Skip to content

Commit

Permalink
annotation and ora for diffbind, fix typo in DEG.Rmd
Browse files Browse the repository at this point in the history
  • Loading branch information
abartlett004 committed Oct 11, 2024
1 parent a0ad6c9 commit 1e718b3
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 5 deletions.
76 changes: 72 additions & 4 deletions inst/templates/chipseq/diffbind/diffbind.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ library(qs)
library(EnhancedVolcano)
library(ggprism)
library(ChIPseeker)
library(msigdbr)
library(fgsea)
if (params$species == 'mouse'){
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
Expand Down Expand Up @@ -265,9 +267,15 @@ results_anno_batch <- annotatePeakInBatch(results_report,
results_anno_batch_df <- results_anno_batch %>% as.data.frame()
entrez_to_symbol <- AnnotationDbi::select(org.Mm.eg.db, results_anno_batch_df$feature,
"ENTREZID", columns = 'SYMBOL') %>%
filter(!is.na(ENTREZID)) %>% distinct()
if(params$species == 'mouse'){
entrez_to_symbol <- AnnotationDbi::select(org.Mm.eg.db, results_anno_batch_df$feature,
"ENTREZID", columns = 'SYMBOL') %>%
filter(!is.na(ENTREZID)) %>% distinct()
} else if (params$species == 'human'){
entrez_to_symbol <- AnnotationDbi::select(org.Hs.eg.db, results_anno_batch_df$feature,
"ENTREZID", columns = 'SYMBOL') %>%
filter(!is.na(ENTREZID)) %>% distinct()
}
results_anno_batch_df <- results_anno_batch_df %>%
left_join(entrez_to_symbol %>% dplyr::select(feature = ENTREZID, gene_name = SYMBOL))
Expand Down Expand Up @@ -316,10 +324,12 @@ results_mod <- results_sig_anno_batch_df %>%
# show <- as.data.frame(results_mod[1:6, c("Fold", "FDR", "gene_name")])
show <- results_mod %>% filter(!is.na(gene_name)) %>% slice_min(n = 6, order_by = FDR)
results_mod <- results_mod %>% mutate(gene_name = ifelse(peak %in% show$peak , gene_name, NA))
EnhancedVolcano(results_mod,
lab= results_mod$gene_name,
pCutoff = 0.05,
# selectLab = c(show$gene_name),
selectLab = c(show$gene_name),
FCcutoff = 0.5,
x = 'Fold',
y = 'FDR',
Expand Down Expand Up @@ -369,6 +379,64 @@ plotDistToTSS(results_sig_anno)

# Functional Enrichment

Over-Representation Analysis (ORA) is a statistical method used to determine whether a predefined set of genes (e.g., genes belonging to a specific biological pathway or function) is over-represented (or enriched) among a list of differentially bound genes (DEGs) from ChIP-seq. Adventages of ORA:

- Simplicity: Easy to perform and interpret.
- Biological Insight: Helps to identify pathways and processes that are significantly affected in the condition studied.
- Prior Knowledge Integration: Utilizes existing biological knowledge through predefined gene sets.

```{r get databases}
if(params$species == 'human'){
all_in_life=get_databases()
} else if (params$species == 'mouse'){
all_in_life = get_databases('Mus musculus')
}
```

```{r ora}
universe_mapping = results_anno_batch_df %>%
filter(!is.na(FDR), !is.na(feature)) %>%
dplyr::select(ENTREZID = feature, SYMBOL = gene_name) %>% distinct()
ora_input = results_anno_batch_df %>%
filter(!is.na(FDR), FDR < 0.01, abs(Fold) > 0.3, !is.na(feature)) %>%
dplyr::select(ENTREZID = feature, SYMBOL = gene_name) %>% distinct()
all = run_fora(ora_input, universe_mapping, all_in_life)
ora_input = results_anno_batch_df %>%
filter(!is.na(FDR), FDR < 0.01, Fold > 0.3, !is.na(feature)) %>%
dplyr::select(ENTREZID = feature, SYMBOL = gene_name) %>% distinct()
up = run_fora(ora_input, universe_mapping, all_in_life)
ora_input = results_anno_batch_df %>%
filter(!is.na(FDR), FDR < 0.01, Fold < -0.3, !is.na(feature)) %>%
dplyr::select(ENTREZID = feature, SYMBOL = gene_name) %>% distinct()
down = run_fora(ora_input, universe_mapping, all_in_life)
```


## Significant pathways using all DB genes

```{r all pathways}
all %>% sanitize_datatable()
```


## Significant pathways using increased DB genes

```{r up pathways}
up %>% sanitize_datatable()
```


## Significant pathways using decreased DB genes

```{r down pathways, results='asis'}
down %>% sanitize_datatable()
```

# R session

List and version of tools used for the report generation.
Expand Down
44 changes: 44 additions & 0 deletions inst/templates/chipseq/libs/load_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,47 @@ make_diffbind_samplesheet <- function(coldata, bam_dir, peaks_dir, column = NULL

return(samplesheet)
}

get_databases=function(sps="human"){
all_in_life=list(
msigdbr(species = sps, category = "H") %>% mutate(gs_subcat="Hallmark"),
# msigdbr(species = "human", category = "C2", subcategory = "CP:REACTOME"),
msigdbr(species = sps, category = "C2", subcategory = "CP:KEGG"),
# msigdbr(species = "human", category = "C2", subcategory = "CP:PID"),
msigdbr(species = sps, category = "C5", subcategory = "GO:BP"),
msigdbr(species = sps, category = "C5", subcategory = "GO:MF")
# msigdbr(species = "human", category = "C5", subcategory = "HPO"),
# msigdbr(species = "human", category = "C3", subcategory = "TFT:GTRD"),
# msigdbr(species = "human", category = "C6") %>% mutate(gs_subcat="Oncogenic")
)
all_in_life
}

run_fora=function(input, uni,all_in_life){
# browser()
total_deg=length(unique(input))/length(unique(uni$ENTREZID))
pathways_ora_all = lapply(all_in_life, function(p){
pathway = split(x = p$entrez_gene, f = p$gs_name)
db_name = paste(p$gs_cat[1], p$gs_subcat[1],sep=":")
respath <- fora(pathways = pathway,
genes = unique(input$ENTREZID),
universe = unique(uni$ENTREZID),
minSize = 15,
maxSize = 500)
# coll_respath = collapsePathwaysORA(respath[order(pval)][padj < 0.1],
# pathway, unique(input$ENTREZID), unique(uni$ENTREZID))
as_tibble(respath) %>%
mutate(database=db_name, NES=(overlap/size)/(total_deg))
}) %>% bind_rows() %>%
mutate(analysis="ORA")
ora_tb = pathways_ora_all %>% unnest(overlapGenes) %>%
group_by(pathway) %>%
left_join(uni, by =c("overlapGenes"="ENTREZID")) %>%
dplyr::select(pathway, padj, NES, SYMBOL, analysis,
database) %>%
group_by(pathway,padj,NES,database,analysis) %>%
summarise(genes=paste(SYMBOL,collapse = ","))
ora_tb

}

2 changes: 1 addition & 1 deletion inst/templates/rnaseq/DE/DEG.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ fa_list=lapply(de_list,function(contrast){
input_entrezid <- AnnotationDbi::select(org.Hs.eg.db, ora_input, 'ENSEMBL', columns = c('ENTREZID', 'SYMBOL'))
up=run_fora(input_entrezid, universe_mapping,all_in_life)
ora_input = res %>% filter(!is.na(padj), padj<0.01, lfc<0.3) %>% pull(gene_id)
ora_input = res %>% filter(!is.na(padj), padj<0.01, lfc<-0.3) %>% pull(gene_id)
#change to the right species
input_entrezid <- AnnotationDbi::select(org.Hs.eg.db, ora_input, 'ENSEMBL', columns = c('ENTREZID', 'SYMBOL'))
down=run_fora(input_entrezid, universe_mapping,all_in_life)
Expand Down

0 comments on commit 1e718b3

Please sign in to comment.