Skip to content

Commit

Permalink
Use GMT name when querying mongo. Check GMT when running data pipeline.
Browse files Browse the repository at this point in the history
  • Loading branch information
mikekucera committed Jun 9, 2024
1 parent 3543440 commit 12ddd6f
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 47 deletions.
5 changes: 5 additions & 0 deletions src/client/components/home/upload-controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ export class UploadController {
const networkID = await res.text();
this.bus.emit('finished', { networkID, requestID });
return networkID;
} else if (res.status == 450) {
// custom status code, error while running create data pipeline
const body = await res.json();
const errors = this.errorMessagesForCreateError(body.details);
this.bus.emit('error', { errors, requestID });
} else {
this.bus.emit('error', { errors: ['could not create demo network'], requestID });
}
Expand Down
32 changes: 25 additions & 7 deletions src/server/datastore.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import MUUID from 'uuid-mongodb';
import _ from 'lodash';
import { fileForEachLine } from './util.js';


// export const DB = 'Human_GOBP_AllPathways_no_GO_iea_June_01_2022_symbol.gmt';
export const DB = 'Human_GOBP_AllPathways_noPFOCR_no_GO_iea_May_01_2024_symbol.gmt';
// These GMT files are loaded into collections with the same name as the file.
// export const GMT_1 = 'Human_GOBP_AllPathways_no_GO_iea_June_01_2022_symbol.gmt';
export const GMT_2 = 'Human_GOBP_AllPathways_noPFOCR_no_GO_iea_May_01_2024_symbol.gmt';

const GENE_RANKS_COLLECTION = 'geneRanks';
const GENE_LISTS_COLLECTION = 'geneLists';
Expand Down Expand Up @@ -114,6 +114,9 @@ class Datastore {
}


/**
* @param dbFileName Name of the GMT file. Use one of the constants at the top of this file.
*/
async loadGenesetDB(path, dbFileName) {
const isLoaded = async () => {
const collections = await this.db.listCollections().toArray();
Expand Down Expand Up @@ -335,7 +338,7 @@ class Datastore {

// Lookup the genes contained in each node
{ $lookup: {
from: DB,
from: geneSetCollection,
localField: "splitNames",
foreignField: "name",
as: "geneSet"
Expand Down Expand Up @@ -524,12 +527,25 @@ class Datastore {
return cursor;
}

async getGeneSetCollectionUsedByNetwork(networkIDString) {
const networkID = makeID(networkIDString);
const network = await this.db
.collection(NETWORKS_COLLECTION)
.findOne(
{ _id: networkID.bson },
{ _id: 0, geneSetCollection: 1 }
);

return network.geneSetCollection;
}

/**
* Returns an cursor of objects of the form:
* [ { "name": "My Gene Set", "description": "blah blah", "genes": ["ABC", "DEF"] }, ... ]
*/
async getGMTCursor(geneSetCollection, networkIDString) {
async getGMTUsedByNetworkCursor(networkIDString) {
const networkID = makeID(networkIDString);
const geneSetCollection = await this.getGeneSetCollectionUsedByNetwork(networkIDString);

const cursor = await this.db
.collection(NETWORKS_COLLECTION)
Expand Down Expand Up @@ -631,8 +647,9 @@ class Datastore {
* The returned array is sorted so that the genes with ranks are first (sorted by rank),
* then the genes without rankes are after (sorted alphabetically).
*/
async getGenesWithRanks(geneSetCollection, networkIDStr, geneSetNames, intersection) {
async getGenesWithRanks(networkIDStr, geneSetNames, intersection) {
const networkID = makeID(networkIDStr);
const geneSetCollection = await this.getGeneSetCollectionUsedByNetwork(networkIDStr);

if(geneSetNames === undefined || geneSetNames.length == 0) {
geneSetNames = await this.getNodeDataSetNames(networkID);
Expand Down Expand Up @@ -697,8 +714,9 @@ class Datastore {
}


async getPathwaysForSearchCursor(geneSetCollection, networkIDStr) {
async getPathwaysForSearchCursor(networkIDStr) {
const networkID = makeID(networkIDStr);
const geneSetCollection = await this.getGeneSetCollectionUsedByNetwork(networkIDStr);

const cursor = await this.db
.collection(NETWORKS_COLLECTION)
Expand Down
5 changes: 3 additions & 2 deletions src/server/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@ import apiRouter from './routes/api/index.js';
import createRouter, { createRouterErrorHandler } from './routes/api/create.js';
import exportRouter from './routes/api/export.js';

import Datastore, { DB } from './datastore.js';
import Datastore, { GMT_2 } from './datastore.js';

console.info('Starting Express');

await Datastore.connect();
await Datastore.initializeGeneSetDB('./public/geneset-db/', DB);
// DB_1 was used for development, don't need to load it
await Datastore.initializeGeneSetDB('./public/geneset-db/', GMT_2);


const debugLog = debug('enrichment-map');
Expand Down
23 changes: 17 additions & 6 deletions src/server/routes/api/create.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import fs from 'fs/promises';
import * as Sentry from "@sentry/node";
import bodyParser from 'body-parser';
import fetch from 'node-fetch';
import Datastore, { DB } from '../../datastore.js';
import Datastore, { GMT_2 } from '../../datastore.js';
import { rankedGeneListToDocument, fgseaServiceGeneRanksToDocument } from '../../datastore.js';
import { performance } from 'perf_hooks';
import { saveUserUploadFileToS3 } from './s3.js';
Expand All @@ -18,7 +18,7 @@ import {
const NETWORK_CREATE_ERROR_CODE = 450;

const http = Express.Router();

const GMT_FILE = GMT_2;

// Endpoints accept TSV or CSV
const dataParser = bodyParser.text({
Expand Down Expand Up @@ -132,13 +132,21 @@ async function runDataPipeline({ networkName, contentType, type, classes, body,
let rankedGeneList;
let pathwaysForEM;
if(preranked) {
const { pathways } = await runFGSEApreranked(body, contentType);
const fgseaRes = await runFGSEApreranked(body, contentType);
const { pathways, gmtFile } = fgseaRes;
if(gmtFile !== GMT_FILE) {
throw new CreateError({ step: 'fgsea', detail: 'gmt', message: `FGSEA: wrong GMT. Expected '${GMT_FILE}', got '${gmtFile}'.` });
}
const delim = contentType === 'text/csv' ? ',' : '\t';
rankedGeneList = rankedGeneListToDocument(body, delim);
pathwaysForEM = pathways;
} else {
// Messages from FGSEA are basically just warning about non-finite ranks
const { ranks, pathways, messages } = await runFGSEArnaseq(body, classes, contentType);
const fgseaRes = await runFGSEArnaseq(body, classes, contentType);
const { ranks, pathways, messages, gmtFile } = fgseaRes;
if(gmtFile !== GMT_FILE) {
throw new CreateError({ step: 'fgsea', detail: 'gmt', message: `FGSEA: wrong GMT. Expected '${GMT_FILE}', got '${gmtFile}'.` });
}
sendMessagesToSentry('fgsea', messages);
rankedGeneList = fgseaServiceGeneRanksToDocument(ranks);
pathwaysForEM = pathways;
Expand All @@ -149,12 +157,15 @@ async function runDataPipeline({ networkName, contentType, type, classes, body,
if(isEmptyNetwork(networkJson)) {
throw new CreateError({ step: 'em', detail: 'empty' });
}
if(networkJson.gmtFile !== GMT_FILE) {
throw new CreateError({ step: 'em', detail: 'gmt', message: `EM-Service: wrong GMT. Expected '${GMT_FILE}', got '${networkJson.gmtFile}'.` });
}

let networkID;
try {
perf.mark('mongo');
networkID = await Datastore.createNetwork(networkJson, networkName, type, DB, demo);
await Datastore.initializeGeneRanks(DB, networkID, rankedGeneList);
networkID = await Datastore.createNetwork(networkJson, networkName, type, GMT_FILE, demo);
await Datastore.initializeGeneRanks(GMT_FILE, networkID, rankedGeneList);
res?.send(networkID);
} catch(e) {
throw new CreateError({ step: 'mongo', cause: e });
Expand Down
4 changes: 2 additions & 2 deletions src/server/routes/api/export.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import Express from 'express';
import Datastore, { DB } from '../../datastore.js';
import Datastore from '../../datastore.js';

const http = Express.Router();

Expand Down Expand Up @@ -41,7 +41,7 @@ http.get('/ranks/:netid', async function(req, res, next) {
http.get('/gmt/:netid', async function(req, res, next) {
try {
const { netid } = req.params;
const cursor = await Datastore.getGMTCursor(DB, netid);
const cursor = await Datastore.getGMTUsedByNetworkCursor(netid);

sendDataLines(cursor, res, {
header: 'name\tdescription\tgenes',
Expand Down
28 changes: 3 additions & 25 deletions src/server/routes/api/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import Express from 'express';
import fs from 'fs';
import path, { dirname } from 'path';
import { fileURLToPath } from 'url';
import Datastore, { DB } from '../../datastore.js';
import Datastore from '../../datastore.js';


const __dirname = dirname(fileURLToPath(import.meta.url));
Expand Down Expand Up @@ -85,28 +85,6 @@ http.put('/:netid', async function(req, res, next) {
}
});

/*
* Returns the contents of multiple gene sets, not including ranks.
*/
http.post('/genesets', async function(req, res, next) {
try {
const { geneSets } = req.body;
if(!Array.isArray(geneSets)) {
res.sendStatus(404);
return;
}

const geneInfo = await Datastore.getGeneSets(DB, geneSets);
if(!geneInfo) {
res.sendStatus(404);
} else {
res.send(JSON.stringify(geneInfo));
}
} catch (err) {
next(err);
}
});


/*
* Returns a ranked gene list.
Expand Down Expand Up @@ -142,7 +120,7 @@ http.post('/:netid/genesets', async function(req, res, next) {
return;
}

const geneInfo = await Datastore.getGenesWithRanks(DB, netid, geneSets, intersection === 'true');
const geneInfo = await Datastore.getGenesWithRanks(netid, geneSets, intersection === 'true');
if(!geneInfo) {
res.sendStatus(404);
} else {
Expand Down Expand Up @@ -194,7 +172,7 @@ http.get('/:netid/pathwaysforsearch', async function(req, res, next) {
try {
const { netid } = req.params;

const cursor = await Datastore.getPathwaysForSearchCursor(DB, netid);
const cursor = await Datastore.getPathwaysForSearchCursor(netid);
await writeCursorToResult(cursor, res);
cursor.close();

Expand Down
3 changes: 2 additions & 1 deletion test/resources/network.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"parameters": {
"geneSetCollection": "geneset_database.gmt",
"parameters": {
"pvalue": 1.0,
"qvalue": 0.1,
"filterByExpressions": true,
Expand Down
8 changes: 4 additions & 4 deletions test/test_datastore.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ describe('Gene Set Queries', () => {
});

it('gets a geneset with ranks', async () => {
const results = await Datastore.getGenesWithRanks(GENESET_DB, networkID, ['GENESET_5']);
const results = await Datastore.getGenesWithRanks(networkID, ['GENESET_5']);
expect(results).to.eql({
minRank: 1,
maxRank: 11,
Expand All @@ -57,7 +57,7 @@ describe('Gene Set Queries', () => {
});

it('gets more than one geneset with ranks', async () => {
const results = await Datastore.getGenesWithRanks(GENESET_DB, networkID, ['GENESET_3', 'GENESET_4']);
const results = await Datastore.getGenesWithRanks(networkID, ['GENESET_3', 'GENESET_4']);
expect(results).to.eql({
minRank: 1,
maxRank: 11,
Expand All @@ -73,7 +73,7 @@ describe('Gene Set Queries', () => {
});

it('gets geneset with ranks intersection', async () => {
const results = await Datastore.getGenesWithRanks(GENESET_DB, networkID, ['GENESET_3', 'GENESET_4'], true);
const results = await Datastore.getGenesWithRanks(networkID, ['GENESET_3', 'GENESET_4'], true);
expect(results).to.eql({
minRank: 1,
maxRank: 11,
Expand All @@ -85,7 +85,7 @@ describe('Gene Set Queries', () => {
});

it('gets all genesets with ranks', async () => {
const results = await Datastore.getGenesWithRanks(GENESET_DB, networkID, []);
const results = await Datastore.getGenesWithRanks(networkID, []);
expect(results).to.eql({
minRank: 1,
maxRank: 11,
Expand Down

0 comments on commit 12ddd6f

Please sign in to comment.