biomedical_id_resolver
v3.11.0
Published
js library for converting biomedical entity ids in batch
Downloads
16
Readme
biomedical_id_resolver.js
js library for resolving biological ids to their equivalent ids in batch
Install
$ npm i biomedical_id_resolver
Usage
const resolve = require('biomedical_id_resolver');
// input should be an object, with semantic type as the key, and array of CURIEs as value
let input = {
"Gene": ["NCBIGene:1017", "NCBIGene:1018", "HGNC:1177"],
"SmallMolecule": ["CHEBI:15377"],
"Disease": ["MONDO:0004976"],
"Cell": ["CL:0002372"]
};
(async () => {
const resolver = new resolve();
console.log(await resolver.resolve(input);
//=> {'NCBIGene:1017': {...}, 'NCBIGene:1018': {...}, 'HGNC:1177': {...}, 'CHEBI:15377': {...}, 'MONDO:0004976': {...}, 'CL:0002372': {...}}
})();
Output Schema
Output is a javascript Object
The root keys are CURIES (e.g. NCBIGene:1017) which are passed in as input
The values represents resolved identifiers
Each CURIE will have 4 required fields
id: the primary id (selected based on the ranking described in the next section) and label
curies: an array, each element represents a resolved id in CURIE format
type: the semantic type of the identifier
db_ids: original ids from source database, could be curies or non-curies.
if an ID can not be resolved using the package, it will have an additional field called "flag", with value equal to "failed"
Example Output
{
"NCBIGene:1017": {
"id": {
"label": "cyclin dependent kinase 2",
"identifier": "NCBIGene:1017"
},
"db_ids": {
"NCBIGene": [
"1017"
],
"ENSEMBL": [
"ENSG00000123374"
],
"HGNC": [
"1771"
],
"SYMBOL": [
"CDK2"
],
"UMLS": [
"C1332733",
"C0108855"
],
"name": [
"cyclin dependent kinase 2"
]
},
"type": "Gene",
"curies": [
"NCBIGene:1017",
"ENSEMBL:ENSG00000123374",
"HGNC:1771",
"SYMBOL:CDK2",
"UMLS:C1332733",
"UMLS:C0108855"
]
}
}
Query Using SRI node normalizer
Usage
const resolver = require('biomedical_id_resolver');
// input must be an object, with semantic type as the key, and array of CURIEs as value
let input = {
"Gene": ["NCBIGene:1017", "NCBIGene:1018", "HGNC:1177"],
"SmallMolecule": ["CHEBI:15377"],
"Disease": ["MONDO:0004976"],
"Cell": ["CL:0002372"]
};
// SRI resolver will figure out the semantic type if the input type is 'unknown', 'undefined', or 'NamedThing'
let input = {
"unknown": ["NCBIGene:1017", "MONDO:0004976"],
};
(async () => {
let res = await resolver.resolveSRI(input);
console.log(res);
})();
Example Output
The output contains id
and equivalent_identifiers
straight from SRI as well as the same fields as the base resolver to make it backwards compatible with it. If the SRI resolved semantic type doesn't agree with the input semantic type, there will be 2 entries in the array for the curie.
{
"NCBIGene:1017": [
{
"id": {
"identifier": "NCBIGene:1017",
"label": "CDK2"
},
"equivalent_identifiers": [
{
"identifier": "NCBIGene:1017",
"label": "CDK2"
},
{
"identifier": "ENSEMBL:ENSG00000123374"
},
{
"identifier": "HGNC:1771",
"label": "CDK2"
},
{
"identifier": "OMIM:116953"
},
{
"identifier": "UMLS:C1332733",
"label": "CDK2 gene"
}
],
"type": [
"biolink:Gene",
"biolink:GeneOrGeneProduct",
"biolink:BiologicalEntity",
"biolink:NamedThing",
"biolink:Entity",
"biolink:MacromolecularMachineMixin"
],
"primaryID": "NCBIGene:1017",
"label": "CDK2",
"attributes": {},
"semanticType": "Gene",
"semanticTypes": [
"biolink:Gene",
"biolink:GeneOrGeneProduct",
"biolink:BiologicalEntity",
"biolink:NamedThing",
"biolink:Entity",
"biolink:MacromolecularMachineMixin"
],
"dbIDs": {
"NCBIGene": [
"1017"
],
"ENSEMBL": [
"ENSG00000123374"
],
"HGNC": [
"1771"
],
"OMIM": [
"116953"
],
"UMLS": [
"C1332733"
],
"name": [
"CDK2",
"CDK2 gene"
]
},
"curies": [
"NCBIGene:1017",
"ENSEMBL:ENSG00000123374",
"HGNC:1771",
"OMIM:116953",
"UMLS:C1332733"
]
}
]
}
Available Semantic Types & prefixes
Gene, Transcript, Protein ID resolution is done through MyGene.info API
Gene
- NCBIGene
- ENSEMBL
- HGNC
- MGI
- OMIM
- UMLS
- SYMBOL
- UniProtKB
- name
Transcript
- ENSEMBL
- SYMBOL
- name
Protein
- UniProtKB
- ENSEMBL
- UMLS
- SYMBOL
- name
Variant ID resolution is done through MyVariant.info API
- SequenceVariant
- CLINVAR
- DBSNP
- HGVS
- MYVARIANT_HG19
SmallMolecule, Drug ID resolution is done through MyChem.info API
SmallMolecule
- PUBCHEM.COMPOUND
- CHEMBL.COMPOUND
- UNII
- CHEBI
- DRUGBANK
- MESH
- CAS
- HMDB
- KEGG.COMPOUND
- INCHI
- INCHIKEY
- UMLS
- LINCS
- name
Drug
- RXCUI
- NDC
- DRUGBANK
- PUBCHEM.COMPOUND
- CHEMBL.COMPOUND
- UNII
- CHEBI
- MESH
- CAS
- HMDB
- KEGG.COMPOUND
- INCHI
- INCHIKEY
- UMLS
- LINCS
- name
Disease, ClinicalFinding ID Resolution is done through MyDisease.info API
Disease
- MONDO
- DOID
- OMIM
- ORPHANET
- EFO
- UMLS
- MESH
- MEDDRA
- NCIT
- SNOMEDCT
- HP
- GARD
- name
ClinicalFinding
- LOINC
- NCIT
- EFO
- name
Pathway ID Resolution is done through biothings.ncats.io/geneset API
- Pathway
- GO
- REACT
- KEGG
- SMPDB
- PHARMGKB.PATHWAYS
- WIKIPATHWAYS
- BIOCARTA
- name
MolecularActivity ID Resolution is done through BioThings Gene Ontology Molecular Activity API
- MolecularActivity
- GO
- REACT
- RHEA
- MetaCyc
- KEGG.REACTION
- name
CellularComponent ID Resolution is done through BioThings Gene Ontology Cellular Component API
- CellularComponent
- GO
- MetaCyc
- name
BiologicalProcess ID Resolution is done through BioThings Gene Ontology Biological Process API
BiologicalProcess
- GO
- REACT
- MetaCyc
- KEGG
- name
AnatomicalEntity ID Resolution is done through BioThings UBERON API
- AnatomicalEntity
- UBERON
- UMLS
- MESH
- NCIT
- name
PhenotypicFeature ID Resolution is done through BioThings HPO API
- PhenotypicFeature
- HP
- EFO
- NCIT
- UMLS
- MEDDRA
- MP
- SNOMEDCT
- MESH
- name
Cell ID Resolution is done through Biothings Cell Ontology API
- Cell
- CL
- NCIT
- MESH
- EFO
- name
Development
- Install Node 12 or later. You can use the package manager of your choice. Tests need to pass in Node 12 and 14.
- Clone this repository.
- Run
npm ci
to install the dependencies. - scripts are stored in
/src
folder - Add test to
/__tests__
folder - run
npm run release
to bump version and generate change log - run
npx depcheck
to check for unused packages in package.json
CHANGELOG
See CHANGELOG.md