# recovering gene names and chromosome positions from ENSEMBL ID # september 26, 2011 # alyssa frazee #1) install biomaRt (only need to do this once) source("http://bioconductor.org/biocLite.R") biocLite("biomaRt") #2) load biomaRt library(biomaRt) #3) given ensembl id, recover gene information: # for our example, we will use genes from an ExpressionSet on the website library(bioBase) load(url("http://bowtie-bio.sourceforge.net/recount/ExpressionSets/core_eset.RData")) # select the ENSEMBL database with the human dataset ensembl <- useMart("ensembl",dataset="hsapiens_gene_ensembl") # here's a vector of ENSEMBL gene id's we're interested in ens_ids <- featureNames(core.eset)[1:3] ens_ids # [1] "ENSG00000000003" "ENSG00000000005" "ENSG00000000419" # use getBM() to recover other information about genes: getBM(attributes = c("ensembl_gene_id", "hgnc_symbol", "chromosome_name", "start_position", "end_position", "band"), filters = "ensembl_gene_id", values = ens_ids, mart = ensembl) #output: # ensembl_gene_id hgnc_symbol chromosome_name start_position end_position #1 ENSG00000000003 TSPAN6 X 99883667 99894988 #2 ENSG00000000005 TNMD X 99839799 99854882 #3 ENSG00000000419 DPM1 20 49551404 49575092 # band #1 q22.1 #2 q22.1 #3 q13.13 # the above recovered HUGO gene name, chromosome, position, and band # listAttributes(ensembl) displays other attributes you can recover # for more information on biomaRt, please see the user manual: # http://www.bioconductor.org/packages/2.2/bioc/vignettes/biomaRt/inst/doc/biomaRt.pdf