# An example of OO programming in Bioconductor: # Creating an object of class ExpressionSet (package Biobase) # There are two main parts in this example: # A. Construction of an object of class ExpressionSet from scratch # B. Getting information and basic manipulation with objects of class ExpressionSet # Microarray experiments, usually consist of several conceptually distinct parts: # 1) assay data, # 2) phenotypic meta-data, # 3) feature annotations and meta-data, # 4) description of the experiment. # These data are collected in objects of class ExpressionSet # loading Biobase library library("Biobase") ############################################### # A. Construction of an object of class ExpressionSet from scratch ############################################### ########################################## # 1) Assay data ############################### ########################################## # read-table-geneData: # assay data are usually tab-delimited files dataDirectory <- system.file("extdata", package="Biobase") exprsFile <- file.path(dataDirectory, "exprsData.txt") exprs <- as.matrix(read.table(exprsFile, header=TRUE, sep="\t", row.names=1, as.is=TRUE)) # it is better to check the results ... ALWAYS! class(exprs) dim(exprs) colnames(exprs) head(exprs[,1:5]) # we can create a minimal ExpressionSet object: minimalSet <- new("ExpressionSet", exprs=exprs) # accession to the first two probe sets (genes) using the method exprs exprs(minimalSet)[1:2,] ########################################## # 2) Phenotypic data and metadata ################ ########################################## # Phenotypic data summarizes information about the samples (e.g., sex, age, and treatment # status; referred to as `covariates'). The information describing the samples can be represented # as a table with S rows and V columns, where V is the number of covariates pDataFile <- file.path(dataDirectory, "pData.txt") pData <- read.table(pDataFile, row.names=1, header=TRUE, sep="\t") # some data checking ... dim(pData) rownames(pData) summary(pData) #Note that the number of rows of phenotypic # data match the number of columns of expression data, and indeed that the row and column # names are identically ordered: all(rownames(pData)==colnames(exprs)) # checking classes of the covariates: sapply(pData, class) # meta-data to describe the covariates associated to the patients metadata <- data.frame(labelDescription=c("Patient gender", "Case/control status", "Tumor progress on XYZ scale"), row.names=c("gender", "type", "score")) #Objects of class "AnnotatedDataFrame" put together patients covariates with its metdata: phenoData <- new("AnnotatedDataFrame", data=pData, varMetadata=metadata) phenoData # accessing slots of the object phenoData: phenoData@data phenoData@varMetadata ##: AnnotatedDataFrame-subsetting: phenoData[c("A","Z"),"gender"] pData(phenoData[phenoData$score>0.8,]) ################################################### # 3) feature annotations and meta-data ###################### ############################# # Usually, as experiments are repeated w.r.t. to the same chip a simple # character slot is used, indicating the type of the chip: annotation <- "hgu95av2" # But it is also possible to record information about features that are unique to the experiment # (e.g., flagging particularly relevant features). This is done by creating or modifying an An- # notatedDataFrame like that for phenoData but with row names of the AnnotatedDataFrame # matching rows of the assay data: library(hgu95av2.db); gene.symbols <- unlist(mget(rownames(exprs),hgu95av2SYMBOL)); fData <- data.frame(Affy=rownames(exprs), symbols=gene.symbols); fmetadata <- data.frame(labelDescription=c("Affymetrix accession", "Gene symbol"), row.names=c("Affy", "symbol")) #Objects of class "AnnotatedDataFrame" put together feature annotations with its metdata: featureData <- new("AnnotatedDataFrame", data=fData, varMetadata=fmetadata) featureNames(featureData) <- rownames(exprs); pData(featureData); ################################## # 4) description of the experiment. ################################## # Basic description about the experiment (e.g., the investigator or lab where the experiment # was done, an overall title, and other notes) can be recorded by creating a MIAME object: experimentData <- new("MIAME", name="Pierre Fermat", lab="Francis Galton Lab", contact="pfermat@lab.not.exist", title="Smoking-Cancer Experiment", abstract="An example ExpressionSet", url="www.lab.not.exist", other=list( notes="Created from text files" )) # Putting together 1 + 2+ 3+ 4 = an object of class ExpressionSet exampleSet <- new("ExpressionSet", exprs=exprs, phenoData=phenoData, featureData=featureData, experimentData=experimentData, annotation="hgu95av2") ############################################### # B. Getting information and basic manipulation with objects of class ExpressionSet ############################################### # Info about the class using help: help("ExpressionSet-class") # general info on an object of class ExpressionSet exampleSet # The varLabels method lists the column names of the phenotype data: varLabels(exampleSet) # Accessing the columns of the phenotype data (an AnnotatedDataFrame instance) using $: exampleSet$gender[1:5] exampleSet$gender[1:5] == "Female" #You can retrieve the names of the features using featureNames. # For many microarray datasets, the feature names are the probe set identi ers. featureNames(exampleSet)[1:5] # Getting the sample names: sampleNames(exampleSet)[1:5] # Accession to gene expression data: mat <- exprs(exampleSet) dim(mat) # Accessing phenotypic data in the slot of class "AnnotatedDataFrame": adf <- phenoData(exampleSet) adf # Subsetting operations: # Create a new ExpressionSet consisting of the 10 features and the first 5 samples: subset <- exampleSet[1:10, 1:5] subset dim(subset) featureNames(subset) sampleNames(subset) exprs(subset) # selecting only male patients: males <- exampleSet[ , exampleSet$gender == "Male"] males # selection of high score patients: high.score.patients <- exampleSet[ , exampleSet$score > 0.9] # Construct an object of class ExpressionSet with only female patients that are controls and with score less than 0.6 vv <- exampleSet[ , exampleSet$gender == "Female" & exampleSet$score < 0.6 & exampleSet$type == "Control"]