/
arbor2apeTreeHandler.R
321 lines (272 loc) · 10.7 KB
/
arbor2apeTreeHandler.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# C. Lisle
# KnowledgeVis, LLC
# November 2013
# declare a new S4 style class in R that provides an interface to pass phylogenetic data between Python and R.
# First, we declare only the instance variables and their types. This arbor2apeTreeHandler class can represent an APE
# tree, which is a 4-tuple (numNodes,taxaNames,edge table, branch_lengths). "preInitialized" is a state variable
# that indicates how initialized the object is. It set to TRUE when prerequisites are completed for creating
# the edge Table. "fullyInitialized" is set after the tables and lists are allocated.
# To test the class, read in the definition, then paste the following lines (uncommented) into the R console.
# The result will be a display of the class contents with an entry into each type of tree metadata to test the
# setter functions.
" Example of creating a small tree through the API and converting it to an APE tree.
Root (Node=4)
/ \
/ D (Node=5)
A (N=1) / \
/ \
B (N=2) C (node=3)
"
# Here are the commands to initialize the class and create and plot the APE tree:
#t2 = new("arbor2apeTreeHandler")
#setNumberOfEdges(t2) <- 6
#setNumberOfInternalNodes(t2) <- 3
#setNumberOfTaxa(t2) <- 4
#initializeStorage(t2)
#setTaxaIndex(t2,1,"A")
#setTaxaIndex(t2,2,"B")
#setTaxaIndex(t2,3,"C")
#setTaxaIndex(t2,4,"D")
#setEdgeIndex(t2,1,5,6)
#setEdgeIndex(t2,2,5,7)
#setEdgeIndex(t2,3,6,1)
#setEdgeIndex(t2,4,6,2)
#setEdgeIndex(t2,5,7,3)
#setEdgeIndex(t2,6,7,4)
#setBranchLengthIndex(t1, 2, 5.678)
#buildGeigerTree(t2,myNewTree)
#plot(myNewTree)
# dependency needed for conversion to APE trees
library(geiger)
#**************************
# Class Definition
#**************************
setClass(
"arbor2apeTreeHandler",
representation(
numNodes = "numeric",
numTaxa = "numeric",
numEdges = "numeric",
edgeTable = "matrix",
taxaNames = "array",
branch_lengths = "array",
preInitialized = "logical",
fullyInitialized = "logical"
),
# set the initial state to have no nodes and an empty taxa and branch length lists. The edge table
# is not initialized for the empty object, because the matrix will be instantiated during initialize-storage
prototype(
numNodes = 0,
numTaxa = 0,
numEdges = 0,
preInitialized = FALSE,
fullyInitialized = FALSE
)
)
#**************************
# Methods Name Declarations
#**************************
# methods on the object have to be first declared in the R namespace as generic functions, before the actual
# method code is assigned to the new class. Below is a declaration of the name of each method defined on the class.
# The arguments declared here must match the arguments declared in the method body below. Note the "<-" syntax
# for setters that are to be used with the assignment R operator.
setGeneric (
name= "setNumberOfInternalNodes<-",
def=function(object,value){standardGeneric("setNumberOfInternalNodes<-")}
)
setGeneric (
name= "setNumberOfTaxa<-",
def=function(object,value){standardGeneric("setNumberOfTaxa<-")}
)
setGeneric (
name= "setNumberOfEdges<-",
def=function(object,value){standardGeneric("setNumberOfEdges<-")}
)
setGeneric (
name="initializeStorage",
def=function(object){standardGeneric("initializeStorage")}
)
setGeneric (
name="testForInitialized",
def=function(.Object){standardGeneric("testForInitialized")}
)
setGeneric(
name="setTaxaIndex",
def=function(object,taxaindex,taxaname){standardGeneric("setTaxaIndex")}
)
setGeneric(
name="setEdgeIndex",
def=function(object,edgeindex,edgesource,edgedestination){standardGeneric("setEdgeIndex")}
)
setGeneric(
name="setBranchLengthIndex",
def=function(object,index, branchlength){standardGeneric("setBranchLengthIndex")}
)
setGeneric(
name="buildGeigerTree",
def=function(object,treename){standardGeneric("buildGeigerTree")}
)
#*********************
# Methods
#*********************
# method to set the total number of nodes in the tree. A replace method has to be used to
# assign a modified value to the object (the class instance).
setReplaceMethod(f="setNumberOfInternalNodes",
signature="arbor2apeTreeHandler",
definition = function(object,value) {
cat("setting value of numNodes\n")
print(value)
object@numNodes <- value
testForInitialized(object)
return (object)
}
)
setReplaceMethod(f="setNumberOfTaxa",
signature="arbor2apeTreeHandler",
definition = function(object,value) {
cat("setting value of numTaxa\n")
print(value)
object@numTaxa <- value
testForInitialized(object)
return (object)
}
)
setReplaceMethod(f="setNumberOfEdges",
signature="arbor2apeTreeHandler",
definition = function(object,value) {
cat("setting value of numEdges\n")
print(value)
object@numEdges <- value
testForInitialized(object)
return (object)
}
)
# Check that the number of nodes, edges, and taxa have all been initialized. Once all three have been
# set, then we can set the "initialized" instance variable. This variable is used as a gate to avoid the
# table being setup with an incorrect size and causing runtime errors.
#
# Note: this is a method that modifies the object itself. This requires namespace tricks to accomplish. Generally
# every R function operations in its own namespace and there is no way to have instance variables available, even
# to a method. Here we find the name of the variable using 'deparse' and assign a value to the object in the
# "parent's namespace".
#
setMethod(f="testForInitialized",
signature="arbor2apeTreeHandler",
definition = function(.Object) {
nameObject <- deparse(substitute(.Object))
if (.Object@numNodes > 0) {
if (.Object@numEdges > 0) {
if (.Object@numTaxa > 0) {
.Object@preInitialized <- TRUE
cat("setting object initialized to TRUE\n")
# cause side-effect by assigning value in parent's namespace
assign(nameObject,.Object,envir=parent.frame())
return(invisible())
}
}
}
}
)
# Check that the object is ready for allocating arrays. If so, allocate and set the object state.
setMethod(f="initializeStorage",
signature="arbor2apeTreeHandler",
definition = function(object) {
# keep this line first, see description in other functions
objectName <- deparse(substitute(object))
if (object@preInitialized == TRUE) {
cat("initializing the edge table")
# initialize the edgeTable by assigning the size of the matrix and filling it with zeros
numElements = object@numEdges
object@edgeTable <- matrix(0,nrow=numElements,ncol=2)
object@taxaNames <- array(dim=object@numTaxa)
object@branch_lengths <- array(dim=object@numEdges)
# indicate that object is really ready for data
object@fullyInitialized <- TRUE
# cause side-effect by assigning value in parent's namespace
assign(objectName,object,envir=parent.frame())
return(invisible())
}
else
print("TreeMgr: error; attempt to initialize storage before setting all control variables numEdges, etc.")
}
)
#******
# Methods to set values in the tree
#******
# This method sets the text string value for an entry in the taxaNames list (corresponding to APE's tip.name array)
# The arguments are the index to set and the character string to assign.
setMethod(f="setTaxaIndex",
signature="arbor2apeTreeHandler",
definition=function(object,taxaindex,taxaname){
# this objectName lookup seems to have to be done BEFORE the variable is referenced? It works when it is
# the first line, but not when it is later in the function..
objectName <- deparse(substitute(object))
#set the taxa name at location "index" to value "name"
object@taxaNames[taxaindex] <- taxaname
# cause side effect to update the object
assign(objectName,object,envir=parent.frame())
return(invisible())
}
)
# this method sets the entry in the edge table. It is called with the index, the source node, and the destination node
setMethod(f="setEdgeIndex",
signature="arbor2apeTreeHandler",
definition=function(object,edgeindex, edgesource, edgedestination){
objectName <- deparse(substitute(object))
#set the taxa name at location "index" to value "name"
object@edgeTable[edgeindex,1] <- edgesource
object@edgeTable[edgeindex,2] <- edgedestination
# cause side effect to update the object
assign(objectName,object,envir=parent.frame())
return(invisible())
}
)
# this method sets an entry in the branch_length array to the numeric value passed in the branchlength argument
setMethod(f="setBranchLengthIndex",
signature="arbor2apeTreeHandler",
definition=function(object,index, branchlength){
objectName <- deparse(substitute(object))
#set the branch length at location "index" to value "branchlength"
object@branch_lengths[index] <- branchlength
# cause side effect to update the object
assign(objectName,object,envir=parent.frame())
return(invisible())
}
)
# build an APE tree from the data stored in the instance variables
setMethod(f="buildGeigerTree",
signature="arbor2apeTreeHandler",
definition=function(object,treename) {
treeObjectName <- deparse(substitute(treename))
# copy the edges
treeEdges <- matrix(0,nrow=object@numEdges,ncol=2)
for (index in 1:object@numEdges) {
treeEdges[index,1] <- object@edgeTable[index,1]
treeEdges[index,2] <- object@edgeTable[index,2]
}
# copy the taxanames
newTaxaNames <- array(dim=object@numTaxa)
for (index in 1:object@numTaxa) {
newTaxaNames[index] <- object@taxaNames[index]
}
# initialize all branch-lengths to 1.0. Test that the branch lengths are defined before
# copying them over. If branches are not assigned, the default of 1.0 is given to each node.
# The test is performed by examining that values are not = NA (not allocated)
newBranchLengths <- array(1.0,dim=object@numEdges)
for (index in 1:object@numEdges) {
if (!is.na(object@branch_lengths[index])) {
newBranchLengths[index] <- object@branch_lengths[index]
}
}
# create the 4-tuple that is an APE tree, including optional branch lengths
treename <- list(edge = treeEdges,
tip.label = newTaxaNames,
Nnode = object@numNodes,
edge.length = newBranchLengths)
# set the classname of the APE tree
class(treename) <- "phylo"
assign(treeObjectName,treename,envir=parent.frame())
print("build APE/Geiger tree completed.")
return(invisible())
}
)