/
dna_network.Rd
260 lines (226 loc) · 12.8 KB
/
dna_network.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rDNA.R
\name{dna_network}
\alias{dna_network}
\title{Compute and retrieve a network}
\usage{
dna_network(connection, networkType = "twomode",
statementType = "DNA Statement", variable1 = "organization",
variable1Document = FALSE, variable2 = "concept",
variable2Document = FALSE, qualifier = "agreement",
qualifierAggregation = "ignore", normalization = "no",
isolates = FALSE, duplicates = "include",
start.date = "01.01.1900", stop.date = "31.12.2099",
start.time = "00:00:00", stop.time = "23:59:59", timewindow = "no",
windowsize = 100, excludeValues = list(),
excludeAuthors = character(), excludeSources = character(),
excludeSections = character(), excludeTypes = character(),
invertValues = FALSE, invertAuthors = FALSE, invertSources = FALSE,
invertSections = FALSE, invertTypes = FALSE, fileFormat = NULL,
outfile = NULL, verbose = TRUE)
}
\arguments{
\item{connection}{A \code{dna_connection} object created by the
\code{dna_connection} function.}
\item{networkType}{The kind of network to be computed. Can be
\code{"twomode"}, \code{"onemode"}, or \code{"eventlist"}.}
\item{statementType}{The name of the statement type in which the variable
of interest is nested. For example, \code{"DNA Statement"}.}
\item{variable1}{The first variable for network construction. In a one-mode
network, this is the variable for both the rows and columns. In a
two-mode network, this is the variable for the rows only. In an event
list, this variable is only used to check for duplicates (depending on
the setting of the \code{duplicate} argument).}
\item{variable1Document}{A boolean value indicating whether the first
variable is at the document level (i.e., \code{"author"},
\code{"source"}, \code{"section"}, \code{"type"}, \code{"id"}, or
\code{"title"}).}
\item{variable2}{The second variable for network construction. In a one-mode
network, this is the variable over which the ties are created. For
example, if an organization x organization network is created, and ties
in this network indicate co-reference to a concept, then the second
variable is the \code{"concept"}. In a two-mode network, this is the
variable used for the columns of the network matrix. In an event list,
this variable is only used to check for duplicates (depending on the
setting of the \code{duplicate} argument).}
\item{variable2Document}{A boolean value indicating whether the second
variable is at the document level (i.e., \code{"author"},
\code{"source"}, \code{"section"}, \code{"type"}, \code{"id"}, or
\code{"title"}}
\item{qualifier}{The qualifier variable. In a one-mode network, this
variable can be used to count only congruence or conflict ties. For
example, in an organization x organization network via common concepts,
a binary \code{"agreement"} qualifier could be used to record only ties
where both organizations have a positive stance on the concept or where
both organizations have a negative stance on the concept. With an
integer qualifier, the tie weight between the organizations would be
proportional to the similarity or distance between the two organizations
on the scale of the integer variable.
In a two-mode network, the qualifier variable can be used to retain only
positive or only negative statements or subtract negative from positive
mentions. All of this depends on the setting of the
\code{qualifierAggregation} argument. For event lists, the qualifier
variable is only used for filtering out duplicates (depending on the
setting of the \code{duplicate} argument.
The qualifier can also be \code{NULL}, in which case it is ignored, meaning
that values in \code{variable1} and \code{variable2} are unconditionally
associated with each other in the network when they co-occur. This is
identical to selecting a qualifier variable and setting
\code{qualifierAggregation = "ignore"}.}
\item{qualifierAggregation}{The aggregation rule for the \code{qualifier}
variable. In one-mode networks, this must be \code{"ignore"} (for
ignoring the qualifier variable), \code{"congruence"} (for recording a
network tie only if both nodes have the same qualifier value in the
binary case or for recording the similarity between the two nodes on the
qualifier variable in the integer case), \code{"conflict"} (for
recording a network tie only if both nodes have a different qualifier
value in the binary case or for recording the distance between the two
nodes on the qualifier variable in the integer case), or
\code{"subtract"} (for subtracting the conflict tie value from the
congruence tie value in each dyad). In two-mode networks, this must be
\code{"ignore"}, \code{"combine"} (for creating multiplex combinations,
e.g., 1 for positive, 2 for negative, and 3 for mixed), or
\code{subtract} (for subtracting negative from positive ties). In event
lists, this setting is ignored.}
\item{normalization}{Normalization of edge weights. Valid settings for
one-mode networks are \code{"no"} (for switching off normalization),
\code{"average"} (for average activity normalization), \code{"Jaccard"}
(for Jaccard coefficient normalization), and \code{"cosine"} (for
cosine similarity normalization). Valid settings for two-mode networks
are \code{"no"}, \code{"activity"} (for activity normalization), and
\code{"prominence"} (for prominence normalization).}
\item{isolates}{Should all nodes of the respective variable be included in
the network matrix (\code{isolates = TRUE}), or should only those nodes
be included that are active in the current time period and are not
excluded (\code{isolates = FALSE})?}
\item{duplicates}{Setting for excluding duplicate statements before network
construction. Valid settings are \code{"include"} (for including all
statements in network construction), \code{"document"} (for counting
only one identical statement per document), \code{"week"} (for counting
only one identical statement per calendar week), \code{"month"} (for
counting only one identical statement per calendar month), \code{"year"}
(for counting only one identical statement per calendar year), and
\code{"acrossrange"} (for counting only one identical statement across
the whole time range).}
\item{start.date}{The start date for network construction in the format
"dd.mm.yyyy". All statements before this date will be excluded.}
\item{stop.date}{The stop date for network construction in the format
"dd.mm.yyyy". All statements after this date will be excluded.}
\item{start.time}{The start time for network construction on the specified
\code{start.date}. All statements before this time on the specified date
will be excluded.}
\item{stop.time}{The stop time for network construction on the specified
\code{stop.date}. All statements after this time on the specified date
will be excluded.}
\item{timewindow}{Possible values are \code{"no"}, \code{"events"},
\code{"seconds"}, \code{"minutes"}, \code{"hours"}, \code{"days"},
\code{"weeks"}, \code{"months"}, and \code{"years"}. If \code{"no"} is
selected (= the default setting), no time window will be used. If any of
the time units is selected, a moving time window will be imposed, and
only the statements falling within the time period defined by the window
will be used to create the network. The time window will then be moved
forward by one time unit at a time, and a new network with the new time
boundaries will be created. This is repeated until the end of the overall
time span is reached. All time windows will be saved as separate
networks in a list. The duration of each time window is defined by the
\code{windowsize} argument. For example, this could be used to create a
time window of 6 months which moves forward by one month each time, thus
creating time windows that overlap by five months. If \code{"events"} is
used instead of a natural time unit, the time window will comprise
exactly as many statements as defined in the \code{windowsize} argument.
However, if the start or end statement falls on a date and time where
multiple events happen, those additional events that occur simultaneously
are included because there is no other way to decide which of the
statements should be selected. Therefore the window size is sometimes
extended when the start or end point of a time window is ambiguous in
event time.}
\item{windowsize}{The number of time units of which a moving time window is
comprised. This can be the number of statement events, the number of days
etc., as defined in the \code{"timewindow"} argument.}
\item{excludeValues}{A list of named character vectors that contains entries
which should be excluded during network construction. For example,
\code{list(concept = c("A", "B"), organization = c("org A", "org B"))}
would exclude all statements containing concepts "A" or "B" or
organizations "org A" or "org B" when the network is constructed. This
is irrespective of whether these values appear in \code{variable1},
\code{variable2}, or the \code{qualifier}. Note that only variables at
the statement level can be used here. There are separate arguments for
excluding statements nested in documents with certain meta-data.}
\item{excludeAuthors}{A character vector of authors. If a statement is
nested in a document where one of these authors is set in the "Author"
meta-data field, the statement is excluded from network construction.}
\item{excludeSources}{A character vector of sources. If a statement is
nested in a document where one of these sources is set in the "Source"
meta-data field, the statement is excluded from network construction.}
\item{excludeSections}{A character vector of sections. If a statement is
nested in a document where one of these sections is set in the "Section"
meta-data field, the statement is excluded from network construction.}
\item{excludeTypes}{A character vector of types. If a statement is
nested in a document where one of these types is set in the "Type"
meta-data field, the statement is excluded from network construction.}
\item{invertValues}{A boolean value indicating whether the entries provided
by the \code{excludeValues} argument should be excluded from network
construction (\code{invertValues = FALSE}) or if they should be the only
values that should be included during network construction
(\code{invertValues = TRUE}).}
\item{invertAuthors}{A boolean value indicating whether the entries provided
by the \code{excludeAuthors} argument should be excluded from network
construction (\code{invertAuthors = FALSE}) or if they should be the
only values that should be included during network construction
(\code{invertAuthors = TRUE}).}
\item{invertSources}{A boolean value indicating whether the entries provided
by the \code{excludeSources} argument should be excluded from network
construction (\code{invertSources = FALSE}) or if they should be the
only values that should be included during network construction
(\code{invertSources = TRUE}).}
\item{invertSections}{A boolean value indicating whether the entries
provided by the \code{excludeSections} argument should be excluded from
network construction (\code{invertSections = FALSE}) or if they should
be the only values that should be included during network construction
(\code{invertSections = TRUE}).}
\item{invertTypes}{A boolean value indicating whether the entries provided
by the \code{excludeTypes} argument should be excluded from network
construction (\code{invertTypes = FALSE}) or if they should be the
only values that should be included during network construction
(\code{invertTypes = TRUE}).}
\item{fileFormat}{An optional file format specification for saving the
resulting network(s) to a file instead of returning an object. Valid values
are \code{"csv"} (for network matrices or event lists), \code{"dl"} (for
UCINET DL full-matrix files), and \code{"graphml"} (for visone .graphml
files). The \code{"graphml"} specification is compatible with time windows.}
\item{outfile}{An optional output file name for saving the resulting
network(s) to a file instead of returning an object.}
\item{verbose}{A boolean value indicating whether details of network
construction should be printed to the R console.}
}
\description{
Compute and retrieve a network from DNA.
}
\details{
This function serves to compute a one-mode or two-mode network or an event
list in DNA and retrieve it as a matrix or data frame, respectively. The
arguments resemble the export options in DNA. It is also possible to compute
a temporal sequence of networks using the moving time window approach, in
which case the networks are retrieved as a list of matrices.
}
\examples{
\dontrun{
dna_init()
conn <- dna_connection(dna_sample())
nw <- dna_network(conn,
networkType = "onemode",
variable1 = "organization",
variable2 = "concept",
qualifier = "agreement",
qualifierAggregation = "congruence",
normalization = "average",
excludeValues = list("concept" =
c("There should be legislation to regulate emissions.")))
# plot network
dna_plotNetwork(nw)
dna_plotHive(nw)
}
}
\author{
Philip Leifeld
}