/
rDNA.R
2160 lines (2058 loc) · 91 KB
/
rDNA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Startup ----------------------------------------------------------------------
dnaEnvironment <- new.env(hash = TRUE, parent = emptyenv())
#' Display version number and date when the package is loaded.
#' @importFrom utils packageDescription
#' @noRd
.onAttach <- function(libname, pkgname) {
desc <- packageDescription(pkgname, libname)
packageStartupMessage(
'Version: ', desc$Version, '\n',
'Date: ', desc$Date, '\n',
'Author: Philip Leifeld (University of Essex)\n',
'Contributors: Tim Henrichsen (University of Warwick),\n',
' Johannes B. Gruber (Vrije Universiteit Amsterdam)\n',
'Project home: github.com/leifeld/dna'
)
}
#' Initialize the connection with DNA
#'
#' Establish a connection between \pkg{rDNA} and the DNA software.
#'
#' To use \pkg{rDNA}, DNA first needs to be initialized. This means that
#' \pkg{rDNA} needs to be told where the DNA executable file, i.e., the jar
#' file, is located. When the \code{dna_init} function is used, the connection
#' to the DNA software is established, and this connection is valid for the rest
#' of the \R session. To initialize a connection with a different DNA version or
#' path, the \R session would need to be restarted first.
#'
#' @param jarfile The file name of the DNA jar file, e.g.,
#' \code{"dna-3.0.7.jar"}. Can be auto-detected using the
#' \code{\link{dna_jar}} function, which looks for a version matching the
#' installed \pkg{rDNA} version in the library path and working directory.
#' @param memory The amount of memory in megabytes to allocate to DNA, for
#' example \code{1024} or \code{4096}.
#' @param returnString Return a character object representing the jar file name?
#'
#' @author Philip Leifeld
#'
#' @examples
#' \dontrun{
#' dna_init()
#' }
#'
#' @export
#' @importFrom rJava .jinit .jnew .jarray
dna_init <- function(jarfile = dna_jar(), memory = 1024, returnString = FALSE) {
if (is.null(jarfile) || length(jarfile) == 0 || is.na(jarfile)) {
stop("Invalid jar file name.")
if (isTRUE(returnString)) {
return(NULL)
}
}
if (!is.character(jarfile) || length(jarfile) > 1 || !grepl("^dna-.+\\.jar$", basename(jarfile))) {
stop("'jarfile' must be a character object of length 1 that points to the DNA jar file.")
}
if (!file.exists(jarfile)) {
stop(paste0("jarfile '", jarfile, "' could not be located."))
}
assign("jar", jarfile, pos = dnaEnvironment)
message(paste("Jar file:", dnaEnvironment[["jar"]]))
.jinit(dnaEnvironment[["jar"]],
force.init = TRUE,
parameters = paste0("-Xmx", memory, "m"))
dnaEnvironment[["dna"]] <- .jnew("dna.Dna", .jarray("headless"))
message("DNA connection established.")
if (isTRUE(returnString)) {
return(jarfile)
}
}
#' Identify and/or download and install the correct DNA jar file
#'
#' Identify and/or download and install the correct DNA jar file.
#'
#' rDNA requires the installation of a DNA jar file to run properly. While it is
#' possible to store the jar file in the respective working directory, it is
#' preferable to install it in the rDNA library installation directory under
#' \code{inst/java/}. The \code{dna_jar} function attempts to find the version
#' of the jar file that matches the installed \pkg{rDNA} version in the
#' \code{inst/java/} sub-directory of the package library path and return the
#' jar file name including its full path. If this fails, it will try to find the
#' jar file in the current working directory and return its file name. If this
#' fails as well, it will attempt to download the matching jar file from GitHub
#' and store it in the library path and return its file name. If this fails, it
#' will attempt to store the downloaded jar file in the working directory and
#' return its file name. If this fails as well, it will clone the current DNA
#' master code from GitHub to a local temporary directory, build the jar file
#' from source, and attempt to store the built jar file in the library path or,
#' if this fails, in the working directory and return the file name of the jar
#' file. If all of this fails, an error message is thrown.
#'
#' @return The file name of the jar file that matches the installed \pkg{rDNA}
#' version, including full path.
#'
#' @author Philip Leifeld
#'
#' @importFrom utils download.file unzip packageVersion
#' @export
dna_jar <- function() {
# detect package version
v <- as.character(packageVersion("rDNA"))
# try to locate jar file in library path and return jar file path
tryCatch({
rdna_dir <- dirname(system.file(".", package = "rDNA"))
jar <- paste0(rdna_dir, "/inst/java/dna-", v, ".jar")
if (file.exists(jar)) {
message("Jar file found in library path.")
return(jar)
}
}, error = function(e) {success <- FALSE})
# try to locate jar file in working directory and return jar file path
tryCatch({
jar <- paste0(getwd(), "/inst/java/dna-", v, ".jar")
if (file.exists(jar)) {
message("Jar file found in working directory.")
return(jar)
}
}, error = function(e) {success <- FALSE})
# try to download from GitHub release directory to library path
tryCatch({
rdna_dir <- dirname(system.file(".", package = "rDNA"))
f <- paste0("https://github.com/leifeld/dna/releases/download/v", v, "/dna-", v, ".jar")
dest <- paste0(rdna_dir, "/inst/java/dna-", v, ".jar")
targetdir <- paste0(rdna_dir, "/", "inst/java/")
dir.create(targetdir, showWarnings = FALSE)
suppressWarnings(download.file(url = f,
destfile = dest,
mode = "wb",
cacheOK = FALSE,
quiet = TRUE))
if (file.exists(dest)) {
message("Jar file downloaded from GitHub to library path.")
return(dest)
}
}, error = function(e) {success <- FALSE})
# try to download from GitHub release directory to working directory
tryCatch({
rdna_dir <- dirname(system.file(".", package = "rDNA"))
f <- paste0("https://github.com/leifeld/dna/releases/download/v", v, "/dna-", v, ".jar")
dest <- paste0(getwd(), "/dna-", v, ".jar")
suppressWarnings(download.file(url = f,
destfile = dest,
mode = "wb",
cacheOK = FALSE,
quiet = TRUE))
if (file.exists(dest)) {
message("Jar file downloaded from GitHub to working directory.")
return(dest)
}
}, error = function(e) {success <- FALSE})
# try to download and build from source
tryCatch({
td <- tempdir()
dest <- paste0(td, "/master.zip")
suppressWarnings(download.file(url = "https://github.com/leifeld/dna/archive/master.zip",
destfile = dest,
mode = "wb",
cacheOK = FALSE,
quiet = TRUE))
unzip(zipfile = dest, overwrite = TRUE, exdir = td)
output <- file.remove(dest)
gradle <- paste0(td, "/dna-master/gradlew")
Sys.chmod(gradle, mode = "0777", use_umask = TRUE)
oldwd <- getwd()
setwd(paste0(td, "/dna-master/"))
system(paste0(gradle, " build"), ignore.stdout = TRUE, ignore.stderr = TRUE)
setwd(oldwd)
builtjar <- paste0(td, "/dna-master/dna/build/libs/dna-", v, ".jar")
if (file.exists(builtjar)) {
message("DNA source code downloaded and jar file built successfully.")
}
}, error = function(e) {success <- FALSE})
# try to copy built jar to library path
tryCatch({
targetdir <- paste0(find.package("rDNA"), "/", "inst/java/")
dir.create(targetdir, recursive = TRUE, showWarnings = FALSE)
dest <- paste0(targetdir, "dna-", v, ".jar")
file.copy(from = builtjar, to = targetdir)
if (file.exists(dest)) {
unlink(paste0(td, "/dna-master"), recursive = TRUE)
message("Jar file copied to library path.")
return(dest)
}
}, error = function(e) {success <- FALSE})
# try to copy built jar to working directory
tryCatch({
dest <- paste0(getwd(), "/dna-", v, ".jar")
file.copy(from = builtjar, to = dest)
if (file.exists(dest)) {
unlink(paste0(td, "/dna-master"), recursive = TRUE)
message("Jar file copied to working directory.")
return(dest)
}
}, error = function(e) {success <- FALSE})
stop("DNA jar file could not be identified or downloaded. Please download ",
"the DNA jar file matching the version number of rDNA and store it in ",
"the inst/java/ directory of your rDNA library installation path or in ",
"your working directory. Your current rDNA version is ", v, ".")
}
#' Provides a small sample database
#'
#' A small sample database to test the functions of rDNA.
#'
#' Copies a small .dna sample file to the current working directory and returns
#' the location of this newly created file.
#'
#' @param overwrite Logical. Should \code{sample.dna} be overwritten if found in
#' the current working directory?
#'
#' @examples
#' \dontrun{
#' dna_init()
#' s <- dna_sample()
#' dna_openDatabase(s)
#' }
#'
#' @author Johannes B. Gruber, Philip Leifeld
#'
#' @export
dna_sample <- function(overwrite = FALSE) {
if (file.exists(paste0(getwd(), "/sample.dna")) & overwrite == FALSE) {
warning("Sample file already exists in working directory. ",
"Use 'overwrite = TRUE' to revert changes in the sample file.")
} else {
file.copy(from = system.file("extdata", "sample.dna", package = "rDNA"),
to = paste0(getwd(), "/sample.dna"),
overwrite = overwrite)
}
return(paste0(getwd(), "/sample.dna"))
}
#' Query the coders in a database
#'
#' Display the coder IDs, names, and colors present in a DNA database.
#'
#' Some functions require knowing the coder ID with which changes should be
#' made. This function queries any database, which does not have to be opened,
#' for their coder IDs, names, and colors, and returns them as a data frame.
#'
#' @param db_url The URL or full path of the database.
#' @param db_type The type of database. Valid values are \code{"sqlite"},
#' \code{postgresql}, and \code{postgresql}.
#' @param db_name The name of the database at the given URL or path. Can be a
#' zero-length character object (\code{""}) for file-based SQLite databases.
#' @param db_port The connection port for the database connection. No port is
#' required (\code{db_port = -1}) for SQLite databases. MySQL databases often
#' use port \code{3306}. PostgreSQL databases often use port \code{5432}. If
#' \code{db_port = NULL}, one of these default values will be selected based
#' on the \code{db_type} argument.
#' @param db_login The login user name for the database. This is the database
#' login user name, not the coder name. Can be a zero-length character object
#' (\code{""}) for SQLite databases.
#' @param db_password The password for the database. This is the database
#' password, not the coder password. Can be a zero-length character object
#' (\code{""}) for SQLite databases.
#'
#' @author Philip Leifeld
#'
#' @examples
#' \dontrun{
#' dna_init()
#' dna_sample()
#' dna_queryCoders("sample.dna")
#' }
#'
#' @export
#' @importFrom rJava .jcall .jevalArray
dna_queryCoders <- function(db_url,
db_type = "sqlite",
db_name = "",
db_port = NULL,
db_login = "",
db_password = "") {
if (is.null(db_port) && !is.null(db_type)) {
if (db_type == "sqlite") {
db_port <- as.integer(-1)
} else if (db_type == "mysql") {
db_port <- as.integer(3306)
} else if (db_type == "postgresql") {
db_port <- as.integer(5432)
}
} else {
db_port <- as.integer(db_port)
}
q <- .jcall(dnaEnvironment[["dna"]]$headlessDna,
"[Ljava/lang/Object;",
"queryCoders",
db_type,
ifelse(db_type == "sqlite", normalizePath(db_url), db_url),
db_name,
db_port,
db_login,
db_password)
names(q) <- c("ID", "Name", "Color")
q <- lapply(q, .jevalArray)
q <- as.data.frame(q, stringsAsFactors = FALSE)
return(q)
}
#' Open a database
#'
#' Open a database in DNA.
#'
#' Open a database in DNA. This can be a SQLite, MySQL, or PostgreSQL database.
#' The database must already have the table structure required for DNA. You must
#' provide the coder ID and password along with the database credentials. To
#' look up coder IDs, use the \code{\link{dna_queryCoders}} function.
#'
#' @param coderId The coder ID of the coder who is opening the database. If an
#' invalid coder ID is supplied (i.e., \code{-1} or similar), the coder ID is
#' queried interactively from the user.
#' @param coderPassword The coder password of the coder who is opening the
#' database. If an empty password is provided (e.g., \code{""}), the password
#' is queried interactively from the user.
#' @param db_url The URL for accessing the database (for remote databases) or
#' the path of the SQLite database file, including file extension.
#' @param db_type The type of database. Valid values are \code{"sqlite"},
#' \code{postgresql}, and \code{postgresql}.
#' @param db_name The name of the database at the given URL or path. Can be a
#' zero-length character object (\code{""}) for file-based SQLite databases.
#' @param db_port The connection port for the database connection. No port is
#' required (\code{db_port = -1}) for SQLite databases. MySQL databases often
#' use port \code{3306}. PostgreSQL databases often use port \code{5432}. If
#' \code{db_port = NULL}, one of these default values will be selected based
#' on the \code{db_type} argument.
#' @param db_login The login user name for the database. This is the database
#' login user name, not the coder name. Can be a zero-length character object
#' (\code{""}) for SQLite databases.
#' @param db_password The password for the database. This is the database
#' password, not the coder password. Can be a zero-length character object
#' (\code{""}) for SQLite databases.
#'
#' @author Philip Leifeld
#'
#' @examples
#' \dontrun{
#' dna_init()
#' dna_sample()
#' dna_openDatabase(coderId = 1,
#' coderPassword = "sample",
#' db_url = "sample.dna")
#' }
#'
#' @export
#' @importFrom rJava .jcall
dna_openDatabase <- function(db_url,
coderId = 1,
coderPassword = "",
db_type = "sqlite",
db_name = "",
db_port = -1,
db_login = "",
db_password = "") {
if (is.null(db_port) && !is.null(db_type)) {
if (db_type == "sqlite") {
db_port <- as.integer(-1)
} else if (db_type == "mysql") {
db_port <- as.integer(3306)
} else if (db_type == "postgresql") {
db_port <- as.integer(5432)
}
} else {
db_port <- as.integer(db_port)
}
if (db_type == "sqlite") {
if (file.exists(db_url)) {
db_url <- normalizePath(db_url)
} else {
stop("Database file not found.")
}
}
if (is.null(coderId) || !is.numeric(coderId) || coderId < 1) {
if (!requireNamespace("askpass", quietly = TRUE)) {
coderId <- as.integer(readline("Coder ID: "))
} else {
coderId <- as.integer(askpass::askpass("Coder ID: "))
}
}
if (is.null(coderId) || length(coderId) == 0) {
coderId <- -1
}
if (is.null(coderPassword) || !is.character(coderPassword) || coderPassword == "") {
if (!requireNamespace("askpass", quietly = TRUE)) {
coderPassword <- readline("Coder password: ")
} else {
coderPassword <- askpass::askpass("Coder password: ")
}
}
if (is.null(coderPassword) || length(coderPassword) == 0) {
coderPassword <- ""
}
q <- .jcall(dnaEnvironment[["dna"]]$headlessDna,
"Z",
"openDatabase",
as.integer(coderId),
coderPassword,
db_type,
db_url,
db_name,
db_port,
db_login,
db_password)
}
#' Print database details
#'
#' Print number of documents and statements and active coder.
#'
#' For the DNA database that is currently open, print the number of documents
#' and statements, the URL, statement types (and their statement counts), and
#' the active coder to the console.
#'
#' @author Philip Leifeld
#'
#' @examples
#' \dontrun{
#' dna_init()
#' dna_sample()
#' dna_openDatabase(coderId = 1,
#' coderPassword = "sample",
#' db_url = "sample.dna")
#' dna_printDetails()
#' }
#'
#' @export
#' @importFrom rJava .jcall
dna_printDetails <- function() {
.jcall(dnaEnvironment[["dna"]]$headlessDna, "V", "printDatabaseDetails")
}
#' Close the open DNA database (if any).
#'
#' Close the DNA database that is currently active (if any).
#'
#' Close the currently active DNA database and display a message confirming that
#' the database was closed.
#'
#' @author Philip Leifeld
#'
#' @examples
#' \dontrun{
#' dna_init()
#' dna_sample()
#' dna_openDatabase(coderId = 1,
#' coderPassword = "sample",
#' db_url = "sample.dna")
#' dna_closeDatabase()
#' }
#'
#' @export
#' @importFrom rJava .jcall
dna_closeDatabase <- function() {
.jcall(dnaEnvironment[["dna"]]$headlessDna, "V", "closeDatabase")
}
#' Save a connection profile to a file
#'
#' Save connection profile for the current coder and database to disk
#'
#' Save the current database URL/path, user name, password, port, database name,
#' and coder to an encrypted JSON file with the extension \code{.dnc}. This file
#' is called a connection profile. It serves as a bookmark and saves you from
#' having to enter and store the full connection details each time you want to
#' access the database. Please make sure you enter the file name with the
#' extension. You are asked to provide the coder password of the currently
#' active coder again, for whom the connection profile is saved. This is just
#' for security reasons. If you do not provide a coder password (e.g., your
#' password is a zero-length character object \code{""}), you are asked to enter
#' the password interactively. If the \pkg{askpass} package is installed, this
#' package will be used to mask the user input; otherwise the password is
#' visible in clear text. Installing the \pkg{askpass} package is strongly
#' recommended.
#'
#' @param file The file name of the connection profile to save.
#' @param coderPassword The clear text coder password. If a zero-length
#' character object (\code{""}) is provided, the user will be prompted
#' for a password interactively.
#'
#' @author Philip Leifeld
#'
#' @examples
#' \dontrun{
#' dna_init()
#' dna_sample()
#' dna_openDatabase(coderId = 1,
#' coderPassword = "sample",
#' db_url = "sample.dna")
#' dna_saveConnectionProfile(file = "my profile.dnc", coderPassword = "sample")
#' }
#'
#' @export
#' @importFrom rJava .jcall
dna_saveConnectionProfile <- function(file, coderPassword = "") {
if (is.null(file) || !is.character(file) || length(file) != 1) {
stop("Please provide a file name for the connection profile.")
}
if (is.null(coderPassword) || !is.character(coderPassword) || coderPassword == "") {
if (!requireNamespace("askpass", quietly = TRUE)) {
coderPassword <- readline("Coder password: ")
} else {
coderPassword <- askpass::askpass("Coder password: ")
}
}
if (is.null(coderPassword) || length(coderPassword) == 0) {
coderPassword <- ""
}
s <- .jcall(dnaEnvironment[["dna"]]$headlessDna,
"Z",
"saveConnectionProfile",
file,
coderPassword)
}
#' Open a connection profile
#'
#' Open a connection profile and establish a connection to the database.
#'
#' Load a connection profile from a \code{.dnc} file. The file contains
#' connection details for a database (like a bookmark) along with the coder ID
#' of the coder who saved the connection profile. By loading the connection
#' profile, a connection to the database will be established by DNA, and the
#' coder saved in the connection profile will be activated. The coder password
#' the user needs to provide is the coder password for the coder saved in the
#' connection profile. It serves to decrypt the information stored in the file
#' and activate the coder in the database connection. If an empty character
#' object is provided as the password (\code{""}), the user will be prompted
#' interactively for a password. If the \pkg{askpass} package is installed, this
#' package will be used to mask the user input; otherwise the password is
#' visible in clear text. Installing the \pkg{askpass} package is strongly
#' recommended.
#'
#' @param file The file name of the connection profile to open.
#' @param coderPassword The clear text coder password. If a zero-length
#' character object (\code{""}) is provided, the user will be prompted
#' for a password interactively.
#'
#' @author Philip Leifeld
#'
#' @examples
#' \dontrun{
#' dna_init()
#' dna_sample()
#' dna_openDatabase(coderId = 1,
#' coderPassword = "sample",
#' db_url = "sample.dna")
#' dna_saveConnectionProfile(file = "my profile.dnc", coderPassword = "sample")
#' dna_closeDatabase()
#' dna_openConnectionProfile(file = "my profile.dnc", coderPassword = "sample")
#' }
#'
#' @export
#' @importFrom rJava .jcall
dna_openConnectionProfile <- function(file, coderPassword = "") {
if (is.null(file) || !is.character(file) || length(file) != 1) {
stop("Please provide a file name for the connection profile.")
}
if (!file.exists(file)) {
stop("File does not exist.")
} else {
file <- normalizePath(file)
}
if (is.null(coderPassword) || !is.character(coderPassword) || coderPassword == "") {
if (!requireNamespace("askpass", quietly = TRUE)) {
coderPassword <- readline("Coder password: ")
} else {
coderPassword <- askpass::askpass("Coder password: ")
}
}
if (is.null(coderPassword) || length(coderPassword) == 0) {
coderPassword <- ""
}
s <- .jcall(dnaEnvironment[["dna"]]$headlessDna,
"Z",
"openConnectionProfile",
file,
coderPassword)
}
#' Get the entities and attributes for a variable
#'
#' Retrieve the entities and their attributes for a variable in DNA
#'
#' This function retrieves the entities and their attributes for a given
#' variable from the DNA database as a \code{dna_attributes} object. Such an
#' object is an extension of a data frame and can be treated as such.
#'
#' There are three ways to use this function: by specifying only the variable
#' ID; by specifying the variable name and its statement type ID; and by
#' specifying the variable name and its statement type name.
#'
#' @param statementType The name of the statement type in which the variable is
#' defined for which entities and values should be retrieved. Only required if
#' \code{variableId} is not supplied. Either \code{statementType} or
#' \code{statementTypeId} must be specified in this case.
#' @param variable The name of the variable for which the entities and
#' attributes should be returned. In addition to this argument, either the
#' statement type name or statement type ID must be supplied to identify the
#' variable correctly. If the \code{variableId} a specified, the
#' \code{variable} argument is unnecessary and the statement type need not be
#' supplied.
#' @param statementTypeId The ID of the statement type in which the variable is
#' defined for which entities and values should be retrieved. Only required if
#' \code{variableId} is not supplied. Either \code{statementType} or
#' \code{statementTypeId} must be specified in this case.
#' @param variableId The ID of the variable for which the entities and
#' attributes should be returned. If this argument is supplied, the other
#' three arguments are unnecessary.
#'
#' @examples
#' \dontrun{
#' dna_init()
#' dna_sample()
#' dna_openDatabase("sample.dna", coderId = 1, coderPassword = "sample")
#'
#' dna_getAttributes(variableId = 1)
#' dna_getAttributes(statementTypeId = 1, variable = "organization")
#' dna_getAttributes(statementType = "DNA Statement", variable = "concept")
#' }
#'
#' @author Philip Leifeld
#'
#' @importFrom rJava .jcall
#' @importFrom rJava J
#' @export
dna_getAttributes <- function(statementType = NULL,
variable = NULL,
statementTypeId = NULL,
variableId = NULL) {
# check if the arguments are valid
statementTypeValid <- TRUE
if (is.null(statementType) || !is.character(statementType) || length(statementType) != 1 || is.na(statementType) || statementType == "") {
statementTypeValid <- FALSE
}
statementTypeIdValid <- TRUE
if (is.null(statementTypeId) || !is.numeric(statementTypeId) || length(statementTypeId) != 1 || is.na(statementTypeId) || statementTypeId %% 1 != 0) {
statementTypeIdValid <- FALSE
}
variableValid <- TRUE
if (is.null(variable) || !is.character(variable) || length(variable) != 1 || is.na(variable) || variable == "") {
variableValid <- FALSE
}
variableIdValid <- TRUE
if (is.null(variableId) || !is.numeric(variableId) || length(variableId) != 1 || is.na(variableId) || variableId %% 1 != 0) {
variableIdValid <- FALSE
}
errorString <- "Please supply 1) a variable ID or 2) a statement type name and a variable name or 3) a statement type ID and a variable name."
if ((!variableValid && !variableIdValid) || (!statementTypeIdValid && !statementTypeValid && !variableIdValid)) {
stop(errorString)
}
if (variableIdValid && variableValid) {
variable <- NULL
variableValid <- FALSE
warning("Both a variable ID and a variable name were supplied. Ignoring the 'variable' argument.")
}
if (statementTypeIdValid && statementTypeValid && !variableIdValid && variableValid) {
statementType <- NULL
statementTypeValid <- FALSE
warning("Both a statement type ID and a statement type name were supplied. Ignoring the 'statementType' argument.")
}
if (variableIdValid && (statementTypeIdValid || statementTypeValid)) {
statementTypeId <- NULL
statementTypeIdValid <- FALSE
statementType <- NULL
statementTypeValid <- FALSE
warning("If a variable ID is provided, a statement type is not necessary. Ignoring the 'statementType' and 'statementTypeId' arguments.")
}
# get the data from the DNA database using rJava
if (variableIdValid) {
a <- .jcall(dnaEnvironment[["dna"]]$headlessDna,
"Lexport/DataFrame;",
"getAttributes",
as.integer(variableId))
} else if (variableValid && statementTypeIdValid) {
a <- .jcall(dnaEnvironment[["dna"]]$headlessDna,
"Lexport/DataFrame;",
"getAttributes",
as.integer(statementTypeId),
variable)
} else if (variableValid && statementTypeValid) {
a <- .jcall(dnaEnvironment[["dna"]]$headlessDna,
"Lexport/DataFrame;",
"getAttributes",
statementType,
variable)
} else {
stop(errorString)
}
# extract the relevant information from the Java reference
varNames <- .jcall(a, "[S", "getVariableNames")
nr <- .jcall(a, "I", "nrow")
nc <- .jcall(a, "I", "ncol")
# create an empty data frame with the first (integer) column for IDs
dat <- cbind(data.frame(ID = integer(nr)),
matrix(character(nr), nrow = nr, ncol = nc - 1))
# populate the data frame
for (i in 0:(nr - 1)) {
for (j in 0:(nc - 1)) {
dat[i + 1, j + 1] <- J(a, "getValue", as.integer(i), as.integer(j))
}
}
rownames(dat) <- NULL
colnames(dat) <- varNames
class(dat) <- c("dna_attributes", class(dat))
return(dat)
}
#' Compute and retrieve a network
#'
#' Compute and retrieve a network from DNA.
#'
#' This function serves to compute a one-mode or two-mode network or an event
#' list in DNA and retrieve it as a matrix or data frame, respectively. The
#' arguments resemble the export options in DNA. It is also possible to compute
#' a temporal sequence of networks using the moving time window approach, in
#' which case the networks are retrieved as a list of matrices.
#'
#' @param networkType The kind of network to be computed. Can be
#' \code{"twomode"}, \code{"onemode"}, or \code{"eventlist"}.
#' @param statementType The name of the statement type in which the variable
#' of interest is nested. For example, \code{"DNA Statement"}.
#' @param variable1 The first variable for network construction. In a one-mode
#' network, this is the variable for both the rows and columns. In a
#' two-mode network, this is the variable for the rows only. In an event
#' list, this variable is only used to check for duplicates (depending on
#' the setting of the \code{duplicates} argument).
#' @param variable1Document A boolean value indicating whether the first
#' variable is at the document level (i.e., \code{"author"},
#' \code{"source"}, \code{"section"}, \code{"type"}, \code{"id"}, or
#' \code{"title"}).
#' @param variable2 The second variable for network construction. In a one-mode
#' network, this is the variable over which the ties are created. For
#' example, if an organization x organization network is created, and ties
#' in this network indicate co-reference to a concept, then the second
#' variable is the \code{"concept"}. In a two-mode network, this is the
#' variable used for the columns of the network matrix. In an event list,
#' this variable is only used to check for duplicates (depending on the
#' setting of the \code{duplicates} argument).
#' @param variable2Document A boolean value indicating whether the second
#' variable is at the document level (i.e., \code{"author"},
#' \code{"source"}, \code{"section"}, \code{"type"}, \code{"id"}, or
#' \code{"title"}
#' @param qualifier The qualifier variable. In a one-mode network, this
#' variable can be used to count only congruence or conflict ties. For
#' example, in an organization x organization network via common concepts,
#' a binary \code{"agreement"} qualifier could be used to record only ties
#' where both organizations have a positive stance on the concept or where
#' both organizations have a negative stance on the concept. With an
#' integer qualifier, the tie weight between the organizations would be
#' proportional to the similarity or distance between the two organizations
#' on the scale of the integer variable. With a short text variable as a
#' qualifier, agreement on common categorical values of the qualifier is
#' required, for example a tie is established (or a tie weight increased) if
#' two actors both refer to the same value on the second variable AND match on
#' the categorical qualifier, for example the type of referral.
#'
#' In a two-mode network, the qualifier variable can be used to retain only
#' positive or only negative statements or subtract negative from positive
#' mentions. All of this depends on the setting of the
#' \code{qualifierAggregation} argument. For event lists, the qualifier
#' variable is only used for filtering out duplicates (depending on the
#' setting of the \code{duplicates} argument.
#'
#' The qualifier can also be \code{NULL}, in which case it is ignored, meaning
#' that values in \code{variable1} and \code{variable2} are unconditionally
#' associated with each other in the network when they co-occur. This is
#' identical to selecting a qualifier variable and setting
#' \code{qualifierAggregation = "ignore"}.
#' @param qualifierDocument A boolean value indicating whether the qualifier
#' variable is at the document level (i.e., \code{"author"},
#' \code{"source"}, \code{"section"}, \code{"type"}, \code{"id"}, or
#' \code{"title"}
#' @param qualifierAggregation The aggregation rule for the \code{qualifier}
#' variable. In one-mode networks, this must be \code{"ignore"} (for
#' ignoring the qualifier variable), \code{"congruence"} (for recording a
#' network tie only if both nodes have the same qualifier value in the
#' binary case or for recording the similarity between the two nodes on the
#' qualifier variable in the integer case), \code{"conflict"} (for
#' recording a network tie only if both nodes have a different qualifier
#' value in the binary case or for recording the distance between the two
#' nodes on the qualifier variable in the integer case), or
#' \code{"subtract"} (for subtracting the conflict tie value from the
#' congruence tie value in each dyad). In two-mode networks, this must be
#' \code{"ignore"}, \code{"combine"} (for creating multiplex combinations,
#' e.g., 1 for positive, 2 for negative, and 3 for mixed), or
#' \code{subtract} (for subtracting negative from positive ties). In event
#' lists, this setting is ignored.
#' @param normalization Normalization of edge weights. Valid settings for
#' one-mode networks are \code{"no"} (for switching off normalization),
#' \code{"average"} (for average activity normalization), \code{"jaccard"}
#' (for Jaccard coefficient normalization), and \code{"cosine"} (for
#' cosine similarity normalization). Valid settings for two-mode networks
#' are \code{"no"}, \code{"activity"} (for activity normalization), and
#' \code{"prominence"} (for prominence normalization).
#' @param isolates Should all nodes of the respective variable be included in
#' the network matrix (\code{isolates = TRUE}), or should only those nodes
#' be included that are active in the current time period and are not
#' excluded (\code{isolates = FALSE})?
#' @param duplicates Setting for excluding duplicate statements before network
#' construction. Valid settings are \code{"include"} (for including all
#' statements in network construction), \code{"document"} (for counting
#' only one identical statement per document), \code{"week"} (for counting
#' only one identical statement per calendar week), \code{"month"} (for
#' counting only one identical statement per calendar month), \code{"year"}
#' (for counting only one identical statement per calendar year), and
#' \code{"acrossrange"} (for counting only one identical statement across
#' the whole time range).
#' @param start.date The start date for network construction in the format
#' \code{"dd.mm.yyyy"}. All statements before this date will be excluded.
#' @param start.time The start time for network construction on the specified
#' \code{start.date}. All statements before this time on the specified date
#' will be excluded.
#' @param stop.date The stop date for network construction in the format
#' \code{"dd.mm.yyyy"}. All statements after this date will be excluded.
#' @param stop.time The stop time for network construction on the specified
#' \code{stop.date}. All statements after this time on the specified date
#' will be excluded.
#' @param timeWindow Possible values are \code{"no"}, \code{"events"},
#' \code{"seconds"}, \code{"minutes"}, \code{"hours"}, \code{"days"},
#' \code{"weeks"}, \code{"months"}, and \code{"years"}. If \code{"no"} is
#' selected (= the default setting), no time window will be used. If any of
#' the time units is selected, a moving time window will be imposed, and
#' only the statements falling within the time period defined by the window
#' will be used to create the network. The time window will then be moved
#' forward by one time unit at a time, and a new network with the new time
#' boundaries will be created. This is repeated until the end of the overall
#' time span is reached. All time windows will be saved as separate
#' networks in a list. The duration of each time window is defined by the
#' \code{windowSize} argument. For example, this could be used to create a
#' time window of 6 months which moves forward by one month each time, thus
#' creating time windows that overlap by five months. If \code{"events"} is
#' used instead of a natural time unit, the time window will comprise
#' exactly as many statements as defined in the \code{windowSize} argument.
#' However, if the start or end statement falls on a date and time where
#' multiple events happen, those additional events that occur simultaneously
#' are included because there is no other way to decide which of the
#' statements should be selected. Therefore the window size is sometimes
#' extended when the start or end point of a time window is ambiguous in
#' event time.
#' @param windowSize The number of time units of which a moving time window is
#' comprised. This can be the number of statement events, the number of days
#' etc., as defined in the \code{"timeWindow"} argument.
#' @param excludeValues A list of named character vectors that contains entries
#' which should be excluded during network construction. For example,
#' \code{list(concept = c("A", "B"), organization = c("org A", "org B"))}
#' would exclude all statements containing concepts "A" or "B" or
#' organizations "org A" or "org B" when the network is constructed. This
#' is irrespective of whether these values appear in \code{variable1},
#' \code{variable2}, or the \code{qualifier}. Note that only variables at
#' the statement level can be used here. There are separate arguments for
#' excluding statements nested in documents with certain meta-data.
#' @param excludeAuthors A character vector of authors. If a statement is
#' nested in a document where one of these authors is set in the "Author"
#' meta-data field, the statement is excluded from network construction.
#' @param excludeSources A character vector of sources. If a statement is
#' nested in a document where one of these sources is set in the "Source"
#' meta-data field, the statement is excluded from network construction.
#' @param excludeSections A character vector of sections. If a statement is
#' nested in a document where one of these sections is set in the "Section"
#' meta-data field, the statement is excluded from network construction.
#' @param excludeTypes A character vector of types. If a statement is
#' nested in a document where one of these types is set in the "Type"
#' meta-data field, the statement is excluded from network construction.
#' @param invertValues A boolean value indicating whether the entries provided
#' by the \code{excludeValues} argument should be excluded from network
#' construction (\code{invertValues = FALSE}) or if they should be the only
#' values that should be included during network construction
#' (\code{invertValues = TRUE}).
#' @param invertAuthors A boolean value indicating whether the entries provided
#' by the \code{excludeAuthors} argument should be excluded from network
#' construction (\code{invertAuthors = FALSE}) or if they should be the
#' only values that should be included during network construction
#' (\code{invertAuthors = TRUE}).
#' @param invertSources A boolean value indicating whether the entries provided
#' by the \code{excludeSources} argument should be excluded from network
#' construction (\code{invertSources = FALSE}) or if they should be the
#' only values that should be included during network construction
#' (\code{invertSources = TRUE}).
#' @param invertSections A boolean value indicating whether the entries
#' provided by the \code{excludeSections} argument should be excluded from
#' network construction (\code{invertSections = FALSE}) or if they should
#' be the only values that should be included during network construction
#' (\code{invertSections = TRUE}).
#' @param invertTypes A boolean value indicating whether the entries provided
#' by the \code{excludeTypes} argument should be excluded from network
#' construction (\code{invertTypes = FALSE}) or if they should be the
#' only values that should be included during network construction
#' (\code{invertTypes = TRUE}).
#' @param fileFormat An optional file format specification for saving the
#' resulting network(s) to a file instead of returning an object. Valid values
#' are \code{"csv"} (for network matrices or event lists), \code{"dl"} (for
#' UCINET DL full-matrix files), and \code{"graphml"} (for visone .graphml
#' files).
#' @param outfile An optional output file name for saving the resulting
#' network(s) to a file instead of returning an object.
#'
#' @examples
#' \dontrun{
#' dna_init()
#' dna_sample()
#' dna_openDatabase("sample.dna", coderId = 1, coderPassword = "sample")
#' nw <- dna_network(networkType = "onemode",
#' variable1 = "organization",
#' variable2 = "concept",
#' qualifier = "agreement",
#' qualifierAggregation = "congruence",
#' normalization = "average",
#' excludeValues = list("concept" =
#' c("There should be legislation to regulate emissions.")))
#' }
#'
#' @author Philip Leifeld
#'
#' @importFrom rJava .jarray
#' @importFrom rJava .jcall
#' @importFrom rJava .jnull
#' @importFrom rJava J
#' @export
dna_network <- function(networkType = "twomode",
statementType = "DNA Statement",
variable1 = "organization",
variable1Document = FALSE,
variable2 = "concept",
variable2Document = FALSE,
qualifier = "agreement",
qualifierDocument = FALSE,
qualifierAggregation = "ignore",
normalization = "no",
isolates = FALSE,
duplicates = "include",
start.date = "01.01.1900",
stop.date = "31.12.2099",
start.time = "00:00:00",
stop.time = "23:59:59",
timeWindow = "no",
windowSize = 100,
excludeValues = list(),
excludeAuthors = character(),
excludeSources = character(),
excludeSections = character(),
excludeTypes = character(),
invertValues = FALSE,
invertAuthors = FALSE,
invertSources = FALSE,
invertSections = FALSE,
invertTypes = FALSE,
fileFormat = NULL,
outfile = NULL) {
# wrap the vectors of exclude values for document variables into Java arrays
excludeAuthors <- .jarray(excludeAuthors)
excludeSources <- .jarray(excludeSources)
excludeSections <- .jarray(excludeSections)
excludeTypes <- .jarray(excludeTypes)
# compile exclude variables and values vectors
dat <- matrix("", nrow = length(unlist(excludeValues)), ncol = 2)
count <- 0
if (length(excludeValues) > 0) {
for (i in 1:length(excludeValues)) {
if (length(excludeValues[[i]]) > 0) {
for (j in 1:length(excludeValues[[i]])) {
count <- count + 1
dat[count, 1] <- names(excludeValues)[i]
dat[count, 2] <- excludeValues[[i]][j]
}
}
}
var <- dat[, 1]
val <- dat[, 2]
} else {
var <- character()
val <- character()
}
var <- .jarray(var) # array of variable names of each excluded value
val <- .jarray(val) # array of values to be excluded
# encode R NULL as Java null value if necessary
if (is.null(qualifier) || is.na(qualifier)) {