Skip to content

Commit

Permalink
Added arguments 'outfile' and 'fileFormat' to dna_network. Closes #153.
Browse files Browse the repository at this point in the history
  • Loading branch information
Philip Leifeld committed Feb 17, 2019
1 parent 94077df commit 7d41c9b
Show file tree
Hide file tree
Showing 8 changed files with 930 additions and 808 deletions.
2 changes: 1 addition & 1 deletion DNA/src/dna/Dna.java
Expand Up @@ -20,7 +20,7 @@ public class Dna {
PrintStream console;

public Dna() {
date = "2019-02-16";
date = "2019-02-17";
version = "2.0 beta 24";
}

Expand Down
749 changes: 748 additions & 1 deletion DNA/src/dna/export/ExportHelper.java

Large diffs are not rendered by default.

766 changes: 9 additions & 757 deletions DNA/src/dna/export/ExporterGUI.java

Large diffs are not rendered by default.

77 changes: 76 additions & 1 deletion DNA/src/dna/export/ExporterR.java
Expand Up @@ -227,6 +227,8 @@ public String rShow() {
* @param invertSources boolean indicating whether the document-level source values should be included (= true) rather than excluded
* @param invertSections boolean indicating whether the document-level section values should be included (= true) rather than excluded
* @param invertTypes boolean indicating whether the document-level type values should be included (= true) rather than excluded
* @param outfile String with a file name under which the resulting network should be saved.
* @param fileFormat String with the file format. Valid values are "csv", "dl", "graphml", and null (for no file export).
* @param verbose Report progress to the console?
* @return A Matrix object containing the resulting one-mode or two-mode network
* @throws Exception
Expand All @@ -236,7 +238,7 @@ public void rNetwork(String networkType, String statementType, String variable1,
String duplicates, String startDate, String stopDate, String startTime, String stopTime, String timewindow, int windowsize,
String[] excludeVariables, String[] excludeValues, String[] excludeAuthors, String[] excludeSources, String[] excludeSections,
String[] excludeTypes, boolean invertValues, boolean invertAuthors, boolean invertSources, boolean invertSections,
boolean invertTypes, boolean verbose) throws Exception {
boolean invertTypes, String outfile, String fileFormat, boolean verbose) throws Exception {

// step 1: preprocess arguments
int max = 5;
Expand Down Expand Up @@ -476,6 +478,20 @@ public void rNetwork(String networkType, String statementType, String variable1,
if (verbose == true) {
System.out.print("Done.\n");
}

// check file export format arguments
if (fileFormat != null && !fileFormat.equals("csv") && !fileFormat.equals("dl") && !fileFormat.equals("graphml")) {
throw new Exception("'fileFormat' must be 'csv', 'dl', 'graphml', or NULL.");
}
if (outfile != null) {
if (fileFormat.equals("graphml") && !outfile.toLowerCase().endsWith(".graphml")) {
outfile = outfile + ".graphml";
} else if (fileFormat.equals("csv") && !outfile.toLowerCase().endsWith(".csv")) {
outfile = outfile + ".csv";
} else if (fileFormat.equals("dl") && !outfile.toLowerCase().endsWith(".dl")) {
outfile = outfile + ".dl";
}
}

// step 2: filter
boolean filterEmptyFields = true;
Expand Down Expand Up @@ -678,6 +694,65 @@ public void rNetwork(String networkType, String statementType, String variable1,
}
System.out.println("(" + step + "/" + max + "): Retrieving results.");
}

// file export
if (fileFormat != null && outfile != null) {
if (fileFormat.equals("dl")) {
if (networkType.equals("Event List")) {
System.err.println("The DL file format does not currently support event lists. Aborting.");
} else if (!timewindow.equals("no time window")) {
System.err.println("The DL file format does not currently support time windows. Aborting.");
} else if (networkType.equals("One-mode network")) {
exportHelper.exportDL(this.matrix, outfile, false);
} else if (networkType.equals("Two-mode network")) {
exportHelper.exportDL(this.matrix, outfile, true);
}
} else if (fileFormat.equals("graphml")) {
String[] values1 = exportHelper.retrieveValues(filteredStatements, this.data.getDocuments(), variable1, variable1Document);
String[] values2 = exportHelper.retrieveValues(filteredStatements, this.data.getDocuments(), variable2, variable2Document);
int[] frequencies1 = exportHelper.countFrequencies(values1, names1);
int[] frequencies2 = exportHelper.countFrequencies(values2, names2);
boolean qualifierBinary = false;
if (st.getVariables().get(qualifier).equals("boolean")) {
qualifierBinary = true;
}
boolean twoMode = false;
if (networkType.equals("Two-mode network")) {
twoMode = true;
}
if (timewindow.equals("no time window")) {
if (networkType.equals("Event List")) {
System.out.println("The graphml file format does not currently support event lists. Aborting.");
} else {
exportHelper.exportGraphml(matrix, twoMode, st, outfile, variable1, variable2, frequencies1, frequencies2,
this.data.getAttributes(), qualifierAggregation, qualifierBinary);
}
} else {
String filename1 = outfile.substring(0, outfile.length() - 8);
String filename3 = outfile.substring(outfile.length() - 8, outfile.length());
if (networkType.equals("Event List")) {
System.out.println("The graphml file format does not currently support event lists. Aborting.");
} else {
for (int i = 0; i < this.timeWindowMatrices.size(); i++) {
String filename2 = "-" + String.format("%0" + String.valueOf(this.timeWindowMatrices.size()).length() + "d", i + 1);
exportHelper.exportGraphml(this.timeWindowMatrices.get(i), twoMode, st, filename1 + filename2 + filename3,
variable1, variable2, frequencies1, frequencies2, this.data.getAttributes(), qualifierAggregation, qualifierBinary);
}
}
}
} else if (fileFormat.equals("csv")) {
if (!timewindow.equals("no time window")) {
System.err.println("The CSV file export does not currently support time windows. Aborting.");
} else if (networkType.equals("Event list")) {
exportHelper.eventCSV(filteredStatements, this.data.getDocuments(), st, outfile);
} else {
exportHelper.exportCSV(this.matrix, outfile);
}
}
if (verbose == true) {
System.out.println("Network(s) exported to '" + outfile + "'.");
}
}
}

/**
Expand Down
9 changes: 6 additions & 3 deletions manual/dna-manual.Rnw
Expand Up @@ -649,7 +649,8 @@ For more experienced users, here is a short version of the steps described below
\item In \R: install the necessary \R\ packages \texttt{rJava} and \texttt{remotes}.
\item In \R: install \rdna\ via
<<eval=FALSE, results = 'tex', message = FALSE>>=
remotes::install_github("leifeld/dna/rDNA@*release", INSTALL_opts = "--no-multiarch")
remotes::install_github("leifeld/dna/rDNA@*release",
INSTALL_opts = "--no-multiarch")
@
\end{enumerate}

Expand Down Expand Up @@ -966,7 +967,8 @@ Since we only need one function from the package \texttt{remotes} at this point,
Instead, you can write \code{remotes::} and then type the function you want to use.\footnote{The option \code{INSTALL\_opts = "--no-multiarch"} should normally not be necessary, but prevents errors on some operating systems.
Since \texttt{remotes} tries to test both the 32-bit and 64-bit version of a package during installation, the process inevitably fails as only one architecture of \java\ is available.}
<<eval=FALSE, results = 'tex', message = FALSE>>=
remotes::install_github("leifeld/dna/rDNA@*release", INSTALL_opts = "--no-multiarch")
remotes::install_github("leifeld/dna/rDNA@*release",
INSTALL_opts = "--no-multiarch")
@

After this is done as well, the final step of the installation is to test if \rdna\ can be loaded into \R\ correctly and to perform a basic operation with it---opening \dna\ from within \R.
Expand All @@ -987,7 +989,8 @@ Note that when updating \rdna\ you will also need the latest release of \dna\ fo

<<eval=FALSE, results = 'tex'>>=
# install.packages("remotes")
remotes::install_github("leifeld/dna/rDNA@*release", INSTALL_opts = "--no-multiarch")
remotes::install_github("leifeld/dna/rDNA@*release",
INSTALL_opts = "--no-multiarch")
dna_downloadJar() # update DNA as well to have matching versions
@

Expand Down
2 changes: 1 addition & 1 deletion rDNA/DESCRIPTION
@@ -1,5 +1,5 @@
Package: rDNA
Version: 2.1.15
Version: 2.1.16
Date: 2019-02-17
Title: Discourse Network Analysis in R
Authors@R:
Expand Down
121 changes: 78 additions & 43 deletions rDNA/R/rDNA.R
Expand Up @@ -4704,6 +4704,13 @@ print.dna_scale <- function(x, ...) {
#' construction (\code{invertTypes = FALSE}) or if they should be the
#' only values that should be included during network construction
#' (\code{invertTypes = TRUE}).
#' @param fileFormat An optional file format specification for saving the
#' resulting network(s) to a file instead of returning an object. Valid values
#' are \code{"csv"} (for network matrices or event lists), \code{"dl"} (for
#' UCINET DL full-matrix files), and \code{"graphml"} (for visone .graphml
#' files). The \code{"graphml"} specification is compatible with time windows.
#' @param outfile An optional output file name for saving the resulting
#' network(s) to a file instead of returning an object.
#' @param verbose A boolean value indicating whether details of network
#' construction should be printed to the R console.
#'
Expand Down Expand Up @@ -4761,6 +4768,8 @@ dna_network <- function(connection,
invertSources = FALSE,
invertSections = FALSE,
invertTypes = FALSE,
fileFormat = NULL,
outfile = NULL,
verbose = TRUE) {

# check and convert exclude arguments
Expand Down Expand Up @@ -4834,6 +4843,27 @@ dna_network <- function(connection,
stop("'normalization' must be 'no', 'activity', or 'prominence' when networkType = 'twomode'.")
}

if (!is.null(fileFormat) && !fileFormat %in% c("csv", "dl", "graphml")) {
stop("'fileFormat' must be 'csv', 'dl', or 'graphml'.")
}
if (!is.null(fileFormat) && timewindow != "no" && fileFormat %in% c("csv", "dl")) {
stop("Only .graphml files are currently compatible with time windows.")
}
if (!is.null(fileFormat) && networkType == "eventlist" && fileFormat %in% c("dl", "graphml")) {
stop("Only .csv files are currently compatible with event lists.")
}
if (is.null(outfile) || is.null(fileFormat)) {
fileExport <- TRUE
} else {
fileExport <- FALSE
}
if (is.null(fileFormat)) {
fileFormat <- .jnull(class = "java/lang/String")
}
if (is.null(outfile)) {
outfile <- .jnull(class = "java/lang/String")
}

# call Java function to create network
.jcall(connection$dna_connection,
"V",
Expand Down Expand Up @@ -4866,52 +4896,57 @@ dna_network <- function(connection,
invertSources,
invertSections,
invertTypes,
outfile,
fileFormat,
verbose
)
if (networkType == "eventlist") {
objects <- .jcall(connection$dna_connection, "[Ljava/lang/Object;", "getEventListColumnsR", simplify = TRUE)
columnNames <- .jcall(connection$dna_connection, "[S", "getEventListColumnsRNames", simplify = TRUE)
dta <- data.frame(id = .jevalArray(objects[[1]]))
dta$time <- as.POSIXct(.jevalArray(objects[[2]]), origin = "1970-01-01")
dta$docId <- .jevalArray(objects[[3]])
dta$docTitle <- .jevalArray(objects[[4]])
dta$docAuthor <- .jevalArray(objects[[5]])
dta$docSource <- .jevalArray(objects[[6]])
dta$docSection <- .jevalArray(objects[[7]])
dta$docType <- .jevalArray(objects[[8]])
for (i in 1:length(columnNames)) {
dta[[columnNames[i]]] <- .jevalArray(objects[[i + 8]])
}
attributes(dta)$call <- match.call()
class(dta) <- c("dna_eventlist", class(dta))
return(dta)
} else if (timewindow == "no") {
mat <- .jcall(connection$dna_connection, "[[D", "getMatrix", simplify = TRUE)
rownames(mat) <- .jcall(connection$dna_connection, "[S", "getRowNames", simplify = TRUE)
colnames(mat) <- .jcall(connection$dna_connection, "[S", "getColumnNames", simplify = TRUE)
attributes(mat)$call <- match.call()
class(mat) <- c(paste0("dna_network_", networkType), class(mat))
return(mat)
} else {
timeLabels <- .jcall(connection$dna_connection, "[J", "getTimeWindowTimes", simplify = TRUE)
timeLabels <- as.POSIXct(timeLabels, origin = "1970-01-01")
numStatements <- .jcall(connection$dna_connection, "[I", "getTimeWindowNumStatements", simplify = TRUE)
mat <- list()
for (t in 1:length(timeLabels)) {
m <- .jcall(connection$dna_connection, "[[D", "getTimeWindowNetwork", as.integer(t - 1), simplify = TRUE)
rownames(m) <- .jcall(connection$dna_connection, "[S", "getTimeWindowRowNames", as.integer(t - 1), simplify = TRUE)
colnames(m) <- .jcall(connection$dna_connection, "[S", "getTimeWindowColumnNames", as.integer(t - 1), simplify = TRUE)
attributes(m)$call <- match.call()
class(m) <- c(paste0("dna_network_", networkType), class(m))
mat[[t]] <- m

if (isTRUE(fileExport)) {
if (networkType == "eventlist") {
objects <- .jcall(connection$dna_connection, "[Ljava/lang/Object;", "getEventListColumnsR", simplify = TRUE)
columnNames <- .jcall(connection$dna_connection, "[S", "getEventListColumnsRNames", simplify = TRUE)
dta <- data.frame(id = .jevalArray(objects[[1]]))
dta$time <- as.POSIXct(.jevalArray(objects[[2]]), origin = "1970-01-01")
dta$docId <- .jevalArray(objects[[3]])
dta$docTitle <- .jevalArray(objects[[4]])
dta$docAuthor <- .jevalArray(objects[[5]])
dta$docSource <- .jevalArray(objects[[6]])
dta$docSection <- .jevalArray(objects[[7]])
dta$docType <- .jevalArray(objects[[8]])
for (i in 1:length(columnNames)) {
dta[[columnNames[i]]] <- .jevalArray(objects[[i + 8]])
}
attributes(dta)$call <- match.call()
class(dta) <- c("dna_eventlist", class(dta))
return(dta)
} else if (timewindow == "no") {
mat <- .jcall(connection$dna_connection, "[[D", "getMatrix", simplify = TRUE)
rownames(mat) <- .jcall(connection$dna_connection, "[S", "getRowNames", simplify = TRUE)
colnames(mat) <- .jcall(connection$dna_connection, "[S", "getColumnNames", simplify = TRUE)
attributes(mat)$call <- match.call()
class(mat) <- c(paste0("dna_network_", networkType), class(mat))
return(mat)
} else {
timeLabels <- .jcall(connection$dna_connection, "[J", "getTimeWindowTimes", simplify = TRUE)
timeLabels <- as.POSIXct(timeLabels, origin = "1970-01-01")
numStatements <- .jcall(connection$dna_connection, "[I", "getTimeWindowNumStatements", simplify = TRUE)
mat <- list()
for (t in 1:length(timeLabels)) {
m <- .jcall(connection$dna_connection, "[[D", "getTimeWindowNetwork", as.integer(t - 1), simplify = TRUE)
rownames(m) <- .jcall(connection$dna_connection, "[S", "getTimeWindowRowNames", as.integer(t - 1), simplify = TRUE)
colnames(m) <- .jcall(connection$dna_connection, "[S", "getTimeWindowColumnNames", as.integer(t - 1), simplify = TRUE)
attributes(m)$call <- match.call()
class(m) <- c(paste0("dna_network_", networkType), class(m))
mat[[t]] <- m
}
dta <- list()
dta$networks <- mat
dta$time <- timeLabels
dta$numStatements <- numStatements
attributes(dta)$call <- match.call()
class(dta) <- c(paste0("dna_network_", networkType, "_timewindows"), class(dta))
return(dta)
}
dta <- list()
dta$networks <- mat
dta$time <- timeLabels
dta$numStatements <- numStatements
attributes(dta)$call <- match.call()
class(dta) <- c(paste0("dna_network_", networkType, "_timewindows"), class(dta))
return(dta)
}
}

Expand Down
12 changes: 11 additions & 1 deletion rDNA/man/dna_network.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7d41c9b

Please sign in to comment.