-
Notifications
You must be signed in to change notification settings - Fork 219
/
LatexOperatorDocGenerator.java
251 lines (230 loc) · 10.6 KB
/
LatexOperatorDocGenerator.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
/**
* Copyright (C) 2001-2019 RapidMiner GmbH
*/
package com.rapidminer.doc;
import java.io.PrintWriter;
import java.io.StringReader;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.Tools;
/**
* Formats operator documentation in LaTeX style.
*
* @rapidminer.todo Lookup class when link is found and decide which tag to use (op, ioobj, ...)
* @author Simon Fischer, Ingo Mierswa
*/
public class LatexOperatorDocGenerator extends AbstractOperatorDocGenerator {
public static final String[][] TAGS = { { "", "" }, // operator
{ "\\operator{", "}" }, // operator name
{ "\\group{", "}" }, // group
{ Tools.getLineSeparator() + "\\begin{parameters}", "\\end{parameters}" }, // parameter list
{ "", "" }, // parameter item
{ "\\reqpar[", "]" }, // required parameter
{ "\\optpar[", "]" }, // optional parameter
{ "", "" }, // parameter description
{ "\\paragraph{Short description:} ", "" }, // short description
{ "\\opdescr ", "" }, // operator description
{ "\\begin{opin} ", "\\end{opin}" }, // input classes list
{ "\\begin{opout} ", "\\end{opout}" }, // output classes list
{ "\\item[", "]" }, // IO class
{ Tools.getLineSeparator() + "\\paragraph{Inner operators:}", Tools.getLineSeparator() }, // inner operators
{ Tools.getLineSeparator() + "\\begin{values}", "\\end{values}" }, // value list
{ "", "" }, // value item
{ "\\val[", "]" }, // value name
{ "", "" }, // value description
{ "\\index{", "}" }, // index entry
{ "\\par References: ", "" }, // reference section
{ "\\cite{", "}" }, // reference entry
{ Tools.getLineSeparator() + "\\paragraph{Further information:}", Tools.getLineSeparator() }, // technical information (external references)
{ "\\emph{", "}" + Tools.getLineSeparator() }, // deprecation info
{ Tools.getLineSeparator() + "\\paragraph{Learner capabilities:}", Tools.getLineSeparator() } // learner capabilities
};
public String getOpenTag(int tagNo) {
return TAGS[tagNo][0];
}
public String getCloseTag(int tagNo) {
return TAGS[tagNo][1];
}
public String marginIcon(String iconName) {
String fig = "\\includegraphics{graphics/" + iconName + "}";
return "\\marginpar[\\flushright" + fig + "]{" + fig + "}";
}
public String escape(String toEscape) {
String escaped = toEscape;
escaped = escaped.replaceAll("MACRO_START", "\\\\% \\\\{"); // hack for macro definitions
escaped = escaped.replaceAll("MACRO_END", "\\\\}"); // hack for macro definitions
escaped = escaped.replaceAll("_", "\\\\_");
escaped = escaped.replaceAll("\\$", "\\\\\\$");
escaped = escaped.replaceAll("\u221E", "\\$\\\\infty\\$");
escaped = escaped.replaceAll("ä", "\\\\\"a");
escaped = escaped.replaceAll("ö", "\\\\\"o");
escaped = escaped.replaceAll("ü", "\\\\\"u");
escaped = escaped.replaceAll("Ä", "\\\\\"A");
escaped = escaped.replaceAll("Ö", "\\\\\"O");
escaped = escaped.replaceAll("Ü", "\\\\\"U");
escaped = escaped.replaceAll("ß", "\\\\\"s");
escaped = escaped.replaceAll(" ", "\\\\ ");
escaped = escaped.replaceAll("(\\w)"", "$1''");
escaped = escaped.replaceAll(""", "``");
escaped = escaped.replaceAll("#", "\\\\#");
escaped = escaped.replaceAll("\\[", "\\{\\[\\}");
escaped = escaped.replaceAll("\\]", "\\{\\]\\}");
escaped = escaped.replaceAll("RapidMiner", "\\\\RAPIDMINER");
escaped = escaped.replaceAll("\\\\s", "\\$\\\\backslash\\$s"); // hack for regular expressions (ExampleSource)
escaped = escaped.replaceAll("\\\\t", "\\$\\\\backslash\\$t"); // hack for regular expressions (ExampleSource)
escaped = escaped.replaceAll("\\|", "\\$|\\$");
escaped = escaped.replaceAll("\\^", "");
return escaped;
}
public void beginGroup(String groupName, PrintWriter out) {
out.println("\\pagebreak[4]");
if (groupName != null) {
groupName = groupName.replace(' ', '_');
out.println("\\input{OpGroup" + groupName + ".tex}");
} else {
out.println("\\section{Basic operators}");
}
}
public void endGroup(String groupName, PrintWriter out) {
out.println("\\vfill");
}
public String transformHTMLJavadocComment(String comment, final Class clazz, final String operatorName) {
try {
SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
comment = "<body>" + comment + "</body>";
final StringBuffer transformed = new StringBuffer();
final Stack<String> closingTagStack = new Stack<String>();
parser.parse(new InputSource(new StringReader(comment)), new DefaultHandler() {
public void characters(char[] ch, int start, int length) throws SAXException {
transformed.append(ch, start, length);
}
public InputSource resolveEntity(String publicId, String systemId) throws SAXException {
LogService.getGlobal().log("Entity: " + publicId, LogService.STATUS);
String latex;
if (systemId.equals(""")) {
latex = "``";
} else if (systemId.equals("ä")) {
latex = "\\\"a";
} else if (systemId.equals("ö")) {
latex = "\\\"o";
} else if (systemId.equals("ü")) {
latex = "\\\"u";
} else if (systemId.equals("Ä")) {
latex = "\\\"A";
} else if (systemId.equals("Ö")) {
latex = "\\\"O";
} else if (systemId.equals("Ü")) {
latex = "\\\"U";
} else if (systemId.equals("ß")) {
latex = "\\\"s";
} else if (systemId.equals(" ")) {
latex = "\\ ";
} else {
LogService.getGlobal().log("Unknown entity: " + systemId, LogService.WARNING);
latex = systemId;
}
return new InputSource(new StringReader(latex));
}
public void endElement(String uri, String localName, String qName) {
transformed.append(closingTagStack.pop());
}
public void startElement(String uri, String localName, String qName, Attributes attributes) {
qName = qName.toLowerCase();
if (qName.equals("code")) {
transformed.append("\\java{");
closingTagStack.push("}");
} else if (qName.equals("em")) {
transformed.append("\\emph{");
closingTagStack.push("}");
} else if (qName.equals("var")) {
transformed.append("\\para{");
closingTagStack.push("}");
} else if (qName.equals("b")) {
LogService.getGlobal().log(operatorName + " (" + clazz.getName() + "): physical markup used (b,i, or tt).", LogService.WARNING);
transformed.append("\\textbf{");
closingTagStack.push("}");
} else if (qName.equals("i")) {
LogService.getGlobal().log(operatorName + " (" + clazz.getName() + "): physical markup used (b,i, or tt).", LogService.WARNING);
transformed.append("\\textit{");
closingTagStack.push("}");
} else if (qName.equals("tt")) {
LogService.getGlobal().log(operatorName + " (" + clazz.getName() + "): physical markup used (b,i, or tt).", LogService.WARNING);
transformed.append("\\texttt{");
closingTagStack.push("}");
} else if (qName.equals("center")) {
transformed.append(Tools.getLineSeparator() + "\\begin{center}" + Tools.getLineSeparator());
closingTagStack.push(Tools.getLineSeparator() + "\\end{center}" + Tools.getLineSeparator());
} else if (qName.equals("ol")) {
transformed.append(Tools.getLineSeparator() + "\\begin{enumerate}" + Tools.getLineSeparator());
closingTagStack.push(Tools.getLineSeparator() + "\\end{enumerate}" + Tools.getLineSeparator());
} else if (qName.equals("ul")) {
transformed.append(Tools.getLineSeparator() + "\\begin{itemize}" + Tools.getLineSeparator());
closingTagStack.push(Tools.getLineSeparator() + "\\end{itemize}" + Tools.getLineSeparator());
} else if (qName.equals("li")) {
transformed.append(Tools.getLineSeparator() + "\\item ");
closingTagStack.push("");
} else if (qName.equals("dl")) {
transformed.append(Tools.getLineSeparator() + "\\begin{description}" + Tools.getLineSeparator());
closingTagStack.push(Tools.getLineSeparator() + "\\end{description}" + Tools.getLineSeparator());
} else if (qName.equals("dt")) {
transformed.append(Tools.getLineSeparator() + "\\item[");
closingTagStack.push("]");
} else if (qName.equals("dd")) {
// nothing for dd
closingTagStack.push("");
} else if (qName.equals("body")) {
transformed.append("");
closingTagStack.push("");
} else if (qName.equals("sup")) {
transformed.append("$^{");
closingTagStack.push("}$");
} else if (qName.equals("sub")) {
transformed.append("$_{");
closingTagStack.push("}$");
} else if (qName.equals("br")) {
transformed.append("\\par" + Tools.getLineSeparator());
closingTagStack.push("");
} else if (qName.equals("p")) {
transformed.append("\\par" + Tools.getLineSeparator());
closingTagStack.push("");
} else if (qName.equals("a")) {
closingTagStack.push("\\footnote{\\url{" + attributes.getValue("href") + "}}");
} else if (qName.equals("h1") || qName.equals("h2") || qName.equals("h3") || qName.equals("h4") || qName.equals("h5")) {
transformed.append(Tools.getLineSeparator() + "\\paragraph{");
closingTagStack.push("}");
} else if (qName.equals("pre")) {
transformed.append("\\begin{verbatim}");
closingTagStack.push("\\end{verbatim}");
} else {
transformed.append("");
closingTagStack.push("");
LogService.getGlobal().log("Unknown tag: " + qName + " (" + operatorName + " (" + clazz.getName() + "))", LogService.WARNING);
}
}
});
StringBuffer linksReplaced = new StringBuffer();
Pattern pattern = Pattern.compile("\\{@link (.*?)\\}");
Matcher matcher = pattern.matcher(transformed);
while (matcher.find()) {
String classname = matcher.group(1);
int period = classname.lastIndexOf(".");
if (period != -1)
classname = classname.substring(period + 1);
matcher.appendReplacement(linksReplaced, "\\\\op{" + classname + "}");
}
matcher.appendTail(linksReplaced);
return linksReplaced.toString();
} catch (Throwable e) {
LogService.getGlobal().log(operatorName + " (" + clazz.getName() + "): " + e, LogService.ERROR);
return "Cannot parse class comment: " + e;
}
}
}