-
Notifications
You must be signed in to change notification settings - Fork 184
/
ExHtmlLoadOptions.cs
347 lines (285 loc) · 14.9 KB
/
ExHtmlLoadOptions.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
// Copyright (c) 2001-2024 Aspose Pty Ltd. All Rights Reserved.
//
// This file is part of Aspose.Words. The source code in this file
// is only intended as a supplement to the documentation, and is provided
// "as is", without warranty of any kind, either expressed or implied.
//////////////////////////////////////////////////////////////////////////
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Aspose.Pdf.Text;
using Aspose.Words;
using Aspose.Words.DigitalSignatures;
using Aspose.Words.Drawing;
using Aspose.Words.Fields;
using Aspose.Words.Loading;
using Aspose.Words.Markup;
using Aspose.Words.Saving;
using NUnit.Framework;
namespace ApiExamples
{
[TestFixture]
internal class ExHtmlLoadOptions : ApiExampleBase
{
[TestCase(true, Category = "SkipMono")]
[TestCase(false, Category = "SkipMono")]
public void SupportVml(bool supportVml)
{
//ExStart
//ExFor:HtmlLoadOptions.#ctor
//ExFor:HtmlLoadOptions.SupportVml
//ExSummary:Shows how to support conditional comments while loading an HTML document.
HtmlLoadOptions loadOptions = new HtmlLoadOptions();
// If the value is true, then we take VML code into account while parsing the loaded document.
loadOptions.SupportVml = supportVml;
// This document contains a JPEG image within "<!--[if gte vml 1]>" tags,
// and a different PNG image within "<![if !vml]>" tags.
// If we set the "SupportVml" flag to "true", then Aspose.Words will load the JPEG.
// If we set this flag to "false", then Aspose.Words will only load the PNG.
Document doc = new Document(MyDir + "VML conditional.htm", loadOptions);
if (supportVml)
Assert.AreEqual(ImageType.Jpeg, ((Shape)doc.GetChild(NodeType.Shape, 0, true)).ImageData.ImageType);
else
Assert.AreEqual(ImageType.Png, ((Shape)doc.GetChild(NodeType.Shape, 0, true)).ImageData.ImageType);
//ExEnd
Shape imageShape = (Shape)doc.GetChild(NodeType.Shape, 0, true);
if (supportVml)
TestUtil.VerifyImageInShape(400, 400, ImageType.Jpeg, imageShape);
else
TestUtil.VerifyImageInShape(400, 400, ImageType.Png, imageShape);
}
//ExStart
//ExFor:HtmlLoadOptions.WebRequestTimeout
//ExSummary:Shows how to set a time limit for web requests when loading a document with external resources linked by URLs.
[Test] //ExSkip
public void WebRequestTimeout()
{
// Create a new HtmlLoadOptions object and verify its timeout threshold for a web request.
HtmlLoadOptions options = new HtmlLoadOptions();
// When loading an Html document with resources externally linked by a web address URL,
// Aspose.Words will abort web requests that fail to fetch the resources within this time limit, in milliseconds.
Assert.AreEqual(100000, options.WebRequestTimeout);
// Set a WarningCallback that will record all warnings that occur during loading.
ListDocumentWarnings warningCallback = new ListDocumentWarnings();
options.WarningCallback = warningCallback;
// Load such a document and verify that a shape with image data has been created.
// This linked image will require a web request to load, which will have to complete within our time limit.
string html = $@"
<html>
<img src=""{ImageUrl}"" alt=""Aspose logo"" style=""width:400px;height:400px;"">
</html>
";
// Set an unreasonable timeout limit and try load the document again.
options.WebRequestTimeout = 0;
Document doc = new Document(new MemoryStream(Encoding.UTF8.GetBytes(html)), options);
Assert.AreEqual(2, warningCallback.Warnings().Count);
// A web request that fails to obtain an image within the time limit will still produce an image.
// However, the image will be the red 'x' that commonly signifies missing images.
Shape imageShape = (Shape)doc.GetChild(NodeType.Shape, 0, true);
Assert.AreEqual(924, imageShape.ImageData.ImageBytes.Length);
// We can also configure a custom callback to pick up any warnings from timed out web requests.
Assert.AreEqual(WarningSource.Html, warningCallback.Warnings()[0].Source);
Assert.AreEqual(WarningType.DataLoss, warningCallback.Warnings()[0].WarningType);
Assert.AreEqual($"Couldn't load a resource from \'{ImageUrl}\'.", warningCallback.Warnings()[0].Description);
Assert.AreEqual(WarningSource.Html, warningCallback.Warnings()[1].Source);
Assert.AreEqual(WarningType.DataLoss, warningCallback.Warnings()[1].WarningType);
Assert.AreEqual("Image has been replaced with a placeholder.", warningCallback.Warnings()[1].Description);
doc.Save(ArtifactsDir + "HtmlLoadOptions.WebRequestTimeout.docx");
}
/// <summary>
/// Stores all warnings that occur during a document loading operation in a List.
/// </summary>
private class ListDocumentWarnings : IWarningCallback
{
public void Warning(WarningInfo info)
{
mWarnings.Add(info);
}
public List<WarningInfo> Warnings() {
return mWarnings;
}
private readonly List<WarningInfo> mWarnings = new List<WarningInfo>();
}
//ExEnd
[Test]
public void LoadHtmlFixed()
{
Document doc = new Document(MyDir + "Rendering.docx");
HtmlFixedSaveOptions saveOptions = new HtmlFixedSaveOptions { SaveFormat = SaveFormat.HtmlFixed };
doc.Save(ArtifactsDir + "HtmlLoadOptions.Fixed.html", saveOptions);
HtmlLoadOptions loadOptions = new HtmlLoadOptions();
ListDocumentWarnings warningCallback = new ListDocumentWarnings();
loadOptions.WarningCallback = warningCallback;
doc = new Document(ArtifactsDir + "HtmlLoadOptions.Fixed.html", loadOptions);
Assert.AreEqual(1, warningCallback.Warnings().Count);
Assert.AreEqual(WarningSource.Html, warningCallback.Warnings()[0].Source);
Assert.AreEqual(WarningType.MajorFormattingLoss, warningCallback.Warnings()[0].WarningType);
Assert.AreEqual("The document is fixed-page HTML. Its structure may not be loaded correctly.", warningCallback.Warnings()[0].Description);
}
[Test]
public void EncryptedHtml()
{
//ExStart
//ExFor:HtmlLoadOptions.#ctor(String)
//ExSummary:Shows how to encrypt an Html document, and then open it using a password.
// Create and sign an encrypted HTML document from an encrypted .docx.
CertificateHolder certificateHolder = CertificateHolder.Create(MyDir + "morzal.pfx", "aw");
SignOptions signOptions = new SignOptions
{
Comments = "Comment",
SignTime = DateTime.Now,
DecryptionPassword = "docPassword"
};
string inputFileName = MyDir + "Encrypted.docx";
string outputFileName = ArtifactsDir + "HtmlLoadOptions.EncryptedHtml.html";
DigitalSignatureUtil.Sign(inputFileName, outputFileName, certificateHolder, signOptions);
// To load and read this document, we will need to pass its decryption
// password using a HtmlLoadOptions object.
HtmlLoadOptions loadOptions = new HtmlLoadOptions("docPassword");
Assert.AreEqual(signOptions.DecryptionPassword, loadOptions.Password);
Document doc = new Document(outputFileName, loadOptions);
Assert.AreEqual("Test encrypted document.", doc.GetText().Trim());
//ExEnd
}
[Test]
public void BaseUri()
{
//ExStart
//ExFor:HtmlLoadOptions.#ctor(LoadFormat,String,String)
//ExFor:LoadOptions.#ctor(LoadFormat, String, String)
//ExFor:LoadOptions.LoadFormat
//ExFor:LoadFormat
//ExSummary:Shows how to specify a base URI when opening an html document.
// Suppose we want to load an .html document that contains an image linked by a relative URI
// while the image is in a different location. In that case, we will need to resolve the relative URI into an absolute one.
// We can provide a base URI using an HtmlLoadOptions object.
HtmlLoadOptions loadOptions = new HtmlLoadOptions(LoadFormat.Html, "", ImageDir);
Assert.AreEqual(LoadFormat.Html, loadOptions.LoadFormat);
Document doc = new Document(MyDir + "Missing image.html", loadOptions);
// While the image was broken in the input .html, our custom base URI helped us repair the link.
Shape imageShape = (Shape)doc.GetChildNodes(NodeType.Shape, true)[0];
Assert.True(imageShape.IsImage);
// This output document will display the image that was missing.
doc.Save(ArtifactsDir + "HtmlLoadOptions.BaseUri.docx");
//ExEnd
doc = new Document(ArtifactsDir + "HtmlLoadOptions.BaseUri.docx");
Assert.True(((Shape)doc.GetChild(NodeType.Shape, 0, true)).ImageData.ImageBytes.Length > 0);
}
[Test]
public void GetSelectAsSdt()
{
//ExStart
//ExFor:HtmlLoadOptions.PreferredControlType
//ExSummary:Shows how to set preferred type of document nodes that will represent imported <input> and <select> elements.
const string html = @"
<html>
<select name='ComboBox' size='1'>
<option value='val1'>item1</option>
<option value='val2'></option>
</select>
</html>
";
HtmlLoadOptions htmlLoadOptions = new HtmlLoadOptions();
htmlLoadOptions.PreferredControlType = HtmlControlType.StructuredDocumentTag;
Document doc = new Document(new MemoryStream(Encoding.UTF8.GetBytes(html)), htmlLoadOptions);
NodeCollection nodes = doc.GetChildNodes(NodeType.StructuredDocumentTag, true);
StructuredDocumentTag tag = (StructuredDocumentTag) nodes[0];
//ExEnd
Assert.AreEqual(2, tag.ListItems.Count);
Assert.AreEqual("val1", tag.ListItems[0].Value);
Assert.AreEqual("val2", tag.ListItems[1].Value);
}
[Test]
public void GetInputAsFormField()
{
const string html = @"
<html>
<input type='text' value='Input value text' />
</html>
";
// By default, "HtmlLoadOptions.PreferredControlType" value is "HtmlControlType.FormField".
// So, we do not set this value.
HtmlLoadOptions htmlLoadOptions = new HtmlLoadOptions();
Document doc = new Document(new MemoryStream(Encoding.UTF8.GetBytes(html)), htmlLoadOptions);
NodeCollection nodes = doc.GetChildNodes(NodeType.FormField, true);
Assert.AreEqual(1, nodes.Count);
FormField formField = (FormField) nodes[0];
Assert.AreEqual("Input value text", formField.Result);
}
[TestCase(true)]
[TestCase(false)]
public void IgnoreNoscriptElements(bool ignoreNoscriptElements)
{
//ExStart
//ExFor:HtmlLoadOptions.IgnoreNoscriptElements
//ExSummary:Shows how to ignore <noscript> HTML elements.
const string html = @"
<html>
<head>
<title>NOSCRIPT</title>
<meta http-equiv=""Content-Type"" content=""text/html; charset=utf-8"">
<script type=""text/javascript"">
alert(""Hello, world!"");
</script>
</head>
<body>
<noscript><p>Your browser does not support JavaScript!</p></noscript>
</body>
</html>";
HtmlLoadOptions htmlLoadOptions = new HtmlLoadOptions();
htmlLoadOptions.IgnoreNoscriptElements = ignoreNoscriptElements;
Document doc = new Document(new MemoryStream(Encoding.UTF8.GetBytes(html)), htmlLoadOptions);
doc.Save(ArtifactsDir + "HtmlLoadOptions.IgnoreNoscriptElements.pdf");
//ExEnd
}
[TestCase(true)]
[TestCase(false)]
public void UsePdfDocumentForIgnoreNoscriptElements(bool ignoreNoscriptElements)
{
IgnoreNoscriptElements(ignoreNoscriptElements);
Aspose.Pdf.Document pdfDoc = new Aspose.Pdf.Document(ArtifactsDir + "HtmlLoadOptions.IgnoreNoscriptElements.pdf");
TextAbsorber textAbsorber = new TextAbsorber();
textAbsorber.Visit(pdfDoc);
Assert.AreEqual(ignoreNoscriptElements ? "" : "Your browser does not support JavaScript!", textAbsorber.Text);
}
[TestCase(BlockImportMode.Preserve)]
[TestCase(BlockImportMode.Merge)]
public void BlockImport(BlockImportMode blockImportMode)
{
//ExStart
//ExFor:HtmlLoadOptions.BlockImportMode
//ExFor:BlockImportMode
//ExSummary:Shows how properties of block-level elements are imported from HTML-based documents.
const string html = @"
<html>
<div style='border:dotted'>
<div style='border:solid'>
<p>paragraph 1</p>
<p>paragraph 2</p>
</div>
</div>
</html>";
MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(html));
HtmlLoadOptions loadOptions = new HtmlLoadOptions();
// Set the new mode of import HTML block-level elements.
loadOptions.BlockImportMode = blockImportMode;
Document doc = new Document(stream, loadOptions);
doc.Save(ArtifactsDir + "HtmlLoadOptions.BlockImport.docx");
//ExEnd
}
[Test]
public void FontFaceRules()
{
//ExStart:FontFaceRules
//GistId:5f20ac02cb42c6b08481aa1c5b0cd3db
//ExFor:HtmlLoadOptions.SupportFontFaceRules
//ExSummary:Shows how to load declared "@font-face" rules.
HtmlLoadOptions loadOptions = new HtmlLoadOptions();
loadOptions.SupportFontFaceRules = true;
Document doc = new Document(MyDir + "Html with FontFace.html", loadOptions);
Assert.AreEqual("Bitstream Vera Serif Bold", doc.FontInfos[0].Name);
//ExEnd:FontFaceRules
}
}
}