/
CharsetFormatter.cs
95 lines (83 loc) · 3.56 KB
/
CharsetFormatter.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Diagnostics;
using Microsoft.CodeAnalysis.Options;
using Microsoft.CodeAnalysis.Text;
using Microsoft.Extensions.Logging;
namespace Microsoft.CodeAnalysis.Tools.Formatters
{
internal sealed class CharsetFormatter : DocumentFormatter
{
protected override string FormatWarningDescription => Resources.Fix_file_encoding;
private static Encoding Utf8 => new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
private static Encoding Latin1 => Encoding.GetEncoding("iso-8859-1");
protected override Task<SourceText> FormatFileAsync(
Document document,
SourceText sourceText,
OptionSet optionSet,
AnalyzerConfigOptions? analyzerConfigOptions,
FormatOptions formatOptions,
ILogger logger,
CancellationToken cancellationToken)
{
return Task.Run(() =>
{
if (!TryGetCharset(analyzerConfigOptions, out var encoding)
|| sourceText.Encoding?.Equals(encoding) == true
|| IsEncodingEquivalent(sourceText, encoding))
{
return sourceText;
}
return SourceText.From(sourceText.ToString(), encoding, sourceText.ChecksumAlgorithm);
});
}
private static bool IsEncodingEquivalent(SourceText sourceText, Encoding encoding)
{
if (sourceText.Encoding is null)
{
throw new System.Exception($"source text did not have an identifiable encoding");
}
var text = sourceText.ToString();
var originalBytes = GetEncodedBytes(text, sourceText.Encoding);
var encodedBytes = GetEncodedBytes(text, encoding);
return originalBytes.Length == encodedBytes.Length
&& originalBytes.SequenceEqual(encodedBytes);
}
private static byte[] GetEncodedBytes(string text, Encoding encoding)
{
// Start with a large initial capacity, double the character count with additional space for the BOM
using var stream = new MemoryStream(text.Length * 2 + 3);
using var streamWriter = new StreamWriter(stream, encoding);
streamWriter.Write(text);
streamWriter.Flush();
return stream.ToArray();
}
private static bool TryGetCharset(AnalyzerConfigOptions? analyzerConfigOptions, [NotNullWhen(true)] out Encoding? encoding)
{
if (analyzerConfigOptions is object &&
analyzerConfigOptions.TryGetValue("charset", out var charsetOption))
{
encoding = GetCharset(charsetOption);
return true;
}
encoding = null;
return false;
}
public static Encoding GetCharset(string charsetOption)
{
return charsetOption switch
{
"latin1" => Latin1,
"utf-8-bom" => Encoding.UTF8,// UTF-8 with BOM Marker
"utf-16be" => Encoding.BigEndianUnicode,// Big Endian with BOM Marker
"utf-16le" => Encoding.Unicode,// Little Endian with BOM Marker
_ => Utf8,
};
}
}
}