Skip to content

Commit

Permalink
Merge pull request #1705 from anisurrahman75/fix-xml-decoding
Browse files Browse the repository at this point in the history
Add charset converter to convert non `utf-8` char
  • Loading branch information
mialinx committed May 12, 2024
2 parents 497759e + 504ddb8 commit 9bff846
Showing 1 changed file with 4 additions and 16 deletions.
20 changes: 4 additions & 16 deletions internal/databases/sqlserver/blob/xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@ package blob

import (
"bytes"
"encoding/binary"
"encoding/xml"
"fmt"
"golang.org/x/net/html/charset"
"io"

"strings"
"unicode/utf16"
"unicode/utf8"
)

const (
Expand Down Expand Up @@ -121,9 +119,11 @@ func (bl *XBlockListIn) MarshalXML(e *xml.Encoder, start xml.StartElement) error

func ParseBlocklistXML(data []byte) (*XBlockListIn, error) {
bl := &XBlockListIn{}
data = utf16utf8(data, binary.LittleEndian)
d := xml.NewDecoder(bytes.NewBuffer(data))
d.CharsetReader = func(s string, r io.Reader) (io.Reader, error) {
if s == "utf-16" {
return charset.NewReader(r, "charset=utf-16")
}
return r, nil
}
err := d.Decode(bl)
Expand All @@ -133,18 +133,6 @@ func ParseBlocklistXML(data []byte) (*XBlockListIn, error) {
return bl, nil
}

// 21century, we can't convert charset in golang. nice
func utf16utf8(b []byte, o binary.ByteOrder) []byte {
utf := make([]uint16, (len(b)+(2-1))/2)
for i := 0; i+(2-1) < len(b); i += 2 {
utf[i/2] = o.Uint16(b[i:])
}
if len(b)/2 < len(utf) {
utf[len(utf)-1] = utf8.RuneError
}
return []byte(string(utf16.Decode(utf)))
}

type XBlockListOut struct {
XMLName xml.Name `xml:"BlockList"`
CommittedBlocks struct {
Expand Down

0 comments on commit 9bff846

Please sign in to comment.