Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tutorial07 中关于将数据类型字符串 转 json文本字符串的 问题 #227

Open
smalluncle opened this issue Jul 23, 2022 · 0 comments

Comments

@smalluncle
Copy link

smalluncle commented Jul 23, 2022

问题:TEST_ROUNDTRIP(""\uD834\uDD1E"") 不通过。
D:\code\CPP_projects\json\json-tutorial-master\tutorial07_answer\test.c:414: expect: "\uD834\uDD1E" actual: "饾劄"
394/395 (99.75%) passed

1、在json解析器中,如这样的字符串""\uD834\uDD1E""会被解析为这种格式"\xF0\x9D\x84\x9E" 的字符串;
2、在json生成器中,将传入这种格式"\xF0\x9D\x84\x9E"的字符串去生成json字符串文本,但代码中遇到某个字节ch大于0x20时直接PUTC(c,ch)进去,这是不是有问题呢?是不是应该考虑转义成unicode。

下面是answer中未优化的代码
static void lept_stringify_string(lept_context* c, const char* s, size_t len) { size_t i; assert(s != NULL); PUTC(c, '"'); for (i = 0; i < len; i++) { unsigned char ch = (unsigned char)s[i]; switch (ch) { case '\"': PUTS(c, "\\\"", 2); break; case '\\': PUTS(c, "\\\\", 2); break; case '\b': PUTS(c, "\\b", 2); break; case '\f': PUTS(c, "\\f", 2); break; case '\n': PUTS(c, "\\n", 2); break; case '\r': PUTS(c, "\\r", 2); break; case '\t': PUTS(c, "\\t", 2); break; default: if (ch < 0x20) { char buffer[7]; sprintf(buffer, "\\u%04X", ch); PUTS(c, buffer, 6); } else PUTC(c, s[i]); } } PUTC(c, '"'); }

下面是我写的代码,对于上述问题的测试是通过了的,不知道我理解的对不对
static void hex2str(unsigned short code, char* str, int len) { str[len] = 0; for (int i = len - 1; i >= 0; i--, code >>= 4) { if ((code & 0xf) <= 9) str[i] = (code & 0xf) + '0'; else str[i] = (code & 0xf) + 'A' - 0x0a; } }
`static void lept_stringify_string(lept_context* c, const char* s, size_t len) {
PUTC(c,'"');
unsigned char ch = 0;
unsigned short code = 0;
unsigned int codePoint = 0;
unsigned short highPoint = 0;
unsigned short lowPoint = 0;
char* str = (char*)malloc(5);
for (size_t i = 0; i < len; ++i) {
ch = s[i];
code = 0;
codePoint = 0;
highPoint = 0;
lowPoint = 0;

    codePoint = 0;
    if (ch == 0) {
        code |= ch;
    }
    else if (ch >0 && ch <= 0x20) {
        switch (ch)
        {
        case '\b': PUTS(c,"\\b",2); break;
        case '\f': PUTS(c,"\\f",2); break;
        case '\n': PUTS(c,"\\n",2); break;
        case '\r': PUTS(c,"\\r",2); break;
        case '\t': PUTS(c,"\\t",2); break;
        case ' ':  PUTC(c, ' '); break;
        default:
            break;
        }
        continue;
    } else if (ch > 0x20 && ch <= 0x7f) {
        if (ch == '\"' || ch == '\\' ) {
            PUTC(c, '\\');
        }
        PUTC(c, ch);
        continue;
    }
    else if (ch >= 0xc0 && ch <= 0xdf) {
        code |= ((unsigned short)(ch & 0x1f) << 6);
        ch = s[++i];
        code |= (ch & 0b00111111);
    }
    else if (ch >= 0xe0 && ch <= 0xef) {
        code |= ((unsigned short)(ch & 0x0f) << 12);
        ch = s[++i];
        code |= ((unsigned short)(ch & 0b00111111) << 6);
        ch = s[++i];
        code |= (ch & 0b00111111);
    }
    else if (ch >= 0xf0 && ch <= 0xf7) {
        codePoint |= ((unsigned int)(ch & 0x00000111) << 18);
        ch = s[++i];
        codePoint |= ((unsigned int)(ch & 0b00111111) << 12);
        ch = s[++i];
        codePoint |= ((unsigned int)(ch & 0b00111111) << 6);
        ch = s[++i];
        codePoint |= (ch & 0b00111111);
        highPoint = (((codePoint - 0x10000) & 0xf000) / 0x400) + 0xd800;
        lowPoint = ((codePoint - 0x10000) & 0x0fff) + 0xdc00;

        PUTS(c, "\\u", 2);
        hex2str(highPoint, str, 4);
        PUTS(c, str, 4);

        PUTS(c, "\\u", 2);
        hex2str(lowPoint, str, 4);
        PUTS(c, str, 4);
        continue;
    }
    PUTS(c, "\\u", 2);
    hex2str(code, str, 4);
    PUTS(c, str, 4);
}
PUTC(c, '\"');
free(str);

}
`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant