Skip to content

Commit

Permalink
Merge pull request #68 from esl/max-element-size
Browse files Browse the repository at this point in the history
Fix and improve the check for maximum element size
  • Loading branch information
NelsonVides committed Dec 22, 2023
2 parents aa62735 + b9500bd commit 74263a9
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 43 deletions.
74 changes: 46 additions & 28 deletions c_src/exml.cpp
Expand Up @@ -81,7 +81,7 @@ namespace {

struct Parser {
ustring stream_tag;
std::uint64_t max_child_size = 0;
std::uint64_t max_element_size = 0;
bool infinite_stream = false;

static thread_local std::vector<unsigned char> buffer;
Expand Down Expand Up @@ -459,10 +459,10 @@ static ERL_NIF_TERM create(ErlNifEnv *env, int argc,
void *mem = enif_alloc_resource(parser_type, sizeof(Parser));
Parser *parser = new (mem) Parser;

ErlNifUInt64 max_child_size;
if (!enif_get_uint64(env, argv[0], &max_child_size))
ErlNifUInt64 max_element_size;
if (!enif_get_uint64(env, argv[0], &max_element_size))
return enif_make_badarg(env);
parser->max_child_size = static_cast<std::uint64_t>(max_child_size);
parser->max_element_size = static_cast<std::uint64_t>(max_element_size);
if (enif_compare(atom_true, argv[1]) == 0)
parser->infinite_stream = true;

Expand Down Expand Up @@ -491,17 +491,23 @@ static ERL_NIF_TERM parse_next(ErlNifEnv *env, int argc,
ParseCtx ctx{env, parser};
xml_document::ParseResult result;
ERL_NIF_TERM element;
const char *error_msg = nullptr;

xml_document &doc = get_static_doc();
Parser::term_buffer.clear();

auto parseStreamOpen = [&] {
result = doc.parse<parse_open_only()>(Parser::buffer.data() + offset);
if (!result.has_error) {
auto name_tag = node_name(doc.impl.first_node());
parser->stream_tag =
if (parser->max_element_size &&
result.rest - Parser::buffer.data() - offset > parser->max_element_size) {
error_msg = "element too big";
} else {
auto name_tag = node_name(doc.impl.first_node());
parser->stream_tag =
ustring{std::get<0>(name_tag), std::get<1>(name_tag)};
element = make_stream_start_tuple(ctx, doc.impl.first_node());
element = make_stream_start_tuple(ctx, doc.impl.first_node());
}
}
};

Expand All @@ -515,10 +521,20 @@ static ERL_NIF_TERM parse_next(ErlNifEnv *env, int argc,
parser->stream_tag;
};

if (parser->infinite_stream) {
auto parseElement = [&] {
result = doc.parse<parse_one()>(Parser::buffer.data() + offset);
if (!result.has_error)
element = make_xmlel(ctx, doc.impl.first_node());
if (!result.has_error) {
if (parser->max_element_size &&
result.rest - Parser::buffer.data() - offset > parser->max_element_size) {
error_msg = "element too big";
} else {
element = make_xmlel(ctx, doc.impl.first_node());
}
}
};

if (parser->infinite_stream) {
parseElement();
} else if (parser->stream_tag.empty()) {
parseStreamOpen();
} else if (has_stream_closing_tag(parser, offset)) {
Expand All @@ -527,9 +543,7 @@ static ERL_NIF_TERM parse_next(ErlNifEnv *env, int argc,
result.rest = &*Parser::buffer.rbegin();
element = make_stream_end_tuple(ctx);
} else {
result = doc.parse<parse_one()>(Parser::buffer.data() + offset);
if (!result.has_error)
element = make_xmlel(ctx, doc.impl.first_node());
parseElement();
}

if (result.eof && hasStreamReopen()) {
Expand All @@ -538,26 +552,30 @@ static ERL_NIF_TERM parse_next(ErlNifEnv *env, int argc,
}

if (result.eof) {
if (parser->max_child_size &&
Parser::buffer.size() - offset >= parser->max_child_size)
return enif_make_tuple2(
env, atom_error,
enif_make_string(env, "child element too big", ERL_NIF_LATIN1));

result.rest = Parser::buffer.data() + offset;
element = atom_undefined;
// Return an error if an incomplete element has at least max_element_size characters.
if (parser->max_element_size &&
Parser::buffer.size() - offset > parser->max_element_size) {
error_msg = "element too big";
} else {
result.rest = Parser::buffer.data() + offset;
element = atom_undefined;
}
} else if (result.has_error) {
return enif_make_tuple2(
env, atom_error,
enif_make_string(env, result.error_message.c_str(), ERL_NIF_LATIN1));
error_msg = result.error_message.c_str();
}

if (!error_msg) {
// Return an error when null character is found.
std::size_t rest_size = &Parser::buffer.back() - result.rest;
if (std::strlen(reinterpret_cast<const char*>(result.rest)) != rest_size)
error_msg = "null character found in buffer";
}

// Raise an exception when null character is found.
std::size_t rest_size = &Parser::buffer.back() - result.rest;
if (std::strlen(reinterpret_cast<const char*>(result.rest)) != rest_size)
if (error_msg) {
return enif_make_tuple2(
env, atom_error,
enif_make_string(env, "null character found in buffer", ERL_NIF_LATIN1));
enif_make_string(env, error_msg, ERL_NIF_LATIN1));
}

return enif_make_tuple3(
env, atom_ok, element,
Expand Down
14 changes: 7 additions & 7 deletions src/exml_stream.erl
Expand Up @@ -29,11 +29,11 @@
-type stop() :: #xmlstreamend{}.
-type element() :: exml_nif:stream_element().
-type parser() :: #parser{}.
%% infinite_stream - no distinct "stream start" or "stream end", only #xmlel{} will be returned
%% max_child_size - specifies maximum byte size of any child of the root element. The byte size is
%% counted from the start tag until the opening character of its end tag. Disabled
%% if set to 0 (default).
-type parser_opt() :: {infinite_stream, boolean()} | {max_child_size, non_neg_integer()}.
%% infinite_stream - No distinct "stream start" or "stream end", only #xmlel{} will be returned.
%% max_element_size - Specifies maximum byte size of any parsed XML element.
%% The only exception is the "stream start" element,
%% for which only the size of the opening tag is limited.
-type parser_opt() :: {infinite_stream, boolean()} | {max_element_size, non_neg_integer()}.

%%%===================================================================
%%% Public API
Expand All @@ -45,9 +45,9 @@ new_parser() ->

-spec new_parser([parser_opt()]) -> {ok, parser()} | {error, any()}.
new_parser(Opts)->
MaxChildSize = proplists:get_value(max_child_size, Opts, 0),
MaxElementSize = proplists:get_value(max_element_size, Opts, 0),
InfiniteStream = proplists:get_value(infinite_stream, Opts, false),
case exml_nif:create(MaxChildSize, InfiniteStream) of
case exml_nif:create(MaxElementSize, InfiniteStream) of
{ok, EventParser} ->
{ok, #parser{event_parser = EventParser, buffer = []}};
Error ->
Expand Down
55 changes: 47 additions & 8 deletions test/exml_stream_tests.erl
Expand Up @@ -160,7 +160,7 @@ conv_attr_test() ->
-define(RESTART_TEST_STREAM, <<"<stream:stream xmlns:stream='something'><quote/><stream:stream xmlns:stream='else'><other/><things/>">>).

stream_restarts_test() ->
{ok, Parser0} = exml_stream:new_parser([{start_tag, <<"stream:stream">>}]),
{ok, Parser0} = exml_stream:new_parser([]),
{ok, _Parser1, Elements} = exml_stream:parse(Parser0, ?RESTART_TEST_STREAM),
?assertMatch(
[#xmlstreamstart{name = <<"stream:stream">>},
Expand All @@ -174,7 +174,7 @@ stream_restarts_test() ->
"<?xml version='1.0'?><stream:stream xmlns:stream='a'><other/>">>).

stream_restarts_with_xml_declaration_test() ->
{ok, Parser0} = exml_stream:new_parser([{start_tag, <<"stream:stream">>}]),
{ok, Parser0} = exml_stream:new_parser([]),
{ok, _Parser1, Elements} = exml_stream:parse(Parser0, ?RESTART_TEST_STREAM_XMLDEC),
?assertMatch(
[#xmlstreamstart{name = <<"stream:stream">>},
Expand All @@ -183,14 +183,53 @@ stream_restarts_with_xml_declaration_test() ->
#xmlel{name = <<"other">>}],
Elements).

stream_max_child_size_test() ->
{ok, Parser0} = exml_stream:new_parser([{max_child_size, 15}]),
{ok, Parser1, _} = exml_stream:parse(Parser0, <<"<stream><root>">>),
{ok, Parser2, _} = exml_stream:parse(Parser1, <<"<a></a>">>),
?assertEqual({error, "child element too big"}, exml_stream:parse(Parser2, <<"<b>456</b>">>)).
stream_max_opening_tag_size_test() ->
{ok, Parser0} = exml_stream:new_parser([{max_element_size, 10}]),
?assertMatch({ok, _Parser1, [#xmlstreamstart{},
#xmlel{}]}, exml_stream:parse(Parser0, <<"<stream89><a></a>">>)),
{ok, Parser2} = exml_stream:new_parser([{max_element_size, 9}]),
?assertEqual({error, "element too big"}, exml_stream:parse(Parser2, <<"<stream89><a></a>">>)).

stream_max_element_size_test() ->
{ok, Parser0} = exml_stream:new_parser([{max_element_size, 10}]),
{ok, Parser1, Elements0} = exml_stream:parse(Parser0, <<"<stream><a></a>">>),
?assertMatch([#xmlstreamstart{}, #xmlel{}], Elements0),
?assertEqual({error, "element too big"}, exml_stream:parse(Parser1, <<"<c><d/></c>">>)).

stream_max_text_element_size_test() ->
{ok, Parser0} = exml_stream:new_parser([{max_element_size, 10}]),
{ok, Parser1, Elements0} = exml_stream:parse(Parser0, <<"<stream><a>123</a><b>123</b>">>),
?assertMatch([#xmlstreamstart{}, #xmlel{}, #xmlel{}], Elements0),
?assertEqual({error, "element too big"}, exml_stream:parse(Parser1, <<"<c>1234</c>">>)).

stream_max_incomplete_element_size_test() ->
{ok, Parser0} = exml_stream:new_parser([{max_element_size, 10}]),
{ok, Parser1, Elements0} = exml_stream:parse(Parser0, <<"<stream><a>123</a><b>123</b>">>),
?assertMatch([#xmlstreamstart{}, #xmlel{}, #xmlel{}], Elements0),
%% Element <c> has 10 characters, but it's incomplete, so it would be too big
?assertEqual({error, "element too big"}, exml_stream:parse(Parser1, <<"<c>1234</c">>)).

stream_max_chunked_element_size_test() ->
{ok, Parser0} = exml_stream:new_parser([{max_element_size, 10}]),
{ok, Parser1, Elements0} = exml_stream:parse(Parser0, <<"<stream><a><b/>">>),
?assertMatch([#xmlstreamstart{}], Elements0),
?assertEqual({error, "element too big"}, exml_stream:parse(Parser1, <<" ">>)).

stream_max_root_element_size_test() ->
{ok, Parser0} = exml_stream:new_parser([{max_element_size, 10}, {infinite_stream, true}]),
{ok, Parser1, Elements0} = exml_stream:parse(Parser0, <<"<a>123</a><b>123</b>">>),
?assertMatch([#xmlel{}, #xmlel{}], Elements0),
?assertEqual({error, "element too big"}, exml_stream:parse(Parser1, <<"<c><d/></c>">>)).

stream_max_incomplete_root_element_size_test() ->
{ok, Parser0} = exml_stream:new_parser([{max_element_size, 10}, {infinite_stream, true}]),
{ok, Parser1, Elements0} = exml_stream:parse(Parser0, <<"<a>123</a><b>123</b>">>),
?assertMatch([#xmlel{}, #xmlel{}], Elements0),
%% Element <c> has 10 characters, but it will have at least one more character
?assertEqual({error, "element too big"}, exml_stream:parse(Parser1, <<"<c>1234</c">>)).

infinite_stream_partial_chunk_test() ->
{ok, Parser0} = exml_stream:new_parser([{infinite_stream, true}, {autoreset, true}]),
{ok, Parser0} = exml_stream:new_parser([{infinite_stream, true}]),
{ok, Parser1, Open} = exml_stream:parse(Parser0, <<"<open xmlns='urn:ietf:params:xml:ns:xmpp-framing' to='i.am.banana.com' version='1.0'/>">>),
?assertEqual(
[#xmlel{name = <<"open">>,
Expand Down

0 comments on commit 74263a9

Please sign in to comment.