Skip to content

Commit

Permalink
z_string: normalize ÿ to y (#90)
Browse files Browse the repository at this point in the history
* z_string: normalize ÿ to y

* Add normalization of Ÿ

* Add to lower/upper for ÿ - change tests
  • Loading branch information
mworrell committed Mar 12, 2024
1 parent f3f0346 commit b3da34b
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 6 deletions.
10 changes: 7 additions & 3 deletions src/z_string.erl
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@
%% coding: utf-8

%% @author Marc Worrell <marc@worrell.nl>
%% @copyright 2009-2020 Marc Worrell
%% @copyright 2009-2024 Marc Worrell
%% @doc String related functions
%% @end

%% @todo Check valid chars for filenames, allow chinese, japanese, etc?
%% CJK Unified Ideographs Extension A: Range: 3400-4DBF
%% CJK Unified Ideographs: Range: 4E00-9FAF
%% Kangxi Radicals: Range 2F00-2FDF
%% See also: http://www.utf8-chartable.de/

%% Copyright 2009-2020 Marc Worrell
%% Copyright 2009-2024 Marc Worrell
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -317,6 +318,7 @@ to_lower(<<"Œ"/utf8,T/binary>>, Acc) -> to_lower(T, <<Acc/binary,197,147>>);
to_lower(<<"Ã"/utf8,T/binary>>, Acc) -> to_lower(T, <<Acc/binary,195,163>>);
to_lower(<<"Ñ"/utf8,T/binary>>, Acc) -> to_lower(T, <<Acc/binary,195,177>>);
to_lower(<<"Õ"/utf8,T/binary>>, Acc) -> to_lower(T, <<Acc/binary,195,181>>);
to_lower(<<"Ÿ"/utf8,T/binary>>, Acc) -> to_lower(T, <<Acc/binary,"ÿ"/utf8>>);
% Cyrillic support
to_lower(<<"А"/utf8,T/binary>>, Acc) -> to_lower(T, <<Acc/binary,208,176>>);
to_lower(<<"Б"/utf8,T/binary>>, Acc) -> to_lower(T, <<Acc/binary,208,177>>);
Expand Down Expand Up @@ -418,6 +420,7 @@ to_upper(<<"œ"/utf8,T/binary>>, Acc) -> to_upper(T, <<Acc/binary,197,146>>);
to_upper(<<"ã"/utf8,T/binary>>, Acc) -> to_upper(T, <<Acc/binary,195,131>>);
to_upper(<<"ñ"/utf8,T/binary>>, Acc) -> to_upper(T, <<Acc/binary,195,145>>);
to_upper(<<"õ"/utf8,T/binary>>, Acc) -> to_upper(T, <<Acc/binary,195,149>>);
to_upper(<<"ÿ"/utf8,T/binary>>, Acc) -> to_upper(T, <<Acc/binary,"Ÿ"/utf8>>);
% Cyrillic support
to_upper(<<"а"/utf8,T/binary>>, Acc) -> to_upper(T, <<Acc/binary,208,144>>);
to_upper(<<"б"/utf8,T/binary>>, Acc) -> to_upper(T, <<Acc/binary,208,145>>);
Expand Down Expand Up @@ -578,7 +581,8 @@ normalize(<<"Ø"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$o>>);
normalize(<<"å"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$a>>);
normalize(<<"Å"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$a>>);
normalize(<<""/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$e>>);
normalize(<<"ÿ"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$i,$j>>);
normalize(<<"ÿ"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$y>>);
normalize(<<"Ÿ"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$y>>);
normalize(<<"ã"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$a>>);
normalize(<<"ñ"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$n>>);
normalize(<<"õ"/utf8,T/binary>>, Acc) -> normalize(T, <<Acc/binary,$o>>);
Expand Down
7 changes: 4 additions & 3 deletions test/z_string_test.erl
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ normalize_test() ->
?assertEqual(<<"a"/utf8>>, z_string:normalize(<<"å"/utf8>>)),
?assertEqual(<<"a"/utf8>>, z_string:normalize(<<"Å"/utf8>>)),
?assertEqual(<<"e"/utf8>>, z_string:normalize(<<""/utf8>>)),
?assertEqual(<<"ij"/utf8>>, z_string:normalize(<<"ÿ"/utf8>>)),
?assertEqual(<<"y"/utf8>>, z_string:normalize(<<"ÿ"/utf8>>)),
?assertEqual(<<"y"/utf8>>, z_string:normalize(<<"Ÿ"/utf8>>)),
?assertEqual(<<"a"/utf8>>, z_string:normalize(<<"ã"/utf8>>)),
?assertEqual(<<"n"/utf8>>, z_string:normalize(<<"ñ"/utf8>>)),
?assertEqual(<<"o"/utf8>>, z_string:normalize(<<"õ"/utf8>>)),
Expand Down Expand Up @@ -192,7 +193,7 @@ normalize_test() ->
?assertEqual(<<"a"/utf8>>, z_string:normalize("å")),
?assertEqual(<<"a"/utf8>>, z_string:normalize("Å")),
?assertEqual(<<"e"/utf8>>, z_string:normalize("")),
?assertEqual(<<"ij"/utf8>>, z_string:normalize("ÿ")),
?assertEqual(<<"y"/utf8>>, z_string:normalize("ÿ")),
?assertEqual(<<"a"/utf8>>, z_string:normalize("ã")),
?assertEqual(<<"n"/utf8>>, z_string:normalize("ñ")),
?assertEqual(<<"o"/utf8>>, z_string:normalize("õ")),
Expand Down Expand Up @@ -234,7 +235,7 @@ normalize_test() ->
?assertEqual(<<"a"/utf8>>, z_string:normalize('å')),
?assertEqual(<<"a"/utf8>>, z_string:normalize('Å')),
?assertEqual(<<"e"/utf8>>, z_string:normalize('€')),
?assertEqual(<<"ij"/utf8>>, z_string:normalize('ÿ')),
?assertEqual(<<"y"/utf8>>, z_string:normalize('ÿ')),
?assertEqual(<<"a"/utf8>>, z_string:normalize('ã')),
?assertEqual(<<"n"/utf8>>, z_string:normalize('ñ')),
?assertEqual(<<"o"/utf8>>, z_string:normalize('õ')),
Expand Down

0 comments on commit b3da34b

Please sign in to comment.