Skip to content

Commit

Permalink
Merge pull request #600 from reox/fix-regex
Browse files Browse the repository at this point in the history
fixing py2 regex for attribute value cleaning
  • Loading branch information
reox committed Jan 4, 2019
2 parents 7c64adc + 4e9c65e commit d349e82
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 6 deletions.
18 changes: 16 additions & 2 deletions androguard/core/bytecodes/axml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from lxml import etree
import logging
import re
import sys
import binascii

log = logging.getLogger("androguard.axml")
Expand Down Expand Up @@ -820,6 +821,9 @@ class AXMLPrinter:
A Reference Implementation can be found at http://androidxref.com/9.0.0_r3/xref/frameworks/base/tools/aapt/XMLNode.cpp
"""
__charrange = None
__replacement = None

def __init__(self, raw_buff):
self.axml = AXMLParser(raw_buff)

Expand Down Expand Up @@ -975,6 +979,16 @@ def _fix_value(self, value):
:param value: a value to clean
:return: the cleaned value
"""
if not self.__charrange or not self.__replacement:
if sys.maxunicode == 0xFFFF:
# Fix for python 2.x, surrogate pairs does not match in regex
self.__charrange = re.compile(u'^([\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD]|[\uD800-\uDBFF][\uDC00-\uDFFF])*$')
# TODO: this regex is slightly wrong... surrogates are not matched as pairs.
self.__replacement = re.compile(u'[^\u0020-\uDBFF\u0009\u000A\u000D\uE000-\uFFFD\uDC00-\uDFFF]')
else:
self.__charrange = re.compile(u'^[\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]*$')
self.__replacement = re.compile(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]')

# Reading string until \x00. This is the same as aapt does.
if "\x00" in value:
self.packerwarning = True
Expand All @@ -984,10 +998,10 @@ def _fix_value(self, value):
binascii.hexlify(value.encode("utf-8"))))
value = value[:value.find("\x00")]

if not re.match(u'^[\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]*$', value):
if not self.__charrange.match(value):
log.warning("Invalid character in value found. Replacing with '_'.")
self.packerwarning = True
value = re.sub(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]', '_', value)
value = self.__replacement.sub('_', value)
return value

def _print_namespace(self, uri):
Expand Down
11 changes: 8 additions & 3 deletions docs/intro/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,19 @@ Just use

.. code-block:: bash
$ pip install -U androguard[magic,graphing,GUI]
$ pip install -U androguard[magic,GUI]
to install androguard.
In order to use features which use :code:`dot`, you need Graphviz_ installed.
This is not a python dependency but a binary package! Please follow the installation instructions for GraphvizInstall_.

You can also make use of an `virtualenv`, to separate the installation from your system wide packages:

.. code-block:: bash
$ virtualenv venv-androguard
$ . venv-androguard/bin/activate
$ pip install -U androguard[magic,graphing,GUI]
$ pip install -U androguard[magic,GUI]
pip should install all required packages too.

Expand Down Expand Up @@ -59,10 +61,13 @@ if you like to install the GUI as well, use

.. code-block:: bash
$ pip install .[magic,GUI,graphing]
$ pip install .[magic,GUI]
The dependencies, defined in :code:`setup.py` will be automatically installed.

If you are installing the libraries using :code:`pip`, make sure you download the correct packages.
For example, there are a lot of implemenations of the :code:`magic` library.
Get the one, that is shipped with the file command (See [Fine Free File Command](http://www.darwinsys.com/file/)) or use `filemagic`, which should work as well.

.. _Graphviz: https://graphviz.org/
.. _GraphvizInstall: https://graphviz.org/download/
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
'matplotlib',
'asn1crypto>=0.24.0',
'click',
'pydot>=1.4.1',
]

# python version specific library versions:
Expand Down Expand Up @@ -90,7 +91,6 @@
# If you are installing on debian you can use python3-magic instead, which fulfills the dependency to file-magic
'magic': ['file-magic'],
'docs': ['sphinx', "sphinxcontrib-programoutput>0.8", 'sphinx_rtd_theme'],
'graphing': ['pydot'],
'tests': ['mock>=2.0', 'nose', 'codecov', 'coverage', 'nose-timer'],
},
setup_requires=['setuptools'],
Expand Down
25 changes: 25 additions & 0 deletions tests/test_axml.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,31 @@ def xml_compare(x1, x2, reporter=None):


class AXMLTest(unittest.TestCase):
def testReplacement(self):
"""
Test that the replacements for attributes, names and values are working
:return:
"""
# Fake, Empty AXML file
a = axml.AXMLPrinter(b"\x03\x00\x08\x00\x24\x00\x00\x00"
b"\x01\x00\x1c\x00\x1c\x00\x00\x00"
b"\x00\x00\x00\x00\x00\x00\x00\x00"
b"\x00\x00\x00\x00"
b"\x00\x00\x00\x00\x00\x00\x00\x00")

self.assertIsNotNone(a)

self.assertEqual(a._fix_value(u"hello world"), u"hello world")
self.assertEqual(a._fix_value(u"Foobar \u000a\u000d\u0b12"), u"Foobar \u000a\u000d\u0b12")
self.assertEqual(a._fix_value(u"hello \U00011234"), u"hello \U00011234")
self.assertEqual(a._fix_value(u"\uFFFF"), u"_")
self.assertEqual(a._fix_value("hello\x00world"), u"hello")

self.assertEqual(a._fix_name(u"foobar"), u"foobar")
self.assertEqual(a._fix_name(u"5foobar"), u"_5foobar")
self.assertEqual(a._fix_name(u"android:foobar"), u"foobar")
self.assertEqual(a._fix_name(u"5:foobar"), u"_5_foobar")

def testAndroidManifest(self):
filenames = [
"examples/axml/AndroidManifest.xml",
Expand Down

0 comments on commit d349e82

Please sign in to comment.