From d2b520851bd10719a3f0de1caf00a48457a961d1 Mon Sep 17 00:00:00 2001 From: David Conran Date: Thu, 30 Sep 2021 08:03:55 +1000 Subject: [PATCH] Fix potential security issue with `scrape_supported_devices.py` (#1619) Note: Tool is used internally by library developers to automatically generate documentation prior to a release and to warn when files are missing the required documentation. * Update Regex used to eliminate potential denial of service. - Deemed a Low severity & impact threat. - Does NOT require or necessitate a new version/release of the library. - The tool is only used after review of code/PR etc which would already be very suspicious. - The tool is not something a user would typically use or run. (i.e. Internal use only) - Very unlikely to be exploited as the tool is not typically automatically run. - Expected worse case scenario would be the Continuous Integration tests failing due to a timeout, or a Developer losing some CPU time if they didn't notice a malicious PR/commit. (It would be very very obvious) - Remediation tested using supplied Proof of Concept code. i.e. Guaranteed Sub mSecond CPU use instead of exponential CPU use. - Addressed all other regex as a precaution as well via sensible range limitations. * Documented the Regex used via `re.VERBOSE` and made easier to read/follow. * Simplified some of the Regex. Fixes #1616 Thanks to @srikanthprathi for reporting the issue & providing POC via the Huntr.Dev team. Thanks to Huntr.Dev team for bring the issue to our attention. (Kudos to @zidingz, @JamieSlome) Ref: https://huntr.dev/bounties/4da00a75-50dc-458b-acc6-cc216e1c854a/ --- tools/scrape_supported_devices.py | 33 +++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/tools/scrape_supported_devices.py b/tools/scrape_supported_devices.py index 979e33004..f40b3e959 100755 --- a/tools/scrape_supported_devices.py +++ b/tools/scrape_supported_devices.py @@ -10,15 +10,32 @@ CODE_URL = "https://github.com/crankyoldgit/IRremoteESP8266/blob/master/src/ir_" -BRAND_MODEL = re.compile(r"Brand: *(?P.+), *Model: *(?P.+)") -ENUMS = re.compile(r"enum (\w+) {(.+?)};", re.DOTALL) -ENUM_ENTRY = re.compile(r"^\s+(\w+)", re.MULTILINE) -DECODED_PROTOCOLS = re.compile(r".*(?:results->decode_type *=.*?|" - r"typeguess\s*=\s*decode_type_t::)(\w+);") -AC_FN = re.compile(r"ir_(.+)\.h") -AC_MODEL_ENUM_RE = re.compile(r"(.+)_ac_remote_model_t") +BRAND_MODEL = re.compile(r""" + Brand:\s{1,20} # "Brand:" label followd by between 1 and 20 whitespace chars. + \b(?P.{1,40})\b # The actual brand of the device, max 40 chars. + \s{0,10}, # Followed by at most 10 whitespace chars, then a comma. + \s{1,20} # The between 1 and 20 whitespace chars. + Model:\s{1,20} # "Model:" label followd by between 1 and 20 whitespace chars. + \b(?P.{1,80}) # The model info of the device, max 80 chars. + \s{0,5}$ # Followed by at most 5 whitespaces before the end of line. + """, re.VERBOSE) +ENUMS = re.compile(r"enum (\w{1,60}) {(.{1,5000}?)};", re.DOTALL) +ENUM_ENTRY = re.compile(r"^\s{1,80}(\w{1,80})", re.MULTILINE) +DECODED_PROTOCOLS = re.compile(r""" + .{0,80} # Ignore upto an 80 char line of whitespace/code etc. + # Now look for code that looks like we are assigning the Protocol type. + # There are two typical styles used: + (?:results->decode_type # The first style. + | # Or + typeguess) # The second style + \s{0,5}=\s{0,5} # The assignment operator and potential whitespace + (?:decode_type_t::)? # The protocol could have an optional type prefix. + (\w{1,40}); # Finally, the last word of code should be the Protocol. + """, re.VERBOSE) +AC_FN = re.compile(r"ir_(.{1,80})\.h") +AC_MODEL_ENUM_RE = re.compile(r"(.{1,40})_ac_remote_model_t") IRSEND_FN_RE = re.compile(r"IRsend\.h") -ALL_FN = re.compile(r"ir_(.+)\.(h|cpp)") +ALL_FN = re.compile(r"ir_(.{1,80})\.(h|cpp)") EXCLUDED_PROTOCOLS = ["UNKNOWN", "UNUSED", "kLastDecodeType", "typeguess"] EXCLUDED_ACS = ["Magiquest", "NEC"]