Skip to content

Commit

Permalink
Reuse subtree exclusion in python encoding linter
Browse files Browse the repository at this point in the history
Rewrites the python encoding lint check to make use of
common exclusion logic.
  • Loading branch information
davidgumberg committed Apr 25, 2024
1 parent ca783f5 commit 46caa5c
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 73 deletions.
73 changes: 0 additions & 73 deletions test/lint/lint-python-utf8-encoding.py

This file was deleted.

5 changes: 5 additions & 0 deletions test/lint/test_runner/src/exclude.rs
Expand Up @@ -78,6 +78,11 @@ pub fn get_pathspecs_exclude_locale_dependence() -> Vec<String> {
get_pathspecs_exclude(Some(&["src/tinyformat.h"]))
}

/// Return the pathspecs for python encoding related excludes
pub fn get_pathspecs_exclude_python_encoding() -> Vec<String> {
get_pathspecs_exclude(None)
}

/// Return the pathspecs for spelling check related excludes
pub fn get_pathspecs_exclude_spelling() -> Vec<String> {
get_pathspecs_exclude(Some(&[
Expand Down
71 changes: 71 additions & 0 deletions test/lint/test_runner/src/main.rs
Expand Up @@ -29,6 +29,11 @@ fn get_linter_list() -> Vec<&'static Linter> {
name: "doc",
lint_fn: lint_doc
},
&Linter {
description: "Check that Python's `open` and `check_output` are invoked with an `encoding=` arg.",
name: "python_encoding",
lint_fn: lint_python_encoding
},
&Linter {
description: "Check that header files have include guards",
name: "include_guards",
Expand Down Expand Up @@ -921,6 +926,72 @@ fn lint_spelling() -> LintResult {
Ok(())
}

// Make sure we explicitly open all text files using UTF-8 (or ASCII) encoding to
// avoid potential issues on the BSDs where the locale is not always set.
fn lint_python_encoding() -> LintResult {
let mut encoding_error = false;
let bad_opens = String::from_utf8(
git()
.args(["grep", "-E"])
.args(["-e", r#" open\("#])
.args(["--and", "--not"])
.args(["-e", r#"open\(.*encoding=.(ascii|utf8|utf-8)."#])
.args(["--and", "--not"])
.args(["-e", r#"open\([^,]*, (\*\*kwargs|['"][^'"]*b.*['"])"#])
.args(["--", "*.py"])
.args(exclude::get_pathspecs_exclude_python_encoding())
.output()
.expect("command error")
.stdout,
)
.expect("error reading stdout");

for bad_open in bad_opens.lines() {
encoding_error = true;
println!(
r#"
Python's open(...) seems to be used to open text files without explicitly specifying encoding='(ascii|utf8|utf-8):'
{bad_open}
"#
)
}

let bad_check_outputs = String::from_utf8(
git()
.args(["grep", "-E"])
.args(["-e", r#" check_output\(.*text=True"#])
.args(["--and", "--not"])
.args(["-e", r#"check_output\(.*encoding=.(ascii|utf8|utf-8)."#])
.args(["--", "*.py"])
.args(exclude::get_pathspecs_exclude_python_encoding())
.output()
.expect("command error")
.stdout,
)
.expect("error reading stdout");

for bad_check_output in bad_check_outputs.lines() {
encoding_error = true;
println!(
r#"
Python's check_output(...) seems to be used to get program outputs without explicitly specifying encoding='(ascii|utf8|utf-8):'
{bad_check_output}
"#
)
}

if encoding_error {
Err(r#"
^^^
The lint check: 'python_encoding' found one or more attempts to read text files or program output without setting an encoding.
Advice not applicable in this specific case? Add an exception by updating the exceptions list.
"#
.to_string())
} else {
Ok(())
}
}

fn run_all_python_linters() -> LintResult {
let mut good = true;
let lint_dir = get_git_root().join("test/lint");
Expand Down

0 comments on commit 46caa5c

Please sign in to comment.