pypa · gaborbernat · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024
diff --git a/docs/changelog/2709.bugfix.rst b/docs/changelog/2709.bugfix.rst
@@ -0,0 +1 @@
+allow builtin discovery to discover specific interpreters (e.g. ``python3.12``) given an unspecific spec (e.g. ``python3``) - by :user:`flying-sheep`.
diff --git a/src/virtualenv/discovery/builtin.py b/src/virtualenv/discovery/builtin.py
@@ -3,23 +3,35 @@
 import logging
 import os
 import sys
+from pathlib import Path
+from typing import TYPE_CHECKING, Callable
 
 from virtualenv.info import IS_WIN
 
 from .discover import Discover
 from .py_info import PythonInfo
 from .py_spec import PythonSpec
 
+if TYPE_CHECKING:
+    from argparse import ArgumentParser
+    from collections.abc import Generator, Iterable, Mapping, Sequence
+
+    from virtualenv.app_data.base import AppData
+
 
 class Builtin(Discover):
+    python_spec: Sequence[str]
+    app_data: AppData
+    try_first_with: Sequence[str]
+
     def __init__(self, options) -> None:
         super().__init__(options)
         self.python_spec = options.python or [sys.executable]
         self.app_data = options.app_data
         self.try_first_with = options.try_first_with
 
     @classmethod
-    def add_parser_arguments(cls, parser):
+    def add_parser_arguments(cls, parser: ArgumentParser) -> None:
         parser.add_argument(
             "-p",
             "--python",
@@ -41,7 +53,7 @@ def add_parser_arguments(cls, parser):
             help="try first these interpreters before starting the discovery",
         )
 
-    def run(self):
+    def run(self) -> PythonInfo | None:
         for python_spec in self.python_spec:
             result = get_interpreter(python_spec, self.try_first_with, self.app_data, self._env)
             if result is not None:
@@ -53,7 +65,9 @@ def __repr__(self) -> str:
         return f"{self.__class__.__name__} discover of python_spec={spec!r}"
 
 
-def get_interpreter(key, try_first_with, app_data=None, env=None):
+def get_interpreter(
+    key, try_first_with: Iterable[str], app_data: AppData | None = None, env: Mapping[str, str] | None = None
+) -> PythonInfo | None:
     spec = PythonSpec.from_string_spec(key)
     logging.info("find interpreter for spec %r", spec)
     proposed_paths = set()
@@ -70,7 +84,12 @@ def get_interpreter(key, try_first_with, app_data=None, env=None):
     return None
 
 
-def propose_interpreters(spec, try_first_with, app_data, env=None):  # noqa: C901, PLR0912
+def propose_interpreters(  # noqa: C901, PLR0912
+    spec: PythonSpec,
+    try_first_with: Iterable[str],
+    app_data: AppData | None = None,
+    env: Mapping[str, str] | None = None,
+) -> Generator[tuple[PythonInfo, bool], None, None]:
     # 0. try with first
     env = os.environ if env is None else env
     for py_exe in try_first_with:
@@ -104,34 +123,35 @@ def propose_interpreters(spec, try_first_with, app_data, env=None):  # noqa: C90
             for interpreter in propose_interpreters(spec, app_data, env):
                 yield interpreter, True
     # finally just find on path, the path order matters (as the candidates are less easy to control by end user)
-    paths = get_paths(env)
     tested_exes = set()
-    for pos, path in enumerate(paths):
-        path_str = str(path)
-        logging.debug(LazyPathDump(pos, path_str, env))
-        for candidate, match in possible_specs(spec):
-            found = check_path(candidate, path_str)
-            if found is not None:
-                exe = os.path.abspath(found)
-                if exe not in tested_exes:
-                    tested_exes.add(exe)
-                    interpreter = PathPythonInfo.from_exe(exe, app_data, raise_on_error=False, env=env)
-                    if interpreter is not None:
-                        yield interpreter, match
-
-
-def get_paths(env):
+    find_candidates = path_exe_finder(spec)
+    for pos, path in enumerate(get_paths(env)):
+        logging.debug(LazyPathDump(pos, path, env))
+        for exe, impl_must_match in find_candidates(path):
+            if exe in tested_exes:
+                continue
+            tested_exes.add(exe)
+            interpreter = PathPythonInfo.from_exe(str(exe), app_data, raise_on_error=False, env=env)
+            if interpreter is not None:
+                yield interpreter, impl_must_match
+
+
+def get_paths(env: Mapping[str, str]) -> Generator[Path, None, None]:
     path = env.get("PATH", None)
     if path is None:
         try:
             path = os.confstr("CS_PATH")
         except (AttributeError, ValueError):
             path = os.defpath
-    return [] if not path else [p for p in path.split(os.pathsep) if os.path.exists(p)]
+    if not path:
+        return None
+    for p in map(Path, path.split(os.pathsep)):
+        if p.exists():
+            yield p
 
 
 class LazyPathDump:
-    def __init__(self, pos, path, env) -> None:
+    def __init__(self, pos: int, path: Path, env: Mapping[str, str]) -> None:
         self.pos = pos
         self.path = path
         self.env = env
@@ -140,35 +160,35 @@ def __repr__(self) -> str:
         content = f"discover PATH[{self.pos}]={self.path}"
         if self.env.get("_VIRTUALENV_DEBUG"):  # this is the over the board debug
             content += " with =>"
-            for file_name in os.listdir(self.path):
+            for file_path in self.path.iterdir():
                 try:
-                    file_path = os.path.join(self.path, file_name)
-                    if os.path.isdir(file_path) or not os.access(file_path, os.X_OK):
+                    if file_path.is_dir() or not (file_path.stat().st_mode & os.X_OK):
                         continue
                 except OSError:
                     pass
                 content += " "
-                content += file_name
+                content += file_path.name
         return content
 
 
-def check_path(candidate, path):
-    _, ext = os.path.splitext(candidate)
-    if sys.platform == "win32" and ext != ".exe":
-        candidate += ".exe"
-    if os.path.isfile(candidate):
-        return candidate
-    candidate = os.path.join(path, candidate)
-    if os.path.isfile(candidate):
-        return candidate
-    return None
-
-
-def possible_specs(spec):
-    # 4. then maybe it's something exact on PATH - if it was direct lookup implementation no longer counts
-    yield spec.str_spec, False
-    # 5. or from the spec we can deduce a name on path  that matches
-    yield from spec.generate_names()
+def path_exe_finder(spec: PythonSpec) -> Callable[[Path], Generator[tuple[Path, bool], None, None]]:
+    """Given a spec, return a function that can be called on a path to find all matching files in it."""
+    pat = spec.generate_re(windows=sys.platform == "win32")
+    direct = spec.str_spec
+    if sys.platform == "win32":
+        direct = f"{direct}.exe"
+
+    def path_exes(path: Path) -> Generator[tuple[Path, bool], None, None]:
+        # 4. then maybe it's something exact on PATH - if it was direct lookup implementation no longer counts
+        yield (path / direct), False
+        # 5. or from the spec we can deduce if a name on path matches
+        for exe in path.iterdir():
+            match = pat.fullmatch(exe.name)
+            if match:
+                # the implementation must match when we find “python[ver]”
+                yield exe.absolute(), match["impl"] == "python"
+
+    return path_exes
 
 
 class PathPythonInfo(PythonInfo):

diff --git a/src/virtualenv/discovery/py_info.py b/src/virtualenv/discovery/py_info.py
@@ -344,7 +344,7 @@ def current(cls, app_data=None):
         return cls._current
 
     @classmethod
-    def current_system(cls, app_data=None):
+    def current_system(cls, app_data=None) -> PythonInfo:
         """
         This locates the current host interpreter information. This might be different than what we run into in case
         the host python has been upgraded from underneath us.

diff --git a/src/virtualenv/discovery/py_spec.py b/src/virtualenv/discovery/py_spec.py
@@ -2,10 +2,8 @@
 
 from __future__ import annotations
 
-import contextlib
 import os
 import re
-from collections import OrderedDict
 
 from virtualenv.info import fs_is_case_sensitive
 
@@ -15,7 +13,16 @@
 class PythonSpec:
     """Contains specification about a Python Interpreter."""
 
-    def __init__(self, str_spec, implementation, major, minor, micro, architecture, path) -> None:  # noqa: PLR0913
+    def __init__(  # noqa: PLR0913
+        self,
+        str_spec: str,
+        implementation: str | None,
+        major: int | None,
+        minor: int | None,
+        micro: int | None,
+        architecture: int | None,
+        path: str | None,
+    ) -> None:
         self.str_spec = str_spec
         self.implementation = implementation
         self.major = major
@@ -25,7 +32,7 @@ def __init__(self, str_spec, implementation, major, minor, micro, architecture,
         self.path = path
 
     @classmethod
-    def from_string_spec(cls, string_spec):  # noqa: C901, PLR0912
+    def from_string_spec(cls, string_spec: str):  # noqa: C901, PLR0912
         impl, major, minor, micro, arch, path = None, None, None, None, None, None
         if os.path.isabs(string_spec):  # noqa: PLR1702
             path = string_spec
@@ -67,26 +74,18 @@ def _int_or_none(val):
 
         return cls(string_spec, impl, major, minor, micro, arch, path)
 
-    def generate_names(self):
-        impls = OrderedDict()
-        if self.implementation:
-            # first consider implementation as it is
-            impls[self.implementation] = False
-            if fs_is_case_sensitive():
-                # for case sensitive file systems consider lower and upper case versions too
-                # trivia: MacBooks and all pre 2018 Windows-es were case insensitive by default
-                impls[self.implementation.lower()] = False
-                impls[self.implementation.upper()] = False
-        impls["python"] = True  # finally consider python as alias, implementation must match now
-        version = self.major, self.minor, self.micro
-        with contextlib.suppress(ValueError):
-            version = version[: version.index(None)]
-
-        for impl, match in impls.items():
-            for at in range(len(version), -1, -1):
-                cur_ver = version[0:at]
-                spec = f"{impl}{'.'.join(str(i) for i in cur_ver)}"
-                yield spec, match
+    def generate_re(self, *, windows: bool) -> re.Pattern:
+        """Generate a regular expression for matching against a filename."""
+        version = r"{}(\.{}(\.{})?)?".format(
+            *(r"\d+" if v is None else v for v in (self.major, self.minor, self.micro))
+        )
+        impl = "python" if self.implementation is None else f"python|{re.escape(self.implementation)}"
+        suffix = r"\.exe" if windows else ""
+        # Try matching `direct` first, so the `direct` group is filled when possible.
+        return re.compile(
+            rf"(?P<impl>{impl})(?P<v>{version}){suffix}$",
+            flags=re.IGNORECASE if fs_is_case_sensitive() else 0,
+        )
 
     @property
     def is_abs(self):

diff --git a/tests/unit/discovery/test_discovery.py b/tests/unit/discovery/test_discovery.py
@@ -5,6 +5,7 @@
 import sys
 from argparse import Namespace
 from pathlib import Path
+from typing import TYPE_CHECKING
 from uuid import uuid4
 
 import pytest
@@ -13,6 +14,9 @@
 from virtualenv.discovery.py_info import PythonInfo
 from virtualenv.info import fs_supports_symlink
 
+if TYPE_CHECKING:
+    from virtualenv.app_data.base import AppData
+
 
 @pytest.mark.skipif(not fs_supports_symlink(), reason="symlink not supported")
 @pytest.mark.parametrize("case", ["mixed", "lower", "upper"])
@@ -40,6 +44,26 @@ def test_discovery_via_path(monkeypatch, case, tmp_path, caplog, session_app_dat
     assert interpreter is not None
 
 
+@pytest.mark.skipif(not fs_supports_symlink(), reason="symlink not supported")
+def test_discovery_via_path_specific(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path, caplog: pytest.LogCaptureFixture, session_app_data: AppData
+):
+    """Test that a generic spec (e.g. python3) can be used to find a specific interpreter (e.g. python3.12)"""
+    caplog.set_level(logging.DEBUG)
+    current = PythonInfo.current_system(session_app_data)
+    name = "somethingVeryCryptic"
+    spec = f"{name}{current.version_info.major}"
+    exe_name = f"{name}{'.'.join(str(i) for i in current.version_info[:2])}{'.exe' if sys.platform == 'win32' else ''}"
+    target = tmp_path / current.install_path("scripts")
+    target.mkdir(parents=True)
+    executable = target / exe_name
+    os.symlink(sys.executable, str(executable))
+    monkeypatch.setenv("PATH", str(target))
+    interpreter = get_interpreter(spec, [])
+
+    assert interpreter is not None
+
+
 def test_discovery_via_path_not_found(tmp_path, monkeypatch):
     monkeypatch.setenv("PATH", str(tmp_path))
     interpreter = get_interpreter(uuid4().hex, [])

diff --git a/tox.ini b/tox.ini
@@ -33,10 +33,10 @@ set_env =
     _COVERAGE_SRC = {envsitepackagesdir}/virtualenv
 commands =
     coverage erase
-    coverage run -m pytest {posargs:--junitxml {toxworkdir}/junit.{envname}.xml tests --int}
+    coverage run -m pytest {posargs:--junitxml "{toxworkdir}/junit.{envname}.xml" tests --int}
     coverage combine
     coverage report --skip-covered --show-missing
-    coverage xml -o {toxworkdir}/coverage.{envname}.xml
+    coverage xml -o "{toxworkdir}/coverage.{envname}.xml"
     coverage html -d {envtmpdir}/htmlcov --show-contexts  --title virtualenv-{envname}-coverage
 uv_seed = true