Source code for fs.glob

"""Useful functions for working with glob patterns.
"""

from __future__ import unicode_literals

import typing
from functools import partial

import re
from collections import namedtuple

from ._repr import make_repr
from .lrucache import LRUCache
from .path import iteratepath


GlobMatch = namedtuple("GlobMatch", ["path", "info"])
Counts = namedtuple("Counts", ["files", "directories", "data"])
LineCounts = namedtuple("LineCounts", ["lines", "non_blank"])

if typing.TYPE_CHECKING:
    from typing import (
        Iterator,
        List,
        Optional,
        Pattern,
        Text,
        Tuple,
        Iterable,
        Callable,
    )
    from .base import FS


_PATTERN_CACHE = LRUCache(
    1000
)  # type: LRUCache[Tuple[Text, bool], Tuple[Optional[int], Pattern]]


def _split_pattern_by_sep(pattern):
    # type: (Text) -> List[Text]
    """Split a glob pattern at its directory seperators (/).

    Takes into account escaped cases like [/].
    """
    indices = [-1]
    bracket_open = False
    for i, c in enumerate(pattern):
        if c == "/" and not bracket_open:
            indices.append(i)
        elif c == "[":
            bracket_open = True
        elif c == "]":
            bracket_open = False

    indices.append(len(pattern))
    return [pattern[i + 1 : j] for i, j in zip(indices[:-1], indices[1:])]


def _translate(pattern):
    # type: (Text) -> Text
    """Translate a glob pattern without '**' to a regular expression.

    There is no way to quote meta-characters.

    Arguments:
        pattern (str): A glob pattern.

    Returns:
        str: A regex equivalent to the given pattern.

    """
    i, n = 0, len(pattern)
    res = []
    while i < n:
        c = pattern[i]
        i = i + 1
        if c == "*":
            if i < n and pattern[i] == "*":
                raise ValueError("glob._translate does not support '**' patterns.")
            res.append("[^/]*")
        elif c == "?":
            res.append("[^/]")
        elif c == "[":
            j = i
            if j < n and pattern[j] == "!":
                j = j + 1
            if j < n and pattern[j] == "]":
                j = j + 1
            while j < n and pattern[j] != "]":
                j = j + 1
            if j >= n:
                res.append("\\[")
            else:
                stuff = pattern[i:j].replace("\\", "\\\\")
                i = j + 1
                if stuff[0] == "!":
                    stuff = "^/" + stuff[1:]
                elif stuff[0] == "^":
                    stuff = "\\" + stuff
                res.append("[%s]" % stuff)
        else:
            res.append(re.escape(c))
    return "".join(res)


def _translate_glob(pattern):
    # type: (Text) -> Tuple[Optional[int], Text]
    """Translate a glob pattern to a regular expression.

    There is no way to quote meta-characters.

    Arguments:
        pattern (str): A glob pattern.

    Returns:
        Tuple[Optional[int], Text]: The first component describes the levels
            of depth this glob pattern goes to; basically the number of "/" in
            the pattern. If there is a "**" in the glob pattern, the depth is
            basically unbounded, and this component is `None` instead.
            The second component is the regular expression.

    """
    recursive = False
    re_patterns = [""]
    for component in iteratepath(pattern):
        if "**" in component:
            recursive = True
            split = component.split("**")
            split_re = [_translate(s) for s in split]
            re_patterns.append("/?" + ".*/?".join(split_re))
        else:
            re_patterns.append("/" + _translate(component))
    re_glob = "(?ms)^" + "".join(re_patterns) + ("/$" if pattern.endswith("/") else "$")
    return pattern.count("/") + 1 if not recursive else None, re_glob


[docs]def match(pattern, path):
    # type: (str, str) -> bool
    """Compare a glob pattern with a path (case sensitive).

    Arguments:
        pattern (str): A glob pattern.
        path (str): A path.

    Returns:
        bool: ``True`` if the path matches the pattern.

    Example:

        >>> from fs.glob import match
        >>> match("**/*.py", "/fs/glob.py")
        True

    """
    try:
        levels, re_pattern = _PATTERN_CACHE[(pattern, True)]
    except KeyError:
        levels, re_str = _translate_glob(pattern)
        re_pattern = re.compile(re_str)
        _PATTERN_CACHE[(pattern, True)] = (levels, re_pattern)
    if path and path[0] != "/":
        path = "/" + path
    return bool(re_pattern.match(path))


[docs]def imatch(pattern, path):
    # type: (str, str) -> bool
    """Compare a glob pattern with a path (case insensitive).

    Arguments:
        pattern (str): A glob pattern.
        path (str): A path.

    Returns:
        bool: ``True`` if the path matches the pattern.

    """
    try:
        levels, re_pattern = _PATTERN_CACHE[(pattern, False)]
    except KeyError:
        levels, re_str = _translate_glob(pattern)
        re_pattern = re.compile(re_str, re.IGNORECASE)
        _PATTERN_CACHE[(pattern, False)] = (levels, re_pattern)
    if path and path[0] != "/":
        path = "/" + path
    return bool(re_pattern.match(path))


[docs]def match_any(patterns, path):
    # type: (Iterable[Text], Text) -> bool
    """Test if a path matches any of a list of patterns.

    Will return `True` if ``patterns`` is an empty list.

    Arguments:
        patterns (list): A list of wildcard pattern, e.g ``["*.py",
            "*.pyc"]``
        path (str): A resource path.

    Returns:
        bool: `True` if the path matches at least one of the patterns.

    """
    if not patterns:
        return True
    return any(match(pattern, path) for pattern in patterns)


[docs]def imatch_any(patterns, path):
    # type: (Iterable[Text], Text) -> bool
    """Test if a path matches any of a list of patterns (case insensitive).

    Will return `True` if ``patterns`` is an empty list.

    Arguments:
        patterns (list): A list of wildcard pattern, e.g ``["*.py",
            "*.pyc"]``
        path (str): A resource path.

    Returns:
        bool: `True` if the path matches at least one of the patterns.

    """
    if not patterns:
        return True
    return any(imatch(pattern, path) for pattern in patterns)


[docs]def get_matcher(patterns, case_sensitive, accept_prefix=False):
    # type: (Iterable[Text], bool, bool) -> Callable[[Text], bool]
    """Get a callable that matches paths against the given patterns.

    Arguments:
        patterns (list): A list of wildcard pattern. e.g. ``["*.py",
            "*.pyc"]``
        case_sensitive (bool): If ``True``, then the callable will be case
            sensitive, otherwise it will be case insensitive.
        accept_prefix (bool): If ``True``, the name is
            not required to match the patterns themselves
            but only need to be a prefix of a string that does.

    Returns:
        callable: a matcher that will return `True` if the paths given as
        an argument matches any of the given patterns, or if no patterns
        exist.

    Example:
        >>> from fs import glob
        >>> is_python = glob.get_matcher(['*.py'], True)
        >>> is_python('__init__.py')
        True
        >>> is_python('foo.txt')
        False

    """
    if not patterns:
        return lambda path: True

    if accept_prefix:
        new_patterns = []
        for pattern in patterns:
            split = _split_pattern_by_sep(pattern)
            for i in range(1, len(split)):
                new_pattern = "/".join(split[:i])
                new_patterns.append(new_pattern)
                new_patterns.append(new_pattern + "/")
            new_patterns.append(pattern)
        patterns = new_patterns

    matcher = match_any if case_sensitive else imatch_any
    return partial(matcher, patterns)


[docs]class Globber(object):
    """A generator of glob results."""

[docs]    def __init__(
        self,
        fs,
        pattern,
        path="/",
        namespaces=None,
        case_sensitive=True,
        exclude_dirs=None,
    ):
        # type: (FS, str, str, Optional[List[str]], bool, Optional[List[str]]) -> None
        """Create a new Globber instance.

        Arguments:
            fs (~fs.base.FS): A filesystem object
            pattern (str): A glob pattern, e.g. ``"**/*.py"``
            path (str): A path to a directory in the filesystem.
            namespaces (list): A list of additional info namespaces.
            case_sensitive (bool): If ``True``, the path matching will be
                case *sensitive* i.e. ``"FOO.py"`` and ``"foo.py"`` will be
                different, otherwise path matching will be case *insensitive*.
            exclude_dirs (list): A list of patterns to exclude when searching,
                e.g. ``["*.git"]``.

        """
        self.fs = fs
        self.pattern = pattern
        self.path = path
        self.namespaces = namespaces
        self.case_sensitive = case_sensitive
        self.exclude_dirs = exclude_dirs

    def __repr__(self):
        return make_repr(
            self.__class__.__name__,
            self.fs,
            self.pattern,
            path=(self.path, "/"),
            namespaces=(self.namespaces, None),
            case_sensitive=(self.case_sensitive, True),
            exclude_dirs=(self.exclude_dirs, None),
        )

    def _make_iter(self, search="breadth", namespaces=None):
        # type: (str, List[str]) -> Iterator[GlobMatch]
        try:
            levels, re_pattern = _PATTERN_CACHE[(self.pattern, self.case_sensitive)]
        except KeyError:
            levels, re_str = _translate_glob(self.pattern)
            re_pattern = re.compile(re_str, 0 if self.case_sensitive else re.IGNORECASE)

        for path, info in self.fs.walk.info(
            path=self.path,
            namespaces=namespaces or self.namespaces,
            max_depth=levels,
            search=search,
            exclude_dirs=self.exclude_dirs,
        ):
            if info.is_dir:
                path += "/"
            if re_pattern.match(path):
                yield GlobMatch(path, info)

[docs]    def __iter__(self):
        # type: () -> Iterator[GlobMatch]
        """Get an iterator of :class:`fs.glob.GlobMatch` objects."""
        return self._make_iter()

[docs]    def count(self):
        # type: () -> Counts
        """Count files / directories / data in matched paths.

        Example:
            >>> my_fs.glob('**/*.py').count()
            Counts(files=2, directories=0, data=55)

        Returns:
            `~Counts`: A named tuple containing results.

        """
        directories = 0
        files = 0
        data = 0
        for _path, info in self._make_iter(namespaces=["details"]):
            if info.is_dir:
                directories += 1
            else:
                files += 1
            data += info.size
        return Counts(directories=directories, files=files, data=data)

[docs]    def count_lines(self):
        # type: () -> LineCounts
        """Count the lines in the matched files.

        Returns:
            `~LineCounts`: A named tuple containing line counts.

        Example:
            >>> my_fs.glob('**/*.py').count_lines()
            LineCounts(lines=4, non_blank=3)

        """
        lines = 0
        non_blank = 0
        for path, info in self._make_iter():
            if info.is_file:
                for line in self.fs.open(path, "rb"):
                    lines += 1
                    if line.rstrip():
                        non_blank += 1
        return LineCounts(lines=lines, non_blank=non_blank)

[docs]    def remove(self):
        # type: () -> int
        """Remove all matched paths.

        Returns:
            int: Number of file and directories removed.

        Example:
            >>> my_fs.glob('**/*.pyc').remove()
            2

        """
        removes = 0
        for path, info in self._make_iter(search="depth"):
            if info.is_dir:
                self.fs.removetree(path)
            else:
                self.fs.remove(path)
            removes += 1
        return removes


[docs]class BoundGlobber(object):
    """A `~fs.glob.Globber` object bound to a filesystem.

    An instance of this object is available on every Filesystem object
    as the `~fs.base.FS.glob` property.

    """

    __slots__ = ["fs"]

[docs]    def __init__(self, fs):
        # type: (FS) -> None
        """Create a new bound Globber.

        Arguments:
            fs (FS): A filesystem object to bind to.

        """
        self.fs = fs

    def __repr__(self):
        return make_repr(self.__class__.__name__, self.fs)

[docs]    def __call__(
        self, pattern, path="/", namespaces=None, case_sensitive=True, exclude_dirs=None
    ):
        # type: (str, str, Optional[List[str]], bool, Optional[List[str]]) -> Globber
        """Match resources on the bound filesystem againsts a glob pattern.

        Arguments:
            pattern (str): A glob pattern, e.g. ``"**/*.py"``
            namespaces (list): A list of additional info namespaces.
            case_sensitive (bool): If ``True``, the path matching will be
                case *sensitive* i.e. ``"FOO.py"`` and ``"foo.py"`` will
                be different, otherwise path matching will be case **insensitive**.
            exclude_dirs (list): A list of patterns to exclude when searching,
                e.g. ``["*.git"]``.

        Returns:
            `Globber`: An object that may be queried for the glob matches.

        """
        return Globber(
            self.fs,
            pattern,
            path,
            namespaces=namespaces,
            case_sensitive=case_sensitive,
            exclude_dirs=exclude_dirs,
        )