Source code for fs.glob

"""Useful functions for working with glob patterns.
"""

from __future__ import unicode_literals

import typing
from functools import partial

import re
from collections import namedtuple

from ._repr import make_repr
from .lrucache import LRUCache
from .path import iteratepath


GlobMatch = namedtuple("GlobMatch", ["path", "info"])
Counts = namedtuple("Counts", ["files", "directories", "data"])
LineCounts = namedtuple("LineCounts", ["lines", "non_blank"])

if typing.TYPE_CHECKING:
    from typing import (
        Iterator,
        List,
        Optional,
        Pattern,
        Text,
        Tuple,
        Iterable,
        Callable,
    )
    from .base import FS


_PATTERN_CACHE = LRUCache(
    1000
)  # type: LRUCache[Tuple[Text, bool], Tuple[Optional[int], Pattern]]


def _split_pattern_by_sep(pattern):
    # type: (Text) -> List[Text]
    """Split a glob pattern at its directory seperators (/).

    Takes into account escaped cases like [/].
    """
    indices = [-1]
    bracket_open = False
    for i, c in enumerate(pattern):
        if c == "/" and not bracket_open:
            indices.append(i)
        elif c == "[":
            bracket_open = True
        elif c == "]":
            bracket_open = False

    indices.append(len(pattern))
    return [pattern[i + 1 : j] for i, j in zip(indices[:-1], indices[1:])]


def _translate(pattern):
    # type: (Text) -> Text
    """Translate a glob pattern without '**' to a regular expression.

    There is no way to quote meta-characters.

    Arguments:
        pattern (str): A glob pattern.

    Returns:
        str: A regex equivalent to the given pattern.

    """
    i, n = 0, len(pattern)
    res = []
    while i < n:
        c = pattern[i]
        i = i + 1
        if c == "*":
            if i < n and pattern[i] == "*":
                raise ValueError("glob._translate does not support '**' patterns.")
            res.append("[^/]*")
        elif c == "?":
            res.append("[^/]")
        elif c == "[":
            j = i
            if j < n and pattern[j] == "!":
                j = j + 1
            if j < n and pattern[j] == "]":
                j = j + 1
            while j < n and pattern[j] != "]":
                j = j + 1
            if j >= n:
                res.append("\\[")
            else:
                stuff = pattern[i:j].replace("\\", "\\\\")
                i = j + 1
                if stuff[0] == "!":
                    stuff = "^/" + stuff[1:]
                elif stuff[0] == "^":
                    stuff = "\\" + stuff
                res.append("[%s]" % stuff)
        else:
            res.append(re.escape(c))
    return "".join(res)


def _translate_glob(pattern):
    # type: (Text) -> Tuple[Optional[int], Text]
    """Translate a glob pattern to a regular expression.

    There is no way to quote meta-characters.

    Arguments:
        pattern (str): A glob pattern.

    Returns:
        Tuple[Optional[int], Text]: The first component describes the levels
            of depth this glob pattern goes to; basically the number of "/" in
            the pattern. If there is a "**" in the glob pattern, the depth is
            basically unbounded, and this component is `None` instead.
            The second component is the regular expression.

    """
    recursive = False
    re_patterns = [""]
    for component in iteratepath(pattern):
        if "**" in component:
            recursive = True
            split = component.split("**")
            split_re = [_translate(s) for s in split]
            re_patterns.append("/?" + ".*/?".join(split_re))
        else:
            re_patterns.append("/" + _translate(component))
    re_glob = "(?ms)^" + "".join(re_patterns) + ("/$" if pattern.endswith("/") else "$")
    return pattern.count("/") + 1 if not recursive else None, re_glob


[docs]def match(pattern, path): # type: (str, str) -> bool """Compare a glob pattern with a path (case sensitive). Arguments: pattern (str): A glob pattern. path (str): A path. Returns: bool: ``True`` if the path matches the pattern. Example: >>> from fs.glob import match >>> match("**/*.py", "/fs/glob.py") True """ try: levels, re_pattern = _PATTERN_CACHE[(pattern, True)] except KeyError: levels, re_str = _translate_glob(pattern) re_pattern = re.compile(re_str) _PATTERN_CACHE[(pattern, True)] = (levels, re_pattern) if path and path[0] != "/": path = "/" + path return bool(re_pattern.match(path))
[docs]def imatch(pattern, path): # type: (str, str) -> bool """Compare a glob pattern with a path (case insensitive). Arguments: pattern (str): A glob pattern. path (str): A path. Returns: bool: ``True`` if the path matches the pattern. """ try: levels, re_pattern = _PATTERN_CACHE[(pattern, False)] except KeyError: levels, re_str = _translate_glob(pattern) re_pattern = re.compile(re_str, re.IGNORECASE) _PATTERN_CACHE[(pattern, False)] = (levels, re_pattern) if path and path[0] != "/": path = "/" + path return bool(re_pattern.match(path))
[docs]def match_any(patterns, path): # type: (Iterable[Text], Text) -> bool """Test if a path matches any of a list of patterns. Will return `True` if ``patterns`` is an empty list. Arguments: patterns (list): A list of wildcard pattern, e.g ``["*.py", "*.pyc"]`` path (str): A resource path. Returns: bool: `True` if the path matches at least one of the patterns. """ if not patterns: return True return any(match(pattern, path) for pattern in patterns)
[docs]def imatch_any(patterns, path): # type: (Iterable[Text], Text) -> bool """Test if a path matches any of a list of patterns (case insensitive). Will return `True` if ``patterns`` is an empty list. Arguments: patterns (list): A list of wildcard pattern, e.g ``["*.py", "*.pyc"]`` path (str): A resource path. Returns: bool: `True` if the path matches at least one of the patterns. """ if not patterns: return True return any(imatch(pattern, path) for pattern in patterns)
[docs]def get_matcher(patterns, case_sensitive, accept_prefix=False): # type: (Iterable[Text], bool, bool) -> Callable[[Text], bool] """Get a callable that matches paths against the given patterns. Arguments: patterns (list): A list of wildcard pattern. e.g. ``["*.py", "*.pyc"]`` case_sensitive (bool): If ``True``, then the callable will be case sensitive, otherwise it will be case insensitive. accept_prefix (bool): If ``True``, the name is not required to match the patterns themselves but only need to be a prefix of a string that does. Returns: callable: a matcher that will return `True` if the paths given as an argument matches any of the given patterns, or if no patterns exist. Example: >>> from fs import glob >>> is_python = glob.get_matcher(['*.py'], True) >>> is_python('__init__.py') True >>> is_python('foo.txt') False """ if not patterns: return lambda path: True if accept_prefix: new_patterns = [] for pattern in patterns: split = _split_pattern_by_sep(pattern) for i in range(1, len(split)): new_pattern = "/".join(split[:i]) new_patterns.append(new_pattern) new_patterns.append(new_pattern + "/") new_patterns.append(pattern) patterns = new_patterns matcher = match_any if case_sensitive else imatch_any return partial(matcher, patterns)
[docs]class Globber(object): """A generator of glob results."""
[docs] def __init__( self, fs, pattern, path="/", namespaces=None, case_sensitive=True, exclude_dirs=None, ): # type: (FS, str, str, Optional[List[str]], bool, Optional[List[str]]) -> None """Create a new Globber instance. Arguments: fs (~fs.base.FS): A filesystem object pattern (str): A glob pattern, e.g. ``"**/*.py"`` path (str): A path to a directory in the filesystem. namespaces (list): A list of additional info namespaces. case_sensitive (bool): If ``True``, the path matching will be case *sensitive* i.e. ``"FOO.py"`` and ``"foo.py"`` will be different, otherwise path matching will be case *insensitive*. exclude_dirs (list): A list of patterns to exclude when searching, e.g. ``["*.git"]``. """ self.fs = fs self.pattern = pattern self.path = path self.namespaces = namespaces self.case_sensitive = case_sensitive self.exclude_dirs = exclude_dirs
def __repr__(self): return make_repr( self.__class__.__name__, self.fs, self.pattern, path=(self.path, "/"), namespaces=(self.namespaces, None), case_sensitive=(self.case_sensitive, True), exclude_dirs=(self.exclude_dirs, None), ) def _make_iter(self, search="breadth", namespaces=None): # type: (str, List[str]) -> Iterator[GlobMatch] try: levels, re_pattern = _PATTERN_CACHE[(self.pattern, self.case_sensitive)] except KeyError: levels, re_str = _translate_glob(self.pattern) re_pattern = re.compile(re_str, 0 if self.case_sensitive else re.IGNORECASE) for path, info in self.fs.walk.info( path=self.path, namespaces=namespaces or self.namespaces, max_depth=levels, search=search, exclude_dirs=self.exclude_dirs, ): if info.is_dir: path += "/" if re_pattern.match(path): yield GlobMatch(path, info)
[docs] def __iter__(self): # type: () -> Iterator[GlobMatch] """Get an iterator of :class:`fs.glob.GlobMatch` objects.""" return self._make_iter()
[docs] def count(self): # type: () -> Counts """Count files / directories / data in matched paths. Example: >>> my_fs.glob('**/*.py').count() Counts(files=2, directories=0, data=55) Returns: `~Counts`: A named tuple containing results. """ directories = 0 files = 0 data = 0 for _path, info in self._make_iter(namespaces=["details"]): if info.is_dir: directories += 1 else: files += 1 data += info.size return Counts(directories=directories, files=files, data=data)
[docs] def count_lines(self): # type: () -> LineCounts """Count the lines in the matched files. Returns: `~LineCounts`: A named tuple containing line counts. Example: >>> my_fs.glob('**/*.py').count_lines() LineCounts(lines=4, non_blank=3) """ lines = 0 non_blank = 0 for path, info in self._make_iter(): if info.is_file: for line in self.fs.open(path, "rb"): lines += 1 if line.rstrip(): non_blank += 1 return LineCounts(lines=lines, non_blank=non_blank)
[docs] def remove(self): # type: () -> int """Remove all matched paths. Returns: int: Number of file and directories removed. Example: >>> my_fs.glob('**/*.pyc').remove() 2 """ removes = 0 for path, info in self._make_iter(search="depth"): if info.is_dir: self.fs.removetree(path) else: self.fs.remove(path) removes += 1 return removes
[docs]class BoundGlobber(object): """A `~fs.glob.Globber` object bound to a filesystem. An instance of this object is available on every Filesystem object as the `~fs.base.FS.glob` property. """ __slots__ = ["fs"]
[docs] def __init__(self, fs): # type: (FS) -> None """Create a new bound Globber. Arguments: fs (FS): A filesystem object to bind to. """ self.fs = fs
def __repr__(self): return make_repr(self.__class__.__name__, self.fs)
[docs] def __call__( self, pattern, path="/", namespaces=None, case_sensitive=True, exclude_dirs=None ): # type: (str, str, Optional[List[str]], bool, Optional[List[str]]) -> Globber """Match resources on the bound filesystem againsts a glob pattern. Arguments: pattern (str): A glob pattern, e.g. ``"**/*.py"`` namespaces (list): A list of additional info namespaces. case_sensitive (bool): If ``True``, the path matching will be case *sensitive* i.e. ``"FOO.py"`` and ``"foo.py"`` will be different, otherwise path matching will be case **insensitive**. exclude_dirs (list): A list of patterns to exclude when searching, e.g. ``["*.git"]``. Returns: `Globber`: An object that may be queried for the glob matches. """ return Globber( self.fs, pattern, path, namespaces=namespaces, case_sensitive=case_sensitive, exclude_dirs=exclude_dirs, )