From e09446a6f44b9070ea70a9760bc82dee0bbcb687 Mon Sep 17 00:00:00 2001 From: Daniele Serafini <me@danieleserafini.eu> Date: Tue, 15 Jun 2021 16:30:41 +0100 Subject: [PATCH] encode exclude patterns before extracting regex objects - add typing annotation to avoid such error in the future Fixes T3383 --- swh/model/cli.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/swh/model/cli.py b/swh/model/cli.py index 52f98aa3..7e497608 100644 --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -5,7 +5,7 @@ import os import sys -from typing import Dict, List, Optional +from typing import Dict, Iterable, Optional # WARNING: do not import unnecessary things here to keep cli startup time under # control @@ -75,7 +75,7 @@ def swhid_of_file_content(data) -> CoreSWHID: ) -def model_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> Directory: +def model_of_dir(path: bytes, exclude_patterns: Iterable[bytes] = None) -> Directory: from swh.model.from_disk import accept_all_directories, ignore_directories_patterns dir_filter = ( @@ -87,7 +87,7 @@ def model_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> Directory return Directory.from_disk(path=path, dir_filter=dir_filter) -def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> CoreSWHID: +def swhid_of_dir(path: bytes, exclude_patterns: Iterable[bytes] = None) -> CoreSWHID: from swh.model.hashutil import hash_to_bytes obj = model_of_dir(path, exclude_patterns) @@ -150,7 +150,9 @@ def swhid_of_git_repo(path) -> CoreSWHID: ) -def identify_object(obj_type, follow_symlinks, exclude_patterns, obj) -> str: +def identify_object( + obj_type: str, follow_symlinks: bool, exclude_patterns: Iterable[bytes], obj +) -> str: from urllib.parse import urlparse if obj_type == "auto": @@ -177,9 +179,7 @@ def identify_object(obj_type, follow_symlinks, exclude_patterns, obj) -> str: if obj_type == "content": swhid = str(swhid_of_file(path)) elif obj_type == "directory": - swhid = str( - swhid_of_dir(path, [pattern.encode() for pattern in exclude_patterns]) - ) + swhid = str(swhid_of_dir(path, exclude_patterns)) elif obj_type == "origin": swhid = str(swhid_of_origin(obj)) elif obj_type == "snapshot": @@ -275,6 +275,9 @@ def identify( from functools import partial import logging + if exclude_patterns: + exclude_patterns = set(pattern.encode() for pattern in exclude_patterns) + if verify and len(objects) != 1: raise click.BadParameter("verification requires a single object") -- GitLab