From 6a38c4ad047df00f8c41cf2072b18babc68522f5 Mon Sep 17 00:00:00 2001
From: Pierre-Yves David <pierre-yves.david@ens-lyon.org>
Date: Wed, 28 Sep 2022 19:28:23 +0200
Subject: [PATCH] from_disks: fix some of the pattern checking logic

The pattern were validated from $PWD and later applied on path relative
to `root_path`. So we shuffle a bit of code to test them againt
root_path. We make the absolute pattern relative in the same go.

This code is coming from swh-scanner and should probably get an
overhaul, how ever for now we start with making it no broken.
---
 swh/model/from_disk.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
index 86ecf123..8795b1fb 100644
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -300,16 +300,20 @@ def extract_regex_objs(
 
     Args:
       root_path (bytes): path to the root directory
-      patterns (list of byte): patterns to match
+      patterns (list of byte): shell patterns to match
 
      Yields:
         an SRE_Pattern object
     """
     absolute_root_path = os.path.abspath(root_path)
     for pattern in patterns:
-        for path in glob.glob(pattern):
-            absolute_path = os.path.abspath(path)
-            if not absolute_path.startswith(absolute_root_path):
+        if os.path.isabs(pattern):
+            pattern = os.path.relpath(pattern, root_path)
+        # python 3.10 has a `root_dir` argument for glob, but not the previous
+        # version. So we adjust the pattern
+        test_pattern = os.path.join(absolute_root_path, pattern)
+        for path in glob.glob(test_pattern):
+            if os.path.isabs(path) and not path.startswith(absolute_root_path):
                 error_msg = (
                     b'The path "' + path + b'" is not a subdirectory or relative '
                     b'to the root directory path: "' + root_path + b'"'
@@ -326,7 +330,7 @@ def ignore_directories_patterns(root_path: bytes, patterns: Iterable[bytes]):
 
     Args:
       root_path (bytes): path of the root directory
-      patterns (list of byte): patterns to ignore
+      patterns (list of bytes): patterns to ignore
 
     Returns:
       a directory filter for :func:`directory_to_objects`
-- 
GitLab