From 15fa84cf7e35608a0a7a7bd4b0f9e5baeb90742d Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Fri, 3 Dec 2021 17:46:07 +0100
Subject: [PATCH] debian: Update last_update for a package when required

A debian package can have sources coming from multiple suites
so we need to ensure to update the last_update field in the
ListedOrigin model if the current processed suite has a greater
modification time for its sources index.

Related to T2400
---
 swh/lister/debian/lister.py            |  9 +++++++++
 swh/lister/debian/tests/test_lister.py | 20 ++++++++++++++------
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py
index 25372357..d31a33dd 100644
--- a/swh/lister/debian/lister.py
+++ b/swh/lister/debian/lister.py
@@ -255,6 +255,15 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
                 }
             )
 
+            if self.listed_origins[origin_url].last_update is None or (
+                self.last_sources_update is not None
+                and self.last_sources_update  # type: ignore
+                > self.listed_origins[origin_url].last_update
+            ):
+                # update debian package last update if current processed sources index
+                # has a greater modification date
+                self.listed_origins[origin_url].last_update = self.last_sources_update
+
             # add package version key to the set of found versions
             self.package_versions[package_name].add(package_version_key)
 
diff --git a/swh/lister/debian/tests/test_lister.py b/swh/lister/debian/tests/test_lister.py
index 1ce5de2d..0aa97041 100644
--- a/swh/lister/debian/tests/test_lister.py
+++ b/swh/lister/debian/tests/test_lister.py
@@ -4,7 +4,8 @@
 # See top-level LICENSE file for more information
 
 from collections import defaultdict
-from email.utils import formatdate
+from datetime import datetime
+from email.utils import formatdate, parsedate_to_datetime
 import os
 from pathlib import Path
 from typing import Dict, List, Set, Tuple
@@ -38,6 +39,7 @@ from swh.scheduler.interface import SchedulerInterface
 _mirror_url = "http://deb.debian.org/debian"
 _suites = ["stretch", "buster", "bullseye"]
 _components = ["main", "foo"]
+_last_modified = {}
 
 SourcesText = str
 
@@ -70,19 +72,22 @@ def _init_test(
 
     suite_pkg_info: DebianSuitePkgSrcInfo = {}
 
-    for suite, sources in debian_sources.items():
+    for i, (suite, sources) in enumerate(debian_sources.items()):
+        # ensure to generate a different date for each suite
+        last_modified = formatdate(timeval=datetime.now().timestamp() + i, usegmt=True)
         suite_pkg_info[suite] = defaultdict(list)
         for pkg_src in Sources.iter_paragraphs(sources):
             suite_pkg_info[suite][pkg_src["Package"]].append(pkg_src)
+            # backup package last update date
+            global _last_modified
+            _last_modified[pkg_src["Package"]] = last_modified
 
         for idx_url, compression in lister.debian_index_urls(suite, _components[0]):
             if compression:
                 requests_mock.get(idx_url, status_code=404)
             else:
                 requests_mock.get(
-                    idx_url,
-                    text=sources,
-                    headers={"Last-Modified": formatdate(usegmt=True)},
+                    idx_url, text=sources, headers={"Last-Modified": last_modified},
                 )
 
         for idx_url, _ in lister.debian_index_urls(suite, _components[1]):
@@ -127,7 +132,10 @@ def _check_listed_origins(
                     ]
 
                     assert filtered_origins
-                    assert filtered_origins[0].last_update is not None
+                    expected_last_update = parsedate_to_datetime(
+                        _last_modified[pkg_src["Package"]]
+                    )
+                    assert filtered_origins[0].last_update == expected_last_update
                     packages = filtered_origins[0].extra_loader_arguments["packages"]
                     # check the version info are available
                     assert package_version_key in packages
-- 
GitLab