Skip to content
Snippets Groups Projects
Commit 26ee05f3 authored by Thibault Allançon's avatar Thibault Allançon
Browse files

cache: put by-date/ entries in direntry cache

Since those entries are updated in the background, we invalidate the
cache on a regular basis.

Close T2864. This should also fix the second problem of T2830, namely
the fact that get_entries(by_date, offset) was not reliable because of
the offset supposing the entries were the same every time.
parent b59cf2a1
No related branches found
No related tags found
No related merge requests found
......@@ -353,6 +353,10 @@ class DirEntryCache:
self.move_to_end(key)
return value
def __delitem__(self, key: Any) -> None:
self.used_ram -= self.sizeof(self[key])
super().__delitem__(key)
def __setitem__(self, key: Any, value: Any) -> None:
if key in self:
self.move_to_end(key)
......@@ -363,7 +367,6 @@ class DirEntryCache:
while self.used_ram > self.max_ram and self:
oldest = next(iter(self))
self.used_ram -= self.sizeof(oldest)
del self[oldest]
def __init__(self, conf: Dict[str, Any]):
......@@ -390,12 +393,11 @@ class DirEntryCache:
if isinstance(direntry, (CacheDir, CacheDir.ArtifactShardBySwhid, OriginDir)):
# The `cache/` and `origin/` directories are populated on the fly
pass
elif (
isinstance(direntry, RevisionHistoryShardByDate)
and not direntry.is_status_done
):
# The `by-date/' directory is populated in parallel so only cache it
# once it has finished fetching all data from the API
pass
else:
self.lru_cache[direntry.inode] = entries
def invalidate(self, direntry: FuseDirEntry) -> None:
try:
del self.lru_cache[direntry.inode]
except KeyError:
pass
......@@ -220,22 +220,36 @@ class RevisionHistory(FuseDirEntry):
swhid: SWHID
async def prefill_caches(self) -> None:
async def prefill_by_date_cache(self, by_date_dir: FuseDirEntry) -> None:
history = await self.fuse.get_history(self.swhid)
nb_api_calls = 0
for swhid in history:
cache = await self.fuse.cache.metadata.get(swhid)
if cache:
continue
await self.fuse.get_metadata(swhid)
# The by-date/ directory is cached temporarily in direntry, and
# invalidated + updated every 100 API calls
nb_api_calls += 1
if nb_api_calls % 100 == 0:
self.fuse.cache.direntry.invalidate(by_date_dir)
# Make sure to have the latest entries once the prefilling is done
self.fuse.cache.direntry.invalidate(by_date_dir)
async def compute_entries(self) -> AsyncIterator[FuseEntry]:
# Run it concurrently because of the many API calls necessary
asyncio.create_task(self.prefill_caches())
yield self.create_child(
by_date_dir = self.create_child(
RevisionHistoryShardByDate,
name="by-date",
mode=int(EntryMode.RDONLY_DIR),
history_swhid=self.swhid,
)
# Run it concurrently because of the many API calls necessary
asyncio.create_task(self.prefill_by_date_cache(by_date_dir))
yield by_date_dir
yield self.create_child(
RevisionHistoryShardByHash,
name="by-hash",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment