Skip to content
Snippets Groups Projects
Verified Commit 72a8c2ed authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

deposit moderation view: Improve deposit types visualization

This explicits the deposit's type, either 'code' or 'meta'. The uri column is now filled
depending on that type. Mostly, the parsing of the deposit's raw xml content is done to
try and extract that potentially provided information.

If not found, this uses the same heuristics as before (either use the origin_url if
provided or extract the origin out of the swhid_context).

Related to T3680
parent 57a83b9a
No related branches found
Tags v0.0.375
1 merge request!739deposit moderation view: Improve deposit types visualization
/**
* Copyright (C) 2018-2021 The Software Heritage developers
* Copyright (C) 2018-2022 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
function genSwhLink(data, type) {
if (type === 'display') {
if (data && data.startsWith('swh')) {
const browseUrl = Urls.browse_swhid(data);
const formattedSWHID = data.replace(/;/g, ';<br/>');
return `<a href="${browseUrl}">${formattedSWHID}</a>`;
}
if (type === 'display' && data && data.startsWith('swh')) {
const browseUrl = Urls.browse_swhid(data);
const formattedSWHID = data.replace(/;/g, ';<br/>');
return `<a href="${browseUrl}">${formattedSWHID}</a>`;
}
return data;
}
function genLink(data, type) {
if (type === 'display' && data) {
const sData = encodeURI(data);
return `<a href="${sData}">${sData}</a>`;
}
return data;
}
......@@ -55,23 +61,14 @@ export function initDepositAdmin(username, isStaff) {
name: 'id'
},
{
data: 'swhid_context',
name: 'swhid_context',
data: 'type',
name: 'type'
},
{
data: 'uri',
name: 'uri',
render: (data, type, row) => {
if (data && type === 'display') {
const originPattern = ';origin=';
const originPatternIdx = data.indexOf(originPattern);
if (originPatternIdx !== -1) {
let originUrl = data.slice(originPatternIdx + originPattern.length);
const nextSepPattern = ';';
const nextSepPatternIdx = originUrl.indexOf(nextSepPattern);
if (nextSepPatternIdx !== -1) { /* Remove extra context */
originUrl = originUrl.slice(0, nextSepPatternIdx);
}
return `<a href="${originUrl}">${originUrl}</a>`;
}
}
return data;
return genLink(data, type);
}
},
{
......
/**
* Copyright (C) 2020-2021 The Software Heritage developers
* Copyright (C) 2020-2022 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
......@@ -14,30 +14,36 @@ describe('Test admin deposit page', function() {
responseDeposits = [
{
'id': 614,
'type': 'code',
'external_id': 'ch-de-1',
'reception_date': '2020-05-18T13:48:27Z',
'status': 'done',
'status_detail': null,
'swhid': 'swh:1:dir:ef04a768',
'swhid_context': 'swh:1:dir:ef04a768;origin=https://w.s.o/c-d-1;visit=swh:1:snp:b234be1e;anchor=swh:1:rev:d24a75c9;path=/'
'swhid_context': 'swh:1:dir:ef04a768;origin=https://w.s.o/c-d-1;visit=swh:1:snp:b234be1e;anchor=swh:1:rev:d24a75c9;path=/',
'uri': 'https://w.s.o/c-d-1'
},
{
'id': 613,
'type': 'code',
'external_id': 'ch-de-2',
'reception_date': '2020-05-18T11:20:16Z',
'status': 'done',
'status_detail': null,
'swhid': 'swh:1:dir:181417fb',
'swhid_context': 'swh:1:dir:181417fb;origin=https://w.s.o/c-d-2;visit=swh:1:snp:8c32a2ef;anchor=swh:1:rev:3d1eba04;path=/'
'swhid_context': 'swh:1:dir:181417fb;origin=https://w.s.o/c-d-2;visit=swh:1:snp:8c32a2ef;anchor=swh:1:rev:3d1eba04;path=/',
'uri': 'https://w.s.o/c-d-2'
},
{
'id': 612,
'type': 'code',
'external_id': 'ch-de-3',
'reception_date': '2020-05-18T11:20:16Z',
'status': 'rejected',
'status_detail': 'incomplete deposit!',
'swhid': null,
'swhid_context': null
'swhid_context': null,
'uri': null
}
];
// those are computed from the
......@@ -87,6 +93,8 @@ describe('Test admin deposit page', function() {
assert.isNotNull(deposit);
assert.isNotNull(responseDeposit);
expect(deposit.id).to.be.equal(responseDeposit['id']);
expect(deposit.uri).to.be.equal(responseDeposit['uri']);
expect(deposit.type).to.be.equal(responseDeposit['type']);
expect(deposit.external_id).to.be.equal(responseDeposit['external_id']);
expect(deposit.status).to.be.equal(responseDeposit['status']);
expect(deposit.status_detail).to.be.equal(responseDeposit['status_detail']);
......
# Copyright (C) 2018-2021 The Software Heritage developers
# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -13,7 +13,11 @@ from django.shortcuts import render
from swh.web.admin.adminurls import admin_route
from swh.web.auth.utils import ADMIN_LIST_DEPOSIT_PERMISSION
from swh.web.common.utils import get_deposits_list
from swh.web.common.utils import (
get_deposits_list,
parse_swh_deposit_origin,
parse_swh_metadata_provenance,
)
def _can_list_deposits(user):
......@@ -70,9 +74,11 @@ def _admin_deposit_list(request):
data = paginator.page(page).object_list
table_data["recordsTotal"] = deposits_count
table_data["recordsFiltered"] = len(deposits)
table_data["data"] = [
{
data_list = []
for d in data:
data_dict = {
"id": d["id"],
"type": d["type"],
"external_id": d["external_id"],
"reception_date": d["reception_date"],
"status": d["status"],
......@@ -80,13 +86,39 @@ def _admin_deposit_list(request):
"swhid": d["swhid"],
"swhid_context": d["swhid_context"],
}
for d in data
]
provenance = None
raw_metadata = d["raw_metadata"]
# Try to determine provenance out of the raw metadata
if raw_metadata and d["type"] == "meta": # metadata provenance
provenance = parse_swh_metadata_provenance(d["raw_metadata"])
elif raw_metadata and d["type"] == "code":
provenance = parse_swh_deposit_origin(raw_metadata)
if not provenance and d["origin_url"]:
provenance = d["origin_url"]
# Finally, if still not found, we determine uri using the swhid
if not provenance and d["swhid_context"]:
# Trying to compute the origin as we did before in the js
from swh.model.swhids import QualifiedSWHID
swhid = QualifiedSWHID.from_string(d["swhid_context"])
provenance = swhid.origin
data_dict["uri"] = provenance # could be None
# This could be large. As this is not displayed yet, drop it to avoid
# cluttering the data dict
data_dict.pop("raw_metadata", None)
data_list.append(data_dict)
table_data["data"] = data_list
except Exception as exc:
sentry_sdk.capture_exception(exc)
table_data["error"] = (
"An error occurred while retrieving " "the list of deposits !"
)
table_data[
"error"
] = "An error occurred while retrieving the list of deposits !"
return JsonResponse(table_data)
# Copyright (C) 2017-2021 The Software Heritage developers
# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -8,6 +8,7 @@ import os
import re
from typing import Any, Dict, List, Optional
import urllib.parse
from xml.etree import ElementTree
from bs4 import BeautifulSoup
from docutils.core import publish_parts
......@@ -447,3 +448,69 @@ def redirect_to_new_route(request, new_route, permanent=True):
request_path = resolve(request.path_info)
args = {**request_path.kwargs, **request.GET.dict()}
return redirect(reverse(new_route, query_params=args), permanent=permanent,)
NAMESPACES = {
"swh": "https://www.softwareheritage.org/schema/2018/deposit",
"schema": "http://schema.org/",
}
def parse_swh_metadata_provenance(raw_metadata: str) -> Optional[str]:
"""Parse swh metadata-provenance out of the raw metadata deposit. If found, returns the
value, None otherwise.
.. code-block:: xml
<swh:deposit>
<swh:metadata-provenance>
<schema:url>https://example.org/metadata/url</schema:url>
</swh:metadata-provenance>
</swh:deposit>
Args:
raw_metadata: raw metadata out of deposits received
Returns:
Either the metadata provenance url if any or None otherwise
"""
metadata = ElementTree.fromstring(raw_metadata)
url = metadata.findtext(
"swh:deposit/swh:metadata-provenance/schema:url", namespaces=NAMESPACES,
)
return url or None
def parse_swh_deposit_origin(raw_metadata: str) -> Optional[str]:
"""Parses <swh:add_to_origin> and <swh:create_origin> from metadata document,
if any. They are mutually exclusive and tested as such in the deposit.
.. code-block:: xml
<swh:deposit>
<swh:create_origin>
<swh:origin url='https://example.org/repo/software123/'/>
</swh:reference>
</swh:deposit>
.. code-block:: xml
<swh:deposit>
<swh:add_to_origin>
<swh:origin url='https://example.org/repo/software123/'/>
</swh:add_to_origin>
</swh:deposit>
Returns:
The one not null if any, None otherwise
"""
metadata = ElementTree.fromstring(raw_metadata)
for origin_tag in ["create_origin", "add_to_origin"]:
elt = metadata.find(
f"swh:deposit/swh:{origin_tag}/swh:origin[@url]", namespaces=NAMESPACES
)
if elt is not None:
return elt.attrib["url"]
return None
......@@ -30,19 +30,21 @@ See top-level LICENSE file for more information
<div>
Toggle column:
<a class="toggle-col" href="#" data-column="0">id</a> -
<a class="toggle-col" href="#" data-column="1">origin</a> -
<a class="toggle-col" href="#" data-column="2">reception date</a> -
<a class="toggle-col" href="#" data-column="3">status</a> -
<a class="toggle-col col-hidden" href="#" data-column="4">status detail</a> -
<a class="toggle-col col-hidden" href="#" data-column="5">directory</a> -
<a class="toggle-col col-hidden" href="#" data-column="6">directory with context</a>
<a class="toggle-col" href="#" data-column="1">type</a> -
<a class="toggle-col" href="#" data-column="2">uri</a> -
<a class="toggle-col" href="#" data-column="3">reception date</a> -
<a class="toggle-col" href="#" data-column="4">status</a> -
<a class="toggle-col col-hidden" href="#" data-column="5">status detail</a> -
<a class="toggle-col col-hidden" href="#" data-column="6">directory</a> -
<a class="toggle-col col-hidden" href="#" data-column="7">directory with context</a>
</div>
<br/>
<table id="swh-admin-deposit-list" class="table swh-table swh-table-striped" width="100%">
<thead>
<tr>
<th>id</th>
<th>origin</th>
<th>type</th>
<th>uri</th>
<th>reception date</th>
<th>status</th>
<th>status detail</th>
......
# Copyright (C) 2017-2021 The Software Heritage developers
# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from base64 import b64encode
import datetime
from os.path import join
from urllib.parse import quote
import pytest
......@@ -314,3 +314,44 @@ def test_is_swh_web_staging(request_factory, server_name):
def test_is_swh_web_production(request_factory):
request = request_factory.get("/", SERVER_NAME=SWH_WEB_SERVER_NAME)
assert utils.is_swh_web_production(request)
@pytest.mark.parametrize(
"raw_metadata_file,expected_url",
[
("raw-metadata-provenance.xml", "https://example.org/metadata/provenance"),
("raw-metadata-no-swh.xml", None),
],
)
def test_parse_swh_provenance(datadir, raw_metadata_file, expected_url):
metadata_path = join(datadir, "deposit", raw_metadata_file)
with open(metadata_path, "r") as f:
raw_metadata = f.read()
actual_url = utils.parse_swh_metadata_provenance(raw_metadata)
assert actual_url == expected_url
@pytest.mark.parametrize(
"raw_metadata_file,expected_url",
[
(
"raw-metadata-create-origin.xml",
"https://example.org/metadata/create-origin",
),
(
"raw-metadata-add-to-origin.xml",
"https://example.org/metadata/add-to-origin",
),
("raw-metadata-no-swh.xml", None),
],
)
def test_parse_swh_origins(datadir, raw_metadata_file, expected_url):
metadata_path = join(datadir, "deposit", raw_metadata_file)
with open(metadata_path, "r") as f:
raw_metadata = f.read()
actual_url = utils.parse_swh_deposit_origin(raw_metadata)
assert actual_url == expected_url
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<title>Awesome Compiler</title>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<author>dudess</author>
<swh:deposit>
<swh:add_to_origin>
<swh:origin url="https://example.org/metadata/add-to-origin" />
</swh:add_to_origin>
</swh:deposit>
</entry>
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<title>Awesome Compiler</title>
<id>urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a</id>
<author>dudess</author>
<swh:deposit>
<swh:create_origin>
<swh:origin url="https://example.org/metadata/create-origin" />
</swh:create_origin>
</swh:deposit>
</entry>
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0">
<title>Awesome Compiler</title>
<id>urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a</id>
<author>dudess</author>
</entry>
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
xmlns:schema="http://schema.org/"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<title>Awesome Compiler</title>
<id>urn:uuid:1225c695-cfb8-4ebb-daaaa-80da344efa6a</id>
<author>dudess</author>
<swh:deposit>
<swh:metadata-provenance>
<schema:url>https://example.org/metadata/provenance</schema:url>
</swh:metadata-provenance>
</swh:deposit>
</entry>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment