diff --git a/swh/web/api/apiresponse.py b/swh/web/api/apiresponse.py index 3529af68528a566201eb6f73d43341b6be0c5d87..94ef92bd53c0fa2f9d282206693e094e3b1adb4c 100644 --- a/swh/web/api/apiresponse.py +++ b/swh/web/api/apiresponse.py @@ -26,8 +26,10 @@ from swh.web.utils.exc import ( BadInputExc, ForbiddenExc, LargePayloadExc, + MaskedObjectException, NotFoundExc, UnauthorizedExc, + masked_to_common_types, sentry_capture_exception, ) @@ -198,6 +200,11 @@ def error_response( doc_data: documentation data for HTML response """ + error_data = { + "exception": exception.__class__.__name__, + "reason": str(exception), + } + error_code = 500 if isinstance(exception, BadInputExc): error_code = 400 @@ -207,6 +214,9 @@ def error_response( error_code = 404 elif isinstance(exception, ForbiddenExc): error_code = 403 + elif isinstance(exception, MaskedObjectException): + error_code = 403 + error_data["masked"] = masked_to_common_types(exception) elif isinstance(exception, LargePayloadExc): error_code = 413 elif isinstance(exception, StorageDBError): @@ -217,10 +227,6 @@ def error_response( error_code = exception.status_code error_opts = {"status": error_code} - error_data = { - "exception": exception.__class__.__name__, - "reason": str(exception), - } if getattr(request, "accepted_media_type", None) == "text/html": error_data["reason"] = escape(error_data["reason"]) diff --git a/swh/web/api/tests/test_apiresponse.py b/swh/web/api/tests/test_apiresponse.py index e71d76de08318ef4ee1fca01aec7f922c1f3ba28..f54f563b1899a1e73571c633f710be818a264a6b 100644 --- a/swh/web/api/tests/test_apiresponse.py +++ b/swh/web/api/tests/test_apiresponse.py @@ -20,6 +20,9 @@ from swh.web.api.apiresponse import ( make_api_response, transform, ) +from swh.web.browse.tests.views.conftest import ( # noqa: F401 + make_masked_object_exception, +) from swh.web.tests.django_asserts import assert_contains from swh.web.tests.helpers import check_http_get_response, check_http_post_response from swh.web.utils import reverse @@ -152,6 +155,20 @@ def test_error_response_handler(mocker, api_client): assert "Traceback" in resp.data["traceback"] +def test_error_response_handler_for_masked( + mocker, api_client, content, make_masked_object_exception # noqa: F811 +): + swhid = f"swh:1:cnt:{content['sha1_git']}" + masked_object_exception = make_masked_object_exception(swhid) + + mock_archive = mocker.patch("swh.web.api.views.stat.archive") + mock_archive.stat_counters.side_effect = masked_object_exception + url = reverse("api-1-stat-counters") + resp = api_client.get(url) + assert resp.status_code == 403 + assert swhid in resp.data["masked"] + + def test_api_endpoints_have_cors_headers(client, content, directory, revision): url = reverse("api-1-stat-counters") diff --git a/swh/web/browse/tests/views/conftest.py b/swh/web/browse/tests/views/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..bc8f646f7467484777e95f7e34bbc869d20fbb63 --- /dev/null +++ b/swh/web/browse/tests/views/conftest.py @@ -0,0 +1,25 @@ +# Copyright (C) 2024 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + + +@pytest.fixture +def make_masked_object_exception(): + def _masked_object_exception(swhid_str: str): + from uuid import UUID + + from swh.model.swhids import ExtendedSWHID + from swh.storage.exc import MaskedObjectException + from swh.storage.proxies.masking.db import MaskedState, MaskedStatus + + swhid = ExtendedSWHID.from_string(swhid_str) + status = MaskedStatus( + state=MaskedState.DECISION_PENDING, + request=UUID("da785a27-7e59-4a35-b82a-a5ae3714407c"), + ) + return MaskedObjectException({swhid: [status]}) + + return _masked_object_exception diff --git a/swh/web/browse/tests/views/test_content.py b/swh/web/browse/tests/views/test_content.py index 9099635d86e31066b10c627f77976d464bf6cac5..8e60d6d38f1dfa70cfeefecc160bd7c72066174d 100644 --- a/swh/web/browse/tests/views/test_content.py +++ b/swh/web/browse/tests/views/test_content.py @@ -396,6 +396,27 @@ def test_content_bytes_missing(client, archive_data, mocker, content): ) +def test_content_masked( + client, archive_data, mocker, content, make_masked_object_exception +): + masked_object_exception = make_masked_object_exception( + f"swh:1:cnt:{content['sha1_git']}" + ) + + mock_archive = mocker.patch("swh.web.browse.utils.archive") + mock_archive.lookup_content.side_effect = masked_object_exception + + url = reverse("browse-content", url_args={"query_string": content["sha1"]}) + + check_html_get_response(client, url, status_code=403, template_used="masked.html") + check_http_get_response( + client, url, content_type="application/json", status_code=403 + ) + check_http_get_response( + client, url, content_type="application/yaml", status_code=403 + ) + + def test_content_too_large(client, mocker): mock_request_content = mocker.patch("swh.web.browse.views.content.request_content") stub_content_too_large_data = { diff --git a/swh/web/browse/tests/views/test_directory.py b/swh/web/browse/tests/views/test_directory.py index 2322420550411ed4f65fa445bdce71d8988504d7..eb014325fb1bc912b72da3efaeccf57cc898a7fe 100644 --- a/swh/web/browse/tests/views/test_directory.py +++ b/swh/web/browse/tests/views/test_directory.py @@ -151,6 +151,23 @@ def test_directory_request_errors(client, invalid_sha1, unknown_directory): ) +def test_directory_masked(client, mocker, directory, make_masked_object_exception): + masked_object_exception = make_masked_object_exception(f"swh:1:dir:{directory}") + + mock_archive = mocker.patch("swh.web.browse.utils.archive") + mock_archive.lookup_directory.side_effect = masked_object_exception + + url = reverse("browse-directory", url_args={"sha1_git": directory}) + + check_html_get_response(client, url, status_code=403, template_used="masked.html") + check_http_get_response( + client, url, content_type="application/json", status_code=403 + ) + check_http_get_response( + client, url, content_type="application/yaml", status_code=403 + ) + + def test_directory_with_invalid_path(client, directory): path = "foo/bar" dir_url = reverse( diff --git a/swh/web/browse/tests/views/test_origin.py b/swh/web/browse/tests/views/test_origin.py index 997bf65bfc236d793dbf3438a9ef8d0fbc137d7c..140eb07595221b87148f664d7cf5000a746c375a 100644 --- a/swh/web/browse/tests/views/test_origin.py +++ b/swh/web/browse/tests/views/test_origin.py @@ -457,6 +457,29 @@ def test_browse_visits_origin_not_found(client, new_origin): ) +def test_browse_visits_origin_masked( + client, mocker, origin, make_masked_object_exception +): + import hashlib + + masked_object_exception = make_masked_object_exception( + f"swh:1:ori:{hashlib.sha1(origin['url'].encode('utf-8')).hexdigest()}" + ) + + mock_archive = mocker.patch("swh.web.browse.views.origin.archive") + mock_archive.lookup_origin.side_effect = masked_object_exception + + url = reverse("browse-origin-visits", query_params={"origin_url": origin["url"]}) + + check_html_get_response(client, url, status_code=403, template_used="masked.html") + check_http_get_response( + client, url, content_type="application/json", status_code=403 + ) + check_http_get_response( + client, url, content_type="application/yaml", status_code=403 + ) + + def test_browse_origin_directory_no_visit(client, mocker, origin): mock_get_origin_visits = mocker.patch( "swh.web.utils.origin_visits.get_origin_visits" diff --git a/swh/web/browse/tests/views/test_release.py b/swh/web/browse/tests/views/test_release.py index 2d8bc882e61cf53a53b71d1323c6d7c091690b43..904e2b76c96d051bce857920cb8130a9525bf871 100644 --- a/swh/web/browse/tests/views/test_release.py +++ b/swh/web/browse/tests/views/test_release.py @@ -9,7 +9,7 @@ from django.utils.html import escape from swh.model.swhids import ObjectType from swh.web.tests.django_asserts import assert_contains -from swh.web.tests.helpers import check_html_get_response +from swh.web.tests.helpers import check_html_get_response, check_http_get_response from swh.web.utils import format_utc_iso_date, reverse from swh.web.utils.identifiers import gen_swhid @@ -52,6 +52,23 @@ def test_release_browse_not_found(client, archive_data, unknown_release): assert_contains(resp, err_msg, status_code=404) +def test_release_masked(client, mocker, release, make_masked_object_exception): + masked_object_exception = make_masked_object_exception(f"swh:1:rel:{release}") + + mock_archive = mocker.patch("swh.web.browse.views.release.archive") + mock_archive.lookup_release.side_effect = masked_object_exception + + url = reverse("browse-release", url_args={"sha1_git": release}) + + check_html_get_response(client, url, status_code=403, template_used="masked.html") + check_http_get_response( + client, url, content_type="application/json", status_code=403 + ) + check_http_get_response( + client, url, content_type="application/yaml", status_code=403 + ) + + def test_release_uppercase(client, release): url = reverse( "browse-release-uppercase-checksum", url_args={"sha1_git": release.upper()} diff --git a/swh/web/browse/tests/views/test_revision.py b/swh/web/browse/tests/views/test_revision.py index af1206975d46127065a6082be2b9743090f52189..0107b541d2fa3092947417e7617b589da375344e 100644 --- a/swh/web/browse/tests/views/test_revision.py +++ b/swh/web/browse/tests/views/test_revision.py @@ -14,7 +14,7 @@ from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.model import Revision, RevisionType, TimestampWithTimezone from swh.model.swhids import ObjectType from swh.web.tests.django_asserts import assert_contains, assert_not_contains -from swh.web.tests.helpers import check_html_get_response +from swh.web.tests.helpers import check_html_get_response, check_http_get_response from swh.web.tests.strategies import new_origin, new_person, new_swh_date from swh.web.utils import format_utc_iso_date, parse_iso8601_date_to_utc, reverse from swh.web.utils.identifiers import gen_swhid @@ -261,6 +261,28 @@ def test_revision_request_errors(client, revision, unknown_revision, new_origin) ) +def test_revision_masked(client, mocker, revision, make_masked_object_exception): + masked_object_exception = make_masked_object_exception(f"swh:1:rev:{revision}") + + mock_archive = mocker.patch("swh.web.browse.views.revision.archive") + mock_archive.lookup_revision.side_effect = masked_object_exception + + url = reverse("browse-revision", url_args={"sha1_git": revision}) + + url = reverse( + "browse-revision", + url_args={"sha1_git": revision}, + ) + + check_html_get_response(client, url, status_code=403, template_used="masked.html") + check_http_get_response( + client, url, content_type="application/json", status_code=403 + ) + check_http_get_response( + client, url, content_type="application/yaml", status_code=403 + ) + + def test_revision_uppercase(client, revision): url = reverse( "browse-revision-uppercase-checksum", url_args={"sha1_git": revision.upper()} diff --git a/swh/web/browse/tests/views/test_snapshot.py b/swh/web/browse/tests/views/test_snapshot.py index 74340ec789d7fc9ed1c68101e8dca71ef04537e5..36a500961b4edc86d5a92a0baeadc7125f52d1e2 100644 --- a/swh/web/browse/tests/views/test_snapshot.py +++ b/swh/web/browse/tests/views/test_snapshot.py @@ -30,7 +30,7 @@ from swh.storage.utils import now from swh.web.browse.snapshot_context import process_snapshot_branches from swh.web.tests.data import random_sha1 from swh.web.tests.django_asserts import assert_contains, assert_not_contains -from swh.web.tests.helpers import check_html_get_response +from swh.web.tests.helpers import check_html_get_response, check_http_get_response from swh.web.tests.strategies import new_origin, new_person, new_swh_date, visit_dates from swh.web.utils import format_utc_iso_date, reverse @@ -125,6 +125,30 @@ def test_snapshot_browse_without_id_and_origin(client, browse_context): ) +@pytest.mark.parametrize("browse_context", ["log", "branches", "releases"]) +def test_snapshot_masked( + client, mocker, browse_context, snapshot, make_masked_object_exception +): + masked_object_exception = make_masked_object_exception(f"swh:1:snp:{snapshot}") + + mocker.patch( + "swh.web.browse.snapshot_context.get_snapshot_context", + side_effect=masked_object_exception, + ) + + url = reverse( + f"browse-snapshot-{browse_context}", url_args={"snapshot_id": snapshot} + ) + + check_html_get_response(client, url, status_code=403, template_used="masked.html") + check_http_get_response( + client, url, content_type="application/json", status_code=403 + ) + check_http_get_response( + client, url, content_type="application/yaml", status_code=403 + ) + + def test_snapshot_browse_branches_targeting_revisions(client, archive_data, origin): _origin_branches_test_helper(client, archive_data, origin["url"]) diff --git a/swh/web/utils/exc.py b/swh/web/utils/exc.py index 4a72884de0dcf9494b6ebab9a41ef9f573fa0145..5225a44f628476e5357402913142cabb2fb8bccb 100644 --- a/swh/web/utils/exc.py +++ b/swh/web/utils/exc.py @@ -12,12 +12,14 @@ import sentry_sdk from django.core import exceptions from django.http import HttpRequest, HttpResponse +from django.http.response import JsonResponse from django.shortcuts import render from django.utils.html import escape, format_html from rest_framework.exceptions import APIException from rest_framework.renderers import JSONRenderer from swh.core.api import RemoteException, TransientRemoteException +from swh.storage.exc import MaskedObjectException from swh.web.api.renderers import YAMLRenderer from swh.web.config import get_config @@ -123,6 +125,61 @@ def _generate_error_page( ) +def masked_to_common_types(exc: MaskedObjectException): + """Convert ``exc.masked`` to common types, suitable for + JSON and YAML encoding. + + ExtendedSWHID becomes strings and MaskedStatus becomes a dict. + """ + return { + str(swhid): [ + { + "request": status.request, + "status": status.state.name.lower().replace("_", "-"), + } + for status in statuses + ] + for swhid, statuses in exc.masked.items() + } + + +def _generate_masked_object_page( + request: HttpRequest, exc: MaskedObjectException +) -> HttpResponse: + error_code = 403 # Forbidden + error_data = { + "error": http_status_code_message[error_code], + "reason": str(exc), + "masked": masked_to_common_types(exc), + } + + accepted_media_type = request.headers.get("Accept", "application/json") + + if accepted_media_type in ("application/json", "*/*"): + return JsonResponse( + error_data, + status=error_code, + ) + elif accepted_media_type == "application/yaml": + return HttpResponse( + YAMLRenderer().render(error_data), + content_type="application/yaml", + status=error_code, + ) + else: + return render( + request, + "masked.html", + { + "error_code": error_code, + "error_message": "Access restricted", + "error_description": str(exc), + "masked": exc.masked, + }, + status=error_code, + ) + + def swh_handle400( request: HttpRequest, exception: Optional[Exception] = None ) -> HttpResponse: @@ -216,6 +273,8 @@ def handle_view_exception(request: HttpRequest, exc: Exception) -> HttpResponse: error_code = 404 elif isinstance(exc, Ratelimited): error_code = 429 + elif isinstance(exc, MaskedObjectException): + return _generate_masked_object_page(request, exc) resp = _generate_error_page(request, error_code, error_description) if get_config()["debug"]: diff --git a/swh/web/webapp/templates/includes/error-base.html b/swh/web/webapp/templates/includes/error-base.html new file mode 100644 index 0000000000000000000000000000000000000000..b1aafc8ac1f45e49217cbd4707266a7f06ab0d8d --- /dev/null +++ b/swh/web/webapp/templates/includes/error-base.html @@ -0,0 +1,38 @@ +{% comment %} +Copyright (C) 2018-2024 The Software Heritage developers +See the AUTHORS file at the top-level directory of this distribution +License: GNU Affero General Public License version 3, or any later version +See top-level LICENSE file for more information +{% endcomment %} + +{% load static %} + +<div class="swh-http-error"> + <div class="swh-http-error-head">Error</div> + <div class="swh-http-error-code"> + <img class="hidden-xs swh-image-error" + src="{% static 'img/swh-logo.svg' %}" + alt="swh logo" /> + {{ error_code }} + <img class="hidden-xs swh-image-error" + src="{% static 'img/swh-logo.svg' %}" + alt="swh logo" /> + </div> + <h3>{{ error_message }}</h3> + {% block error-description %} + {% endblock error-description %} + + {% if not iframe_mode %} + <div> + <a class="btn" onclick="window.history.back();"> + <i class="mdi mdi-arrow-left" aria-hidden="true"></i> + Go back to previous page + </a> + or + <a class="btn" href="{% url 'swh-web-homepage' %}"> + <i class="mdi mdi-arrow-left" aria-hidden="true"></i> + Go back to homepage + </a> + </div> + {% endif %} +</div> diff --git a/swh/web/webapp/templates/includes/http-error.html b/swh/web/webapp/templates/includes/http-error.html index 4c686947c768d63fa1fccd86e22a782cd654dd87..b6776c1ccb285ca92c9a60b7622f39bdc5da93c6 100644 --- a/swh/web/webapp/templates/includes/http-error.html +++ b/swh/web/webapp/templates/includes/http-error.html @@ -1,5 +1,7 @@ +{% extends "./error-base.html" %} + {% comment %} -Copyright (C) 2018-2023 The Software Heritage developers +Copyright (C) 2018-2024 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information @@ -7,32 +9,8 @@ See top-level LICENSE file for more information {% load static %} -<div class="swh-http-error"> - <div class="swh-http-error-head">Error</div> - <div class="swh-http-error-code"> - <img class="hidden-xs swh-image-error" - src="{% static 'img/swh-logo.svg' %}" - alt="swh logo" /> - {{ error_code }} - <img class="hidden-xs swh-image-error" - src="{% static 'img/swh-logo.svg' %}" - alt="swh logo" /> - </div> - <h3>{{ error_message }}</h3> +{% block error-description %} <div class="swh-http-error-desc"> <pre>{{ error_description }}</pre> - {% if not iframe_mode %} - <div> - <a class="btn" onclick="window.history.back();"> - <i class="mdi mdi-arrow-left" aria-hidden="true"></i> - Go back to previous page - </a> - or - <a class="btn" href="{% url 'swh-web-homepage' %}"> - <i class="mdi mdi-arrow-left" aria-hidden="true"></i> - Go back to homepage - </a> - </div> - {% endif %} </div> -</div> +{% endblock error-description %} diff --git a/swh/web/webapp/templates/includes/masking-error.html b/swh/web/webapp/templates/includes/masking-error.html new file mode 100644 index 0000000000000000000000000000000000000000..05ff126ab38c7a24ce6164fc7a7e1e1de57a816c --- /dev/null +++ b/swh/web/webapp/templates/includes/masking-error.html @@ -0,0 +1,38 @@ +{% extends "./error-base.html" %} + +{% comment %} +Copyright (C) 2024 The Software Heritage developers +See the AUTHORS file at the top-level directory of this distribution +License: GNU Affero General Public License version 3, or any later version +See top-level LICENSE file for more information +{% endcomment %} + +{% block error-description %} + <div class="container text-left"> + <p>Some requested objects are currently under restricted access:</p> + <ul> + {% for swhid, statuses in masked.items %} + <li> + <code>{{ swhid }}</code> + {% if statuses %} + due to the request{{ statuses|length|pluralize }}: + <ul> + {% for status in statuses %} + <li> + <code>{{ status.request }}</code> + {% if status.state.name == "RESTRICTED" %} + (permanent restriction) + {% else %} + (temporary restriction) + {% endif %} + </li> + {% endfor -%} + </ul> + {% endif %} + </li> + {% empty %} + <li>Object list unavailable. Sorry.</li> + {% endfor %} + </ul> + </div> +{% endblock error-description %} diff --git a/swh/web/webapp/templates/masked.html b/swh/web/webapp/templates/masked.html new file mode 100644 index 0000000000000000000000000000000000000000..b27edd703d5449c487ab270df984bda905bacfae --- /dev/null +++ b/swh/web/webapp/templates/masked.html @@ -0,0 +1,20 @@ +{% extends "layout.html" %} + +{% comment %} +Copyright (C) 2024 The Software Heritage developers +See the AUTHORS file at the top-level directory of this distribution +License: GNU Affero General Public License version 3, or any later version +See top-level LICENSE file for more information +{% endcomment %} + +{% block page_title %} + Access restricted (Error {{ error_code }}) +{% endblock page_title %} + +{% block navbar-content %} + <h4>Access restricted</h4> +{% endblock navbar-content %} + +{% block content %} + {% include "includes/masking-error.html" %} +{% endblock content %}