From 793b0d18e453bb65ae3b8ea9ef693771c9e7d75f Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Wed, 17 Apr 2019 13:45:53 +0200
Subject: [PATCH 1/2] deposit: Wrongly formatted xml should return a 400 bad
 request

Both for multipart deposit and only-metadata (atom) deposit.

This is a new kind of badly formatted which ended up in 500 internal
error (server side).

Related T1638
---
 swh/deposit/api/common.py                     | 24 ++++++++--
 swh/deposit/errors.py                         | 15 +++++-
 swh/deposit/parsers.py                        | 13 +++++-
 swh/deposit/tests/api/test_deposit_atom.py    | 15 ++++++
 .../tests/api/test_deposit_multipart.py       | 46 +++++++++++++++++++
 5 files changed, 106 insertions(+), 7 deletions(-)

diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
index b83846d8..cb972894 100644
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -27,7 +27,8 @@ from ..errors import (
     MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT,
     CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED,
     make_error_response_from_dict, FORBIDDEN,
-    NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED
+    NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED,
+    ParserError, PARSING_ERROR
 )
 from ..models import (
     Deposit, DepositRequest, DepositCollection,
@@ -502,8 +503,15 @@ class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta):
         if precondition_status_response:
             return precondition_status_response
 
-        raw_metadata, metadata = self._read_metadata(
-            data['application/atom+xml'])
+        try:
+            raw_metadata, metadata = self._read_metadata(
+                data['application/atom+xml'])
+        except ParserError:
+            return make_error_dict(
+                PARSING_ERROR,
+                'Malformed xml metadata',
+                "The xml received is malformed. "
+                "Please ensure your metadata file is correctly formatted.")
 
         # actual storage of data
         deposit = self._deposit_put(deposit_id=deposit_id,
@@ -560,7 +568,15 @@ class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta):
             - 415 (unsupported media type) if a wrong media type is provided
 
         """
-        raw_metadata, metadata = self._read_metadata(req.data)
+        try:
+            raw_metadata, metadata = self._read_metadata(req.data)
+        except ParserError:
+            return make_error_dict(
+                BAD_REQUEST,
+                'Malformed xml metadata',
+                "The xml received is malformed. "
+                "Please ensure your metadata file is correctly formatted.")
+
         if not metadata:
             return make_error_dict(
                 BAD_REQUEST,
diff --git a/swh/deposit/errors.py b/swh/deposit/errors.py
index f81601cc..bd51a451 100644
--- a/swh/deposit/errors.py
+++ b/swh/deposit/errors.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017  The Software Heritage developers
+# Copyright (C) 2017-2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -20,6 +20,14 @@ CHECKSUM_MISMATCH = 'checksum-mismatch'
 MEDIATION_NOT_ALLOWED = 'mediation-not-allowed'
 METHOD_NOT_ALLOWED = 'method-not-allowed'
 MAX_UPLOAD_SIZE_EXCEEDED = 'max_upload_size_exceeded'
+PARSING_ERROR = 'parsing-error'
+
+
+class ParserError(ValueError):
+    """Specific parsing error detected when parsing the xml metadata input
+
+    """
+    pass
 
 
 ERRORS = {
@@ -53,6 +61,11 @@ ERRORS = {
         'iri': 'http://purl.org/net/sword/error/ErrorBadRequest',
         'tag': 'sword:ErrorBadRequest',
     },
+    PARSING_ERROR: {
+        'status': status.HTTP_400_BAD_REQUEST,
+        'iri': 'http://purl.org/net/sword/error/ErrorBadRequest',
+        'tag': 'sword:ErrorBadRequest',
+    },
     MEDIATION_NOT_ALLOWED: {
         'status': status.HTTP_412_PRECONDITION_FAILED,
         'iri': 'http://purl.org/net/sword/error/MediationNotAllowed',
diff --git a/swh/deposit/parsers.py b/swh/deposit/parsers.py
index 52fcc10b..70f328fd 100644
--- a/swh/deposit/parsers.py
+++ b/swh/deposit/parsers.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018  The Software Heritage developers
+# Copyright (C) 2017-2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -14,6 +14,9 @@ from django.conf import settings
 from rest_framework.parsers import BaseParser
 from rest_framework.parsers import FileUploadParser
 from rest_framework.parsers import MultiPartParser
+from xml.parsers.expat import ExpatError
+
+from swh.deposit.errors import ParserError
 
 
 class SWHFileUploadZipParser(FileUploadParser):
@@ -76,8 +79,14 @@ def parse_xml(raw_content):
     Args:
         raw_content (bytes): The content to parse
 
+    Raises:
+        ParserError in case of a malformed xml
+
     Returns:
         content parsed as dict.
 
     """
-    return SWHXMLParser().parse(raw_content)
+    try:
+        return SWHXMLParser().parse(raw_content)
+    except ExpatError as e:
+        raise ParserError(str(e))
diff --git a/swh/deposit/tests/api/test_deposit_atom.py b/swh/deposit/tests/api/test_deposit_atom.py
index 4220b846..5c9180aa 100644
--- a/swh/deposit/tests/api/test_deposit_atom.py
+++ b/swh/deposit/tests/api/test_deposit_atom.py
@@ -297,6 +297,21 @@ and other stuff</description>
         self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
 
     def test_post_deposit_atom_without_slug_header_is_bad_request(self):
+    def test_post_deposit_atom_400_with_parsing_error(self):
+        """Posting parsing error prone atom should return 400
+
+        """
+        atom_entry_data_parsing_error_prone = b"""<?xml version="1.0"?>
+<entry xmlns="http://www.w3.org/2005/Atom"</entry>
+  <title>Composing a Web of Audio Applications</title>
+  <clienhal</client>
+</entry>
+"""
+        response = self.client.post(
+            reverse(COL_IRI, args=[self.collection.name]),
+            content_type='application/atom+xml;type=entry',
+            data=atom_entry_data_parsing_error_prone)
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
         """Posting an atom entry without a slug header should return a 400
 
         """
diff --git a/swh/deposit/tests/api/test_deposit_multipart.py b/swh/deposit/tests/api/test_deposit_multipart.py
index 8ba2a2e3..05a03832 100644
--- a/swh/deposit/tests/api/test_deposit_multipart.py
+++ b/swh/deposit/tests/api/test_deposit_multipart.py
@@ -400,3 +400,49 @@ class DepositMultipartTestCase(APITestCase, WithAuthTestCase, BasicTestCase,
             'application/x-tar) and 1 atom+xml entry for '
             'multipart deposit' in response.content.decode('utf-8')
         )
+
+    def test_post_deposit_multipart_400_when_badly_formatted_xml(self):
+        # given
+        url = reverse(COL_IRI, args=[self.collection.name])
+
+        data_atom_entry_ko = b"""<?xml version="1.0"?>
+<entry xmlns="http://www.w3.org/2005/Atom"
+        xmlns:dcterms="http://purl.org/dc/terms/">
+    <titleTitle</title>
+    <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+</entry>
+"""
+
+        archive_content = b'some content representing archive'
+        archive = InMemoryUploadedFile(
+            BytesIO(archive_content),
+            field_name='archive0',
+            name='archive0',
+            content_type='application/zip',
+            size=len(archive_content),
+            charset=None)
+
+        atom_entry = InMemoryUploadedFile(
+            BytesIO(data_atom_entry_ko),
+            field_name='atom0',
+            name='atom0',
+            content_type='application/atom+xml; charset="utf-8"',
+            size=len(data_atom_entry_ko),
+            charset='utf-8')
+
+        # when
+        response = self.client.post(
+            url,
+            format='multipart',
+            data={
+                'archive': archive,
+                'atom_entry': atom_entry,
+            },
+            # + headers
+            HTTP_IN_PROGRESS='false',
+            HTTP_SLUG='external-id',
+        )
+
+        self.assertIn(b'Malformed xml metadata', response.content)
+        self.assertEqual(response.status_code,
+                         status.HTTP_400_BAD_REQUEST)
-- 
GitLab


From 3b19233c6d668f2bedbb268d06746bde45960390 Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Wed, 17 Apr 2019 14:17:49 +0200
Subject: [PATCH 2/2] tests: Reformat some deposit tests

---
 swh/deposit/tests/api/test_deposit_atom.py | 218 +++++++--------------
 1 file changed, 74 insertions(+), 144 deletions(-)

diff --git a/swh/deposit/tests/api/test_deposit_atom.py b/swh/deposit/tests/api/test_deposit_atom.py
index 5c9180aa..b04da6d6 100644
--- a/swh/deposit/tests/api/test_deposit_atom.py
+++ b/swh/deposit/tests/api/test_deposit_atom.py
@@ -74,139 +74,6 @@ and other stuff</description>
     <runtimePlatform>all</runtimePlatform>
 </entry>"""
 
-        self.atom_entry_data2 = b"""<?xml version="1.0"?>
-<entry xmlns="http://www.w3.org/2005/Atom">
-    <external_identifier>%s</external_identifier>
-</entry>"""
-
-        self.atom_entry_data_empty_body = b"""<?xml version="1.0"?>
-<entry xmlns="http://www.w3.org/2005/Atom"></entry>"""
-
-        self.atom_entry_data3 = b"""<?xml version="1.0"?>
-<entry xmlns="http://www.w3.org/2005/Atom">
-    <something>something</something>
-</entry>"""
-
-        self.atom_entry_data_atom_only = b"""<?xml version="1.0"?>
-            <entry xmlns="http://www.w3.org/2005/Atom">
-                <title>Awesome Compiler</title>
-                <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
-                <external_identifier>1785io25c695</external_identifier>
-                <updated>2017-10-07T15:17:08Z</updated>
-                <author>some awesome author</author>
-        </entry>"""
-
-        self.atom_entry_data_codemeta = b"""<?xml version="1.0"?>
-            <entry xmlns="http://www.w3.org/2005/Atom"
-                     xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0">
-                <title>Awesome Compiler</title>
-                <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
-                <external_identifier>1785io25c695</external_identifier>
-                <codemeta:id>1785io25c695</codemeta:id>
-                <codemeta:url>origin url</codemeta:url>
-                <codemeta:identifier>other identifier, DOI, ARK</codemeta:identifier>
-                <codemeta:applicationCategory>Domain</codemeta:applicationCategory>
-
-                <codemeta:description>description</codemeta:description>
-                <codemeta:keywords>key-word 1</codemeta:keywords>
-                <codemeta:keywords>key-word 2</codemeta:keywords>
-                <codemeta:dateCreated>creation date</codemeta:dateCreated>
-                <codemeta:datePublished>publication date</codemeta:datePublished>
-                <codemeta:releaseNotes>comment</codemeta:releaseNotes>
-                <codemeta:referencePublication>
-                  <codemeta:name> article name</codemeta:name>
-                  <codemeta:identifier> article id </codemeta:identifier>
-                </codemeta:referencePublication>
-                <codemeta:isPartOf>
-                    <codemeta:type> Collaboration/Projet </codemeta:type>
-                    <codemeta:name> project name</codemeta:name>
-                    <codemeta:identifier> id </codemeta:identifier>
-                </codemeta:isPartOf>
-                <codemeta:relatedLink>see also </codemeta:relatedLink>
-                <codemeta:funding>Sponsor A  </codemeta:funding>
-                <codemeta:funding>Sponsor B</codemeta:funding>
-                <codemeta:operatingSystem>Platform/OS </codemeta:operatingSystem>
-                <codemeta:softwareRequirements>dependencies </codemeta:softwareRequirements>
-                <codemeta:softwareVersion>Version</codemeta:softwareVersion>
-                <codemeta:developmentStatus>active </codemeta:developmentStatus>
-                <codemeta:license>
-                    <codemeta:name>license</codemeta:name>
-                    <codemeta:url>url spdx</codemeta:url>
-                </codemeta:license>
-                <codemeta:runtimePlatform>.Net Framework 3.0 </codemeta:runtimePlatform>
-                <codemeta:runtimePlatform>Python2.3</codemeta:runtimePlatform>
-                <codemeta:author>
-                    <codemeta:name> author1 </codemeta:name>
-                    <codemeta:affiliation> Inria </codemeta:affiliation>
-                    <codemeta:affiliation> UPMC </codemeta:affiliation>
-                </codemeta:author>
-                <codemeta:author>
-                    <codemeta:name> author2 </codemeta:name>
-                    <codemeta:affiliation> Inria </codemeta:affiliation>
-                    <codemeta:affiliation> UPMC </codemeta:affiliation>
-                </codemeta:author>
-                <codemeta:codeRepository>http://code.com</codemeta:codeRepository>
-                <codemeta:programmingLanguage>language 1</codemeta:programmingLanguage>
-                <codemeta:programmingLanguage>language 2</codemeta:programmingLanguage>
-                <codemeta:issueTracker>http://issuetracker.com</codemeta:issueTracker>
-            </entry>"""  # noqa
-
-        self.atom_entry_data_dc_codemeta = b"""<?xml version="1.0"?>
-        <entry xmlns="http://www.w3.org/2005/Atom"
-               xmlns:dcterms="http://purl.org/dc/terms/"
-               xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0">
-
-
-            <external_identifier>%s</external_identifier>
-            <dcterms:identifier>hal-01587361</dcterms:identifier>
-            <dcterms:identifier>https://hal.inria.fr/hal-01587361</dcterms:identifier>
-            <dcterms:identifier>https://hal.inria.fr/hal-01587361/document</dcterms:identifier>
-            <dcterms:identifier>https://hal.inria.fr/hal-01587361/file/AffectationRO-v1.0.0.zip</dcterms:identifier>
-            <dcterms:identifier>doi:10.5281/zenodo.438684</dcterms:identifier>
-            <dcterms:title xml:lang="en">The assignment problem</dcterms:title>
-            <dcterms:title xml:lang="fr">AffectationRO</dcterms:title>
-            <dcterms:creator>Gruenpeter, Morane</dcterms:creator>
-            <dcterms:subject>[INFO] Computer Science [cs]</dcterms:subject>
-            <dcterms:subject>[INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO]</dcterms:subject>
-            <dcterms:type>SOFTWARE</dcterms:type>
-            <dcterms:abstract xml:lang="en">Project in OR: The assignment problemA java implementation for the assignment problem first release</dcterms:abstract>
-            <dcterms:abstract xml:lang="fr">description fr</dcterms:abstract>
-            <dcterms:created>2015-06-01</dcterms:created>
-            <dcterms:available>2017-10-19</dcterms:available>
-            <dcterms:language>en</dcterms:language>
-
-
-            <codemeta:url>url stable</codemeta:url>
-            <codemeta:version>Version sur hal </codemeta:version>
-            <codemeta:softwareVersion>Version entre par lutilisateur</codemeta:softwareVersion>
-            <codemeta:keywords>Mots-cls</codemeta:keywords>
-            <codemeta:releaseNotes>Commentaire</codemeta:releaseNotes>
-            <codemeta:referencePublication>Rfrence interne </codemeta:referencePublication>
-            <codemeta:isPartOf>
-                <codemeta:type> Collaboration/Projet </codemeta:type>
-                <codemeta:name> nom du projet</codemeta:name>
-                <codemeta:identifier> id </codemeta:identifier>
-            </codemeta:isPartOf>
-            <codemeta:relatedLink>Voir aussi  </codemeta:relatedLink>
-            <codemeta:funding>Financement  </codemeta:funding>
-            <codemeta:funding>Projet ANR </codemeta:funding>
-            <codemeta:funding>Projet Europen </codemeta:funding>
-            <codemeta:operatingSystem>Platform/OS </codemeta:operatingSystem>
-            <codemeta:softwareRequirements>Dpendances </codemeta:softwareRequirements>
-            <codemeta:developmentStatus>Etat du dveloppement </codemeta:developmentStatus>
-            <codemeta:license>
-                <codemeta:name>license</codemeta:name>
-                <codemeta:url>url spdx</codemeta:url>
-            </codemeta:license>
-            <codemeta:runtimePlatform>Outils de dveloppement- outil no1 </codemeta:runtimePlatform>
-            <codemeta:runtimePlatform>Outils de dveloppement- outil no2 </codemeta:runtimePlatform>
-            <codemeta:codeRepository>http://code.com</codemeta:codeRepository>
-            <codemeta:programmingLanguage>language 1</codemeta:programmingLanguage>
-            <codemeta:programmingLanguage>language 2</codemeta:programmingLanguage>
-        </entry>"""  # noqa
-
-        self.atom_entry_tei = b"""<TEI><teiHeader><fileDesc><titleStmt><title>HAL TEI export of hal-01587083</title></titleStmt><publicationStmt><distributor>CCSD</distributor><availability status="restricted"><licence target="http://creativecommons.org/licenses/by/4.0/">Distributed under a Creative Commons Attribution 4.0 International License</licence></availability><date when="2017-10-03T17:21:03+02:00"/></publicationStmt><sourceDesc><p part="N">HAL API platform</p></sourceDesc></fileDesc></teiHeader><text><body><listBibl><biblFull><titleStmt><title xml:lang="en">questionnaire software metadata</title><author role="aut"><persName><forename type="first">Morane</forename><surname>Gruenpeter</surname></persName><email type="md5">7de56c632362954fa84172cad80afe4e</email><email type="domain">inria.fr</email><ptr type="url" target="moranegg.github.io"/><idno type="halauthorid">1556733</idno><affiliation ref="#struct-474639"/></author><editor role="depositor"><persName><forename>Morane</forename><surname>Gruenpeter</surname></persName><email type="md5">f85a43a5fb4a2e0778a77e017f28c8fd</email><email type="domain">gmail.com</email></editor></titleStmt><editionStmt><edition n="v1" type="current"><date type="whenSubmitted">2017-09-29 11:21:32</date><date type="whenModified">2017-10-03 17:20:13</date><date type="whenReleased">2017-10-03 17:20:13</date><date type="whenProduced">2017-09-29</date><date type="whenEndEmbargoed">2017-09-29</date><ref type="file" target="https://hal.inria.fr/hal-01587083/document"><date notBefore="2017-09-29"/></ref><ref type="file" subtype="author" n="1" target="https://hal.inria.fr/hal-01587083/file/questionnaire.zip"><date notBefore="2017-09-29"/></ref></edition><respStmt><resp>contributor</resp><name key="442239"><persName><forename>Morane</forename><surname>Gruenpeter</surname></persName><email type="md5">f85a43a5fb4a2e0778a77e017f28c8fd</email><email type="domain">gmail.com</email></name></respStmt></editionStmt><publicationStmt><distributor>CCSD</distributor><idno type="halId">hal-01587083</idno><idno type="halUri">https://hal.inria.fr/hal-01587083</idno><idno type="halBibtex">gruenpeter:hal-01587083</idno><idno type="halRefHtml">2017</idno><idno type="halRef">2017</idno></publicationStmt><seriesStmt/><notesStmt/><sourceDesc><biblStruct><analytic><title xml:lang="en">questionnaire software metadata</title><author role="aut"><persName><forename type="first">Morane</forename><surname>Gruenpeter</surname></persName><email type="md5">7de56c632362954fa84172cad80afe4e</email><email type="domain">inria.fr</email><ptr type="url" target="moranegg.github.io"/><idno type="halauthorid">1556733</idno><affiliation ref="#struct-474639"/></author></analytic><monogr><imprint/></monogr></biblStruct></sourceDesc><profileDesc><langUsage><language ident="en">English</language></langUsage><textClass><classCode scheme="halDomain" n="info">Computer Science [cs]</classCode><classCode scheme="halTypology" n="SOFTWARE">Software</classCode></textClass></profileDesc></biblFull></listBibl></body><back><listOrg type="structures"><org type="laboratory" xml:id="struct-474639" status="VALID"><orgName>IRILL</orgName><orgName type="acronym">Initiative pour la Recherche et l'Innovation sur le Logiciel Libre</orgName><desc><address><country key="FR"/></address><ref type="url">https://www.irill.org/</ref></desc><listRelation><relation active="#struct-93591" type="direct"/><relation active="#struct-300009" type="direct"/><relation active="#struct-300301" type="direct"/></listRelation></org><org type="institution" xml:id="struct-93591" status="VALID"><orgName>Universite Pierre et Marie Curie - Paris 6</orgName><orgName type="acronym">UPMC</orgName><desc><address><addrLine>4 place Jussieu - 75005 Paris</addrLine><country key="FR"/></address><ref type="url">http://www.upmc.fr/</ref></desc></org><org type="institution" xml:id="struct-300009" status="VALID"><orgName>Institut National de Recherche en Informatique et en Automatique</orgName><orgName type="acronym">Inria</orgName><desc><address><addrLine>Domaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex</addrLine><country key="FR"/></address><ref type="url">http://www.inria.fr/en/</ref></desc></org><org type="institution" xml:id="struct-300301" status="VALID"><orgName>Universite Paris Diderot - Paris 7</orgName><orgName type="acronym">UPD7</orgName><desc><address><addrLine>5 rue Thomas-Mann - 75205 Paris cedex 13</addrLine><country key="FR"/></address><ref type="url">http://www.univ-paris-diderot.fr</ref></desc></org></listOrg></back></text></TEI>"""  # noqa
-
         self.atom_entry_data_badly_formatted = b"""<?xml version="1.0"?>
 <entry xmlns="http://www.w3.org/2005/Atom"</entry>"""
 
@@ -250,7 +117,7 @@ and other stuff</description>
 </entry>
 """  # noqa
 
-    def test_post_deposit_atom_entry_serialization_error(self):
+    def test_post_deposit_atom_201_even_with_decimal(self):
         """Posting an initial atom entry should return 201 with deposit receipt
 
         """
@@ -276,17 +143,20 @@ and other stuff</description>
         sw_version = dr.metadata.get('codemeta:softwareVersion')
         self.assertEqual(sw_version, '10.4')
 
-    def test_post_deposit_atom_empty_body_request(self):
+    def test_post_deposit_atom_400_with_empty_body(self):
         """Posting empty body request should return a 400 response
 
         """
+        atom_entry_data_empty_body = b"""<?xml version="1.0"?>
+<entry xmlns="http://www.w3.org/2005/Atom"></entry>"""
+
         response = self.client.post(
             reverse(COL_IRI, args=[self.collection.name]),
             content_type='application/atom+xml;type=entry',
-            data=self.atom_entry_data_empty_body)
+            data=atom_entry_data_empty_body)
         self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
 
-    def test_post_deposit_atom_badly_formatted_is_a_bad_request(self):
+    def test_post_deposit_atom_400_badly_formatted_atom(self):
         """Posting a badly formatted atom should return a 400 response
 
         """
@@ -296,7 +166,6 @@ and other stuff</description>
             data=self.atom_entry_data_badly_formatted)
         self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
 
-    def test_post_deposit_atom_without_slug_header_is_bad_request(self):
     def test_post_deposit_atom_400_with_parsing_error(self):
         """Posting parsing error prone atom should return 400
 
@@ -312,6 +181,8 @@ and other stuff</description>
             content_type='application/atom+xml;type=entry',
             data=atom_entry_data_parsing_error_prone)
         self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+
+    def test_post_deposit_atom_400_without_slug_header(self):
         """Posting an atom entry without a slug header should return a 400
 
         """
@@ -329,14 +200,19 @@ and other stuff</description>
         self.assertEqual(response.status_code,
                          status.HTTP_400_BAD_REQUEST)
 
-    def test_post_deposit_atom_unknown_collection(self):
+    def test_post_deposit_atom_404_unknown_collection(self):
         """Posting an atom entry to an unknown collection should return a 404
 
         """
+        atom_entry_data3 = b"""<?xml version="1.0"?>
+<entry xmlns="http://www.w3.org/2005/Atom">
+    <something>something</something>
+</entry>"""
+
         response = self.client.post(
             reverse(COL_IRI, args=['unknown-one']),
             content_type='application/atom+xml;type=entry',
-            data=self.atom_entry_data3,
+            data=atom_entry_data3,
             HTTP_SLUG='something')
         self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
 
@@ -389,8 +265,59 @@ and other stuff</description>
         with self.assertRaises(Deposit.DoesNotExist):
             Deposit.objects.get(external_id=external_id)
 
-        atom_entry_data = self.atom_entry_data_dc_codemeta % (
-            external_id.encode('utf-8'), )
+        atom_entry_data = b"""<?xml version="1.0"?>
+        <entry xmlns="http://www.w3.org/2005/Atom"
+               xmlns:dcterms="http://purl.org/dc/terms/"
+               xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0">
+
+
+            <external_identifier>%s</external_identifier>
+            <dcterms:identifier>hal-01587361</dcterms:identifier>
+            <dcterms:identifier>https://hal.inria.fr/hal-01587361</dcterms:identifier>
+            <dcterms:identifier>https://hal.inria.fr/hal-01587361/document</dcterms:identifier>
+            <dcterms:identifier>https://hal.inria.fr/hal-01587361/file/AffectationRO-v1.0.0.zip</dcterms:identifier>
+            <dcterms:identifier>doi:10.5281/zenodo.438684</dcterms:identifier>
+            <dcterms:title xml:lang="en">The assignment problem</dcterms:title>
+            <dcterms:title xml:lang="fr">AffectationRO</dcterms:title>
+            <dcterms:creator>Gruenpeter, Morane</dcterms:creator>
+            <dcterms:subject>[INFO] Computer Science [cs]</dcterms:subject>
+            <dcterms:subject>[INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO]</dcterms:subject>
+            <dcterms:type>SOFTWARE</dcterms:type>
+            <dcterms:abstract xml:lang="en">Project in OR: The assignment problemA java implementation for the assignment problem first release</dcterms:abstract>
+            <dcterms:abstract xml:lang="fr">description fr</dcterms:abstract>
+            <dcterms:created>2015-06-01</dcterms:created>
+            <dcterms:available>2017-10-19</dcterms:available>
+            <dcterms:language>en</dcterms:language>
+
+
+            <codemeta:url>url stable</codemeta:url>
+            <codemeta:version>Version sur hal </codemeta:version>
+            <codemeta:softwareVersion>Version entre par lutilisateur</codemeta:softwareVersion>
+            <codemeta:keywords>Mots-cls</codemeta:keywords>
+            <codemeta:releaseNotes>Commentaire</codemeta:releaseNotes>
+            <codemeta:referencePublication>Rfrence interne </codemeta:referencePublication>
+            <codemeta:isPartOf>
+                <codemeta:type> Collaboration/Projet </codemeta:type>
+                <codemeta:name> nom du projet</codemeta:name>
+                <codemeta:identifier> id </codemeta:identifier>
+            </codemeta:isPartOf>
+            <codemeta:relatedLink>Voir aussi  </codemeta:relatedLink>
+            <codemeta:funding>Financement  </codemeta:funding>
+            <codemeta:funding>Projet ANR </codemeta:funding>
+            <codemeta:funding>Projet Europen </codemeta:funding>
+            <codemeta:operatingSystem>Platform/OS </codemeta:operatingSystem>
+            <codemeta:softwareRequirements>Dpendances </codemeta:softwareRequirements>
+            <codemeta:developmentStatus>Etat du dveloppement </codemeta:developmentStatus>
+            <codemeta:license>
+                <codemeta:name>license</codemeta:name>
+                <codemeta:url>url spdx</codemeta:url>
+            </codemeta:license>
+            <codemeta:runtimePlatform>Outils de dveloppement- outil no1 </codemeta:runtimePlatform>
+            <codemeta:runtimePlatform>Outils de dveloppement- outil no2 </codemeta:runtimePlatform>
+            <codemeta:codeRepository>http://code.com</codemeta:codeRepository>
+            <codemeta:programmingLanguage>language 1</codemeta:programmingLanguage>
+            <codemeta:programmingLanguage>language 2</codemeta:programmingLanguage>
+        </entry>"""  % external_id.encode('utf-8')  # noqa
 
         # when
         response = self.client.post(
@@ -430,7 +357,7 @@ and other stuff</description>
         with self.assertRaises(Deposit.DoesNotExist):
             Deposit.objects.get(external_id=external_id)
 
-        atom_entry_data = self.atom_entry_tei
+        atom_entry_data = b"""<TEI><teiHeader><fileDesc><titleStmt><title>HAL TEI export of hal-01587083</title></titleStmt><publicationStmt><distributor>CCSD</distributor><availability status="restricted"><licence target="http://creativecommons.org/licenses/by/4.0/">Distributed under a Creative Commons Attribution 4.0 International License</licence></availability><date when="2017-10-03T17:21:03+02:00"/></publicationStmt><sourceDesc><p part="N">HAL API platform</p></sourceDesc></fileDesc></teiHeader><text><body><listBibl><biblFull><titleStmt><title xml:lang="en">questionnaire software metadata</title><author role="aut"><persName><forename type="first">Morane</forename><surname>Gruenpeter</surname></persName><email type="md5">7de56c632362954fa84172cad80afe4e</email><email type="domain">inria.fr</email><ptr type="url" target="moranegg.github.io"/><idno type="halauthorid">1556733</idno><affiliation ref="#struct-474639"/></author><editor role="depositor"><persName><forename>Morane</forename><surname>Gruenpeter</surname></persName><email type="md5">f85a43a5fb4a2e0778a77e017f28c8fd</email><email type="domain">gmail.com</email></editor></titleStmt><editionStmt><edition n="v1" type="current"><date type="whenSubmitted">2017-09-29 11:21:32</date><date type="whenModified">2017-10-03 17:20:13</date><date type="whenReleased">2017-10-03 17:20:13</date><date type="whenProduced">2017-09-29</date><date type="whenEndEmbargoed">2017-09-29</date><ref type="file" target="https://hal.inria.fr/hal-01587083/document"><date notBefore="2017-09-29"/></ref><ref type="file" subtype="author" n="1" target="https://hal.inria.fr/hal-01587083/file/questionnaire.zip"><date notBefore="2017-09-29"/></ref></edition><respStmt><resp>contributor</resp><name key="442239"><persName><forename>Morane</forename><surname>Gruenpeter</surname></persName><email type="md5">f85a43a5fb4a2e0778a77e017f28c8fd</email><email type="domain">gmail.com</email></name></respStmt></editionStmt><publicationStmt><distributor>CCSD</distributor><idno type="halId">hal-01587083</idno><idno type="halUri">https://hal.inria.fr/hal-01587083</idno><idno type="halBibtex">gruenpeter:hal-01587083</idno><idno type="halRefHtml">2017</idno><idno type="halRef">2017</idno></publicationStmt><seriesStmt/><notesStmt/><sourceDesc><biblStruct><analytic><title xml:lang="en">questionnaire software metadata</title><author role="aut"><persName><forename type="first">Morane</forename><surname>Gruenpeter</surname></persName><email type="md5">7de56c632362954fa84172cad80afe4e</email><email type="domain">inria.fr</email><ptr type="url" target="moranegg.github.io"/><idno type="halauthorid">1556733</idno><affiliation ref="#struct-474639"/></author></analytic><monogr><imprint/></monogr></biblStruct></sourceDesc><profileDesc><langUsage><language ident="en">English</language></langUsage><textClass><classCode scheme="halDomain" n="info">Computer Science [cs]</classCode><classCode scheme="halTypology" n="SOFTWARE">Software</classCode></textClass></profileDesc></biblFull></listBibl></body><back><listOrg type="structures"><org type="laboratory" xml:id="struct-474639" status="VALID"><orgName>IRILL</orgName><orgName type="acronym">Initiative pour la Recherche et l'Innovation sur le Logiciel Libre</orgName><desc><address><country key="FR"/></address><ref type="url">https://www.irill.org/</ref></desc><listRelation><relation active="#struct-93591" type="direct"/><relation active="#struct-300009" type="direct"/><relation active="#struct-300301" type="direct"/></listRelation></org><org type="institution" xml:id="struct-93591" status="VALID"><orgName>Universite Pierre et Marie Curie - Paris 6</orgName><orgName type="acronym">UPMC</orgName><desc><address><addrLine>4 place Jussieu - 75005 Paris</addrLine><country key="FR"/></address><ref type="url">http://www.upmc.fr/</ref></desc></org><org type="institution" xml:id="struct-300009" status="VALID"><orgName>Institut National de Recherche en Informatique et en Automatique</orgName><orgName type="acronym">Inria</orgName><desc><address><addrLine>Domaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex</addrLine><country key="FR"/></address><ref type="url">http://www.inria.fr/en/</ref></desc></org><org type="institution" xml:id="struct-300301" status="VALID"><orgName>Universite Paris Diderot - Paris 7</orgName><orgName type="acronym">UPD7</orgName><desc><address><addrLine>5 rue Thomas-Mann - 75205 Paris cedex 13</addrLine><country key="FR"/></address><ref type="url">http://www.univ-paris-diderot.fr</ref></desc></org></listOrg></back></text></TEI>"""  # noqa
 
         # when
         response = self.client.post(
@@ -493,7 +420,10 @@ and other stuff</description>
         deposit_requests = DepositRequest.objects.filter(deposit=deposit)
         self.assertEqual(len(deposit_requests), 1)
 
-        atom_entry_data = self.atom_entry_data2 % external_id.encode('utf-8')
+        atom_entry_data = b"""<?xml version="1.0"?>
+<entry xmlns="http://www.w3.org/2005/Atom">
+    <external_identifier>%s</external_identifier>
+</entry>""" % external_id.encode('utf-8')
 
         update_uri = response._headers['location'][1]
 
-- 
GitLab