Skip to content
Snippets Groups Projects
Commit 8e72996d authored by vlorentz's avatar vlorentz
Browse files

Fix merging documents with @list elements.

This happens when we find two metadata files each with at least two authors.
parent 1ef78733
No related branches found
Tags v0.0.166
No related merge requests found
......@@ -186,7 +186,20 @@ def merge_documents(documents):
merged_document[SCHEMA_URI + 'sameAs'].append(value)
else:
for value in values:
if value not in merged_document[key]:
if isinstance(value, dict) and set(value) == {'@list'}:
# Value is of the form {'@list': [item1, item2]}
# instead of the usual [item1, item2].
# We need to merge the inner lists (and mostly
# preserve order).
merged_value = merged_document.setdefault(
key, {'@list': []})
for subvalue in value['@list']:
# merged_value must be of the form
# {'@list': [item1, item2]}; as it is the same
# type as value, which is an @list.
if subvalue not in merged_value['@list']:
merged_value['@list'].append(subvalue)
elif value not in merged_document[key]:
merged_document[key].append(value)
return compact(merged_document)
......@@ -156,3 +156,128 @@ def test_merge_documents_duplicate_ids():
"name": ['test_1', 'test_1b', 'test_2']
}
assert results == expected_results
def test_merge_documents_lists():
"""Tests merging two @list elements."""
# given
metadata_list = [{
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': {
'@list': [
{'name': 'test_1'},
]
},
}, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': {
'@list': [
{'name': 'test_2'},
]
},
}]
# when
results = merge_documents(metadata_list)
# then
expected_results = {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': [
{'name': 'test_1'},
{'name': 'test_2'},
],
}
assert results == expected_results
def test_merge_documents_lists_duplicates():
"""Tests merging two @list elements with a duplicate subelement."""
# given
metadata_list = [{
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': {
'@list': [
{'name': 'test_1'},
]
},
}, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': {
'@list': [
{'name': 'test_2'},
{'name': 'test_1'},
]
},
}]
# when
results = merge_documents(metadata_list)
# then
expected_results = {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': [
{'name': 'test_1'},
{'name': 'test_2'},
],
}
assert results == expected_results
def test_merge_documents_list_left():
"""Tests merging a singleton with an @list."""
# given
metadata_list = [{
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': {'name': 'test_1'},
}, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': {
'@list': [
{'name': 'test_2'},
]
},
}]
# when
results = merge_documents(metadata_list)
# then
expected_results = {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': [
{'name': 'test_1'},
{'name': 'test_2'},
],
}
assert results == expected_results
def test_merge_documents_list_right():
"""Tests merging an @list with a singleton."""
# given
metadata_list = [{
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': {
'@list': [
{'name': 'test_1'},
]
},
}, {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': {'name': 'test_2'},
}]
# when
results = merge_documents(metadata_list)
# then
expected_results = {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'author': [
{'name': 'test_1'},
{'name': 'test_2'},
],
}
assert results == expected_results
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment