@pytest.mark.parametrize(
"test_file_path, files_path, expected_diff",
[
(
"dataset-harvested-fsd.json",
None,
{
# Added items do not matter usually, as they are just artifacts from linking objects properly.
# For example here creator is member of organization that gets additional identifier from database.
# The identifier was not originally part of the v2 dataset organization description, but is added
# as it is part of Organization object in v3.
# "dictionary_item_added": [
# "root['research_dataset']['creator'][0]['member_of']['identifier']",
# "root['research_dataset']['creator'][1]['member_of']['identifier']",
# ],
# "dictionary_item_removed": [
# "root['research_dataset']['access_rights']['license'][0]['title']['und']"
# ],
"dictionary_item_added": ["root['research_dataset']['issued']"],
"dictionary_item_removed": [
"root['research_dataset']['total_files_byte_size']",
],
},
),
(
"dataset-ida-files-qvain-created.json",
"files-ida.json",
{},
),
(
"dataset-remote-qvain-created.json",
None,
{
"dictionary_item_removed": [
"root['research_dataset']['total_remote_resources_byte_size']",
],
},
),
(
"dataset-remote-qvain-extra-contributor.json",
None,
{
"dictionary_item_removed": [
"root['research_dataset']['total_remote_resources_byte_size']",
],
},
),
],
)
@pytest.mark.django_db
def test_v2_to_v3_dataset_conversion(
data_catalog,
data_catalog_att,
funder_type_reference_data,
license_reference_data,
test_file_path,
files_path,
expected_diff,
):
factories.ContractFactory(legacy_id=123)
# Data prep
test_data_path = os.path.dirname(os.path.abspath(__file__)) + "/testdata/"
data = None
file_ids = None
with open(test_data_path + test_file_path) as json_file:
data = json.load(json_file)
if files_path:
with open(test_data_path + files_path) as json_file:
file_data = json.load(json_file)
for f in file_data:
File.create_from_legacy(f)
file_ids = [f["id"] for f in file_data]
v2_dataset = LegacyDataset(id=data["identifier"], dataset_json=data, legacy_file_ids=file_ids)
v2_dataset.save()
v2_dataset.update_from_legacy()
diff = LegacyCompatibility(v2_dataset).get_compatibility_diff()
assert diff == expected_diff