Assign all datasets to DatasetVersions based on legacy data.
Intended for use in a data migration. Does the following:
- Assign datasets to same DatasetVersions as other datasets in dataset_json.dataset_version_set
- Add id of each dataset in dataset_version_set to DatasetVersions.legacy_versions
Source code in src/apps/core/models/legacy_versions.py
| def migrate_dataset_versions(apps=django_apps):
"""Assign all datasets to DatasetVersions based on legacy data.
Intended for use in a data migration. Does the following:
- Assign datasets to same DatasetVersions as other datasets in dataset_json.dataset_version_set
- Add id of each dataset in dataset_version_set to DatasetVersions.legacy_versions
"""
versions_model = apps.get_model("core", "DatasetVersions")
legacy_dataset_model = apps.get_model("core", "LegacyDataset")
dataset_model = apps.get_model("core", "Dataset")
# Collect all datasets and version sets by their identifiers
datasets_by_id = dataset_model.objects.only("id", "dataset_versions_id").in_bulk()
version_sets_by_id = versions_model.objects.in_bulk()
# Get all legacy dataset_version_set lists
legacy_versions_data = legacy_dataset_model.objects.filter(
dataset_json__dataset_version_set__isnull=False
).values_list("dataset_json__dataset_version_set", flat=True)
# Collect identifiers, merge all sets containing at least one common dataset
legacy_version_data_ids = [
[UUID(version["identifier"]) for version in version_set]
for version_set in legacy_versions_data
]
legacy_version_sets = merge_sets(legacy_version_data_ids)
dataset_updates = []
new_version_sets = []
for version_set in legacy_version_sets:
_assign_dataset_version(
version_set=version_set,
datasets_by_id=datasets_by_id,
version_sets_by_id=version_sets_by_id,
new_dataset_versions=new_version_sets,
dataset_updates=dataset_updates,
versions_model=versions_model,
)
versions_model.objects.bulk_create(new_version_sets)
versions_model.objects.bulk_update(version_sets_by_id.values(), fields=["legacy_versions"])
dataset_model.objects.bulk_update(dataset_updates, fields=["dataset_versions_id"])
versions_model.objects.filter(
datasets__isnull=True
).delete() # remove versions without datasets
|