From b00ce7c75e2d76fe7ca87c404f69de270ac41c08 Mon Sep 17 00:00:00 2001 From: Vlad0n20 Date: Wed, 18 Mar 2026 15:35:14 +0200 Subject: [PATCH] Update command, add tests --- osf/management/commands/force_archive.py | 9 +- .../management_commands/test_force_archive.py | 99 ++++++++++++++++++- 2 files changed, 105 insertions(+), 3 deletions(-) diff --git a/osf/management/commands/force_archive.py b/osf/management/commands/force_archive.py index 42d6c213fa4..7b6d18b6c65 100644 --- a/osf/management/commands/force_archive.py +++ b/osf/management/commands/force_archive.py @@ -329,7 +329,11 @@ def handle_file_operation(file_tree, reg, file_obj, log, obj_cache): elif log.action == 'osf_storage_file_updated': return modify_file_tree_recursive(reg._id, file_tree, file_obj, cached=bool(file_obj._id in obj_cache), revert=True) elif log.action == 'addon_file_moved': - parent = BaseFileNode.objects.get(_id__in=obj_cache, name='/{}'.format(log.params['source']['materialized']).rstrip('/').split('/')[-2]) + if '/' not in log.params['source']['materialized'].rstrip('/'): + # Root dir — parent is the root storage folder (name='') + parent = BaseFileNode.objects.get(_id__in=obj_cache, name='') + else: + parent = BaseFileNode.objects.get(_id__in=obj_cache, name='/{}'.format(log.params['source']['materialized']).rstrip('/').split('/')[-2]) return modify_file_tree_recursive(reg._id, file_tree, file_obj, cached=bool(file_obj._id in obj_cache), move_under=parent) elif log.action == 'addon_file_renamed': return modify_file_tree_recursive(reg._id, file_tree, file_obj, cached=bool(file_obj._id in obj_cache), rename=log.params['source']['name']) @@ -356,7 +360,8 @@ def revert_log_actions(file_tree, reg, obj_cache, permissible_addons): def build_file_tree(reg, node_settings, *args, **kwargs): n = reg.registered_from - obj_cache = set(n.files.values_list('_id', flat=True)) + ct_id = ContentType.objects.get_for_model(n.__class__()).id + obj_cache = set(BaseFileNode.objects.filter(target_object_id=n.id, target_content_type_id=ct_id).values_list('_id', flat=True)) def _recurse(file_obj, node): ct_id = ContentType.objects.get_for_model(node.__class__()).id diff --git a/osf_tests/management_commands/test_force_archive.py b/osf_tests/management_commands/test_force_archive.py index cdd134a02d3..38612abdab0 100644 --- a/osf_tests/management_commands/test_force_archive.py +++ b/osf_tests/management_commands/test_force_archive.py @@ -4,7 +4,7 @@ from addons.osfstorage.models import OsfStorageFile, OsfStorageFolder from osf.models import NodeLog, BaseFileNode from osf.models.files import TrashedFileNode, TrashedFolder -from osf.management.commands.force_archive import get_file_obj_from_log, build_file_tree, DEFAULT_PERMISSIBLE_ADDONS +from osf.management.commands.force_archive import get_file_obj_from_log, build_file_tree, handle_file_operation, DEFAULT_PERMISSIBLE_ADDONS from osf_tests.factories import NodeFactory, RegistrationFactory @@ -192,3 +192,100 @@ def get_root(self): names = [child['object'].name for child in tree['children']] assert 'file1.txt' in names assert 'file2.txt' not in names + + @pytest.mark.django_db + def test_obj_cache_includes_folders(self, node, reg, permissible_addons): + """ + Regression: n.files is a GenericRelation to OsfStorageFile only, so folder _ids were + never in obj_cache. The fix uses BaseFileNode.objects.filter(...) which includes folders. + """ + from django.contrib.contenttypes.models import ContentType + + folder = OsfStorageFolder.create(target=node, name='myfolder') + folder.save() + root_folder = OsfStorageFolder.create(target=node, name='') + root_folder.save() + + # Demonstrate the BUG: n.files (GenericRelation to OsfStorageFile) omits folders + old_obj_cache = set(node.files.values_list('_id', flat=True)) + assert folder._id not in old_obj_cache, 'Folders must NOT appear via n.files (demonstrating the bug)' + assert root_folder._id not in old_obj_cache, 'Root folder must NOT appear via n.files (demonstrating the bug)' + + # Demonstrate the FIX: BaseFileNode.objects.filter(...) includes files AND folders + ct_id = ContentType.objects.get_for_model(node.__class__()).id + new_obj_cache = set( + BaseFileNode.objects.filter( + target_object_id=node.id, + target_content_type_id=ct_id, + ).values_list('_id', flat=True) + ) + assert folder._id in new_obj_cache, 'Folders must appear in fixed obj_cache' + assert root_folder._id in new_obj_cache, 'Root folder must appear in fixed obj_cache' + + +class TestHandleFileOperation: + + @pytest.fixture + def node(self): + return NodeFactory(title='Test Node', category='project') + + @pytest.fixture + def reg(self, node): + return RegistrationFactory(project=node, registered_date=timezone.now()) + + @pytest.mark.django_db + def test_addon_file_moved_from_root_dir(self, node, reg): + """ + Regression: when materialized='/' (root dir moved between nodes), the old code did: + '/{}'.format('/').rstrip('/') -> '' + ''.split('/') -> [''] (only 1 element) + [''][-2] -> IndexError: list index out of range + The fix detects the root-dir case and looks up the folder by name='' directly. + """ + from django.contrib.contenttypes.models import ContentType + + root_folder = OsfStorageFolder.create(target=node, name='') + root_folder.save() + file = OsfStorageFile.create(target=node, name='file.txt') + file.save() + file.move_under(root_folder) + + ct_id = ContentType.objects.get_for_model(node.__class__()).id + obj_cache = set( + BaseFileNode.objects.filter( + target_object_id=node.id, + target_content_type_id=ct_id, + ).values_list('_id', flat=True) + ) + + file_tree = { + 'object': root_folder, + 'name': '', + 'deleted': False, + 'version': None, + 'children': [ + {'object': file, 'name': 'file.txt', 'deleted': False, 'version': None, 'children': []} + ] + } + + # materialized='/' is the actual crash case: moving a root dir between nodes + log = NodeLog.objects.create( + node=node, + action='addon_file_moved', + params={ + 'source': { + 'materialized': '/', # root dir: triggers IndexError in old code + 'name': '', + }, + 'destination': { + 'materialized': '/', + 'name': '', + } + }, + date=timezone.now(), + ) + + # Old code: '/{}'.format('/').rstrip('/') = '' -> ''.split('/')[-2] -> IndexError + # Fixed code: detects no '/' in materialized.rstrip('/') and uses name='' directly + result_tree, noop = handle_file_operation(file_tree, reg, file, log, obj_cache) + assert result_tree is not None