Skip to content

Commit

Permalink
Use bulk API in mass_delete.py example script | refs #39550 (#34)
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas-mckay authored May 13, 2024
1 parent a4cfd20 commit 068ef94
Showing 1 changed file with 127 additions and 59 deletions.
186 changes: 127 additions & 59 deletions examples/mass_delete.py
Original file line number Diff line number Diff line change
@@ -1,102 +1,170 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
"""
Delete all media described by oids in a CSV file (first column)
WARNING: THIS CANNOT BE CANCELED BE VERY CAREFUL WITH THIS SCRIPT
WARNING: ENABLE THE RECYCLE-BIN ON YOUR PLATFORM BEFORE RUNNING THIS SCRIPT.
IF YOU DON'T, OR IF YOU USE THE "--permanent" FLAG, YOUR ACTIONS WILL BE IRREVERSIBLE.
By default, does just predict how much space is freed
By default, the script just reports the space that would be freed (no actual
deletions).
$ python examples/mass_delete.py --conf ubicast.json --csv media.csv
v12345649684
...
Deleting 7 media would have freed 4.1 GB
To really delete:
To actually perform the deletions, pass "--apply":
$ python examples/mass_delete.py --conf ubicast.json --csv media.csv --apply
If you've made a mistake, assuming the recycle-bin is active on your platform,
and you didn't use "--permanent", you can revert your actions by manually selecting
and restoring content from the recycle-bin.
"""

'''
import argparse
import os
from pathlib import Path
import sys
import argparse

GB = 1000 * 1000 * 1000


if __name__ == '__main__':
try:
from ms_client.client import MediaServerClient
except ModuleNotFoundError:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from ms_client.client import MediaServerClient

parser = argparse.ArgumentParser()
GB = 1000 * 1000 * 1000


def format_size(size_bytes: int) -> str:
"""
Return human-readable size with automatic suffix.
"""
for unit in ('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z'):
if abs(size_bytes) < 1000:
return f'{size_bytes:.1f}{unit}B'
size_bytes /= 1000
return f'{size_bytes:.1f}YB'


def _delete_medias(
msc: MediaServerClient,
oids: list[str],
permanent: bool = False,
apply: bool = False
):
mode = '[APPLY] ' if apply else '[DRY-RUN] '
print(f'{mode}Fetching catalog.')

catalog = msc.get_catalog('flat')
oids = set(oids)
to_delete = {}
for obj_type, objects in catalog.items():
for obj in objects:
if obj['oid'] in oids:
to_delete[obj['oid']] = obj['storage_used']

print(f'{mode}Found {len(to_delete)} objects in the catalog matching your CSV.')

if apply:
print(f'{mode}Starting deletion of {len(to_delete)} catalog objects.')
params = {'oids': list(to_delete.keys())}
if permanent:
params['force'] = 'yes'
deleted_statuses = msc.api('catalog/bulk-delete/', method='post', data=params)['statuses']

deleted_count = 0
deleted_size = 0
for object_id, status in deleted_statuses.items():
if status['status'] == 200:
deleted_count += 1
deleted_size += to_delete[object_id]
else:
print(f'{mode}Media {object_id} could not be deleted: {status.get("message")}')

print(f'{mode}Deleted {deleted_count} VODs, freed {format_size(deleted_size)}.')
else:
oids = list(to_delete.keys())
total_size = sum(to_delete.values())
print(f'{mode}Would have deleted {len(oids)} VODs: {oids}')
print(
f'{mode}Deleting these VODs would have freed {format_size(total_size)}.'
)


def delete_medias_from_csv(sys_args):
parser = argparse.ArgumentParser(
'mass_delete',
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
'--conf',
help='Path to the configuration file.',
required=True,
type=str
)

parser.add_argument(
'--csv',
help='Path to CSV file; the first column is expected to be the OID. Lines starting with "#" will be ignored',
help='Path to CSV file; the first column is expected to be the OID. '
'Lines starting with "#" will be ignored',
required=True,
type=str
type=Path
)

parser.add_argument(
'--csv-separator',
help='CSV separator',
default='\t',
type=str
)

parser.add_argument(
'--permanent',
action='store_true',
default=False,
help='Bypass the recycle-bin. With this flag, videos will be deleted without the '
'possibility of restoration, even if the recycle-bin is activated on the '
'platform. Beware, if the recycle-bin is not activated on the platform, '
'medias will be deleted forever whether this flag is passed or not. For '
'videos to be deleted to the recycle-bin, you need to activate the '
'recycle-bin on the platform AND omit this flag.'
)
parser.add_argument(
'--apply',
action='store_true',
default=False,
help='Really delete; without this flag, an estimation of the freed space will be printed instead.'
help='Really delete; without this flag, an estimation of the freed space will be '
'printed instead.'
)

args = parser.parse_args()

args = parser.parse_args(sys_args)
msc = MediaServerClient(args.conf)
# ping
print(msc.api('/'))

with open(args.csv, 'r') as f:
csv_data = f.read().strip()
count = 0
freed = 0
lines = [line for line in csv_data.split('\n') if (line and not line.startswith('#'))]
total_lines = len(lines)
print(f'About to delete {total_lines} media')
# there is a limit to how many subprocesses can be launched
if total_lines > 30000:
print('We recommend against deleting that many files at once')
sys.exit(1)

for index, line in enumerate(lines):
oid = line.split(args.csv_separator)[0]
if oid:
params = {'oid': oid, 'full': 'yes'}
try:
print(f'[{index+1}/{total_lines}] About to delete {oid}')
info = msc.api('medias/get/', params=params)['info']
freed += info['storage_used']
if args.apply:
data = {
'oid': oid,
'delete_metadata': 'yes',
'delete_resources': 'yes',
}
print(f'Deleting {oid}')
msc.api('medias/delete/', method='post', data=data)
count += 1
except Exception as e:
print(f'Error on {oid}: {e}')
freed_gb = round(freed / GB, 1)
if not args.apply:
print(f'Deleting {count} media would have freed {freed_gb} GB ({freed} bytes)')
else:
print(f'Deleted {count} media, freed {freed_gb} GB ({freed} bytes)')
msc.conf['TIMEOUT'] = max(600, msc.conf['TIMEOUT'])

# Ping
print(f'Server url: {msc.conf["SERVER_URL"]}')
print(f'Mediaserver version: {msc.api("/")["mediaserver"]}')
oids = [
clean_line.split(args.csv_separator)[0].strip()
for line in args.csv.read_text().strip().split('\n')
if (clean_line := line.strip().strip('\r')) and not line.startswith('#')
]

if args.apply:
answer = input(
f'The script is running in normal mode. {len(oids)} medias will be deleted.\n'
'Please ensure that the recycle-bin is enabled on your platform '
f'{msc.conf["SERVER_URL"]}/admin/settings/#id_trash_enabled \n'
'Proceed ? [y / n]'
)
if answer.lower() not in ['yes', 'y']:
sys.exit(0)
else:
print(
'The script is running in dry-run mode. No media will be deleted. '
f'A report of the storage used by the {len(oids)} medias in your CSV will be '
'generated.'
)
_delete_medias(msc, oids, permanent=args.permanent, apply=args.apply)


if __name__ == '__main__':
delete_medias_from_csv(sys.argv[1:])

0 comments on commit 068ef94

Please sign in to comment.