From 42158ebd33eb4ddf5dc2843a63998b4d8523be54 Mon Sep 17 00:00:00 2001 From: Stephan Steinbach <61017+ssteinbach@users.noreply.github.com> Date: Wed, 17 Jul 2024 14:30:11 -0700 Subject: [PATCH] Second draft of file bundle adapter docs. (#1709) * Second draft of file bundle adapter docs. * attempting to clarify documentation & docstrings in the code * add note about which files are compressed * clarify CWD constraint of relative input paths --------- Co-authored-by: ssteinbach --- docs/tutorials/otio-filebundles.md | 134 +++++++++++------- .../adapters/file_bundle_utils.py | 32 ++--- .../opentimelineio/adapters/otiod.py | 21 +-- .../opentimelineio/adapters/otioz.py | 27 ++-- .../opentimelineio/url_utils.py | 76 +++++----- 5 files changed, 170 insertions(+), 120 deletions(-) diff --git a/docs/tutorials/otio-filebundles.md b/docs/tutorials/otio-filebundles.md index 02b505b990..b0f63e95b7 100644 --- a/docs/tutorials/otio-filebundles.md +++ b/docs/tutorials/otio-filebundles.md @@ -2,91 +2,129 @@ ## Overview -This document describes OpenTimelineIO's file bundle formats, otiod and otioz. The intent is that they make it easier to package and send or archive OpenTimelineIO data and associated media. +This document describes OpenTimelineIO's file bundle formats, `otiod` and `otioz`, as well as how to use the internal adapters that read and write them. -## Source Timeline +The OTIOZ/D File Bundle formats package OpenTimelineIO data and associated media into a single file. This can be useful for sending, archiving and interchange of a single unit that collects cut information and media together. -For creating otio bundles, an OTIO file is used as input, whose media references are composed only of `ExternalReference` that have a target_url field pointing at a media file with a unique basename, because file bundles have a flat namespace for media. For example, if there are media references that point at: +## OTIOZ/D File Bundle Format Details -`/project_a/academy_leader.mov` +There are two encodings for OTIO file bundles, OTIOZ and OTIOD. OTIOD is an encoding in the file system that uses a directory hierarchy of files. OTIOZ is the identical structure packed into a single .zip file, currently using the python `zipfile` library. Both contain a content.otio entry at the top level which contains the cut information for the bundle. -and: +### Structure -`/project_b/academy_leader.mov` +File bundles have a consistent structure: -Because the basename of both files is `academy_leader.mov`, this will be an error. The adapters have different policies for how to handle media references. See below for more information. +OTIOD: + +``` +something.otiod (directory) +├── content.otio (file) +└── media (directory) + ├── media1 (file) +    ├── media2 (file) +    └── media3 (file) +``` -### URL Format +OTIOZ (adds the version.txt file and is encoded in a zipfile): -The file bundle adapters expect the `target_url` field of the `media_reference` to be in one of two forms (as produced by python's urlparse library): +``` +something.otioz (zipfile) +├── content.otio (compressed file) +├── version.txt (compressed file) +└── media (directory) + ├── media1 (uncompressed file) +    ├── media2 (uncompressed file) +    ├── media3 (uncompressed file) +    └── ... (uncompressed files) +``` -- absolute path: "file:///path/to/some/file" (encodes "/path/to/some/file") -- relative path: "path/to/some/file" (assumes the path is relative to the current working directory when invoking the adapter). +### content.otio file + +This is an OpenTimelineIO file whose media references are either `MissingReference`s, or `ExternalReference`s with target_urls that are relative paths pointing into the `media` directory. -## Structure +### version.txt file -File bundles, regardless of how they're encoded, have a consistent structure: +This file encodes the otioz version of the file, with no other text, in the form: ``` -something.otioz -├── content.otio -├── version -└── media - ├── media1 -    ├── media2 -    └── media3 +1.0.0 ``` +### "media" Directory -### content.otio file +The `media` directory contains all the media files that the `ExternalReference`s `target_url`s in the `content.otio` point at, in a flat structure. Each media file must have a unique basename, but can be encoded in whichever codec/container the user wishes (otio is unable to decode or encode the media files). -This is a normal OpenTimelineIO whose media references are either ExternalReferences with relative target_urls pointing into the `media` directory or `MissingReference`. +## Adapter Usage -### version.txt file +## Read Adapter Behavior -This file encodes the otioz version of the file, in the form 1.0.0. +When a bundle is read from disk using the OpenTimelineIO Python API (using the adapters.read_from_* functions), only the `content.otio` file is read and parsed. -### Media Directory +For example, to view the timeline (not the media) of an otioz file in `otioview`, you can run: -The media directory contains all the media files in a flat structure. They must have unique basenames, but can be encoded in whichever codec/container the user wishes (otio is unable to decode or encode the media files). +`otioview sommething.otioz` -## Read Behavior +Because this will _only_ read the `content.otio` from the bundle, it is usually a fast operation to run. None of the media is decoded or unzipped during this process. -When a bundle is read from disk, the `content.otio` file is extracted from the bundle and returned. For example, to view the timeline (not the media) of an otioz file in `otioview`, you can run: +### extract_to_directory Optional Argument -`otioview sommething.otioz` +extract_to_directory: if a value other than `None` is passed in, will extract the contents of the bundle into the directory at the path passed into the `extract_to_directory` argument. For the OTIOZ adapter, this will unzip the associated media. -This will _only_ read the `content.otio` from the bundle, so is usually a fast operation to run. +### absolute_media_reference_paths Optional Argument -## MediaReferencePolicy +The OTIOD adapter additionally has an argument `absolute_media_reference_paths` which will convert all the media references in the bundle to be absolute paths if `True` is passed. Default is `False`. -When building a file bundle using the OTIOZ/OTIOD adapters, you can set the 'media reference policy', which is described by an enum in the file_bundle_utils module. The policies can be: +### Read Adapter Example -- (default) ErrorIfNotFile: will raise an exception if a media reference is found that is of type `ExternalReference` but that does not point at a `target_url`. -- MissingIfNotFile: will replace any media references that meet the above condition with a `MissingReference`, preserving the original media reference in the metadata of the new `MissingReference`. -- AllMissing: will replace all media references with `MissingReference`, preserving the original media reference in metadata on the new object. +Extract the contents of the bundle and convert to an rv playlist: -When running in `AllMissing` mode, no media will be put into the bundle. +`otioconvert -i /var/tmp/some_file.otioz -a extract_to_directory=/var/tmp/example_directory -o /var/tmp/example_directory/some_file.rv` -## OTIOD +## Write Adapter -The OTIOD adapter will build a bundle in a directory stucture on disk. The adapter will gather up all the files it can and copy them to the destination directory, and then build the `.otio` file with local relative path references into that directory. +### Source Timeline Constraints -## OTIOZ +For creating otio bundles using the provided python adapter, an OTIO file is used as input. There are some constraints on the source timeline. -The OTIOZ adapter will build a bundle into a zipfile (using the zipfile library). The adapter will write media into the zip file uncompressed and the content.otio with compression. +#### Unique Basenames -### Optional Arguments: +Because file bundles have a flat namespace for media, and media will be copied into the bundle, the `ExternalReference` media references in the source OTIO must have a target_url fields pointing at media files with unique basenames. -- Read: - - extract_to_directory: if a value other than `None` is passed in, will extract the contents of the bundle into the directory at the path passed into the `extract_to_directory` argument. +For example, if there are media references that point at: -## Example usage in otioconvert +`/project_a/academy_leader.mov` -### Convert an otio into a zip bundle +and: + +`/project_b/academy_leader.mov` + +Because the basename of both files is `academy_leader.mov`, this will be an error. The adapters have different policies for how to handle media references. See below for more information. + +#### Expected Source Timeline External Reference URL Format + +The file bundle adapters expect the `target_url` field of any `media_reference`s in the source timeline to be in one of two forms (as produced by python's [urlparse](https://docs.python.org/3/library/urllib.parse.html) library): + +- absolute path: "file:///path/to/some/file" (encodes "/path/to/some/file") +- relative path: "path/to/some/file" (the path is relative to the current working directory of the command running the adapter on the source timeline). + +### MediaReferencePolicy Option + +When building a file bundle using the OTIOZ/OTIOD adapters, you can set the 'media reference policy', which is described by an enum in the file_bundle_utils module. The policies can be: + +- (default) `ErrorIfNotFile`: will raise an exception if a media reference is found that is of type `ExternalReference` but that does not point at a `target_url`. +- `MissingIfNotFile`: will replace any media references that meet the above condition with a `MissingReference`, preserving the original media reference in the metadata of the new `MissingReference`. +- `AllMissing`: will replace all media references with `MissingReference`, preserving the original media reference in metadata on the new object. + +When running in `AllMissing` mode, no media will be put into the bundle. + +To use this argument with `otioconvert` from the commandline, you can use the `-A` flag with the argument name `media_policy`: + +``` +otioconvert -i -o path/to/output_file.otioz -A media_policy="AllMissing" +``` -`otioconvert -i somefile.otio -o /var/tmp/somefile.otioz` +### Write Adapter Example -### Extract the contents of the bundle and convert to an rv playlist +Convert an otio into a zip bundle: -`otioconvert -i /var/tmp/somefile.otioz -a extract_to_directory=/var/tmp/somefile -o /var/tmp/somefile/somefile.rv` +`otioconvert -i some_file.otio -o /var/tmp/some_file.otioz` diff --git a/src/py-opentimelineio/opentimelineio/adapters/file_bundle_utils.py b/src/py-opentimelineio/opentimelineio/adapters/file_bundle_utils.py index 3cccff1289..818299cafa 100644 --- a/src/py-opentimelineio/opentimelineio/adapters/file_bundle_utils.py +++ b/src/py-opentimelineio/opentimelineio/adapters/file_bundle_utils.py @@ -6,14 +6,13 @@ import os import copy - from .. import ( exceptions, schema, url_utils, ) -import urllib.parse as urlparse +import urllib # versioning @@ -58,21 +57,17 @@ def _guarantee_unique_basenames(path_list, adapter_name): new_basename = os.path.basename(fn) if new_basename in basename_to_source_fn: raise exceptions.OTIOError( - "Error: the {} adapter requires that the media files have " - "unique basenames. File '{}' and '{}' have matching basenames" - " of: '{}'".format( - adapter_name, - fn, - basename_to_source_fn[new_basename], - new_basename - ) + f"Error: the {adapter_name} adapter requires that the media" + f" files have unique basenames. File '{fn}' and" + f" '{basename_to_source_fn[new_basename]}' have matching" + f" basenames of: '{new_basename}'" ) basename_to_source_fn[new_basename] = fn def _prepped_otio_for_bundle_and_manifest( input_otio, # otio to process - media_policy, # what to do with media references + media_policy, # how to handle media references (see: MediaReferencePolicy) adapter_name, # just for error messages ): """ Create a new OTIO based on input_otio that has had media references @@ -86,6 +81,8 @@ def _prepped_otio_for_bundle_and_manifest( their bundles. This is considered an internal API. + + media_policy is expected to be of type MediaReferencePolicy. """ # make sure the incoming OTIO isn't edited @@ -109,17 +106,18 @@ def _prepped_otio_for_bundle_and_manifest( # not an ExternalReference, ignoring it. continue - parsed_url = urlparse.urlparse(target_url) + parsed_url = urllib.parse.urlparse(target_url) - # ensure that the urlscheme is either file or "" + # ensure that the urlscheme is either "file" or "" # file means "absolute path" - # none is interpreted as a relative path, relative to cwd + # "" is interpreted as a relative path, relative to cwd of the python + # process if parsed_url.scheme not in ("file", ""): if media_policy is MediaReferencePolicy.ErrorIfNotFile: raise NotAFileOnDisk( - "The {} adapter only works with media reference" - " target_url attributes that begin with 'file:'. Got a " - "target_url of: '{}'".format(adapter_name, target_url) + f"The {adapter_name} adapter only works with media" + " reference target_url attributes that begin with 'file:'." + f" Got a target_url of: '{target_url}'" ) if media_policy is MediaReferencePolicy.MissingIfNotFile: cl.media_reference = reference_cloned_and_missing( diff --git a/src/py-opentimelineio/opentimelineio/adapters/otiod.py b/src/py-opentimelineio/opentimelineio/adapters/otiod.py index 77028dcd8f..f00f4a74a4 100644 --- a/src/py-opentimelineio/opentimelineio/adapters/otiod.py +++ b/src/py-opentimelineio/opentimelineio/adapters/otiod.py @@ -26,7 +26,11 @@ import urllib.parse as urlparse -def read_from_file(filepath, absolute_media_reference_paths=False): +def read_from_file( + filepath, + # convert the media_reference paths to absolute paths + absolute_media_reference_paths=False, +): result = otio_json.read_from_file( os.path.join(filepath, utils.BUNDLE_PLAYLIST_PATH) ) @@ -53,6 +57,8 @@ def read_from_file(filepath, absolute_media_reference_paths=False): def write_to_file( input_otio, filepath, + # see documentation in file_bundle_utils for more information on the + # media_policy media_policy=utils.MediaReferencePolicy.ErrorIfNotFile, dryrun=False ): @@ -64,18 +70,14 @@ def write_to_file( if not os.path.exists(os.path.dirname(filepath)): raise exceptions.OTIOError( - "Directory '{}' does not exist, cannot create '{}'.".format( - os.path.dirname(filepath), - filepath - ) + f"Directory '{os.path.dirname(filepath)}' does not exist, cannot" + f" create '{filepath}'." ) if not os.path.isdir(os.path.dirname(filepath)): raise exceptions.OTIOError( - "'{}' is not a directory, cannot create '{}'.".format( - os.path.dirname(filepath), - filepath - ) + f"'{os.path.dirname(filepath)}' is not a directory, cannot create" + f" '{filepath}'." ) # general algorithm for the file bundle adapters: @@ -125,7 +127,6 @@ def write_to_file( os.mkdir(filepath) - # write the otioz file to the temp directory otio_json.write_to_file( result_otio, os.path.join(filepath, utils.BUNDLE_PLAYLIST_PATH) diff --git a/src/py-opentimelineio/opentimelineio/adapters/otioz.py b/src/py-opentimelineio/opentimelineio/adapters/otioz.py index 89d950903a..4ccba931c7 100644 --- a/src/py-opentimelineio/opentimelineio/adapters/otioz.py +++ b/src/py-opentimelineio/opentimelineio/adapters/otioz.py @@ -26,13 +26,17 @@ from . import ( file_bundle_utils as utils, - otio_json + otio_json, ) import pathlib -def read_from_file(filepath, extract_to_directory=None): +def read_from_file( + filepath, + # if provided, will extract contents of zip to this directory + extract_to_directory=None, +): if not zipfile.is_zipfile(filepath): raise exceptions.OTIOError(f"Not a zipfile: {filepath}") @@ -44,22 +48,20 @@ def read_from_file(filepath, extract_to_directory=None): if not os.path.exists(extract_to_directory): raise exceptions.OTIOError( - "Directory '{}' does not exist, cannot unpack otioz " - "there.".format(extract_to_directory) + f"Directory '{extract_to_directory()}' does not exist, cannot" + " unpack otioz there." ) if os.path.exists(output_media_directory): raise exceptions.OTIOError( - "Error: '{}' already exists on disk, cannot overwrite while " - " unpacking OTIOZ file '{}'.".format( - output_media_directory, - filepath - ) - + f"Error: '{output_media_directory}' already exists on disk, " + f"cannot overwrite while unpacking OTIOZ file '{filepath}'." ) with zipfile.ZipFile(filepath, 'r') as zi: - result = otio_json.read_from_string(zi.read(utils.BUNDLE_PLAYLIST_PATH)) + result = otio_json.read_from_string( + zi.read(utils.BUNDLE_PLAYLIST_PATH) + ) if extract_to_directory: zi.extractall(extract_to_directory) @@ -70,10 +72,11 @@ def read_from_file(filepath, extract_to_directory=None): def write_to_file( input_otio, filepath, + # see documentation in file_bundle_utils for more information on the + # media_policy media_policy=utils.MediaReferencePolicy.ErrorIfNotFile, dryrun=False ): - if os.path.exists(filepath): raise exceptions.OTIOError( f"'{filepath}' exists, will not overwrite." diff --git a/src/py-opentimelineio/opentimelineio/url_utils.py b/src/py-opentimelineio/opentimelineio/url_utils.py index a1c0f49908..a67ad527a2 100644 --- a/src/py-opentimelineio/opentimelineio/url_utils.py +++ b/src/py-opentimelineio/opentimelineio/url_utils.py @@ -4,32 +4,34 @@ """Utilities for conversion between urls and file paths""" import os - -from urllib import ( - parse as urlparse, - request -) -from pathlib import ( - PurePath, - PureWindowsPath, - PurePosixPath -) +import urllib +from urllib import request +import pathlib def url_from_filepath(fpath): - """Convert a filesystem path to an url in a portable way using / path sep""" + """Convert a filesystem path to an url in a portable way. + + ensures that `fpath` conforms to the following pattern: + * if it is an absolute path, "file:///path/to/thing" + * if it is a relative path, "path/to/thing" + + In other words, if you pass in: + * "/var/tmp/thing.otio" -> "file:///var/tmp/thing.otio" + * "subdir/thing.otio" -> "tmp/thing.otio" + """ try: # appears to handle absolute windows paths better, which are absolute # and start with a drive letter. - return urlparse.unquote(PurePath(fpath).as_uri()) + return urllib.parse.unquote(pathlib.PurePath(fpath).as_uri()) except ValueError: # scheme is "file" for absolute paths, else "" scheme = "file" if os.path.isabs(fpath) else "" # handles relative paths - return urlparse.urlunparse( - urlparse.ParseResult( + return urllib.parse.urlunparse( + urllib.parse.ParseResult( scheme=scheme, path=fpath, netloc="", @@ -45,45 +47,53 @@ def filepath_from_url(urlstr): Take an url and return a filepath. URLs can either be encoded according to the `RFC 3986`_ standard or not. - Additionally, Windows mapped drive letter and UNC paths need to be accounted for - when processing URL(s); however, there are `ongoing discussions`_ about how to best - handle this within Python developer community. This function is meant to cover - these scenarios in the interim. + Additionally, Windows mapped drive letter and UNC paths need to be + accounted for when processing URL(s); however, there are `ongoing + discussions`_ about how to best handle this within Python developer + community. This function is meant to cover these scenarios in the interim. .. _RFC 3986: https://tools.ietf.org/html/rfc3986#section-2.1 .. _ongoing discussions: https://discuss.python.org/t/file-uris-in-python/15600 """ # Parse provided URL - parsed_result = urlparse.urlparse(urlstr) + parsed_result = urllib.parse.urlparse(urlstr) # De-encode the parsed path - decoded_parsed_path = urlparse.unquote(parsed_result.path) + decoded_parsed_path = urllib.parse.unquote(parsed_result.path) # Convert the parsed URL to a path - filepath = PurePath(request.url2pathname(decoded_parsed_path)) + filepath = pathlib.PurePath( + request.url2pathname(decoded_parsed_path) + ) # If the network location is a window drive, reassemble the path - if PureWindowsPath(parsed_result.netloc).drive: - filepath = PurePath(parsed_result.netloc + decoded_parsed_path) + if pathlib.PureWindowsPath(parsed_result.netloc).drive: + filepath = pathlib.PurePath(parsed_result.netloc + decoded_parsed_path) - # If the specified index is a windows drive, then append it to the other parts - elif PureWindowsPath(filepath.parts[0]).drive: - filepath = PurePosixPath(filepath.drive, *filepath.parts[1:]) + # If the specified index is a windows drive, then append it to the other + # parts + elif pathlib.PureWindowsPath(filepath.parts[0]).drive: + filepath = pathlib.PurePosixPath(filepath.drive, *filepath.parts[1:]) # If the specified index is a windows drive, then offset the path - elif PureWindowsPath(filepath.parts[1]).drive: - # Remove leading "/" if/when `request.url2pathname` yields "/S:/path/file.ext" - filepath = PurePosixPath(*filepath.parts[1:]) + elif pathlib.PureWindowsPath(filepath.parts[1]).drive: + # Remove leading "/" if/when `request.url2pathname` yields + # "/S:/path/file.ext" + filepath = pathlib.PurePosixPath(*filepath.parts[1:]) # Should catch UNC paths, # as parsing "file:///some/path/to/file.ext" doesn't provide a netloc elif parsed_result.netloc and parsed_result.netloc != 'localhost': - # Paths of type: "file://host/share/path/to/file.ext" provide "host" as netloc - filepath = PurePath('//', parsed_result.netloc + decoded_parsed_path) + # Paths of type: "file://host/share/path/to/file.ext" provide "host" as + # netloc + filepath = pathlib.PurePath( + '//', + parsed_result.netloc + decoded_parsed_path + ) - # Executing `as_posix` on Windows seems to generate a path with only - # 1 leading `/`, so we insert another `/` at the front of the string path + # Executing `as_posix` on Windows seems to generate a path with only 1 + # leading `/`, so we insert another `/` at the front of the string path # to match Linux and Windows UNC conventions and return it. conformed_filepath = filepath.as_posix() if not conformed_filepath.startswith('//'):