Skip to content

Commit

Permalink
Merge pull request #4 from abhinaukumar/v0.2_dev
Browse files Browse the repository at this point in the history
Version 0.2.0 Updates
  • Loading branch information
abhinaukumar authored Mar 29, 2024
2 parents 64e97bc + e6fa65e commit 2ad785f
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 48 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2022 Abhinau Kumar Venkataramanan
Copyright (c) 2024 Abhinau Kumar Venkataramanan

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@

project = 'VideoLIB'
author = 'Abhinau Kumar'
copyright = '2023, Abhinau Kumar Venkataramanan'
copyright = '2024, Abhinau Kumar Venkataramanan'

# The full version, including alpha/beta/rc tags
release = '0.1.3'
release = '0.2.0'

# -- General configuration ---------------------------------------------------

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Python requirements file.
numpy
scikit-video
matplotlib
matplotlib
imageio
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
name='videolib',
author='Abhinau Kumar',
author_email='[email protected]',
version='0.1.2',
version='0.2.0',
url='https://github.com/abhinaukumar/videolib',
description='Package for easy Video IO and color conversion in Python.',
install_requires=['numpy', 'scikit-video', 'matplotlib'],
Expand Down
8 changes: 8 additions & 0 deletions videolib/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,11 @@ def center(self) -> Any:
raise IndexError('Empty buffer has no center')
center_index = (self._back_index + self.buf_size//2 + 1) % self.buf_size
return self._buf[center_index]


def __getitem__(self, index):
if self.isempty():
raise IndexError(f'Empty buffer.')
if index < 0 or index >= self.buf_size:
raise IndexError(f'Index must be in the range 0-{self.buf_size-1}')
return self._buf[self._back_index - index]
8 changes: 8 additions & 0 deletions videolib/standards.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,11 @@ def range(self) -> int:
rec_2100_pq,
rec_2100_hlg
]

_standards_dict = {standard.name: standard for standard in supported_standards}
def get_standard(name: str) -> Standard:
try:
return _standards_dict[name]
except KeyError:
raise ValueError('Invalid standard name')

174 changes: 131 additions & 43 deletions videolib/video.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
import os
import subprocess
import datetime

from typing import Any, BinaryIO, Dict, Tuple, Optional, Union
from warnings import warn
import json

import numpy as np
import skvideo.io
import imageio

from . import cvt_color
from . import standards

_datatypes = ['rgb', 'linear_rgb', 'bgr', 'linear_bgr', 'yuv', 'linear_yuv', 'xyz']
TEMP_DIR = '/tmp'


class Frame:
'''
Class defining a frame, either of a video or an image.
Supported native color representations: :code:`rgb`, :code:`linear_rgb`, :code:`bgr`, :code:`linear_bgr`, :code:`yuv`, :code:`linear_yuv`, :code:`xyz`.
Class defining a frame, either of a video or an image.
Supported native color representations: :code:`rgb`, :code:`linear_rgb`, :code:`bgr`, :code:`linear_bgr`, :code:`yuv`, :code:`linear_yuv`, :code:`xyz`.
Access as :code:`frame.<color_space>`. For all others, use the :obj:`~videolib.cvt_color` submodule.
'''
def __init__(
Expand Down Expand Up @@ -84,7 +90,7 @@ def _assert_or_make_1channel(img: np.ndarray) -> np.ndarray:
img: 1-channel image, possibly with extra dimensions.
Returns:
1-channel image with no extra dimensions.
np.ndarray: 1-channel image with no extra dimensions.
Raises:
ValueError: If img cannot be squeezed to 2 dimensions.
Expand Down Expand Up @@ -114,7 +120,7 @@ def _assert_or_make_3channel(img: np.ndarray) -> np.ndarray:
img: 3-channel image, possibly with extra dimensions.
Returns:
3-channel image with no extra dimensions.
np.ndarray: 3-channel image with no extra dimensions.
Raises:
ValueError: If img cannot be squeezed to 3 dimensions and channels.
Expand Down Expand Up @@ -147,7 +153,7 @@ def _lift_to_multichannel(img: np.ndarray, channels: int = 3) -> np.ndarray:
channels: Number of channels in the output image.
Returns:
Lifted image.
np.ndarray: Lifted image.
'''
img = Frame._assert_or_make_1channel(img)
return np.tile(np.expand_dims(img, -1), [1, 1, channels])
Expand Down Expand Up @@ -308,7 +314,7 @@ def __init__(
self.quantization: int = quantization
self.dither: bool = dither

self._frame_stride = 1.5 * np.dtype(self.standard).itemsize
self._frame_stride = 1.5 * np.dtype(self.standard.dtype).itemsize

if self.quantization is None and self.dither is True:
warn('Dithering is not applied when quantization is not applied.', RuntimeWarning)
Expand All @@ -317,21 +323,31 @@ def __init__(
elif self.quantization is not None and self.quantization > self.standard.range:
raise ValueError('Quantization value must not exceed the range of the standard')

self._range = 'Full'

self._allowed_formats = ['raw', 'encoded', 'sdr_image', 'hdr_image']

if format is None:
ext = self.file_path.split('.')[-1]
if ext == 'yuv':
format = 'raw'
elif ext in ['mp4', 'mov', 'avi']:
elif ext in ['mp4', 'mov', 'avi', 'webm']:
format = 'encoded'
elif ext in ['jpg', 'png', 'bmp', 'tiff']:
format = 'sdr_image'
if self.standard not in standards.low_bitdepth_standards:
raise ValueError('Extension \'{ext}\' can only be used with 8-bit standards.')
elif ext in ['hdr', 'exr']:
format = 'hdr_image'
if self.standard != standards.radiance_hdr:
raise ValueError('Extension \'{ext}\' can only be used with RadianceHDR.')
else:
raise ValueError('Format unknown for files of type \'{}\''.format(ext))
raise ValueError(f'Format unknown for files of type \'{ext}\'')

if format not in ['encoded', 'raw']:
raise ValueError('Invalid format. Must be one of \'encoded\' or \'raw\'.')
if np.dtype(self.standard.dtype).type != np.uint8 and format != 'raw':
raise ValueError(f'Format \'{format}\' is not supported for videos of standard {self.standard.name}.')
else:
self.format = format
if format not in self._allowed_formats:
raise ValueError(f'Invalid format. Must be one of {self._allowed_formats}.')

self.format = format

if (self.mode == 'r' or self.format == 'raw') and len(out_dict) != 0:
warn('out_dict is only used when mode is \'w\' and format is \'encoded\'. Ignoring.')
Expand All @@ -347,28 +363,99 @@ def __init__(
self._file_object: BinaryIO = open(file_path, '{}b'.format(self.mode))
elif self.format == 'encoded':
if self.mode == 'r':
self._file_object = skvideo.io.FFmpegReader(file_path)
self._decode_encoded_video()
self._file_object: BinaryIO = open(self._temp_path, 'rb')
elif self.mode == 'w':
self._file_object = skvideo.io.FFmpegWriter(file_path, outputdict=self.out_dict)
elif 'image' in self.format:
self._img = Frame(self.standard, self.quantization, self.dither)
rgb = imageio.imread(file_path).astype('float64')
if self.format == 'hdr_image':
rgb = (rgb - np.min(rgb)) / (np.max(rgb) - np.min(rgb))
self._img.linear_rgb = rgb
else:
self._img.rgb = rgb

self.num_frames: int = 0
if self.mode == 'r':
self._frames_loaded_from_next: int = 0
if self.format == 'raw':
self._file_object.seek(0, os.SEEK_END)
size_in_bytes = self._file_object.tell()
self.num_frames = size_in_bytes // int(self.width * self.height * self._frame_stride)
self._file_object.seek(0)
if self.width is None or self.height is None:
raise ValueError('Must set values of width and height when reading a raw video.')
elif self.format == 'encoded':
(self.num_frames, height, width, _) = self._file_object.getShape() # N x H x W x C
if (self.height is not None and height != self.height) or (self.width is not None and width != self.width):
raise ValueError('Input width and height does not match video\'s dimensions.')
else:
self.height = height
self.width = width
self._file_frame_generator = self._file_object.nextFrame()
self._file_object.seek(0, os.SEEK_END)
size_in_bytes = self._file_object.tell()
self.num_frames = size_in_bytes // int(self.width * self.height * self._frame_stride)
self._file_object.seek(0)
if self.width is None or self.height is None:
raise ValueError('Must set values of width and height when reading a raw video.')
elif 'image' in self.format:
self.num_frames = 1
self.width = self._img.width
self.height = self._img.height

@property
def bit_depth(self) -> int:
return self.standard.bitdepth

@property
def _offset(self) -> float:
offset_dict = {
8: 16,
10: 64,
}
return offset_dict.get(self.bit_depth, 0) if self._range == 'Limited' else 0

@property
def _scale(self) -> float:
scale_dict = {
8: 255 / (235 - 16),
10: 1023 / (940 - 64),
}
return scale_dict.get(self.bit_depth, 1) if self._range == 'Limited' else 1

def _decode_encoded_video(self):
self._temp_path = os.path.join(TEMP_DIR, f'EncodedReader_temp_{self.file_path.replace("/", "_")}_' + '{0:%Y_%m_%d_%H_%M_%S}'.format(datetime.datetime.now()) + '.yuv')
json_string = subprocess.check_output(['mediainfo', '--Output=JSON', self.file_path], stdin=subprocess.DEVNULL)
d = json.loads(json_string)
v_track = None
for track in d['media']['track']:
if track['@type'] == 'Video':
v_track = track
break
if v_track is None:
raise ValueError(f'File {self.file_path} does not have a video track or MediaInfo returned unexpected output.')

width = int(v_track['Width'])
height = int(v_track['Height'])
rotation = float(v_track['Rotation'])
# Flip width and height if portrait mode
if rotation not in [0, 180, -180]:
width, height = height, width

if (self.height is not None and height != self.height) or (self.width is not None and width != self.width):
raise ValueError('Input width and height does not match video\'s dimensions.')
else:
self.height = height
self.width = width

bit_depth = int(v_track['BitDepth'])
if self.bit_depth != bit_depth:
raise ValueError('Video bit depth does not match standard\'s bitdepth')

self.bytes_per_pixel = 1.5 * np.ceil(self.bit_depth / 8)
pix_fmt = 'yuv420p'
if self.bit_depth != 8:
pix_fmt += f'{self.bit_depth}le'

self._range = v_track.get('colour_range', self._range)

cmd = [
'ffmpeg',
'-i', self.file_path,
'-c:v', 'rawvideo',
'-pix_fmt', pix_fmt,
'-y',
self._temp_path
]

subprocess.run(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

def reset(self) -> None:
'''
Expand Down Expand Up @@ -416,20 +503,19 @@ def get_frame(self, frame_ind: int) -> Frame:
raise ValueError('Encoded videos must be read sequentially.')

frame = Frame(self.standard, self.quantization, self.dither)
if self.format == 'raw':
if self.format in ['raw', 'encoded']:
self._file_object.seek(int(self.width * self.height * frame_ind * self._frame_stride))

y1 = np.fromfile(self._file_object, self.standard.dtype, (self.width * self.height))
u1 = np.fromfile(self._file_object, self.standard.dtype, (self.width * self.height) >> 2)
v1 = np.fromfile(self._file_object, self.standard.dtype, (self.width * self.height) >> 2)

y = np.reshape(y1, (self.height, self.width)).astype('float64')
u = np.reshape(u1, (self.height >> 1, self.width >> 1)).repeat(2, axis=0).repeat(2, axis=1).astype('float64')
v = np.reshape(v1, (self.height >> 1, self.width >> 1)).repeat(2, axis=0).repeat(2, axis=1).astype('float64')

frame.yuv = np.stack((y, u, v), axis=-1)
elif self.format == 'encoded':
frame.rgb = next(self._file_frame_generator).astype('float64')
yuv = np.stack((y, u, v), axis=-1)
# Normalize the pixel values based on the determined range
frame.yuv = (yuv - self._offset) * self._scale
else:
frame = self._img
return frame

def __getitem__(self, frame_ind: int) -> Frame:
Expand All @@ -443,9 +529,9 @@ def __getitem__(self, frame_ind: int) -> Frame:
Frame: Frame object containing the frame in YUV format.
'''
if self.mode == 'w':
raise OSError('Cannot index video in write mode.')
if self.format == 'encoded':
raise OSError('Cannot index encoded video.')
raise IndexError('Cannot index video in write mode.')
if self.format not in ['raw', 'encoded']:
raise IndexError(f'Cannot index {self.format} format.')
if frame_ind >= self.num_frames or frame_ind < -self.num_frames:
raise IndexError('Frame index out of range.')

Expand Down Expand Up @@ -491,9 +577,8 @@ def write_yuv_frame(self, yuv: np.ndarray) -> None:
def write_rgb_frame(self, rgb: np.ndarray) -> None:
'''
Adds RGB frame array to file on disk.
Args:
yuv: YUV data to be written.
rgb: RGB data to be written.
'''
if self.mode == 'r':
raise OSError('Cannot write RGB frame in read mode.')
Expand Down Expand Up @@ -543,4 +628,7 @@ def __enter__(self):

# Close video file when exiting 'with' statement
def __exit__(self, exc_type, exc_value, traceback):
self.close()
if self.format in ['raw', 'encoded']:
self.close()
if self.format == 'encoded' and self.mode == 'r':
os.remove(self._temp_path)

0 comments on commit 2ad785f

Please sign in to comment.