From 755f40f971199498667c8a85e474fd87c5845c46 Mon Sep 17 00:00:00 2001 From: Seth Grover Date: Mon, 16 Sep 2024 10:44:46 -0600 Subject: [PATCH] for #36, ability to select audio stream with --audio-stream-index, and list audio tracks with --audio-stream-list --- README.md | 24 +++-- setup.cfg | 2 +- src/cleanvid/__init__.py | 2 +- src/cleanvid/cleanvid.py | 187 ++++++++++++++++++++++++++++----------- 4 files changed, 156 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 65f39c4..768f0db 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ 1. The user provides as input a video file and matching `.srt` subtitle file. If subtitles are not provided explicitly, they will be extracted from the video file if possible; if not, [`subliminal`](https://github.com/Diaoul/subliminal) is used to attempt to download the best matching `.srt` file. 2. [`pysrt`](https://github.com/byroot/pysrt) is used to parse the `.srt` file, and each entry is checked against a [list](./src/cleanvid/swears.txt) of profanity or other words or phrases you'd like muted. Mappings can be provided (eg., map "sh*t" to "poop"), otherwise the word will be replaced with *****. 3. A new "clean" `.srt` file is created. with *only* those phrases containing the censored/replaced objectional language. -4. [`ffmpeg`](https://www.ffmpeg.org/) is used to create a cleaned video file. This file contains the original video stream, but the audio stream is muted during the segments containing objectional language. The audio stream is re-encoded as AAC and remultiplexed back together with the video. Optionally, the clean `.srt` file can be embedded in the cleaned video file as a subtitle track. +4. [`ffmpeg`](https://www.ffmpeg.org/) is used to create a cleaned video file. This file contains the original video stream, but the specified audio stream is muted during the segments containing objectional language. That audio stream is re-encoded and remultiplexed back together with the video. Optionally, the clean `.srt` file can be embedded in the cleaned video file as a subtitle track. You can then use your favorite media player to play the cleaned video file together with the cleaned subtitles. @@ -49,10 +49,9 @@ To install FFmpeg, use your operating system's package manager or install binari ## usage ``` -usage: cleanvid.py [-h] [-s ] -i [-o ] - [--plex-auto-skip-json ] [--plex-auto-skip-id ] - [--subs-output ] [-w ] [-l ] [-p ] [-e] [-f] [--subs-only] [--offline] [--edl] [-r] [-b] - [-v VPARAMS] [-a APARAMS] +usage: cleanvid [-h] [-s ] -i [-o ] [--plex-auto-skip-json ] [--plex-auto-skip-id ] [--subs-output ] + [-w ] [-l ] [-p ] [-e] [-f] [--subs-only] [--offline] [--edl] [--json] [--re-encode-video] [--re-encode-audio] [-b] [-v VPARAMS] [-a APARAMS] + [-d] [--audio-stream-index ] [--audio-stream-list] [--threads-input ] [--threads-encoding ] [--threads ] options: -h, --help show this help message and exit @@ -71,7 +70,7 @@ options: -w , --swears text file containing profanity (with optional mapping) -l , --lang - language for srt download (default is "eng") + language for extracting srt from video file or srt download (default is "eng") -p , --pad pad (seconds) around profanity -e, --embed-subs embed subtitles in resulting video file @@ -79,12 +78,23 @@ options: --subs-only only operate on subtitles (do not alter audio) --offline don't attempt to download subtitles --edl generate MPlayer EDL file with mute actions (also implies --subs-only) - -r, --re-encode Re-encode video + --json generate JSON file with muted subtitles and their contents + --re-encode-video Re-encode video + --re-encode-audio Re-encode audio -b, --burn Hard-coded subtitles (implies re-encode) -v VPARAMS, --video-params VPARAMS Video parameters for ffmpeg (only if re-encoding) -a APARAMS, --audio-params APARAMS Audio parameters for ffmpeg + -d, --downmix Downmix to stereo (if not already stereo) + --audio-stream-index + Index of audio stream to process + --audio-stream-list Show list of audio streams (to get index for --audio-stream-index) + --threads-input + ffmpeg global options -threads value + --threads-encoding + ffmpeg encoding options -threads value + --threads ffmpeg -threads value (for both global options and encoding) ``` ### Docker diff --git a/setup.cfg b/setup.cfg index c71d36d..ba46d0f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = cleanvid -version = 1.6.0 +version = 1.7.0 author = Seth Grover author_email = mero.mero.guero@gmail.com description = cleanvid is a little script to mute profanity in video files. diff --git a/src/cleanvid/__init__.py b/src/cleanvid/__init__.py index 37e91bc..21c67fe 100644 --- a/src/cleanvid/__init__.py +++ b/src/cleanvid/__init__.py @@ -1,6 +1,6 @@ """cleanvid is a little script to mute profanity in video files.""" -__version__ = "1.6.0" +__version__ = "1.7.0" __author__ = "Seth Grover " __all__ = [] diff --git a/src/cleanvid/cleanvid.py b/src/cleanvid/cleanvid.py index b199ca9..0fc476b 100755 --- a/src/cleanvid/cleanvid.py +++ b/src/cleanvid/cleanvid.py @@ -51,6 +51,21 @@ def GetFormatAndStreamInfo(vidFileSpec): return result +######## GetAudioStreamsInfo ############################################### +def GetAudioStreamsInfo(vidFileSpec): + result = None + if os.path.isfile(vidFileSpec): + ffprobeCmd = ( + "ffprobe -loglevel quiet -select_streams a -show_entries stream=index,codec_name,sample_rate,channel_layout:stream_tags=language -of json \"" + + vidFileSpec + + "\"" + ) + ffprobeResult = delegator.run(ffprobeCmd, block=True) + if ffprobeResult.return_code == 0: + result = json.loads(ffprobeResult.out) + return result + + ######## GetStreamSubtitleMap ############################################### def GetStreamSubtitleMap(vidFileSpec): result = None @@ -202,6 +217,7 @@ class VidCleaner(object): unalteredVideo = False subsLang = SUBTITLE_DEFAULT_LANG vParams = VIDEO_DEFAULT_PARAMS + audioStreamIdx = None aParams = AUDIO_DEFAULT_PARAMS aDownmix = False threadsInput = None @@ -232,6 +248,7 @@ def __init__( reEncodeAudio=False, hardCode=False, vParams=VIDEO_DEFAULT_PARAMS, + audioStreamIdx=None, aParams=AUDIO_DEFAULT_PARAMS, aDownmix=False, threadsInput=None, @@ -275,6 +292,7 @@ def __init__( self.hardCode = hardCode self.subsLang = subsLang self.vParams = vParams + self.audioStreamIdx = audioStreamIdx self.aParams = aParams self.aDownmix = aDownmix self.threadsInput = threadsInput @@ -533,10 +551,46 @@ def MultiplexCleanVideo(self): videoArgs = self.vParams else: videoArgs = "-c:v copy" + + audioStreamOnlyIndex = 0 + if audioStreams := GetAudioStreamsInfo(self.inputVidFileSpec).get('streams', []): + if len(audioStreams) > 0: + if self.audioStreamIdx is None: + if len(audioStreams) == 1: + if 'index' in audioStreams[0]: + self.audioStreamIdx = audioStreams[0]['index'] + else: + raise ValueError(f'Could not determine audio stream index for {self.inputVidFileSpec}') + else: + raise ValueError( + f'Multiple audio streams, specify audio stream index with --audio-stream-index' + ) + elif any(stream.get('index', -1) == self.audioStreamIdx for stream in audioStreams): + audioStreamOnlyIndex = next( + ( + i + for i, stream in enumerate(audioStreams) + if stream.get('index', -1) == self.audioStreamIdx + ), + 0, + ) + else: + raise ValueError( + f'Audio stream index {self.audioStreamIdx} is invalid for {self.inputVidFileSpec}' + ) + else: + raise ValueError(f'No audio streams found in {self.inputVidFileSpec}') + else: + raise ValueError(f'Could not determine audio streams in {self.inputVidFileSpec}') + self.aParams = re.sub(r"-c:a(\s+)", rf"-c:a:{str(audioStreamOnlyIndex)}\1", self.aParams) + audioUnchangedMapList = ' '.join( + f'-map 0:a:{i}' if i != audioStreamOnlyIndex else '' for i in range(len(audioStreams)) + ) + if self.aDownmix and HasAudioMoreThanStereo(self.inputVidFileSpec): self.muteTimeList.insert(0, AUDIO_DOWNMIX_FILTER) if (not self.subsOnly) and (len(self.muteTimeList) > 0): - audioFilter = " -af \"" + ",".join(self.muteTimeList) + "\" " + audioFilter = f' -filter_complex "[0:a:{audioStreamOnlyIndex}]{",".join(self.muteTimeList)}[a{audioStreamOnlyIndex}]"' else: audioFilter = " " if self.embedSubs and os.path.isfile(self.cleanSubsFileSpec): @@ -544,14 +598,16 @@ def MultiplexCleanVideo(self): subsArgs = f" -i \"{self.cleanSubsFileSpec}\" -map 0 -map -0:s -map 1 -c:s {'mov_text' if outFileParts[1] == '.mp4' else 'srt'} -disposition:s:0 default -metadata:s:s:0 language={self.subsLang} " else: subsArgs = " -sn " + ffmpegCmd = ( f"ffmpeg -hide_banner -nostats -loglevel error -y {'' if self.threadsInput is None else ('-threads '+ str(int(self.threadsInput)))} -i \"" + self.inputVidFileSpec + "\"" + + audioFilter + + f' -map 0:v -map "[a{audioStreamOnlyIndex}]" {audioUnchangedMapList} ' + subsArgs + videoArgs - + audioFilter - + f"{self.aParams} {'' if self.threadsEncoding is None else ('-threads '+ str(int(self.threadsEncoding)))} \"" + + f" {self.aParams} {'' if self.threadsEncoding is None else ('-threads '+ str(int(self.threadsEncoding)))} \"" + self.outputVidFileSpec + "\"" ) @@ -661,6 +717,20 @@ def RunCleanvid(): parser.add_argument( '-d', '--downmix', help='Downmix to stereo (if not already stereo)', dest='aDownmix', action='store_true' ) + parser.add_argument( + '--audio-stream-index', + help='Index of audio stream to process', + metavar='', + dest="audioStreamIdx", + type=int, + default=None, + ) + parser.add_argument( + '--audio-stream-list', + help='Show list of audio streams (to get index for --audio-stream-index)', + action='store_true', + dest="audioStreamIdxList", + ) parser.add_argument( '--threads-input', help='ffmpeg global options -threads value', @@ -686,62 +756,79 @@ def RunCleanvid(): default=None, ) parser.set_defaults( + audioStreamIdxList=False, + edl=False, embedSubs=False, fullSubs=False, - subsOnly=False, + hardCode=False, offline=False, - reEncodeVideo=False, reEncodeAudio=False, - hardCode=False, - edl=False, + reEncodeVideo=False, + subsOnly=False, ) args = parser.parse_args() - inFile = args.input - outFile = args.output - subsFile = args.subs - lang = args.lang - plexFile = args.plexAutoSkipJson - if inFile: - inFileParts = os.path.splitext(inFile) - if not outFile: - outFile = inFileParts[0] + "_clean" + inFileParts[1] - if not subsFile: - subsFile = GetSubtitles(inFile, lang, args.offline) - if args.plexAutoSkipId and not plexFile: - plexFile = inFileParts[0] + "_PlexAutoSkip_clean.json" - - if plexFile and not args.plexAutoSkipId: - raise ValueError( - f'Content ID must be specified if creating a PlexAutoSkip JSON file (https://github.com/mdhiggins/PlexAutoSkip/wiki/Identifiers)' + if args.audioStreamIdxList: + audioStreamsInfo = GetAudioStreamsInfo(args.input) + # e.g.: + # 1: aac, 44100 Hz, stereo, eng + # 3: opus, 48000 Hz, stereo, jpn + print( + '\n'.join( + [ + f"{x['index']}: {x.get('codec_name', 'unknown codec')}, {x.get('sample_rate', 'unknown')} Hz, {x.get('channel_layout', 'unknown channel layout')}, {x.get('tags', {}).get('language', 'unknown language')}" + for x in audioStreamsInfo.get("streams", []) + ] + ) ) - cleaner = VidCleaner( - inFile, - subsFile, - outFile, - args.subsOut, - args.swears, - args.pad, - args.embedSubs, - args.fullSubs, - args.subsOnly, - args.edl, - args.json, - lang, - args.reEncodeVideo, - args.reEncodeAudio, - args.hardCode, - args.vParams, - args.aParams, - args.aDownmix, - args.threadsInput if args.threadsInput is not None else args.threads, - args.threadsEncoding if args.threadsEncoding is not None else args.threads, - plexFile, - args.plexAutoSkipId, - ) - cleaner.CreateCleanSubAndMuteList() - cleaner.MultiplexCleanVideo() + else: + inFile = args.input + outFile = args.output + subsFile = args.subs + lang = args.lang + plexFile = args.plexAutoSkipJson + if inFile: + inFileParts = os.path.splitext(inFile) + if not outFile: + outFile = inFileParts[0] + "_clean" + inFileParts[1] + if not subsFile: + subsFile = GetSubtitles(inFile, lang, args.offline) + if args.plexAutoSkipId and not plexFile: + plexFile = inFileParts[0] + "_PlexAutoSkip_clean.json" + + if plexFile and not args.plexAutoSkipId: + raise ValueError( + f'Content ID must be specified if creating a PlexAutoSkip JSON file (https://github.com/mdhiggins/PlexAutoSkip/wiki/Identifiers)' + ) + + cleaner = VidCleaner( + inFile, + subsFile, + outFile, + args.subsOut, + args.swears, + args.pad, + args.embedSubs, + args.fullSubs, + args.subsOnly, + args.edl, + args.json, + lang, + args.reEncodeVideo, + args.reEncodeAudio, + args.hardCode, + args.vParams, + args.audioStreamIdx, + args.aParams, + args.aDownmix, + args.threadsInput if args.threadsInput is not None else args.threads, + args.threadsEncoding if args.threadsEncoding is not None else args.threads, + plexFile, + args.plexAutoSkipId, + ) + cleaner.CreateCleanSubAndMuteList() + cleaner.MultiplexCleanVideo() #################################################################################