diff --git a/EmiyaEngine.py b/EmiyaEngine.py deleted file mode 100644 index 7e67a88..0000000 --- a/EmiyaEngine.py +++ /dev/null @@ -1,341 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# Emiya Engine -# Version: Alpha.0 Rev.3 -# Author: Sg4Dylan - -# Created: 02/07/2017 -# 真正重要的東西,只用眼睛是看不見的, -# 只要蘊藏著想成為真物的意志,偽物就比真物還要來得真實 - -import os -import argparse -import datetime -import uuid -import random -import math -import numpy as np -import scipy -import librosa -import resampy -import logging -from colorama import Fore, Back, init - -logging.basicConfig( - level=logging.DEBUG, - format='%(asctime)s [line:%(lineno)d] \ - %(levelname)s %(message)s', - datefmt='%a, %d %b %Y %H:%M:%S', - filename='EmiyaEngine.log', - filemode='w+' -) -logger = logging.getLogger("EmiyaLog") -# console = logging.StreamHandler() -# console.setLevel(logging.INFO) -# logger.addHandler(console) - - -class EmiyaEngineCore: - - ReadyFilePath = '' # 输入文件名 - BeforeSignal = '' # 输入原始信号 - BeforeSignalSR = 0 # 原始信号采样 - MidSignal = '' # 重采样到96K信号 - MidSignalSR = 0 # 96K - AfterSignalLeft = '' # 处理后的左信号 - AfterSignalRight = '' # 处理后的右信号 - AfterSignal = '' # 处理后的信号 - AfterSignalSR = 0 # 处理后的采样率 - SplitSize = 0 # 倒腾区大小 - AnalysisWindow = False # 分析接续点用的单边FFT是否加窗 - MidSRCFalse = False # SRC开关, 为True时就会取消掉SRC步骤 - MidPrint = False # 打印细节日志开关 - MidPrintProgress = True # 打印进度信息 - - def __init__(self, _InputFilePath, _DebugSwitch, _SplitSize, _WindowSwitch): - # 输入样本: - # 信号被加载为ndarray结构,有两个声道各以一维array形式存在 - self.ReadyFilePath = _InputFilePath - self.SplitSize = _SplitSize - if _WindowSwitch == 1: - self.AnalysisWindow = True - if _DebugSwitch == 0: - MidPrintProgress = False - elif _DebugSwitch == 1: - MidPrintProgress = True - elif _DebugSwitch == 2: - MidPrint = True - else: - pass - self.LoadFile() - self.MidUpSRC() - self.ProcessCore() - - def LoadFile(self): - self.BeforeSignal, self.BeforeSignalSR = librosa.load( - self.ReadyFilePath, sr=None, mono=False) - self.AfterSignalLeft = np.array([()]) - self.AfterSignalRight = np.array([()]) - print("Load signal complete. ChannelCount: %s SampleRate: %s Hz" % ( - str(len(self.BeforeSignal)), str(self.BeforeSignalSR))) - logger.info("Load signal complete. ChannelCount: %s SampleRate: %s Hz" % ( - str(len(self.BeforeSignal)), str(self.BeforeSignalSR))) - - def MidUpSRC(self): - # 重采样loss样本到96K - print("Please wait for SRC.") - logger.info("Please wait for SRC.") - self.MidSignalSR = 96000 - self.AfterSignalSR = self.MidSignalSR - if self.MidSRCFalse: - self.MidSignal = self.BeforeSignal - else: - self.MidSignal = resampy.resample( - self.BeforeSignal, self.BeforeSignalSR, self.MidSignalSR, filter='kaiser_best') - print("Signal SRC complete.") - logging.info("Signal SRC complete.") - - def MidFindThresholdPoint(self, _MidFFTResultSingle, _FFTPointCount): - # 鉴定频谱基本参数 - _MidAmpData = abs(_MidFFTResultSingle[range(_FFTPointCount // 2)]) - # Step0. 找出基波幅度 - _MidBaseFreqAmp = _MidAmpData.max() - if self.MidPrint: - print("Signal max AMP -> %s" % _MidBaseFreqAmp) - logger.debug("Signal max AMP -> %s" % _MidBaseFreqAmp) - # Step1. 找出接续的阈值 - _MidThresholdHit = 1.0e-11 # 方差判定阈值 - _MidThresholdPoint = 0 # 最后的阈值点 - _MidFindRange = int((_FFTPointCount / 2) - 1) # 搜索的范围 - _MidStartFindPos = round(2000 / (self.BeforeSignalSR / (_FFTPointCount / 2))) # 从2K频点附近开始寻找,加快速度 - _MidStartFlag = True # 循环用的启动Flag - _MidLoopCount = 0 # 循环计数器 - _MidLegalFreq = (self.BeforeSignalSR / 2) - 500 # 判定结果合法的阈值频率 - _MidForwardFreq = 3000 # 前向修正频率 - _MidOrderFreq = (self.BeforeSignalSR / 2) - 6000 # 钦定频率 - # Rev.1: 检查接续点是否符合常理 - while _MidStartFlag or _MidThresholdPoint > round(_MidLegalFreq / (self.BeforeSignalSR / (_FFTPointCount / 2))): - _MidStartFlag = False - if (_MidThresholdPoint * (self.BeforeSignalSR / (_FFTPointCount / 2))) > int(self.BeforeSignalSR / 2): - _MidThresholdHit *= 2 - for i in range(_MidStartFindPos, _MidFindRange): - if i + 5 > _MidFindRange: - break - # 计算连续五个采样*3 的方差,与阈值比较,判断频谱消失的位置 - if np.var(_MidAmpData[i:i + 4]) < _MidThresholdHit and \ - np.var(_MidAmpData[i + 1:i + 5]) < _MidThresholdHit: - # 定位到当前位置的前500Hz位置 - _MidThresholdPoint = i - round(_MidForwardFreq / (self.BeforeSignalSR / (_FFTPointCount / 2))) - break - # 错误超过5把就强行钦定频率 - _MidLoopCount += 1 - if _MidLoopCount > 5: - _MidThresholdPoint = round( - _MidOrderFreq / (self.BeforeSignalSR / (_FFTPointCount / 2))) - break - # 打印函数返回信息 - if self.MidPrint: - print("Signal threshold point -> %s @ %sHz Max Amp -> %s" % (_MidThresholdPoint, - _MidThresholdPoint * - (self.BeforeSignalSR / (_MidFindRange + 1)), - _MidBaseFreqAmp)) - logger.debug("Signal threshold point -> %s @ %sHz Max Amp -> %s" % (_MidThresholdPoint, - _MidThresholdPoint * - (self.BeforeSignalSR / (_MidFindRange + 1)), - _MidBaseFreqAmp)) - # _MidThresholdPoint = round(21000/(self.BeforeSignalSR/(_FFTPointCount/2))) - return _MidBaseFreqAmp, _MidThresholdPoint - - def MidInsertJitter(self, _MidFFTResultDouble, _FFTPointCount, _MidThresholdPoint, _MidBaseFreqAmp): - # 构造抖动 - if _MidThresholdPoint <= 0: - return _MidFFTResultDouble - for i in range(_MidThresholdPoint, _FFTPointCount - _MidThresholdPoint): - # Rev.0: 调整生成概率,频率越高概率越低 - # Rev.1: 加入幅值判定,幅度越大概率越大 - _GenPossible = abs((_FFTPointCount / 2) - i) / ((_FFTPointCount / - 2) - _MidThresholdPoint) * (_MidBaseFreqAmp / 0.22) - if random.randint(0, 1000000) < 800000 * _GenPossible: # 0<=x<=10 - _MidRealValue = abs(_MidFFTResultDouble.real[i]) - _BaseJitterMin = _MidRealValue * 0.5 * (1 - _GenPossible) - _BaseJitterMax = _MidRealValue * 6 * _GenPossible - _AmpJitterMin = _MidBaseFreqAmp * _MidRealValue * 0.5 - _AmpJitterMax = _MidBaseFreqAmp * _MidRealValue * 2 - _AmpJitterPrefix = - \ - 1 if random.randint(0, 100000) < 50000 else 1 - _MiditterPrefix = - \ - 1 if random.randint(0, 100000) < 50000 else 1 - _MidDeltaJitterValue = random.uniform( - _BaseJitterMin, _BaseJitterMax) + _AmpJitterPrefix * random.uniform(_AmpJitterMin, _AmpJitterMax) - _MidFFTResultDouble.real[ - i] += _MiditterPrefix * _MidDeltaJitterValue - return _MidFFTResultDouble - - def FinSaveFile(self): - init(autoreset=True) - OutputFilePath = os.path.abspath( - os.path.join(self.ReadyFilePath, os.pardir)) + "\\" - OutputFileName = OutputFilePath + 'Output_%s.wav' % uuid.uuid4().hex - librosa.output.write_wav( - OutputFileName, self.AfterSignal, self.AfterSignalSR) - print(Back.GREEN + Fore.WHITE + "SAVE DONE" + - Back.BLACK + " Output path -> " + OutputFileName) - logger.info(Back.GREEN + Fore.WHITE + "SAVE DONE" + - Back.BLACK + " Output path -> " + OutputFileName) - - def ProcessCore(self): - # 初始化彩色命令行 - init(autoreset=True) - # 两个声道 - for ChannelIndex in range(2): - # 记录开始时间 - _MidStartTime = datetime.datetime.now() - # 信号总长度 - _MidSignalLength = len(self.MidSignal[ChannelIndex]) - # FFT分割数量 - _FFTPointCount = 1024 # 至少2048点,避免计算错误 - _MidDivCount = math.floor(_MidSignalLength / _FFTPointCount) - # 实际重叠操作数量 = FFT分割数量 * 分块次数 - _EachLength = 512 - # 补偿标记, 标记有效时, 说明已经到了序列尾部, 因停止继续循环运算 - SuffixFlag = False - SuffixLength = 0 - # Rev.2: 加入临时数组加速Append, 临时数组每Append操作设定次数就倒腾一次 - _TempArrayLeft = np.array([()]) - _TempArrayRight = np.array([()]) - _TempAppendCount = 0 - # 除了最后一块,每一块都是取计算结果时域的前512点 - for SamplePointIndex in range(_MidDivCount + 1): - StartPos = SamplePointIndex * _FFTPointCount - EndPos = SamplePointIndex * _FFTPointCount + _FFTPointCount - _EachPieceLeft = np.array([()]) - _EachPieceRight = np.array([()]) - for EachFourPiece in range(int(_FFTPointCount / _EachLength)): - StartPos += EachFourPiece * _EachLength - EndPos += EachFourPiece * _EachLength - # 若超出范围, 需将本次计算完整保留接续有效部分(除去补零部分) - if EndPos > _MidSignalLength: - EndPos = _MidSignalLength - SuffixFlag = True - _TempSignal = self.MidSignal[ChannelIndex][StartPos:EndPos] - # 不足FFT点数的补零 - while len(_TempSignal) != _FFTPointCount: - _TempSignal = np.append(_TempSignal, [0]) - SuffixLength += 1 - # 执行FFT运算, 单边谱用于分析, 双边谱用于处理 - _MidFFTResultDouble = np.fft.fft( - _TempSignal, _FFTPointCount) / (_FFTPointCount) - if self.AnalysisWindow: - _TempSignal *= scipy.signal.hann(_FFTPointCount, sym=0) - _MidFFTResultSingle = np.fft.fft( - _TempSignal, _FFTPointCount) / (_FFTPointCount / 2) - # 获取当前分段最大振幅, 处理阈值点 - _MidBaseFreqAmp, _MidThresholdPoint = self.MidFindThresholdPoint( - _MidFFTResultSingle, _FFTPointCount) - # 构造抖动到当前FFT实际值上 - _MidFFTAfterJitter = self.MidInsertJitter( - _MidFFTResultDouble, _FFTPointCount, _MidThresholdPoint, _MidBaseFreqAmp) - # 逆变换IFFT - _MidTimerDomSignal = np.fft.ifft( - _MidFFTAfterJitter, n=_FFTPointCount) - # 接续到新信号上 - _AppendLength = _EachLength - if SuffixFlag: - _AppendLength = _FFTPointCount - SuffixLength - _MidAppendSignal = _MidTimerDomSignal[0:_AppendLength] - if self.MidPrint: - print("Per each length -> %s" % len(_MidAppendSignal)) - logger.debug("Per each length -> %s" % len(_MidAppendSignal)) - if ChannelIndex == 0: - _EachPieceLeft = np.append( - _EachPieceLeft, _MidAppendSignal) - else: - _EachPieceRight = np.append( - _EachPieceRight, _MidAppendSignal) - # 及时跳出尾部 - if SuffixFlag: - break - # 先倒腾到临时数组,倒腾500次给放回大数组 - if _TempAppendCount < self.SplitSize and not SuffixFlag: - # 已消耗时间 - _MidUsedTime = datetime.datetime.now() - _MidStartTime - # 估算剩余时间 - _MidEtaTime = ((datetime.datetime.now() - _MidStartTime) / - ((SamplePointIndex + 1) / _MidDivCount)) - _MidUsedTime - # 构造显示文本 - if ChannelIndex == 0: - _TempArrayLeft = np.append( - _TempArrayLeft, _EachPieceLeft) - if self.MidPrintProgress: - print("Left channel progress rate -> " + Fore.CYAN + str(SamplePointIndex) + " / " + - str(_MidDivCount - 1) + Fore.WHITE + " TIME USED -> " + Fore.YELLOW + - str(_MidUsedTime) + Fore.WHITE + " ETA -> " + Fore.GREEN + str(_MidEtaTime)) - logger.info("Left channel progress rate -> " + str(SamplePointIndex) + " / " + - str(_MidDivCount - 1) + " TIME USED -> " + - str(_MidUsedTime) + " ETA -> " + str(_MidEtaTime)) - else: - _TempArrayRight = np.append( - _TempArrayRight, _EachPieceRight) - if self.MidPrintProgress: - print("Right channel progress rate -> " + Fore.CYAN + str(SamplePointIndex) + " / " + - str(_MidDivCount - 1) + Fore.WHITE + " TIME USED -> " + Fore.YELLOW + - str(_MidUsedTime) + Fore.WHITE + " ETA -> " + Fore.GREEN + str(_MidEtaTime)) - logger.info("Right channel progress rate -> " + str(SamplePointIndex) + " / " + - str(_MidDivCount - 1) + " TIME USED -> " + - str(_MidUsedTime) + " ETA -> " + str(_MidEtaTime)) - else: - _TempAppendCount = 0 - if ChannelIndex == 0: - self.AfterSignalLeft = np.append(self.AfterSignalLeft, _TempArrayLeft) - _TempArrayLeft = np.array([()]) - else: - self.AfterSignalRight = np.append(self.AfterSignalRight, _TempArrayRight) - _TempArrayRight = np.array([()]) - # 倒腾计数器 - _TempAppendCount += 1 - if SuffixFlag: - break - self.AfterSignal = np.array([self.AfterSignalLeft.real, self.AfterSignalRight.real]) - self.FinSaveFile() - - -def Main(_input, _debug, _size, _window): - Processor = EmiyaEngineCore(_input, _debug, _size, _window) - -if __name__ == "__main__": - - parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, - description='Emiya Engine\n' - 'Version: Alpha.0 Rev.2\n' - 'Author: Sg4Dylan - \n' - '真正重要的東西, 只用眼睛是看不見的, \n' - '只要蘊藏著想成為真物的意志, 偽物就比真物還要來得真實.') - parser.add_argument('-i', '--input', help='待处理文件的绝对路径, 同一路径可直接输入文件名. 例如: \n' - 'Music_ready_test.mp3') - parser.add_argument('-d', '--debug', help='调试等级设定. 默认 1 级. \n' - '设置为 0 时, 只显示任务起始日志; \n' - '设置为 1 时, 额外显示进度日志; \n' - '设置为 2 时, 额外显示处理细节日志') - parser.add_argument('-s', '--size', help='倒腾区大小. 默认 500. \n' - '使用倒腾区是因为 numpy 做大数组 append 速度远低于小数组, \n' - '故加入小数组多倒腾一手, 这个参数就是小数组的尺寸.') - parser.add_argument('-w', '--window', help='分析用汉宁Hann双余弦窗启用开关. 默认不使用.\n' - '输入 0 代表不使用, 1 代表使用.') - - args = parser.parse_args() - _input = args.input - _debug = args.debug - _size = args.size - _window = args.window - - if not _input: - print("缺少输入文件参数,请使用 --help 参考!") - exit(1) - if not _debug: - _debug = 1 - if not _size: - _size = 500 - if not _window: - _window = 0 - - Main(_input, _debug, _size, _window) diff --git a/GUI_EmiyaEngine.py b/GUI_EmiyaEngine.py deleted file mode 100644 index c818ef4..0000000 --- a/GUI_EmiyaEngine.py +++ /dev/null @@ -1,557 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -import argparse -import datetime -import uuid -import random -import math -import numpy as np -import scipy -import librosa -import resampy -import logging -from colorama import Fore, Back, init -from PyQt5 import QtCore, QtGui, QtWidgets - -logging.basicConfig( - level=logging.DEBUG, - format='%(asctime)s [line:%(lineno)d] \ - %(levelname)s %(message)s', - datefmt='%a, %d %b %Y %H:%M:%S', - filename='EmiyaEngineGUI.log', - filemode='w+' -) -logger = logging.getLogger("EmiyaLog") -# console = logging.StreamHandler() -# console.setLevel(logging.INFO) -# logger.addHandler(console) - - -class EmiyaEngineCore(QtCore.QThread): - - Update = QtCore.pyqtSignal([str, str, int]) - Finish = QtCore.pyqtSignal() - - ReadyFilePath = '' # 输入文件 - OutputFilePath = '' # 输出文件 - BeforeSignal = '' # 输入原始信号 - BeforeSignalSR = 0 # 原始信号采样 - MidSignal = '' # 重采样到96K信号 - MidSignalSR = 0 # 96K - AfterSignalLeft = '' # 处理后的左信号 - AfterSignalRight = '' # 处理后的右信号 - AfterSignal = '' # 处理后的信号 - AfterSignalSR = 0 # 处理后的采样率 - SplitSize = 0 # 倒腾区大小 - AnalysisWindow = False # 分析接续点用的单边FFT是否加窗 - MidSRCFalse = False # SRC开关, 为True时就会取消掉SRC步骤 - MidPrint = False # 打印细节日志开关 - MidPrintProgress = True # 打印进度信息 - - def __init__(self, parent, _InputFilePath, _OutputFilePath, _DebugSwitch, _SplitSize, _WindowSwitch): - super(EmiyaEngineCore, self).__init__(parent) - # QtCore.QThread.__init__(self,parent) - self.ReadyFilePath = _InputFilePath - self.OutputFilePath = _OutputFilePath - self.SplitSize = _SplitSize - self.AnalysisWindow = _WindowSwitch - if _DebugSwitch == 0: - MidPrintProgress = False - elif _DebugSwitch == 1: - MidPrintProgress = True - elif _DebugSwitch == 2: - MidPrint = True - else: - pass - - def LoadFile(self): - self.BeforeSignal, self.BeforeSignalSR = librosa.load(self.ReadyFilePath, sr=None, mono=False) - self.AfterSignalLeft = np.array([()]) - self.AfterSignalRight = np.array([()]) - print("Load signal complete. ChannelCount: %s SampleRate: %s Hz" % (str(len(self.BeforeSignal)), - str(self.BeforeSignalSR))) - logger.info("Load signal complete. ChannelCount: %s SampleRate: %s Hz" % (str(len(self.BeforeSignal)), - str(self.BeforeSignalSR))) - - def MidUpSRC(self): - # 重采样loss样本到96K - print("Please wait for SRC.") - logger.info("Please wait for SRC.") - self.MidSignalSR = 96000 - self.AfterSignalSR = self.MidSignalSR - if self.MidSRCFalse: - self.MidSignal = self.BeforeSignal - else: - self.MidSignal = resampy.resample(self.BeforeSignal, self.BeforeSignalSR, - self.MidSignalSR, filter='kaiser_best') - print("Signal SRC complete.") - logger.info("Signal SRC complete.") - - def MidFindThresholdPoint(self, _MidFFTResultSingle, _FFTPointCount): - # 鉴定频谱基本参数 - _MidAmpData = abs(_MidFFTResultSingle[range(_FFTPointCount//2)]) - # Step0. 找出基波幅度 - _MidBaseFreqAmp = _MidAmpData.max() - if self.MidPrint: - print("Signal max AMP -> %s" % _MidBaseFreqAmp) - logger.debug("Signal max AMP -> %s" % _MidBaseFreqAmp) - # Step1. 找出接续的阈值 - _MidThresholdHit = 1.0e-11 # 方差判定阈值 - _MidThresholdPoint = 0 # 最后的阈值点 - _MidFindRange = int((_FFTPointCount/2)-1) # 搜索的范围 - _MidStartFindPos = round(2000 / (self.BeforeSignalSR / (_FFTPointCount / 2))) # 从2K频点附近开始寻找,加快速度 - _MidStartFlag = True # 循环用的启动Flag - _MidLoopCount = 0 # 循环计数器 - _MidLegalFreq = (self.BeforeSignalSR / 2) - 500 # 判定结果合法的阈值频率 - _MidForwardFreq = 3000 # 前向修正频率 - _MidOrderFreq = (self.BeforeSignalSR / 2) - 6000 # 钦定频率 - # Rev.1: 检查接续点是否符合常理 - while _MidStartFlag or _MidThresholdPoint > round(_MidLegalFreq / (self.BeforeSignalSR / (_FFTPointCount / 2))): - _MidStartFlag = False - if (_MidThresholdPoint * (self.BeforeSignalSR / (_FFTPointCount / 2))) > int(self.BeforeSignalSR / 2): - _MidThresholdHit *= 2 - for i in range(_MidStartFindPos, _MidFindRange): - if i + 5 > _MidFindRange: - break - # 计算连续五个采样*3 的方差,与阈值比较,判断频谱消失的位置 - if np.var(_MidAmpData[i:i + 4]) < _MidThresholdHit and \ - np.var(_MidAmpData[i + 1:i + 5]) < _MidThresholdHit: - # 定位到当前位置的前500Hz位置 - _MidThresholdPoint = i - round(_MidForwardFreq / (self.BeforeSignalSR / (_FFTPointCount / 2))) - break - # 错误超过5把就强行钦定频率 - _MidLoopCount += 1 - if _MidLoopCount > 5: - _MidThresholdPoint = round(_MidOrderFreq / (self.BeforeSignalSR / (_FFTPointCount / 2))) - break - # 打印函数返回信息 - if self.MidPrint: - print("Signal threshold point -> %s @ %sHz Max Amp -> %s" % (_MidThresholdPoint, - _MidThresholdPoint * - (self.BeforeSignalSR / (_MidFindRange + 1)), - _MidBaseFreqAmp)) - logger.debug("Signal threshold point -> %s @ %sHz Max Amp -> %s" % (_MidThresholdPoint, - _MidThresholdPoint * - (self.BeforeSignalSR / (_MidFindRange + 1)), - _MidBaseFreqAmp)) - # _MidThresholdPoint = round(21000/(self.BeforeSignalSR/(_FFTPointCount/2))) - return _MidBaseFreqAmp, _MidThresholdPoint - - def MidInsertJitter(self, _MidFFTResultDouble, _FFTPointCount, _MidThresholdPoint, _MidBaseFreqAmp): - # 构造抖动 - if _MidThresholdPoint <= 0: - return _MidFFTResultDouble - for i in range(_MidThresholdPoint, _FFTPointCount - _MidThresholdPoint): - # Rev.0: 调整生成概率,频率越高概率越低 - # Rev.1: 加入幅值判定,幅度越大概率越大 - _GenPossible = abs((_FFTPointCount/2)-i)/((_FFTPointCount/2)-_MidThresholdPoint)*(_MidBaseFreqAmp/0.22) - if random.randint(0, 1000000) < 800000 * _GenPossible: # 0<=x<=10 - _MidRealValue = abs(_MidFFTResultDouble.real[i]) - _BaseJitterMin = _MidRealValue * 0.5 * (1-_GenPossible) - _BaseJitterMax = _MidRealValue * 6 * _GenPossible - _AmpJitterMin = _MidBaseFreqAmp * _MidRealValue * 0.5 - _AmpJitterMax = _MidBaseFreqAmp * _MidRealValue * 2 - _AmpJitterPrefix = -1 if random.randint(0, 100000) < 50000 else 1 - _MiditterPrefix = -1 if random.randint(0, 100000) < 50000 else 1 - _MidDeltaJitterValue = random.uniform(_BaseJitterMin, _BaseJitterMax) + \ - _AmpJitterPrefix * random.uniform(_AmpJitterMin, _AmpJitterMax) - _MidFFTResultDouble.real[i] += _MiditterPrefix * _MidDeltaJitterValue - return _MidFFTResultDouble - - def FinSaveFile(self): - init(autoreset=True) - SaveFilePath = self.OutputFilePath - if self.OutputFilePath == '': - SaveFilePath = os.path.abspath(os.path.join(self.ReadyFilePath, os.pardir)) + "\\" - OutputFileName = SaveFilePath + 'Output_%s.wav' % uuid.uuid4().hex - librosa.output.write_wav(SaveFilePath, self.AfterSignal, self.AfterSignalSR) - print(Back.GREEN + Fore.WHITE + "SAVE DONE" + Back.BLACK + " Output path -> " + SaveFilePath) - logger.info("SAVE DONE Output path -> " + SaveFilePath) - - def run(self): - # 加载文件启动SRC - self.LoadFile() - self.MidUpSRC() - # 初始化彩色命令行 - init(autoreset=True) - # 记录整个任务开始时间 - _MidStartTimeGlobal = datetime.datetime.now() - # 两个声道 - for ChannelIndex in range(2): - # 记录开始时间 - _MidStartTime = datetime.datetime.now() - # 信号总长度 - _MidSignalLength = len(self.MidSignal[ChannelIndex]) - # FFT分割数量 - _FFTPointCount = 1024 # 至少2048点,避免计算错误 - _MidDivCount = math.floor(_MidSignalLength/_FFTPointCount) - # 实际重叠操作数量 = FFT分割数量 * 分块次数 - _EachLength = 512 - # 补偿标记, 标记有效时, 说明已经到了序列尾部, 因停止继续循环运算 - SuffixFlag = False - SuffixLength = 0 - # Rev.2: 加入临时数组加速Append, 临时数组每Append操作设定次数就倒腾一次 - _TempArrayLeft = np.array([()]) - _TempArrayRight = np.array([()]) - _TempAppendCount = 0 - # 除了最后一块,每一块都是取计算结果时域的前512点 - for SamplePointIndex in range(_MidDivCount+1): - StartPos = SamplePointIndex*_FFTPointCount - EndPos = SamplePointIndex*_FFTPointCount+_FFTPointCount - _EachPieceLeft = np.array([()]) - _EachPieceRight = np.array([()]) - for EachFourPiece in range(int(_FFTPointCount/_EachLength)): - StartPos += EachFourPiece * _EachLength - EndPos += EachFourPiece * _EachLength - # 若超出范围, 需将本次计算完整保留接续有效部分(除去补零部分) - if EndPos > _MidSignalLength: - EndPos = _MidSignalLength - SuffixFlag = True - _TempSignal = self.MidSignal[ChannelIndex][StartPos:EndPos] - # 不足FFT点数的补零 - while len(_TempSignal) != _FFTPointCount: - _TempSignal = np.append(_TempSignal, [0]) - SuffixLength += 1 - # 执行FFT运算, 单边谱用于分析, 双边谱用于处理 - _MidFFTResultDouble = np.fft.fft(_TempSignal, _FFTPointCount) / (_FFTPointCount) - if self.AnalysisWindow: - _TempSignal *= scipy.signal.hann(_FFTPointCount, sym=0) - _MidFFTResultSingle = np.fft.fft(_TempSignal, _FFTPointCount) / (_FFTPointCount / 2) - # 获取当前分段最大振幅, 处理阈值点 - _MidBaseFreqAmp, _MidThresholdPoint = self.MidFindThresholdPoint(_MidFFTResultSingle, - _FFTPointCount) - # 构造抖动到当前FFT实际值上 - _MidFFTAfterJitter = self.MidInsertJitter(_MidFFTResultDouble, _FFTPointCount, - _MidThresholdPoint, _MidBaseFreqAmp) - # 逆变换IFFT - _MidTimerDomSignal = np.fft.ifft(_MidFFTAfterJitter, n=_FFTPointCount) - # 接续到新信号上 - _AppendLength = _EachLength - if SuffixFlag: - _AppendLength = _FFTPointCount-SuffixLength - _MidAppendSignal = _MidTimerDomSignal[0:_AppendLength] - if self.MidPrint: - print("Per each length -> %s" % len(_MidAppendSignal)) - logger.debug("Per each length -> %s" % len(_MidAppendSignal)) - if ChannelIndex == 0: - _EachPieceLeft = np.append(_EachPieceLeft, _MidAppendSignal) - else: - _EachPieceRight = np.append(_EachPieceRight, _MidAppendSignal) - # 及时跳出尾部 - if SuffixFlag: - break - # 先倒腾到临时数组,倒腾500次给放回大数组 - if _TempAppendCount < self.SplitSize and not SuffixFlag: - # 已消耗时间 - _MidUsedTime = datetime.datetime.now()-_MidStartTime - # 估算剩余时间 - _MidEtaTime = (_MidUsedTime/((SamplePointIndex+1)/_MidDivCount))-_MidUsedTime - # 完整任务消耗时间 - _MidTotalUsedTime = datetime.datetime.now()-_MidStartTimeGlobal - # 预计总消耗时间 - if ChannelIndex == 0: - _MidTotalEtaTime = (_MidTotalUsedTime/((SamplePointIndex+1)/_MidDivCount))*2-_MidTotalUsedTime - else: - _MidTotalEtaTime = _MidEtaTime - # 进度比例 - _MidProgressRate = round(50 * (SamplePointIndex + 1) / _MidDivCount) + \ - (50 if ChannelIndex == 1 else 0) - self.Update.emit(str(_MidTotalUsedTime)[:-5], str(_MidTotalEtaTime)[:-5], _MidProgressRate) - # 构造显示文本 - if ChannelIndex == 0: - _TempArrayLeft = np.append(_TempArrayLeft, _EachPieceLeft) - if self.MidPrintProgress: - print("Left channel progress rate -> " + Fore.CYAN + str(SamplePointIndex) + " / " + - str(_MidDivCount-1) + Fore.WHITE + " TIME USED -> " + Fore.YELLOW + - str(_MidUsedTime) + Fore.WHITE + " ETA -> " + Fore.GREEN + str(_MidEtaTime)) - logger.info("Left channel progress rate -> " + str(SamplePointIndex) + " / " + - str(_MidDivCount-1) + " TIME USED -> " + - str(_MidUsedTime) + " ETA -> " + str(_MidEtaTime)) - else: - _TempArrayRight = np.append(_TempArrayRight, _EachPieceRight) - if self.MidPrintProgress: - print("Right channel progress rate -> " + Fore.CYAN + str(SamplePointIndex) + " / " + - str(_MidDivCount-1) + Fore.WHITE + " TIME USED -> " + Fore.YELLOW + - str(_MidUsedTime) + Fore.WHITE + " ETA -> " + Fore.GREEN + str(_MidEtaTime)) - logger.info("Right channel progress rate -> " + str(SamplePointIndex) + " / " + - str(_MidDivCount-1) + " TIME USED -> " + - str(_MidUsedTime) + " ETA -> " + str(_MidEtaTime)) - else: - _TempAppendCount = 0 - if ChannelIndex == 0: - self.AfterSignalLeft = np.append(self.AfterSignalLeft, _TempArrayLeft) - _TempArrayLeft = np.array([()]) - else: - self.AfterSignalRight = np.append(self.AfterSignalRight, _TempArrayRight) - _TempArrayRight = np.array([()]) - # 倒腾计数器 - _TempAppendCount += 1 - if SuffixFlag: - break - self.AfterSignal = np.array([self.AfterSignalLeft.real, self.AfterSignalRight.real]) - self.FinSaveFile() - self.Finish.emit() - - -class EmiyaEngineGUI(object): - - # Emiya Engine GUI ARGS - - InputFilePath = '' - OutputFilePath = '' - IsUseWindow = False - SplitSize = 500 - IsStarted = False - SubCore = QtCore.pyqtSignal() - - def __init__(self, MainWindow): - - MainWindow.setObjectName("MainWindow") - MainWindow.resize(615, 272) - MainWindow.setMinimumSize(QtCore.QSize(615, 272)) - MainWindow.setMaximumSize(QtCore.QSize(615, 272)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(10) - MainWindow.setFont(font) - MainWindow.setWindowTitle("Emiya Engine - 只要蘊藏著想成為真物的意志, 偽物就比真物還要來得真實") - self.centralwidget = QtWidgets.QWidget(MainWindow) - self.centralwidget.setObjectName("centralwidget") - - self.Input_Label = QtWidgets.QLabel(self.centralwidget) - self.Input_Label.setGeometry(QtCore.QRect(20, 20, 101, 16)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.Input_Label.setFont(font) - self.Input_Label.setObjectName("Input_Label") - - self.InputLineBox = QtWidgets.QLineEdit(self.centralwidget) - self.InputLineBox.setGeometry(QtCore.QRect(130, 20, 401, 21)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(10) - self.InputLineBox.setFont(font) - self.InputLineBox.setObjectName("InputLineBox") - - self.InputButton = QtWidgets.QPushButton(self.centralwidget) - self.InputButton.setGeometry(QtCore.QRect(540, 20, 51, 21)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.InputButton.setFont(font) - self.InputButton.setObjectName("InputButton") - self.InputButton.clicked.connect(self.SetInputFilePath) - - self.OutputLineBox = QtWidgets.QLineEdit(self.centralwidget) - self.OutputLineBox.setGeometry(QtCore.QRect(130, 50, 401, 21)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(10) - self.OutputLineBox.setFont(font) - self.OutputLineBox.setObjectName("OutputLineBox") - - self.OutputButton = QtWidgets.QPushButton(self.centralwidget) - self.OutputButton.setGeometry(QtCore.QRect(540, 50, 51, 21)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.OutputButton.setFont(font) - self.OutputButton.setObjectName("OutputButton") - self.OutputButton.clicked.connect(self.SetOutputFilePath) - - self.Output_Label = QtWidgets.QLabel(self.centralwidget) - self.Output_Label.setGeometry(QtCore.QRect(20, 50, 101, 16)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.Output_Label.setFont(font) - self.Output_Label.setObjectName("Output_Label") - - self.IsUseWindowCheck = QtWidgets.QCheckBox(self.centralwidget) - self.IsUseWindowCheck.setGeometry(QtCore.QRect(50, 90, 181, 31)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.IsUseWindowCheck.setFont(font) - self.IsUseWindowCheck.setChecked(False) - self.IsUseWindowCheck.setObjectName("IsUseWindowCheck") - self.IsUseWindowCheck.stateChanged.connect(self.SetIsUseWindowCheck) - - self.SplitSizeSpin = QtWidgets.QSpinBox(self.centralwidget) - self.SplitSizeSpin.setGeometry(QtCore.QRect(350, 90, 71, 31)) - self.SplitSizeSpin.setMinimum(100) - self.SplitSizeSpin.setMaximum(5000) - self.SplitSizeSpin.setSingleStep(100) - self.SplitSizeSpin.setProperty("value", 500) - self.SplitSizeSpin.setObjectName("SplitSizeSpin") - self.SplitSizeSpin.valueChanged.connect(self.SetSplitSize) - - self.SplitSize_Label = QtWidgets.QLabel(self.centralwidget) - self.SplitSize_Label.setGeometry(QtCore.QRect(250, 90, 91, 31)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.SplitSize_Label.setFont(font) - self.SplitSize_Label.setObjectName("SplitSize_Label") - - self.StartButton = QtWidgets.QPushButton(self.centralwidget) - self.StartButton.setGeometry(QtCore.QRect(460, 90, 91, 31)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.StartButton.setFont(font) - self.StartButton.setObjectName("StartButton") - self.StartButton.clicked.connect(self.RunProcess) - - self.groupBox = QtWidgets.QGroupBox(self.centralwidget) - self.groupBox.setGeometry(QtCore.QRect(20, 130, 571, 101)) - self.groupBox.setObjectName("groupBox") - - self.UsedTime_Label = QtWidgets.QLabel(self.groupBox) - self.UsedTime_Label.setGeometry(QtCore.QRect(30, 30, 101, 16)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.UsedTime_Label.setFont(font) - self.UsedTime_Label.setObjectName("UsedTime_Label") - - self.GlobalProgressBar = QtWidgets.QProgressBar(self.groupBox) - self.GlobalProgressBar.setGeometry(QtCore.QRect(100, 60, 451, 23)) - self.GlobalProgressBar.setProperty("value", 0) - self.GlobalProgressBar.setObjectName("GlobalProgressBar") - - self.GlobalProgressBar_Label = QtWidgets.QLabel(self.groupBox) - self.GlobalProgressBar_Label.setGeometry(QtCore.QRect(30, 60, 61, 21)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.GlobalProgressBar_Label.setFont(font) - self.GlobalProgressBar_Label.setObjectName("GlobalProgressBar_Label") - - self.UsedTime = QtWidgets.QLabel(self.groupBox) - self.UsedTime.setGeometry(QtCore.QRect(150, 30, 101, 16)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.UsedTime.setFont(font) - self.UsedTime.setAlignment(QtCore.Qt.AlignRight | QtCore.Qt.AlignTrailing | QtCore.Qt.AlignVCenter) - self.UsedTime.setObjectName("UsedTime") - - self.EtaTime = QtWidgets.QLabel(self.groupBox) - self.EtaTime.setGeometry(QtCore.QRect(410, 30, 91, 16)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.EtaTime.setFont(font) - self.EtaTime.setAlignment(QtCore.Qt.AlignRight | QtCore.Qt.AlignTrailing | QtCore.Qt.AlignVCenter) - self.EtaTime.setObjectName("EtaTime") - - self.EtaTime_Label = QtWidgets.QLabel(self.groupBox) - self.EtaTime_Label.setGeometry(QtCore.QRect(290, 30, 101, 16)) - font = QtGui.QFont() - font.setFamily("微软雅黑") - font.setPointSize(11) - self.EtaTime_Label.setFont(font) - self.EtaTime_Label.setObjectName("EtaTime_Label") - - MainWindow.setCentralWidget(self.centralwidget) - self.menubar = QtWidgets.QMenuBar(MainWindow) - self.menubar.setGeometry(QtCore.QRect(0, 0, 615, 22)) - self.menubar.setObjectName("menubar") - self.menu_F = QtWidgets.QMenu(self.menubar) - self.menu_F.setObjectName("menu_F") - MainWindow.setMenuBar(self.menubar) - self.action_OpenFile = QtWidgets.QAction(MainWindow) - self.action_OpenFile.setObjectName("action_OpenFile") - self.action_OpenFile.triggered.connect(self.SetInputFilePath) - self.action_Exit = QtWidgets.QAction(MainWindow) - self.action_Exit.setObjectName("action_Exit") - self.action_Exit.triggered.connect(self.ExitAll) - self.menu_F.addAction(self.action_OpenFile) - self.menu_F.addAction(self.action_Exit) - self.menubar.addAction(self.menu_F.menuAction()) - - self.TextedUI(MainWindow) - QtCore.QMetaObject.connectSlotsByName(MainWindow) - - def TextedUI(self, MainWindow): - _translate = QtCore.QCoreApplication.translate - self.Input_Label.setText(_translate("MainWindow", "输入文件路径:")) - self.InputButton.setText(_translate("MainWindow", "<<<")) - self.OutputButton.setText(_translate("MainWindow", "<<<")) - self.Output_Label.setText(_translate("MainWindow", "输出文件路径:")) - self.IsUseWindowCheck.setToolTip(_translate("MainWindow", "汉宁Hann双余弦窗可以使FFT分析中的频谱泄漏更少")) - self.IsUseWindowCheck.setText(_translate("MainWindow", "分析音频时使用Hann窗")) - self.SplitSize_Label.setText(_translate("MainWindow", "倒腾区大小:")) - self.StartButton.setText(_translate("MainWindow", "开始处理")) - self.groupBox.setTitle(_translate("MainWindow", "输出状态")) - self.UsedTime_Label.setText(_translate("MainWindow", "当前处理耗时:")) - self.GlobalProgressBar_Label.setText(_translate("MainWindow", "当前进度")) - self.UsedTime.setText(_translate("MainWindow", "00:00:00")) - self.EtaTime.setText(_translate("MainWindow", "00:00:00")) - self.EtaTime_Label.setText(_translate("MainWindow", "预计剩余时间:")) - self.menu_F.setTitle(_translate("MainWindow", "文件(&F)")) - self.action_OpenFile.setText(_translate("MainWindow", "打开(&O)")) - self.action_Exit.setText(_translate("MainWindow", "退出(&E)")) - - def SetInputFilePath(self): - self.InputFilePath = str(QtWidgets.QFileDialog.getOpenFileName(None, "选择待处理的文件")[0]) - if self.InputFilePath: - self.InputLineBox.setText(self.InputFilePath) - - def SetOutputFilePath(self): - self.OutputFilePath = str(QtWidgets.QFileDialog.getSaveFileName(None, "设置输出文件的位置及文件名")[0]) - if self.OutputFilePath: - self.OutputLineBox.setText(self.OutputFilePath) - - def SetIsUseWindowCheck(self): - self.IsUseWindow = True if self.IsUseWindowCheck.checkState() == 2 else False - - def SetSplitSize(self): - self.SplitSize = int(self.SplitSizeSpin.value()) - - def ExitAll(self): - QtWidgets.QApplication.quit() - - def RunProcess(self): - if not self.IsStarted: - self.InputFilePath = self.InputLineBox.text() - self.OutputFilePath = self.OutputLineBox.text() - if self.InputFilePath and self.OutputFilePath: - self.IsStarted = True - self.StartButton.setText("停止处理") - self.CoreObject = EmiyaEngineCore(None, self.InputFilePath, self.OutputFilePath, - 1, self.SplitSize, self.IsUseWindow) - self.CoreObject.Update.connect(self.UpdateState) - self.CoreObject.Finish.connect(self.DetectEnd) - self.CoreObject.start() - else: - self.IsStarted = False - self.StartButton.setText("开始处理") - self.CoreObject.terminate() - self.UsedTime.setText("00:00:00") - self.EtaTime.setText("00:00:00") - self.GlobalProgressBar.setValue(0) - print("所有处理已停止") - logger.info("所有处理已停止") - - def DetectEnd(self): - self.IsStarted = False - self.StartButton.setText("开始处理") - - def UpdateState(self, _TimeUsed, _EtaTime, _ProgressRate): - self.UsedTime.setText(str(_TimeUsed)) - self.EtaTime.setText(str(_EtaTime)) - self.GlobalProgressBar.setValue(_ProgressRate) - - -if __name__ == "__main__": - import sys - app = QtWidgets.QApplication(sys.argv) - MainWindow = QtWidgets.QMainWindow() - ui = EmiyaEngineGUI(MainWindow) - # ui.setupUi(MainWindow) - MainWindow.show() - sys.exit(app.exec_()) diff --git a/README.md b/README.md index 7cfbdb4..b85f57b 100644 --- a/README.md +++ b/README.md @@ -4,65 +4,124 @@ > "只要蘊藏著想成為真物的意志,偽物就比真物還要來得真實。" Emiya Engine 是一个用来丰富音频频谱的脚本。可以将频谱变得好看那么一点。 -原理是使用 FFT (快速傅立叶变换) 将音频信号采样转到频域,在频域上为空白的频谱加上与时域幅值相称的微小抖动。 - -### 使用须知: - - - 由于 FFT 的栅栏效应,程序的处理过程不可避免地会损失部分采样信号。**故不建议将本程序用于玄学领域。** - - 由于程序缺乏细致调教,当前算法会导致部分采样块未被有效处理。 - - 由于玄学原因,最后生成的文件有一定几率出现时长一百余个采样点(0.0016s)的爆音区域。 - - 由于处理需要消耗大约是音源时长的 2 - 10 倍的时间,故不建议输入较长的音频文件。 - - 鉴于脚本输出为 96KHz 采样 32bit 单精度浮点型 的 WAV 文件,请不要输入比输出精度更高的文件。 - -### 程序依赖: - - - Python 3 - - numpy - - scipy - - librosa - - resampy - - colorama - - PyQt5 (GUI版依赖) - -#### 依赖安装建议 -Windows平台: - -> 实际上 librosa 的依赖非常多,如果使用 `pip`安装,可能会导致出错, -> 建议直接在[这里](http://www.lfd.uci.edu/~gohlke/pythonlibs/)下载二进制包使用`pip`离线安装。 -> 除了以上列表的依赖,还有`Cython` 和 `scikit-learn`,建议一块装了。 -> `librosa` 需要配置 `ffmpeg` 的目录, -> 找到 `Python` 安装目录下 `Lib\site-packages\audioread` 文件夹的 `ffdec.py` 文件。 -> 修改第 32 行,修改为你的 `ffmpeg` 程序路径,比如我的放在 F 盘根目录,设置成这样: -> `COMMANDS = ('F:\\ffmpeg.exe', 'avconv')` - -Linux平台: - -> ArchLinux 上建议用 pacman 一路搞定 numpy scipy scikit-learn Cython,当然使用pip也是OK的。 -> Debian/Ubuntu 上默认源似乎必须用 apt-get 安装,有一些包用 pip 安装有些问题。 -> 与 Windows 平台一样,也需要为 librosa安装后端解码音频文件, -> 直接使用发行版自带的包管理器安装 ffmpeg 就可以。 -> 如果出错了,建议从源编译最新的放在原来的路径下。 - -### 命令行帮助: - - -h, --help 显示帮助信息. - -i INPUT, --input INPUT - 待处理文件的绝对路径, 同一路径可直接输入文件名. 例如: - Music_ready_test.mp3 - -d DEBUG, --debug DEBUG - 调试等级设定. 默认 1 级. - 设置为 0 时, 只显示任务起始日志; - 设置为 1 时, 额外显示进度日志; - 设置为 2 时, 额外显示处理细节日志 - -s SIZE, --size SIZE - 倒腾区大小. 默认 500. - 使用倒腾区是因为 numpy 做大数组 append 速度远低于小数组, - 故加入小数组多倒腾一手, 这个参数就是小数组的尺寸. - -w WINDOW, --window WINDOW - 分析用汉宁Hann双余弦窗启用开关. 默认不使用. - 输入 0 代表不使用, 1 代表使用. - -### 效果预览: -音源:44.1KHz@16bit WAV -![enter image description here](https://i.imgur.com/VU9Obqw.jpg) + +--- + +### 当前版本: + +`RC Version 0` + +### 编年史: + + - `Alpha.0 Rev.3` + > 这算是 Emiya Engine 的第一个阶段成果,目标的最小实现 + > 简单说原理就是矩形窗暴力 FFT,移频,乘以乱数,叠加,IFFT + > 大部分的代码是为了处理超大数组拼接速度缓慢的问题 + > 处理后的音频有大量爆音及咔哒声,低电平音频容易看出处理痕迹 + - `Alpha.1 Rev.0` + > 该版本为 Alpha.0 的重构改进,主要工作是改写为多进程执行 + > 为消除频谱图上可见的断层,加入了整数倍时域重采样机制 + > 事实上重采样带来的运算增加远超多进程带来的提升,所以... + > 以及因为多进程,处理需要占用更大的内存,性能消耗巨大 + > 爆音和咔哒声依旧存在,但已大幅减少,处理痕迹依旧能看出来 + - `Alpha.1 Rev.1` + > 这一版中加入了 AkkoMode + > 这一模式原理极其简单,就是给原始信号采样点分别乘以极小的随机数 + > 可以视作信号在有微小热噪声的线路走了一趟 + > 处理后无爆音及咔哒声,但在低电平音频上能听出背景噪声 + > 消除背景噪声就必须暴露处理痕迹 + - `Alpha.3` + > 推翻了之前的所有代码的完全重构,处理结果类似 DSEE HX + > 这一版本质是高通滤波器 + 混频器 + > 将高通滤波后的信号分离为打击乐及弦乐,然后增益后叠加在原始信号上 + > 丢掉了自造的 FFT 轮子,改用库实现的 SFFT + > 因此不存在爆音和咔哒声,也不再需要额外多倍重采样,速度极大提升 + > 这一版本参数调节极其重要,需要参照结果反复调整参数 + > 正确调整参数的处理样本完全不增加爆音及咔哒声,加上 EQ 能完全抹平处理痕迹 + +### 当前版本使用说明: + +为了方便使用,特地做了个 GUI 界面, +但实际上还是挺难用的,所以还是说一下。 + +工具: Spek(仅频谱观察用),Audition(频谱观察/频率分析/后期处理用) + +首先需要分析音乐类型,对于以下类型不建议使用 AkkoMode: + - 电子合成纯音乐,背景乐器只有一两样的 + - 人声清唱带一个伴奏乐器 + - 其他频谱图中最高频率不到 18kHz 的音乐 + +例如这样的: +![sample-not-for-akkomode](https://i.imgur.com/Fd4EoGN.jpg?1) + +AkkoMode 适用于大部分时候音量都很大的流行乐(比如 JPOP), +处理时应选用 Apple iTunes 购买的 AAC 格式音频,常见的频谱长这样: +![some-jpop](https://i.imgur.com/swdtDz6.jpg) +因为参数只有俩,调整并不麻烦,此处就不展开说了。 + +CopyBand 模式需要设置六个参数,配置之前观察频谱。 +以某网站下载的音乐为例,以下是其频谱图及频率分析图: +![sample-spec-0](https://i.imgur.com/RzEzmtl.jpg) +![sample-spec-1](https://i.imgur.com/t0ps5iS.png) + +从两张图中可以明显看出频率在 17kHz 不到的地方戛然而止, +如果目标是生成 48kHz 文件,则需要补齐 24-17=7kHz 的部分。 +而 17-7=10kHz,故 HPF 截止频率应设定在 10kHz 以下, +而调制频率则在 HPF 截止频率上加上 7k。 +本例中设定为 9k 及 16k。 +这首歌背景音乐以打击乐为主,因此能量集中在冲击部分, +调参数时,首先将谐波增益设置为 0,可以避免参数过多干扰测试。 +冲击增益可以从 5 开始测试,勾上测试模式,启动输出, +检查输出文件频率分析结果: +![sample-result-0](https://i.imgur.com/gqBmSFy.png) + +很明显,在 17-21kHz 的地方本应该是比 17kHz 以下的部分“矮”一些的。 +(高频衰减更大,所以高频部分通常增益应低于低频) +因此,根据观察结果,将冲击增益调为 2.5(折半试错),再重新跑一次。 +(此时要在其他软件中关闭文件,否则会发生错误) +调整后的频率分析结果变成了这样: +![sample-result-1](https://i.imgur.com/aDempeR.png) + +此时已经很接近理想的样子了,因为还要加入谐波的部分(前边设定成了 0) +故再将冲击增益降低 0.5,同时给谐波增益改为 1.0 并再次执行。 +结果变成了这样: +![sample-result-2](https://i.imgur.com/cAqZdbQ.png) +看起来不错,直接取消测试模式生成最终结果。 +生成最终结果时可能会很卡,请不要担心并耐心等待,进度条将滚动四次(两声道音频)。 + +接着检查频谱,输出如下: +![sample-result-3](https://i.imgur.com/E5I1fMf.jpg) +![sample-result-4](https://i.imgur.com/pFZDbHB.png) +此时是不是有点失望了,很明显的衔接痕迹对不对。 + +没关系,这时可以打开 Audition 效果中的 FFT 滤波器, +接着拿起刚才的频率分析结果图,照着图调整 FFT 滤波器,比如这样: +![au-fft-filter](https://i.imgur.com/dbHxIKH.png) +应用后,频率分析结果变成了这样: +![final-0](https://i.imgur.com/9eYJs8V.png) +而频谱中的衔接痕迹已经不明显了: +![final-1](https://i.imgur.com/X1cDgcX.jpg) + +放大频谱细节,可以看出雾蒙蒙的部分依然有欠缺, +![final-2](https://i.imgur.com/9AbW9j2.jpg) +这是谐波增益不够的原因,可以继续调整改善。最终得到以下结果: +![final-3](https://i.imgur.com/2UO9OnW.jpg) + +### 其他提示: + +由于 CopyBand 本质是复制粘贴已有的部分, +因此对于超过 48kHz 以上的拉升,需要多次处理达成, +例如以下原始文件不到 16kHz: +![ex-0](https://i.imgur.com/eAui0i7.jpg) +拉升到 48kHz 采样需要的频率片段至少为 24-16=8kHz, +而拉升到 96kHz 采样则需要 48-16=32kHz。 +而原始音频中都没有 32kHz 的容量, +因此在最终拉升到 96kHz 之前需要重复至少三次操作。 +在这一过程中,最大频率由 16 最终变为 48Hz。 +由于事实上 20kHz 以上的听不见,所以你做得再多也无妨(笑)。 +例如上边的例子被拉升到 192kHz 采样率,宛如天籁之声: +![ex-1](https://i.imgur.com/QWKpaHA.jpg) + +### 特别提醒 +~~请不要使用这个脚本制造 `'HiRes'` 逗玄学家玩~~ diff --git a/config.json b/config.json new file mode 100644 index 0000000..eac5d46 --- /dev/null +++ b/config.json @@ -0,0 +1,6 @@ +{ + "ui": "res/window.ui", + "lang": "chs", + "eng": "res/eng.qm", + "chs": "res/chs.ts" +} \ No newline at end of file diff --git a/core/akkomode.py b/core/akkomode.py new file mode 100644 index 0000000..db49015 --- /dev/null +++ b/core/akkomode.py @@ -0,0 +1,61 @@ +import random +import librosa +import resampy +#from tqdm import tqdm + + +def core( + input_path,output_path, + output_sr=48000,inter_sr=1, + test_mode=False, + sv_l=0.02,sv_h=0.55, + update=None +): + + # 加载音频 + y, sr = librosa.load(input_path,mono=False,sr=None) + if test_mode: + y, sr = librosa.load(input_path,mono=False,sr=None,offset=round(len(y[0])/sr/2),duration=5) + y = resampy.resample(y, sr, output_sr * inter_sr, filter='kaiser_fast') + + # AkkoMode + for chan in y: + # 是否第一次执行 + is_loop_once = True + # 前一次的数值 + pre_value = 0 + # 前一次操作的数值 + pre_opt = 0 + # 实际操作 + #for i in tqdm(range(len(chan)),unit='Segment',ascii=True): + for i in range(len(chan)): + update.emit(i/len(chan)) + this_value = chan[i] + # 构造抖动值 + linear_jitter = 0 + if pre_value < this_value: + linear_jitter = random.uniform(this_value*-sv_l, this_value*sv_h) + else: + linear_jitter = random.uniform(this_value*sv_h, this_value*-sv_l) + # 应用抖动 + if pre_opt*linear_jitter > 0: + chan[i] = this_value + linear_jitter + elif pre_opt*linear_jitter < 0: + chan[i] = this_value - linear_jitter + else: + pass + # 第一次操作特殊化处理 + if is_loop_once: + linear_jitter = random.uniform(this_value*-sv_h, this_value*sv_h) + chan[i] += linear_jitter + is_loop_once = False + # 保存到上一次记录 + pre_value = this_value + pre_opt = linear_jitter + + # 合并输出 + final_data = resampy.resample(y, + output_sr * inter_sr, + output_sr, + filter='kaiser_fast') + librosa.output.write_wav(output_path, final_data, output_sr) diff --git a/core/copyband.py b/core/copyband.py new file mode 100644 index 0000000..620e6a8 --- /dev/null +++ b/core/copyband.py @@ -0,0 +1,57 @@ +import numpy as np +import scipy.signal as signal +import librosa +import resampy +#from tqdm import tqdm + + +def core( + input_path,output_path, + output_sr=48000,inter_sr=1, + test_mode=False, + harmonic_hpfc=6000,harmonic_sft=16000,harmonic_gain=1.2, + percussive_hpfc=6000,percussive_stf=16000,percussive_gain=2.5, + update=None +): + + def hpd_n_shift(data, lpf, sft, gain): + # 高通滤波 + b,a = signal.butter(3,lpf/(sr/2),'high') + data = librosa.stft(signal.filtfilt(b,a,librosa.istft(data))) + # 拷贝频谱 + #for i in tqdm(range(data.shape[1]),unit='Segment',ascii=True): + for i in range(data.shape[1]): + update.emit(i/data.shape[1]) + shift = sft + shift_point = round(shift/(sr/data.shape[0])) + # 调制 + for p in reversed(range(len(chan[:,i]))): + data[:,i][p] = data[:,i][p-shift_point] + # 高通滤波 + data = librosa.stft(signal.filtfilt(b,a,librosa.istft(data))) + data *= gain + return data + + # 加载音频 + y, sr = librosa.load(input_path,mono=False,sr=None) + if test_mode: + y, sr = librosa.load(input_path,mono=False,sr=None,offset=round(len(y[0])/sr/2),duration=5) + y = resampy.resample(y, sr, output_sr * inter_sr, filter='kaiser_fast') + # 产生 STFT 谱 + stft_list = [librosa.stft(chan) for chan in y] + + # 谐波增强模式 + for chan in stft_list: + D_harmonic,D_percussive = librosa.decompose.hpss(chan, margin=4) + D_harmonic = hpd_n_shift(D_harmonic,harmonic_hpfc,harmonic_sft,harmonic_gain) + D_percussive = hpd_n_shift(D_percussive,percussive_hpfc,percussive_stf,percussive_gain) + chan += D_harmonic + chan += D_percussive + + # 合并输出 + istft_list = [librosa.istft(chan) for chan in stft_list] + final_data = resampy.resample(np.array(istft_list), + output_sr * inter_sr, + output_sr, + filter='kaiser_fast') + librosa.output.write_wav(output_path, final_data, output_sr) diff --git a/main.pyw b/main.pyw new file mode 100644 index 0000000..f582926 --- /dev/null +++ b/main.pyw @@ -0,0 +1,115 @@ +from PyQt5 import QtWidgets,QtCore,QtGui,uic +import json +import sys +from core import copyband, akkomode + +Config = json.loads(open('config.json','rb').read()) +Ui_MainWindow, QtBaseClass = uic.loadUiType(Config['ui']) + +class Core(QtCore.QThread): + + Update = QtCore.pyqtSignal([float]) + Finish = QtCore.pyqtSignal() + + def __init__(self, parent, mode, **kwargs): + super(Core, self).__init__(parent) + self.mode = mode + self.kwargs = kwargs + + def run(self): + if self.mode == 0: + copyband.core( + self.kwargs['input_path'],self.kwargs['output_path'], + self.kwargs['output_sr'],self.kwargs['inter_sr'], + self.kwargs['test_mode'], + self.kwargs['harmonic_hpfc'],self.kwargs['harmonic_sft'], + self.kwargs['harmonic_gain'],self.kwargs['percussive_hpfc'], + self.kwargs['percussive_stf'],self.kwargs['percussive_gain'], + self.Update) + else: + akkomode.core( + self.kwargs['input_path'],self.kwargs['output_path'], + self.kwargs['output_sr'],self.kwargs['inter_sr'], + self.kwargs['test_mode'], + self.kwargs['sv_l'],self.kwargs['sv_h'], + self.Update) + self.Finish.emit() + +class MainUI(QtWidgets.QMainWindow, Ui_MainWindow): + + def __init__(self): + QtWidgets.QMainWindow.__init__(self) + Ui_MainWindow.__init__(self) + self.setupUi(self) + self._bind_ui_() + self.lang = json.loads(open('res/lang.json','rb').read())[Config['lang']] + self.input_path,self.output_path = None, None + self.is_started = False + + def _bind_ui_(self): + self.selectInputFile.clicked.connect(lambda:self.openfile(False)) + self.selectOutputFile.clicked.connect(lambda:self.openfile(True)) + self.globalExec.clicked.connect(self.start) + + def openfile(self, is_output): + if is_output: + self.output_path,_ = QtWidgets.QFileDialog.getSaveFileName(self,self.lang['OutputDialog'],'','Audio files(*.wav)') + self.outputFilePath.setText(self.output_path) + else: + self.input_path = QtWidgets.QFileDialog.getOpenFileName(self,self.lang['InputDialog'],'','Audio files(*.*)') + self.inputFilePath.setText(self.input_path[0]) + + def start(self): + + if (not self.input_path) or (not self.output_path): + QtWidgets.QMessageBox.warning(self,self.lang['MsgBoxW'],self.lang['LackFile'],QtWidgets.QMessageBox.Ok) + return + + mode = 1 + if self.useCopyBand.isChecked(): + mode = 0 + + if not self.is_started: + self.CoreObject = Core( + None, mode, + input_path=self.inputFilePath.text(), + output_path=self.outputFilePath.text(), + output_sr=int(self.commOutputSr.currentText()[:-2]), + inter_sr=int(self.commInsertSr.currentText()[:-1]), + test_mode=self.useSampleOutput.isChecked(), + harmonic_hpfc=int(self.cbHarmonicHpfCutFreq.value()), + harmonic_sft=int(self.cbHarmonicShiftFreq.value()), + harmonic_gain=float(self.cbHarmonicGain.value()), + percussive_hpfc=int(self.cbPercussiveHpfCutFreq.value()), + percussive_stf=int(self.cbPercussiveShiftFreq.value()), + percussive_gain=float(self.cbPercussiveGain.value()), + sv_l=float(self.akkoJitterDownFactor.value()), + sv_h=float(self.akkoJitterUpFactor.value())) + self.CoreObject.start() + self.CoreObject.Update.connect(self.proc_bar_bind) + self.CoreObject.Finish.connect(self.proc_end_bind) + self.globalExec.setText(self.lang['ExecBtnTextStop']) + self.is_started = True + else: + self.CoreObject.terminate() + self.globalExec.setText(self.lang['ExecBtnTextStart']) + self.progressBar.setValue(0) + self.is_started = False + + def proc_bar_bind(self, rate): + self.progressBar.setValue(round(rate*100)) + + def proc_end_bind(self): + self.is_started = False + self.progressBar.setValue(0) + self.globalExec.setText(self.lang['ExecBtnTextStart']) + + +if __name__ == "__main__": + app = QtWidgets.QApplication(sys.argv) + tl = QtCore.QTranslator() + tl.load(Config[Config['lang']]) + app.installTranslator(tl) + window = MainUI() + window.show() + sys.exit(app.exec_()) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ac564a6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +PyQt5 +librosa +resampy +tqdm +numpy +scipy diff --git a/res/chs.ts b/res/chs.ts new file mode 100644 index 0000000..e6570f0 --- /dev/null +++ b/res/chs.ts @@ -0,0 +1,172 @@ + + + + + MainWindow + + + EmiyaEngine RC + EmiyaEngine RC + + + + 输入文件 + Select input file + + + + ··· + ··· + + + + 输出位置 + Select output path + + + + 处理模式 + Process mode + + + + CopyBand + CopyBand + + + + AkkoMode + AkkoMode + + + + 共用参数 + Common parameters + + + + 输出采样率: + Output SR: + + + + 44100Hz + 44100Hz + + + + 48000Hz + 48000Hz + + + + 96000Hz + 96000Hz + + + + 192000Hz + 192000Hz + + + + 内插值倍率: + Inter SR: + + + + 1x + 1x + + + + 2x + 2x + + + + 3x + 3x + + + + 4x + 4x + + + + AkkoMode 参数 + AkkoMode parameters + + + + 抖动下限倍率: + Jitter Lower: + + + + 抖动上限倍率: + Jitter Upper: + + + + CopyBand 参数 + CopyBand paramaters + + + + 谐波HPF截至频率(Hz): + Harmonic HPF (Hz): + + + + 谐波调制频率(Hz): + Harmonic SFT (Hz): + + + + 谐波增益倍率: + Harmonic Gain: + + + + 冲击调制频率(Hz): + PercussiveSFT (Hz): + + + + 冲击HPF截至频率(Hz): + Percussive HPF (Hz): + + + + 冲击增益倍率: + PercussiveGain: + + + + 输出控制 + Output control + + + + 样本输出模式 + TestMode + + + + 执行 + Execute + + + + 当前任务: + Task: + + + + 总进度: + Total: + + + diff --git a/res/eng.qm b/res/eng.qm new file mode 100644 index 0000000..697e3ff Binary files /dev/null and b/res/eng.qm differ diff --git a/res/lang.json b/res/lang.json new file mode 100644 index 0000000..63f9f3e --- /dev/null +++ b/res/lang.json @@ -0,0 +1,18 @@ +{ + "chs": { + "OutputDialog": "选择输出位置", + "InputDialog": "选择待处理的音频文件", + "MsgBoxW": "警告", + "LackFile": "未设置输入或输出音频文件位置", + "ExecBtnTextStart": "执行", + "ExecBtnTextStop": "停止" + }, + "eng": { + "OutputDialog": "Select output path", + "InputDialog": "Select input audio file", + "MsgBoxW": "Warning", + "LackFile": "Please set input audio file and output path.", + "ExecBtnTextStart": "Execute", + "ExecBtnTextStop": "Stop" + } +} \ No newline at end of file diff --git a/res/window.ui b/res/window.ui new file mode 100644 index 0000000..aadb7bc --- /dev/null +++ b/res/window.ui @@ -0,0 +1,637 @@ + + + MainWindow + + + + 0 + 0 + 715 + 415 + + + + + 715 + 415 + + + + + 16777215 + 16777215 + + + + + Segoe UI + + + + EmiyaEngine RC + + + Qt::LeftToRight + + + + + + 20 + 10 + 671 + 61 + + + + + Microsoft YaHei UI + + + + 输入文件 + + + + + 20 + 20 + 571 + 31 + + + + + + + 610 + 20 + 51 + 31 + + + + ··· + + + + + + + 20 + 80 + 671 + 61 + + + + + Microsoft YaHei UI + + + + 输出位置 + + + + + 20 + 20 + 571 + 31 + + + + + + + 610 + 20 + 51 + 31 + + + + ··· + + + + + + + 20 + 150 + 131 + 81 + + + + + Microsoft YaHei UI + + + + 处理模式 + + + + + 20 + 20 + 111 + 21 + + + + CopyBand + + + true + + + + + + 20 + 50 + 101 + 21 + + + + AkkoMode + + + + + + + 160 + 150 + 211 + 80 + + + + + Microsoft YaHei UI + + + + 共用参数 + + + + + 20 + 20 + 71 + 21 + + + + 输出采样率: + + + + + + 100 + 20 + 91 + 22 + + + + 1 + + + + 44100Hz + + + + + 48000Hz + + + + + 96000Hz + + + + + 192000Hz + + + + + + + 20 + 50 + 71 + 21 + + + + 内插值倍率: + + + + + + 100 + 50 + 51 + 22 + + + + 0 + + + + 1x + + + + + 2x + + + + + 3x + + + + + 4x + + + + + + + + 500 + 150 + 191 + 80 + + + + + Microsoft YaHei UI + + + + AkkoMode 参数 + + + + + 20 + 20 + 81 + 21 + + + + 抖动下限倍率: + + + + + + 20 + 50 + 81 + 21 + + + + 抖动上限倍率: + + + + + + 110 + 20 + 62 + 22 + + + + 0.990000000000000 + + + 0.001000000000000 + + + 0.020000000000000 + + + + + + 110 + 50 + 62 + 22 + + + + 0.990000000000000 + + + 0.001000000000000 + + + 0.080000000000000 + + + + + + + 20 + 240 + 561 + 101 + + + + + Microsoft YaHei UI + + + + CopyBand 参数 + + + + + 20 + 30 + 131 + 21 + + + + 谐波HPF截至频率(Hz): + + + + + + 210 + 30 + 111 + 21 + + + + 谐波调制频率(Hz): + + + + + + 390 + 30 + 91 + 21 + + + + 谐波增益倍率: + + + + + + 150 + 30 + 51 + 22 + + + + 1 + + + 9999999 + + + 6000 + + + + + + 320 + 30 + 61 + 22 + + + + 1 + + + 9999999 + + + 16000 + + + + + + 480 + 30 + 62 + 22 + + + + 100.000000000000000 + + + 0.010000000000000 + + + 1.200000000000000 + + + + + + 210 + 60 + 111 + 21 + + + + 冲击调制频率(Hz): + + + + + + 20 + 60 + 131 + 21 + + + + 冲击HPF截至频率(Hz): + + + + + + 480 + 60 + 62 + 22 + + + + 100.000000000000000 + + + 0.010000000000000 + + + 2.500000000000000 + + + + + + 320 + 60 + 61 + 22 + + + + 1 + + + 9999999 + + + 16000 + + + + + + 150 + 60 + 51 + 22 + + + + 1 + + + 9999999 + + + 6000 + + + + + + 390 + 60 + 91 + 21 + + + + 冲击增益倍率: + + + + + + + 380 + 150 + 111 + 81 + + + + + Microsoft YaHei UI + + + + 输出控制 + + + + + 10 + 20 + 101 + 16 + + + + 样本输出模式 + + + + + + + 600 + 260 + 81 + 51 + + + + + Microsoft YaHei UI + 14 + 75 + true + + + + 执行 + + + + + + 20 + 360 + 671 + 31 + + + + 0 + + + + + + +