diff --git a/AUTHOR.html b/AUTHOR.html
index c15e867..325fd4e 100644
--- a/AUTHOR.html
+++ b/AUTHOR.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2196,7 +2183,7 @@ 关于作者
如何联系( -w-):
您可以通过 知乎 或 Github 联系到作者,感谢您的帮助。
@@ -2240,7 +2227,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":" =[>> 关于作者© <<]= ","level":"1.1.1","depth":2,"next":{"title":" =[>> 版权申明© <<]= ","level":"1.1.2","depth":2,"path":"COPYRIGHT.md","ref":"COPYRIGHT.md","articles":[]},"previous":{"title":"《音视频开发技术:原理与实践》©","level":"1.1","depth":1,"path":"README.md","ref":"README.md","articles":[{"title":" =[>> 关于作者© <<]= ","level":"1.1.1","depth":2,"path":"AUTHOR.md","ref":"AUTHOR.md","articles":[]},{"title":" =[>> 版权申明© <<]= ","level":"1.1.2","depth":2,"path":"COPYRIGHT.md","ref":"COPYRIGHT.md","articles":[]},{"title":" =[>> 难度向导© <<]= ","level":"1.1.3","depth":2,"path":"GUIDER.md","ref":"GUIDER.md","articles":[]},{"title":" =[>> 赞助本作© <<]= ","level":"1.1.4","depth":2,"path":"DONATE.md","ref":"DONATE.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"AUTHOR.md","mtime":"2024-09-11T06:09:49.840Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":".","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":" =[>> 关于作者© <<]= ","level":"1.1.1","depth":2,"next":{"title":" =[>> 版权申明© <<]= ","level":"1.1.2","depth":2,"path":"COPYRIGHT.md","ref":"COPYRIGHT.md","articles":[]},"previous":{"title":"《音视频开发技术:原理与实践》©","level":"1.1","depth":1,"path":"README.md","ref":"README.md","articles":[{"title":" =[>> 关于作者© <<]= ","level":"1.1.1","depth":2,"path":"AUTHOR.md","ref":"AUTHOR.md","articles":[]},{"title":" =[>> 版权申明© <<]= ","level":"1.1.2","depth":2,"path":"COPYRIGHT.md","ref":"COPYRIGHT.md","articles":[]},{"title":" =[>> 难度向导© <<]= ","level":"1.1.3","depth":2,"path":"GUIDER.md","ref":"GUIDER.md","articles":[]},{"title":" =[>> 赞助本作© <<]= ","level":"1.1.4","depth":2,"path":"DONATE.md","ref":"DONATE.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"AUTHOR.md","mtime":"2024-09-12T04:11:10.450Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":".","book":{"language":""}});
});
diff --git a/COPYRIGHT.html b/COPYRIGHT.html
index 363231e..e9f08f3 100644
--- a/COPYRIGHT.html
+++ b/COPYRIGHT.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2194,7 +2181,7 @@ 版权申明©
本作品采用知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议 进行许可。
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License .
@@ -2238,7 +2225,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":" =[>> 版权申明© <<]= ","level":"1.1.2","depth":2,"next":{"title":" =[>> 难度向导© <<]= ","level":"1.1.3","depth":2,"path":"GUIDER.md","ref":"GUIDER.md","articles":[]},"previous":{"title":" =[>> 关于作者© <<]= ","level":"1.1.1","depth":2,"path":"AUTHOR.md","ref":"AUTHOR.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"COPYRIGHT.md","mtime":"2024-09-11T06:09:49.870Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":".","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":" =[>> 版权申明© <<]= ","level":"1.1.2","depth":2,"next":{"title":" =[>> 难度向导© <<]= ","level":"1.1.3","depth":2,"path":"GUIDER.md","ref":"GUIDER.md","articles":[]},"previous":{"title":" =[>> 关于作者© <<]= ","level":"1.1.1","depth":2,"path":"AUTHOR.md","ref":"AUTHOR.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"COPYRIGHT.md","mtime":"2024-09-12T04:11:10.470Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":".","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Apex_1_Introduce.html b/Chapter_1/Language/cn/Apex_1_Introduce.html
index 60584b8..4eb7128 100644
--- a/Chapter_1/Language/cn/Apex_1_Introduce.html
+++ b/Chapter_1/Language/cn/Apex_1_Introduce.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2211,7 +2198,7 @@ 目录
【参考文献】
@@ -2255,7 +2242,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"一、音频的保存与还原","level":"1.2","depth":1,"next":{"title":"1.1 音频基础","level":"1.2.1","depth":2,"path":"Chapter_1/Language/cn/Docs_1_1.md","ref":"Chapter_1/Language/cn/Docs_1_1.md","articles":[]},"previous":{"title":" =[>> 赞助本作© <<]= ","level":"1.1.4","depth":2,"path":"DONATE.md","ref":"DONATE.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Apex_1_Introduce.md","mtime":"2024-09-11T06:09:49.910Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"一、音频的保存与还原","level":"1.2","depth":1,"next":{"title":"1.1 音频基础","level":"1.2.1","depth":2,"path":"Chapter_1/Language/cn/Docs_1_1.md","ref":"Chapter_1/Language/cn/Docs_1_1.md","articles":[]},"previous":{"title":" =[>> 赞助本作© <<]= ","level":"1.1.4","depth":2,"path":"DONATE.md","ref":"DONATE.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Apex_1_Introduce.md","mtime":"2024-09-12T04:11:10.510Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_1.html b/Chapter_1/Language/cn/Docs_1_1.html
index b591294..a8e6a64 100644
--- a/Chapter_1/Language/cn/Docs_1_1.html
+++ b/Chapter_1/Language/cn/Docs_1_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2249,7 +2236,7 @@ 数ֵ
在 CD 时代伊始,音频的格式就从传统的纯物理记录方式,演变成了调制解调(PCM)配合格式压缩存储的处理过程。这正是数字时代和以往传统时代相比,最为显著的特征。
因此,想要理解并处理音频,首先需要从如何衡量 声音(Sounds) 开始。
@@ -2293,7 +2280,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.1 音频基础","level":"1.2.1","depth":2,"next":{"title":"1.2 声波三要素(Three Elements of Acoustics)","level":"1.2.2","depth":2,"path":"Chapter_1/Language/cn/Docs_1_2.md","ref":"Chapter_1/Language/cn/Docs_1_2.md","articles":[]},"previous":{"title":"一、音频的保存与还原","level":"1.2","depth":1,"path":"Chapter_1/Language/cn/Apex_1_Introduce.md","ref":"Chapter_1/Language/cn/Apex_1_Introduce.md","articles":[{"title":"1.1 音频基础","level":"1.2.1","depth":2,"path":"Chapter_1/Language/cn/Docs_1_1.md","ref":"Chapter_1/Language/cn/Docs_1_1.md","articles":[]},{"title":"1.2 声波三要素(Three Elements of Acoustics)","level":"1.2.2","depth":2,"path":"Chapter_1/Language/cn/Docs_1_2.md","ref":"Chapter_1/Language/cn/Docs_1_2.md","articles":[]},{"title":"1.3 声音三要素(Three Elements of Sounds)","level":"1.2.3","depth":2,"path":"Chapter_1/Language/cn/Docs_1_3.md","ref":"Chapter_1/Language/cn/Docs_1_3.md","articles":[{"title":"1.3.1 音高(Pitch)","level":"1.2.3.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_1.md","ref":"Chapter_1/Language/cn/Docs_1_3_1.md","articles":[]},{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_2.md","ref":"Chapter_1/Language/cn/Docs_1_3_2.md","articles":[]},{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_3.md","ref":"Chapter_1/Language/cn/Docs_1_3_3.md","articles":[]}]},{"title":"1.4 声音的解构","level":"1.2.4","depth":2,"path":"Chapter_1/Language/cn/Docs_1_4.md","ref":"Chapter_1/Language/cn/Docs_1_4.md","articles":[{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_2.md","ref":"Chapter_1/Language/cn/Docs_1_4_2.md","articles":[]},{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_4.md","ref":"Chapter_1/Language/cn/Docs_1_4_4.md","articles":[]},{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]}]},{"title":"1.5 声音数字化","level":"1.2.5","depth":2,"path":"Chapter_1/Language/cn/Docs_1_5.md","ref":"Chapter_1/Language/cn/Docs_1_5.md","articles":[{"title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","level":"1.2.5.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_1.md","ref":"Chapter_1/Language/cn/Docs_1_5_1.md","articles":[]},{"title":"1.5.2 模数转换(A/D [Analog-to-Digital])","level":"1.2.5.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_2.md","ref":"Chapter_1/Language/cn/Docs_1_5_2.md","articles":[]},{"title":"1.5.3 数模转换(D/A [Digital-to-Analog])","level":"1.2.5.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_3.md","ref":"Chapter_1/Language/cn/Docs_1_5_3.md","articles":[]},{"title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","level":"1.2.5.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_4.md","ref":"Chapter_1/Language/cn/Docs_1_5_4.md","articles":[]}]},{"title":"1.6 音频的存储","level":"1.2.6","depth":2,"path":"Chapter_1/Language/cn/Docs_1_6.md","ref":"Chapter_1/Language/cn/Docs_1_6.md","articles":[{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]}]},{"title":"【参考文献】","level":"1.2.7","depth":2,"path":"Chapter_1/Language/cn/References_1.md","ref":"Chapter_1/Language/cn/References_1.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_1.md","mtime":"2024-09-11T06:09:49.930Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.1 音频基础","level":"1.2.1","depth":2,"next":{"title":"1.2 声波三要素(Three Elements of Acoustics)","level":"1.2.2","depth":2,"path":"Chapter_1/Language/cn/Docs_1_2.md","ref":"Chapter_1/Language/cn/Docs_1_2.md","articles":[]},"previous":{"title":"一、音频的保存与还原","level":"1.2","depth":1,"path":"Chapter_1/Language/cn/Apex_1_Introduce.md","ref":"Chapter_1/Language/cn/Apex_1_Introduce.md","articles":[{"title":"1.1 音频基础","level":"1.2.1","depth":2,"path":"Chapter_1/Language/cn/Docs_1_1.md","ref":"Chapter_1/Language/cn/Docs_1_1.md","articles":[]},{"title":"1.2 声波三要素(Three Elements of Acoustics)","level":"1.2.2","depth":2,"path":"Chapter_1/Language/cn/Docs_1_2.md","ref":"Chapter_1/Language/cn/Docs_1_2.md","articles":[]},{"title":"1.3 声音三要素(Three Elements of Sounds)","level":"1.2.3","depth":2,"path":"Chapter_1/Language/cn/Docs_1_3.md","ref":"Chapter_1/Language/cn/Docs_1_3.md","articles":[{"title":"1.3.1 音高(Pitch)","level":"1.2.3.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_1.md","ref":"Chapter_1/Language/cn/Docs_1_3_1.md","articles":[]},{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_2.md","ref":"Chapter_1/Language/cn/Docs_1_3_2.md","articles":[]},{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_3.md","ref":"Chapter_1/Language/cn/Docs_1_3_3.md","articles":[]}]},{"title":"1.4 声音的解构","level":"1.2.4","depth":2,"path":"Chapter_1/Language/cn/Docs_1_4.md","ref":"Chapter_1/Language/cn/Docs_1_4.md","articles":[{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_2.md","ref":"Chapter_1/Language/cn/Docs_1_4_2.md","articles":[]},{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_4.md","ref":"Chapter_1/Language/cn/Docs_1_4_4.md","articles":[]},{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]}]},{"title":"1.5 声音数字化","level":"1.2.5","depth":2,"path":"Chapter_1/Language/cn/Docs_1_5.md","ref":"Chapter_1/Language/cn/Docs_1_5.md","articles":[{"title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","level":"1.2.5.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_1.md","ref":"Chapter_1/Language/cn/Docs_1_5_1.md","articles":[]},{"title":"1.5.2 模数转换(A/D [Analog-to-Digital])","level":"1.2.5.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_2.md","ref":"Chapter_1/Language/cn/Docs_1_5_2.md","articles":[]},{"title":"1.5.3 数模转换(D/A [Digital-to-Analog])","level":"1.2.5.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_3.md","ref":"Chapter_1/Language/cn/Docs_1_5_3.md","articles":[]},{"title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","level":"1.2.5.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_4.md","ref":"Chapter_1/Language/cn/Docs_1_5_4.md","articles":[]}]},{"title":"1.6 音频的存储","level":"1.2.6","depth":2,"path":"Chapter_1/Language/cn/Docs_1_6.md","ref":"Chapter_1/Language/cn/Docs_1_6.md","articles":[{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]}]},{"title":"【参考文献】","level":"1.2.7","depth":2,"path":"Chapter_1/Language/cn/References_1.md","ref":"Chapter_1/Language/cn/References_1.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_1.md","mtime":"2024-09-12T04:11:10.510Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_2.html b/Chapter_1/Language/cn/Docs_1_2.html
index 65d1655..b208b28 100644
--- a/Chapter_1/Language/cn/Docs_1_2.html
+++ b/Chapter_1/Language/cn/Docs_1_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2209,7 +2196,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2388,7 +2375,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2312,7 +2299,7 @@ 频率ÿ
观察例举的统计结果,会发现直觉上非常吵闹的声音,如飞机发动机的声音,其频率并不一定高。而一些我们生活中感觉难以察觉的声音,如蚊子飞行声,却不一定低频。
显然,频率并不能代表声音的高低 。我们还需要其它参数表示,那就是 响度(Loudness) 。
@@ -2356,7 +2343,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.3.1 音高(Pitch)","level":"1.2.3.1","depth":3,"next":{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_2.md","ref":"Chapter_1/Language/cn/Docs_1_3_2.md","articles":[]},"previous":{"title":"1.3 声音三要素(Three Elements of Sounds)","level":"1.2.3","depth":2,"path":"Chapter_1/Language/cn/Docs_1_3.md","ref":"Chapter_1/Language/cn/Docs_1_3.md","articles":[{"title":"1.3.1 音高(Pitch)","level":"1.2.3.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_1.md","ref":"Chapter_1/Language/cn/Docs_1_3_1.md","articles":[]},{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_2.md","ref":"Chapter_1/Language/cn/Docs_1_3_2.md","articles":[]},{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_3.md","ref":"Chapter_1/Language/cn/Docs_1_3_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_3_1.md","mtime":"2024-09-11T06:09:49.970Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.3.1 音高(Pitch)","level":"1.2.3.1","depth":3,"next":{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_2.md","ref":"Chapter_1/Language/cn/Docs_1_3_2.md","articles":[]},"previous":{"title":"1.3 声音三要素(Three Elements of Sounds)","level":"1.2.3","depth":2,"path":"Chapter_1/Language/cn/Docs_1_3.md","ref":"Chapter_1/Language/cn/Docs_1_3.md","articles":[{"title":"1.3.1 音高(Pitch)","level":"1.2.3.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_1.md","ref":"Chapter_1/Language/cn/Docs_1_3_1.md","articles":[]},{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_2.md","ref":"Chapter_1/Language/cn/Docs_1_3_2.md","articles":[]},{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_3.md","ref":"Chapter_1/Language/cn/Docs_1_3_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_3_1.md","mtime":"2024-09-12T04:11:10.530Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_3_2.html b/Chapter_1/Language/cn/Docs_1_3_2.html
index c6a48ed..4791135 100644
--- a/Chapter_1/Language/cn/Docs_1_3_2.html
+++ b/Chapter_1/Language/cn/Docs_1_3_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2292,7 +2279,7 @@ N ∑ = 1 0 ⋅ log 1 0 ( ∑ n ( p r e f p i ) 2 ) = 1 0 ⋅ log 1 0 ( ∑ n ( I r e f I i ) ) = 1 0 ⋅ log 1 0 ( 1 0 1 0 d B L 0 + 1 0 1 0 d B L 1 + ⋯ + 1 0 1 0 d B L n )
至此,两个主客观系统间,达成了转换条件。一般的 p r e f = 2 0 μ P a p_{ref} = 20 \mu Pa p r e f = 2 0 μ P a 时,有 I r e f = 1 p W / m 2 I_{ref} = 1 \ pW/m^2 I r e f = 1 p W / m 2 。我们用声压级表示响度,而以声强计算能量。
@@ -2336,7 +2323,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"next":{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_3.md","ref":"Chapter_1/Language/cn/Docs_1_3_3.md","articles":[]},"previous":{"title":"1.3.1 音高(Pitch)","level":"1.2.3.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_1.md","ref":"Chapter_1/Language/cn/Docs_1_3_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_3_2.md","mtime":"2024-09-11T06:09:49.980Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"next":{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_3.md","ref":"Chapter_1/Language/cn/Docs_1_3_3.md","articles":[]},"previous":{"title":"1.3.1 音高(Pitch)","level":"1.2.3.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_1.md","ref":"Chapter_1/Language/cn/Docs_1_3_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_3_2.md","mtime":"2024-09-12T04:11:10.530Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_3_3.html b/Chapter_1/Language/cn/Docs_1_3_3.html
index a589173..568a01d 100644
--- a/Chapter_1/Language/cn/Docs_1_3_3.html
+++ b/Chapter_1/Language/cn/Docs_1_3_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2239,7 +2226,7 @@ 谐波
可见,决定整个谐波链的关键,就在于第一谐波,也就是基波上。而在基波响度相同的情况下,产生的第二、第三、... 、第 i 谐波,其 数目 和 各自的响度 ,才确定了声源特色。
至此,声音三要素与工程量映射,就解释清楚了。
@@ -2283,7 +2270,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"next":{"title":"1.4 声音的解构","level":"1.2.4","depth":2,"path":"Chapter_1/Language/cn/Docs_1_4.md","ref":"Chapter_1/Language/cn/Docs_1_4.md","articles":[{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_2.md","ref":"Chapter_1/Language/cn/Docs_1_4_2.md","articles":[]},{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_4.md","ref":"Chapter_1/Language/cn/Docs_1_4_4.md","articles":[]},{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]}]},"previous":{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_2.md","ref":"Chapter_1/Language/cn/Docs_1_3_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_3_3.md","mtime":"2024-09-11T06:09:49.990Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"next":{"title":"1.4 声音的解构","level":"1.2.4","depth":2,"path":"Chapter_1/Language/cn/Docs_1_4.md","ref":"Chapter_1/Language/cn/Docs_1_4.md","articles":[{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_2.md","ref":"Chapter_1/Language/cn/Docs_1_4_2.md","articles":[]},{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_4.md","ref":"Chapter_1/Language/cn/Docs_1_4_4.md","articles":[]},{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]}]},"previous":{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_2.md","ref":"Chapter_1/Language/cn/Docs_1_3_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_3_3.md","mtime":"2024-09-12T04:11:10.540Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_4.html b/Chapter_1/Language/cn/Docs_1_4.html
index 4395d6a..8ac9994 100644
--- a/Chapter_1/Language/cn/Docs_1_4.html
+++ b/Chapter_1/Language/cn/Docs_1_4.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2173,7 +2160,7 @@ 1.4 声音的
不同角度观察到的,可以认为是同一声音在各自领域平面的投影。而我们通过这种方式,从不同的视角,拼接出了声音本身。所以,声音也可以被称为是某种程度上的高维信息。 并非 在不考虑传播时,直觉上的仅有时频那么简单。
接下来,我们便分别从这三个不同的视角,去看如何处理。
@@ -2217,7 +2204,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.4 声音的解构","level":"1.2.4","depth":2,"next":{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},"previous":{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_3.md","ref":"Chapter_1/Language/cn/Docs_1_3_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_4.md","mtime":"2024-09-11T06:09:50.000Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.4 声音的解构","level":"1.2.4","depth":2,"next":{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},"previous":{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_3.md","ref":"Chapter_1/Language/cn/Docs_1_3_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_4.md","mtime":"2024-09-12T04:11:10.540Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_4_1.html b/Chapter_1/Language/cn/Docs_1_4_1.html
index edf8a35..de33952 100644
--- a/Chapter_1/Language/cn/Docs_1_4_1.html
+++ b/Chapter_1/Language/cn/Docs_1_4_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2538,7 +2525,7 @@ Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by Gitbook
@@ -2582,7 +2569,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"next":{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_2.md","ref":"Chapter_1/Language/cn/Docs_1_4_2.md","articles":[]},"previous":{"title":"1.4 声音的解构","level":"1.2.4","depth":2,"path":"Chapter_1/Language/cn/Docs_1_4.md","ref":"Chapter_1/Language/cn/Docs_1_4.md","articles":[{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_2.md","ref":"Chapter_1/Language/cn/Docs_1_4_2.md","articles":[]},{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_4.md","ref":"Chapter_1/Language/cn/Docs_1_4_4.md","articles":[]},{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_4_1.md","mtime":"2024-09-11T06:09:50.010Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"next":{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_2.md","ref":"Chapter_1/Language/cn/Docs_1_4_2.md","articles":[]},"previous":{"title":"1.4 声音的解构","level":"1.2.4","depth":2,"path":"Chapter_1/Language/cn/Docs_1_4.md","ref":"Chapter_1/Language/cn/Docs_1_4.md","articles":[{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_2.md","ref":"Chapter_1/Language/cn/Docs_1_4_2.md","articles":[]},{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_4.md","ref":"Chapter_1/Language/cn/Docs_1_4_4.md","articles":[]},{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_4_1.md","mtime":"2024-09-12T04:11:10.540Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_4_2.html b/Chapter_1/Language/cn/Docs_1_4_2.html
index 3d66821..e2b63e5 100644
--- a/Chapter_1/Language/cn/Docs_1_4_2.html
+++ b/Chapter_1/Language/cn/Docs_1_4_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2380,7 +2367,7 @@ 调
那么,其具体是怎样的测量过程,而结果又是怎样体现的呢?
这就需要提到 等响曲线(Equal Loudness Level Contour) 了。
@@ -2424,7 +2411,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"next":{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},"previous":{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_4_2.md","mtime":"2024-09-11T06:09:50.010Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"next":{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},"previous":{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_4_2.md","mtime":"2024-09-12T04:11:10.550Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_4_3.html b/Chapter_1/Language/cn/Docs_1_4_3.html
index 6693326..326211c 100644
--- a/Chapter_1/Language/cn/Docs_1_4_3.html
+++ b/Chapter_1/Language/cn/Docs_1_4_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2222,7 +2209,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2399,7 +2386,7 @@
这就是感官感受和工程测量的不同了。
@@ -2443,7 +2430,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"next":{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]},"previous":{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_4_4.md","mtime":"2024-09-11T06:09:50.040Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"next":{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]},"previous":{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_4_4.md","mtime":"2024-09-12T04:11:10.550Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_4_5.html b/Chapter_1/Language/cn/Docs_1_4_5.html
index 3888402..87d510b 100644
--- a/Chapter_1/Language/cn/Docs_1_4_5.html
+++ b/Chapter_1/Language/cn/Docs_1_4_5.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2250,7 +2237,7 @@ Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by Gitbook
@@ -2294,7 +2281,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"next":{"title":"1.5 声音数字化","level":"1.2.5","depth":2,"path":"Chapter_1/Language/cn/Docs_1_5.md","ref":"Chapter_1/Language/cn/Docs_1_5.md","articles":[{"title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","level":"1.2.5.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_1.md","ref":"Chapter_1/Language/cn/Docs_1_5_1.md","articles":[]},{"title":"1.5.2 模数转换(A/D [Analog-to-Digital])","level":"1.2.5.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_2.md","ref":"Chapter_1/Language/cn/Docs_1_5_2.md","articles":[]},{"title":"1.5.3 数模转换(D/A [Digital-to-Analog])","level":"1.2.5.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_3.md","ref":"Chapter_1/Language/cn/Docs_1_5_3.md","articles":[]},{"title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","level":"1.2.5.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_4.md","ref":"Chapter_1/Language/cn/Docs_1_5_4.md","articles":[]}]},"previous":{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_4.md","ref":"Chapter_1/Language/cn/Docs_1_4_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_4_5.md","mtime":"2024-09-11T06:10:00.300Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"next":{"title":"1.5 声音数字化","level":"1.2.5","depth":2,"path":"Chapter_1/Language/cn/Docs_1_5.md","ref":"Chapter_1/Language/cn/Docs_1_5.md","articles":[{"title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","level":"1.2.5.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_1.md","ref":"Chapter_1/Language/cn/Docs_1_5_1.md","articles":[]},{"title":"1.5.2 模数转换(A/D [Analog-to-Digital])","level":"1.2.5.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_2.md","ref":"Chapter_1/Language/cn/Docs_1_5_2.md","articles":[]},{"title":"1.5.3 数模转换(D/A [Digital-to-Analog])","level":"1.2.5.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_3.md","ref":"Chapter_1/Language/cn/Docs_1_5_3.md","articles":[]},{"title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","level":"1.2.5.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_4.md","ref":"Chapter_1/Language/cn/Docs_1_5_4.md","articles":[]}]},"previous":{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_4.md","ref":"Chapter_1/Language/cn/Docs_1_4_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_4_5.md","mtime":"2024-09-12T04:11:10.570Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_5.html b/Chapter_1/Language/cn/Docs_1_5.html
index badfe5d..927c6ba 100644
--- a/Chapter_1/Language/cn/Docs_1_5.html
+++ b/Chapter_1/Language/cn/Docs_1_5.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2173,7 +2160,7 @@ 1.5 声音数
而将声音从物理波转为数字保存,并在需要时提供还原能力的技术,就是 调制解调(Modulation & Demodulation) 技术。
这既是本节讨论的内容。
@@ -2217,7 +2204,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.5 声音数字化","level":"1.2.5","depth":2,"next":{"title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","level":"1.2.5.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_1.md","ref":"Chapter_1/Language/cn/Docs_1_5_1.md","articles":[]},"previous":{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_5.md","mtime":"2024-09-11T06:09:50.050Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.5 声音数字化","level":"1.2.5","depth":2,"next":{"title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","level":"1.2.5.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_1.md","ref":"Chapter_1/Language/cn/Docs_1_5_1.md","articles":[]},"previous":{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_5.md","mtime":"2024-09-12T04:11:10.580Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_5_1.html b/Chapter_1/Language/cn/Docs_1_5_1.html
index 4b5b0d6..83d9d19 100644
--- a/Chapter_1/Language/cn/Docs_1_5_1.html
+++ b/Chapter_1/Language/cn/Docs_1_5_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2207,7 +2194,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2228,7 +2215,7 @@ N N N 为多少,就代表着单个 ADC 上,有多少以 参考输入电压二的幂指倍缩小电压信号 所组成的门后电压单元。
由于参考电压一般要求稳定,所以至少需要以内部元件提供稳定三相电来作为基准。不过,对于精度要求极低的设备,为了电子组件复用和电路板的简化,会采用把采样时钟信号的电压作为参考输入的非常做法。但对于高精度设备(包括麦克风等),时钟信号为高频信号,是严格不能作为参考输入的。
@@ -2272,7 +2259,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.5.2 模数转换(A/D [Analog-to-Digital])","level":"1.2.5.2","depth":3,"next":{"title":"1.5.3 数模转换(D/A [Digital-to-Analog])","level":"1.2.5.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_3.md","ref":"Chapter_1/Language/cn/Docs_1_5_3.md","articles":[]},"previous":{"title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","level":"1.2.5.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_1.md","ref":"Chapter_1/Language/cn/Docs_1_5_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_5_2.md","mtime":"2024-09-11T06:09:50.070Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.5.2 模数转换(A/D [Analog-to-Digital])","level":"1.2.5.2","depth":3,"next":{"title":"1.5.3 数模转换(D/A [Digital-to-Analog])","level":"1.2.5.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_3.md","ref":"Chapter_1/Language/cn/Docs_1_5_3.md","articles":[]},"previous":{"title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","level":"1.2.5.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_1.md","ref":"Chapter_1/Language/cn/Docs_1_5_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_5_2.md","mtime":"2024-09-12T04:11:10.580Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_5_3.html b/Chapter_1/Language/cn/Docs_1_5_3.html
index 28b21cd..dbba27e 100644
--- a/Chapter_1/Language/cn/Docs_1_5_3.html
+++ b/Chapter_1/Language/cn/Docs_1_5_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2205,7 +2192,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2221,7 +2208,7 @@ PCM & PDM 异
音视频工程场景中,我们常处理的音频信号,基本为 PCM 方式获取的数字信号。 对于想要进行调整的 PDM 数字信号,通常需要转换为 PCM 数字信号后,再行以 PCM 更具优势的直接编辑方式,进行相关操作。而位于计算机体系内用来实现音频存储的数字信号基础类型,亦为 PCM 类型的数字信号。
由此可见 PCM 数字信号的重要性。
@@ -2265,7 +2252,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","level":"1.2.5.4","depth":3,"next":{"title":"1.6 音频的存储","level":"1.2.6","depth":2,"path":"Chapter_1/Language/cn/Docs_1_6.md","ref":"Chapter_1/Language/cn/Docs_1_6.md","articles":[{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]}]},"previous":{"title":"1.5.3 数模转换(D/A [Digital-to-Analog])","level":"1.2.5.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_3.md","ref":"Chapter_1/Language/cn/Docs_1_5_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_5_4.md","mtime":"2024-09-11T06:09:50.080Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","level":"1.2.5.4","depth":3,"next":{"title":"1.6 音频的存储","level":"1.2.6","depth":2,"path":"Chapter_1/Language/cn/Docs_1_6.md","ref":"Chapter_1/Language/cn/Docs_1_6.md","articles":[{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]}]},"previous":{"title":"1.5.3 数模转换(D/A [Digital-to-Analog])","level":"1.2.5.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_3.md","ref":"Chapter_1/Language/cn/Docs_1_5_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_5_4.md","mtime":"2024-09-12T04:11:10.590Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_6.html b/Chapter_1/Language/cn/Docs_1_6.html
index 4f96753..4506b9d 100644
--- a/Chapter_1/Language/cn/Docs_1_6.html
+++ b/Chapter_1/Language/cn/Docs_1_6.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2173,7 +2160,7 @@ 1.6 音频的
这就是音频存储的基础格式,PCM 音频格式 。
什么是音频格式?
@@ -2217,7 +2204,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.6 音频的存储","level":"1.2.6","depth":2,"next":{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},"previous":{"title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","level":"1.2.5.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_4.md","ref":"Chapter_1/Language/cn/Docs_1_5_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_6.md","mtime":"2024-09-11T06:09:50.080Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.6 音频的存储","level":"1.2.6","depth":2,"next":{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},"previous":{"title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","level":"1.2.5.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_4.md","ref":"Chapter_1/Language/cn/Docs_1_5_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_6.md","mtime":"2024-09-12T04:11:10.590Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_6_1.html b/Chapter_1/Language/cn/Docs_1_6_1.html
index 2cd0634..910627e 100644
--- a/Chapter_1/Language/cn/Docs_1_6_1.html
+++ b/Chapter_1/Language/cn/Docs_1_6_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2190,7 +2177,7 @@ 显然,想要理解这几类的划分,从 压缩算法 入手,是个较好的切入点。
@@ -2234,7 +2221,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"next":{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},"previous":{"title":"1.6 音频的存储","level":"1.2.6","depth":2,"path":"Chapter_1/Language/cn/Docs_1_6.md","ref":"Chapter_1/Language/cn/Docs_1_6.md","articles":[{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_6_1.md","mtime":"2024-09-11T06:09:50.090Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"next":{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},"previous":{"title":"1.6 音频的存储","level":"1.2.6","depth":2,"path":"Chapter_1/Language/cn/Docs_1_6.md","ref":"Chapter_1/Language/cn/Docs_1_6.md","articles":[{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_6_1.md","mtime":"2024-09-12T04:11:10.590Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_6_2.html b/Chapter_1/Language/cn/Docs_1_6_2.html
index feb0605..688f4c8 100644
--- a/Chapter_1/Language/cn/Docs_1_6_2.html
+++ b/Chapter_1/Language/cn/Docs_1_6_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -3067,7 +3054,7 @@ AIFF 音频
借此,常用的 三种未压缩编码格式(或者说两种,即 WAV 和 AIFF) 与 PCM 基础格式 ,共同构成了 音频格式的地基 。
但如此直接或相对直接的对 PCM 数据的存放方式,还是会有 大量空间占用浪费 。于是,为了进一步缩减音频数据 ,在计算机系统中的持续化存储问题,工程师们开始采用压缩算法来提高空间利用率。这带来了携带压缩算法的,无损压缩编码格式(Lossless [Lossless Compression Audio Format]) 和 有损压缩编码格式(Lossy [Lossy Compression Audio Format]) 。
@@ -3111,7 +3098,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"next":{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},"previous":{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_6_2.md","mtime":"2024-09-11T06:09:50.110Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"next":{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},"previous":{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_6_2.md","mtime":"2024-09-12T04:11:10.590Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_6_3.html b/Chapter_1/Language/cn/Docs_1_6_3.html
index 07b4e18..8bbdef2 100644
--- a/Chapter_1/Language/cn/Docs_1_6_3.html
+++ b/Chapter_1/Language/cn/Docs_1_6_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2814,7 +2801,7 @@ FLAC 音频
然而,尽管无损压缩如 FLAC 提供了最高的音质保真度,但其文件大小仍然相对较大 。在许多应用场景中,如 流媒体 和 便携设备存储(尤其是在随身听时代,早期有限的存储空间情况) ,依然 不够便利 。因此,具有更大压缩比的有损压缩编码音频格式 ,如 MP3 和 AAC 便成为了一种 可以接受的替代方案 。这些格式 通过舍弃人耳不易察觉的音频信息,进一步减小文件大小,同时在音质和压缩率之间取得平衡 。
虽然为人所带来的听觉感受,介于此,会相对有所衰减。
@@ -2858,7 +2845,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"next":{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]},"previous":{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_6_3.md","mtime":"2024-09-11T06:09:50.130Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"next":{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]},"previous":{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_6_3.md","mtime":"2024-09-12T04:11:10.600Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/Docs_1_6_4.html b/Chapter_1/Language/cn/Docs_1_6_4.html
index 9a31393..f61b32d 100644
--- a/Chapter_1/Language/cn/Docs_1_6_4.html
+++ b/Chapter_1/Language/cn/Docs_1_6_4.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2705,7 +2692,7 @@ MP3 音频ڃ
BASS (Basic Audio Stream System). C/C++. http://www.un4seen.com/
@@ -2749,7 +2736,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"next":{"title":"【参考文献】","level":"1.2.7","depth":2,"path":"Chapter_1/Language/cn/References_1.md","ref":"Chapter_1/Language/cn/References_1.md","articles":[]},"previous":{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_6_4.md","mtime":"2024-09-11T06:09:50.130Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"next":{"title":"【参考文献】","level":"1.2.7","depth":2,"path":"Chapter_1/Language/cn/References_1.md","ref":"Chapter_1/Language/cn/References_1.md","articles":[]},"previous":{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/Docs_1_6_4.md","mtime":"2024-09-12T04:11:10.600Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_1/Language/cn/References_1.html b/Chapter_1/Language/cn/References_1.html
index 23d5f95..e49094b 100644
--- a/Chapter_1/Language/cn/References_1.html
+++ b/Chapter_1/Language/cn/References_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2197,7 +2184,7 @@ Ӡ
[26] Roberts Family. FLAC Metadata Structure [EB/OL]. [2023-10-23]. https://www.the-roberts-family.net/metadata/flac.html .
[27] Theile, Günther; Stolle, Gerhard; 1992; MUSICAM-Surround: A Universal Multichannel Coding System Compatible with ISO 11172-3 PDF ; Institut fur Rundfunktechnik, Munich, Germany; Paper 3403; Available from: https://aes2.org/publications/elibrary-page/?id=6731
@@ -2241,7 +2228,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"【参考文献】","level":"1.2.7","depth":2,"next":{"title":"二、色彩的运用与存储","level":"1.3","depth":1,"path":"Chapter_2/Language/cn/Apex_2_Introduce.md","ref":"Chapter_2/Language/cn/Apex_2_Introduce.md","articles":[{"title":"2.1 色彩基础","level":"1.3.1","depth":2,"path":"Chapter_2/Language/cn/Docs_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_1.md","articles":[]},{"title":"2.2 颜色三要素(Three Elements of Color)","level":"1.3.2","depth":2,"path":"Chapter_2/Language/cn/Docs_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2.md","articles":[{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]}]},{"title":"2.3 色彩的衡量","level":"1.3.3","depth":2,"path":"Chapter_2/Language/cn/Docs_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_3.md","articles":[{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_2.md","ref":"Chapter_2/Language/cn/Docs_2_3_2.md","articles":[]},{"title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","level":"1.3.3.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_3.md","ref":"Chapter_2/Language/cn/Docs_2_3_3.md","articles":[]},{"title":"2.3.4 经典三刺激函数(Tristimulus Values Functions)","level":"1.3.3.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_4.md","ref":"Chapter_2/Language/cn/Docs_2_3_4.md","articles":[]},{"title":"2.3.5 现代色彩体系(Modern Color System)","level":"1.3.3.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_5.md","ref":"Chapter_2/Language/cn/Docs_2_3_5.md","articles":[]}]},{"title":"2.4 色彩的对比","level":"1.3.4","depth":2,"path":"Chapter_2/Language/cn/Docs_2_4.md","ref":"Chapter_2/Language/cn/Docs_2_4.md","articles":[{"title":"2.4.1 色域(Color Gamut )","level":"1.3.4.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_1.md","ref":"Chapter_2/Language/cn/Docs_2_4_1.md","articles":[]},{"title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","level":"1.3.4.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_2.md","ref":"Chapter_2/Language/cn/Docs_2_4_2.md","articles":[]},{"title":"2.4.3 色差(Chromatic Aberration)","level":"1.3.4.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_3.md","ref":"Chapter_2/Language/cn/Docs_2_4_3.md","articles":[]},{"title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","level":"1.3.4.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_4.md","ref":"Chapter_2/Language/cn/Docs_2_4_4.md","articles":[]},{"title":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point)","level":"1.3.4.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_5.md","ref":"Chapter_2/Language/cn/Docs_2_4_5.md","articles":[]},{"title":"2.4.6 显色指数(Color Rendering Index)","level":"1.3.4.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_6.md","ref":"Chapter_2/Language/cn/Docs_2_4_6.md","articles":[]}]},{"title":"2.5 经典色彩空间(Classical Color Space)","level":"1.3.5","depth":2,"path":"Chapter_2/Language/cn/Docs_2_5.md","ref":"Chapter_2/Language/cn/Docs_2_5.md","articles":[{"title":"2.5.1 光学三原色色彩空间(RGB)","level":"1.3.5.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_1.md","ref":"Chapter_2/Language/cn/Docs_2_5_1.md","articles":[]},{"title":"2.5.2 颜料三原色色彩空间(CMY / CMYK )","level":"1.3.5.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_2.md","ref":"Chapter_2/Language/cn/Docs_2_5_2.md","articles":[]},{"title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","level":"1.3.5.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_3.md","ref":"Chapter_2/Language/cn/Docs_2_5_3.md","articles":[]},{"title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","level":"1.3.5.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_4.md","ref":"Chapter_2/Language/cn/Docs_2_5_4.md","articles":[]},{"title":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space)","level":"1.3.5.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_5.md","ref":"Chapter_2/Language/cn/Docs_2_5_5.md","articles":[]},{"title":"2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space)","level":"1.3.5.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_6.md","ref":"Chapter_2/Language/cn/Docs_2_5_6.md","articles":[]},{"title":"2.5.7 颜色三要素色彩空间(HSV / HSI / HSL)","level":"1.3.5.7","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_7.md","ref":"Chapter_2/Language/cn/Docs_2_5_7.md","articles":[]}]},{"title":"2.6 色彩的存储","level":"1.3.6","depth":2,"path":"Chapter_2/Language/cn/Docs_2_6.md","ref":"Chapter_2/Language/cn/Docs_2_6.md","articles":[{"title":"2.6.1 色彩格式(Color Format)与色彩存储","level":"1.3.6.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_1.md","ref":"Chapter_2/Language/cn/Docs_2_6_1.md","articles":[]},{"title":"2.6.2 RGB 体系色彩格式","level":"1.3.6.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_2.md","ref":"Chapter_2/Language/cn/Docs_2_6_2.md","articles":[]},{"title":"2.6.3 YUV 体系色彩格式","level":"1.3.6.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_3.md","ref":"Chapter_2/Language/cn/Docs_2_6_3.md","articles":[]}]},{"title":"【参考文献】","level":"1.3.7","depth":2,"path":"Chapter_2/Language/cn/References_2.md","ref":"Chapter_2/Language/cn/References_2.md","articles":[]}]},"previous":{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/References_1.md","mtime":"2024-09-11T06:09:50.130Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"【参考文献】","level":"1.2.7","depth":2,"next":{"title":"二、色彩的运用与存储","level":"1.3","depth":1,"path":"Chapter_2/Language/cn/Apex_2_Introduce.md","ref":"Chapter_2/Language/cn/Apex_2_Introduce.md","articles":[{"title":"2.1 色彩基础","level":"1.3.1","depth":2,"path":"Chapter_2/Language/cn/Docs_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_1.md","articles":[]},{"title":"2.2 颜色三要素(Three Elements of Color)","level":"1.3.2","depth":2,"path":"Chapter_2/Language/cn/Docs_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2.md","articles":[{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]}]},{"title":"2.3 色彩的衡量","level":"1.3.3","depth":2,"path":"Chapter_2/Language/cn/Docs_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_3.md","articles":[{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_2.md","ref":"Chapter_2/Language/cn/Docs_2_3_2.md","articles":[]},{"title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","level":"1.3.3.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_3.md","ref":"Chapter_2/Language/cn/Docs_2_3_3.md","articles":[]},{"title":"2.3.4 经典三刺激函数(Tristimulus Values Functions)","level":"1.3.3.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_4.md","ref":"Chapter_2/Language/cn/Docs_2_3_4.md","articles":[]},{"title":"2.3.5 现代色彩体系(Modern Color System)","level":"1.3.3.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_5.md","ref":"Chapter_2/Language/cn/Docs_2_3_5.md","articles":[]}]},{"title":"2.4 色彩的对比","level":"1.3.4","depth":2,"path":"Chapter_2/Language/cn/Docs_2_4.md","ref":"Chapter_2/Language/cn/Docs_2_4.md","articles":[{"title":"2.4.1 色域(Color Gamut )","level":"1.3.4.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_1.md","ref":"Chapter_2/Language/cn/Docs_2_4_1.md","articles":[]},{"title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","level":"1.3.4.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_2.md","ref":"Chapter_2/Language/cn/Docs_2_4_2.md","articles":[]},{"title":"2.4.3 色差(Chromatic Aberration)","level":"1.3.4.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_3.md","ref":"Chapter_2/Language/cn/Docs_2_4_3.md","articles":[]},{"title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","level":"1.3.4.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_4.md","ref":"Chapter_2/Language/cn/Docs_2_4_4.md","articles":[]},{"title":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point)","level":"1.3.4.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_5.md","ref":"Chapter_2/Language/cn/Docs_2_4_5.md","articles":[]},{"title":"2.4.6 显色指数(Color Rendering Index)","level":"1.3.4.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_6.md","ref":"Chapter_2/Language/cn/Docs_2_4_6.md","articles":[]}]},{"title":"2.5 经典色彩空间(Classical Color Space)","level":"1.3.5","depth":2,"path":"Chapter_2/Language/cn/Docs_2_5.md","ref":"Chapter_2/Language/cn/Docs_2_5.md","articles":[{"title":"2.5.1 光学三原色色彩空间(RGB)","level":"1.3.5.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_1.md","ref":"Chapter_2/Language/cn/Docs_2_5_1.md","articles":[]},{"title":"2.5.2 颜料三原色色彩空间(CMY / CMYK )","level":"1.3.5.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_2.md","ref":"Chapter_2/Language/cn/Docs_2_5_2.md","articles":[]},{"title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","level":"1.3.5.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_3.md","ref":"Chapter_2/Language/cn/Docs_2_5_3.md","articles":[]},{"title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","level":"1.3.5.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_4.md","ref":"Chapter_2/Language/cn/Docs_2_5_4.md","articles":[]},{"title":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space)","level":"1.3.5.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_5.md","ref":"Chapter_2/Language/cn/Docs_2_5_5.md","articles":[]},{"title":"2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space)","level":"1.3.5.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_6.md","ref":"Chapter_2/Language/cn/Docs_2_5_6.md","articles":[]},{"title":"2.5.7 颜色三要素色彩空间(HSV / HSI / HSL)","level":"1.3.5.7","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_7.md","ref":"Chapter_2/Language/cn/Docs_2_5_7.md","articles":[]}]},{"title":"2.6 色彩的存储","level":"1.3.6","depth":2,"path":"Chapter_2/Language/cn/Docs_2_6.md","ref":"Chapter_2/Language/cn/Docs_2_6.md","articles":[{"title":"2.6.1 色彩格式(Color Format)与色彩存储","level":"1.3.6.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_1.md","ref":"Chapter_2/Language/cn/Docs_2_6_1.md","articles":[]},{"title":"2.6.2 RGB 体系色彩格式","level":"1.3.6.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_2.md","ref":"Chapter_2/Language/cn/Docs_2_6_2.md","articles":[]},{"title":"2.6.3 YUV 体系色彩格式","level":"1.3.6.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_3.md","ref":"Chapter_2/Language/cn/Docs_2_6_3.md","articles":[]}]},{"title":"【参考文献】","level":"1.3.7","depth":2,"path":"Chapter_2/Language/cn/References_2.md","ref":"Chapter_2/Language/cn/References_2.md","articles":[]}]},"previous":{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_1/Language/cn/References_1.md","mtime":"2024-09-12T04:11:10.600Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Apex_2_Introduce.html b/Chapter_2/Language/cn/Apex_2_Introduce.html
index 7cb4e07..f049b1e 100644
--- a/Chapter_2/Language/cn/Apex_2_Introduce.html
+++ b/Chapter_2/Language/cn/Apex_2_Introduce.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2221,7 +2208,7 @@ 目录
【参考文献】
@@ -2265,7 +2252,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"二、色彩的运用与存储","level":"1.3","depth":1,"next":{"title":"2.1 色彩基础","level":"1.3.1","depth":2,"path":"Chapter_2/Language/cn/Docs_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_1.md","articles":[]},"previous":{"title":"【参考文献】","level":"1.2.7","depth":2,"path":"Chapter_1/Language/cn/References_1.md","ref":"Chapter_1/Language/cn/References_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Apex_2_Introduce.md","mtime":"2024-09-11T06:09:50.160Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"二、色彩的运用与存储","level":"1.3","depth":1,"next":{"title":"2.1 色彩基础","level":"1.3.1","depth":2,"path":"Chapter_2/Language/cn/Docs_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_1.md","articles":[]},"previous":{"title":"【参考文献】","level":"1.2.7","depth":2,"path":"Chapter_1/Language/cn/References_1.md","ref":"Chapter_1/Language/cn/References_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Apex_2_Introduce.md","mtime":"2024-09-12T04:11:10.620Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_1.html b/Chapter_2/Language/cn/Docs_2_1.html
index d42add0..8cf5e28 100644
--- a/Chapter_2/Language/cn/Docs_2_1.html
+++ b/Chapter_2/Language/cn/Docs_2_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2196,7 +2183,7 @@ 2.1 色彩基础
受限于当时的科研器材水平,亥姆霍兹很遗憾的没有确切的办法,测量到三类视锥细胞可感知的确切波长范围。不过现代医学领域的研究,已相对准确的得到了答案。我们的眼睛基于此三种颜色的波形叠加组合,形成了能够覆盖从紫到红(360nm - 780nm)的 312nm - 1050nm 可观测波长范围 [6] 。
@@ -2240,7 +2227,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.1 色彩基础","level":"1.3.1","depth":2,"next":{"title":"2.2 颜色三要素(Three Elements of Color)","level":"1.3.2","depth":2,"path":"Chapter_2/Language/cn/Docs_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2.md","articles":[{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]}]},"previous":{"title":"二、色彩的运用与存储","level":"1.3","depth":1,"path":"Chapter_2/Language/cn/Apex_2_Introduce.md","ref":"Chapter_2/Language/cn/Apex_2_Introduce.md","articles":[{"title":"2.1 色彩基础","level":"1.3.1","depth":2,"path":"Chapter_2/Language/cn/Docs_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_1.md","articles":[]},{"title":"2.2 颜色三要素(Three Elements of Color)","level":"1.3.2","depth":2,"path":"Chapter_2/Language/cn/Docs_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2.md","articles":[{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]}]},{"title":"2.3 色彩的衡量","level":"1.3.3","depth":2,"path":"Chapter_2/Language/cn/Docs_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_3.md","articles":[{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_2.md","ref":"Chapter_2/Language/cn/Docs_2_3_2.md","articles":[]},{"title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","level":"1.3.3.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_3.md","ref":"Chapter_2/Language/cn/Docs_2_3_3.md","articles":[]},{"title":"2.3.4 经典三刺激函数(Tristimulus Values Functions)","level":"1.3.3.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_4.md","ref":"Chapter_2/Language/cn/Docs_2_3_4.md","articles":[]},{"title":"2.3.5 现代色彩体系(Modern Color System)","level":"1.3.3.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_5.md","ref":"Chapter_2/Language/cn/Docs_2_3_5.md","articles":[]}]},{"title":"2.4 色彩的对比","level":"1.3.4","depth":2,"path":"Chapter_2/Language/cn/Docs_2_4.md","ref":"Chapter_2/Language/cn/Docs_2_4.md","articles":[{"title":"2.4.1 色域(Color Gamut )","level":"1.3.4.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_1.md","ref":"Chapter_2/Language/cn/Docs_2_4_1.md","articles":[]},{"title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","level":"1.3.4.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_2.md","ref":"Chapter_2/Language/cn/Docs_2_4_2.md","articles":[]},{"title":"2.4.3 色差(Chromatic Aberration)","level":"1.3.4.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_3.md","ref":"Chapter_2/Language/cn/Docs_2_4_3.md","articles":[]},{"title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","level":"1.3.4.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_4.md","ref":"Chapter_2/Language/cn/Docs_2_4_4.md","articles":[]},{"title":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point)","level":"1.3.4.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_5.md","ref":"Chapter_2/Language/cn/Docs_2_4_5.md","articles":[]},{"title":"2.4.6 显色指数(Color Rendering Index)","level":"1.3.4.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_6.md","ref":"Chapter_2/Language/cn/Docs_2_4_6.md","articles":[]}]},{"title":"2.5 经典色彩空间(Classical Color Space)","level":"1.3.5","depth":2,"path":"Chapter_2/Language/cn/Docs_2_5.md","ref":"Chapter_2/Language/cn/Docs_2_5.md","articles":[{"title":"2.5.1 光学三原色色彩空间(RGB)","level":"1.3.5.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_1.md","ref":"Chapter_2/Language/cn/Docs_2_5_1.md","articles":[]},{"title":"2.5.2 颜料三原色色彩空间(CMY / CMYK )","level":"1.3.5.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_2.md","ref":"Chapter_2/Language/cn/Docs_2_5_2.md","articles":[]},{"title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","level":"1.3.5.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_3.md","ref":"Chapter_2/Language/cn/Docs_2_5_3.md","articles":[]},{"title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","level":"1.3.5.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_4.md","ref":"Chapter_2/Language/cn/Docs_2_5_4.md","articles":[]},{"title":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space)","level":"1.3.5.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_5.md","ref":"Chapter_2/Language/cn/Docs_2_5_5.md","articles":[]},{"title":"2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space)","level":"1.3.5.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_6.md","ref":"Chapter_2/Language/cn/Docs_2_5_6.md","articles":[]},{"title":"2.5.7 颜色三要素色彩空间(HSV / HSI / HSL)","level":"1.3.5.7","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_7.md","ref":"Chapter_2/Language/cn/Docs_2_5_7.md","articles":[]}]},{"title":"2.6 色彩的存储","level":"1.3.6","depth":2,"path":"Chapter_2/Language/cn/Docs_2_6.md","ref":"Chapter_2/Language/cn/Docs_2_6.md","articles":[{"title":"2.6.1 色彩格式(Color Format)与色彩存储","level":"1.3.6.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_1.md","ref":"Chapter_2/Language/cn/Docs_2_6_1.md","articles":[]},{"title":"2.6.2 RGB 体系色彩格式","level":"1.3.6.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_2.md","ref":"Chapter_2/Language/cn/Docs_2_6_2.md","articles":[]},{"title":"2.6.3 YUV 体系色彩格式","level":"1.3.6.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_3.md","ref":"Chapter_2/Language/cn/Docs_2_6_3.md","articles":[]}]},{"title":"【参考文献】","level":"1.3.7","depth":2,"path":"Chapter_2/Language/cn/References_2.md","ref":"Chapter_2/Language/cn/References_2.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_1.md","mtime":"2024-09-11T06:09:50.160Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.1 色彩基础","level":"1.3.1","depth":2,"next":{"title":"2.2 颜色三要素(Three Elements of Color)","level":"1.3.2","depth":2,"path":"Chapter_2/Language/cn/Docs_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2.md","articles":[{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]}]},"previous":{"title":"二、色彩的运用与存储","level":"1.3","depth":1,"path":"Chapter_2/Language/cn/Apex_2_Introduce.md","ref":"Chapter_2/Language/cn/Apex_2_Introduce.md","articles":[{"title":"2.1 色彩基础","level":"1.3.1","depth":2,"path":"Chapter_2/Language/cn/Docs_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_1.md","articles":[]},{"title":"2.2 颜色三要素(Three Elements of Color)","level":"1.3.2","depth":2,"path":"Chapter_2/Language/cn/Docs_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2.md","articles":[{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]}]},{"title":"2.3 色彩的衡量","level":"1.3.3","depth":2,"path":"Chapter_2/Language/cn/Docs_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_3.md","articles":[{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_2.md","ref":"Chapter_2/Language/cn/Docs_2_3_2.md","articles":[]},{"title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","level":"1.3.3.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_3.md","ref":"Chapter_2/Language/cn/Docs_2_3_3.md","articles":[]},{"title":"2.3.4 经典三刺激函数(Tristimulus Values Functions)","level":"1.3.3.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_4.md","ref":"Chapter_2/Language/cn/Docs_2_3_4.md","articles":[]},{"title":"2.3.5 现代色彩体系(Modern Color System)","level":"1.3.3.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_5.md","ref":"Chapter_2/Language/cn/Docs_2_3_5.md","articles":[]}]},{"title":"2.4 色彩的对比","level":"1.3.4","depth":2,"path":"Chapter_2/Language/cn/Docs_2_4.md","ref":"Chapter_2/Language/cn/Docs_2_4.md","articles":[{"title":"2.4.1 色域(Color Gamut )","level":"1.3.4.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_1.md","ref":"Chapter_2/Language/cn/Docs_2_4_1.md","articles":[]},{"title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","level":"1.3.4.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_2.md","ref":"Chapter_2/Language/cn/Docs_2_4_2.md","articles":[]},{"title":"2.4.3 色差(Chromatic Aberration)","level":"1.3.4.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_3.md","ref":"Chapter_2/Language/cn/Docs_2_4_3.md","articles":[]},{"title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","level":"1.3.4.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_4.md","ref":"Chapter_2/Language/cn/Docs_2_4_4.md","articles":[]},{"title":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point)","level":"1.3.4.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_5.md","ref":"Chapter_2/Language/cn/Docs_2_4_5.md","articles":[]},{"title":"2.4.6 显色指数(Color Rendering Index)","level":"1.3.4.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_6.md","ref":"Chapter_2/Language/cn/Docs_2_4_6.md","articles":[]}]},{"title":"2.5 经典色彩空间(Classical Color Space)","level":"1.3.5","depth":2,"path":"Chapter_2/Language/cn/Docs_2_5.md","ref":"Chapter_2/Language/cn/Docs_2_5.md","articles":[{"title":"2.5.1 光学三原色色彩空间(RGB)","level":"1.3.5.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_1.md","ref":"Chapter_2/Language/cn/Docs_2_5_1.md","articles":[]},{"title":"2.5.2 颜料三原色色彩空间(CMY / CMYK )","level":"1.3.5.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_2.md","ref":"Chapter_2/Language/cn/Docs_2_5_2.md","articles":[]},{"title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","level":"1.3.5.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_3.md","ref":"Chapter_2/Language/cn/Docs_2_5_3.md","articles":[]},{"title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","level":"1.3.5.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_4.md","ref":"Chapter_2/Language/cn/Docs_2_5_4.md","articles":[]},{"title":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space)","level":"1.3.5.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_5.md","ref":"Chapter_2/Language/cn/Docs_2_5_5.md","articles":[]},{"title":"2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space)","level":"1.3.5.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_6.md","ref":"Chapter_2/Language/cn/Docs_2_5_6.md","articles":[]},{"title":"2.5.7 颜色三要素色彩空间(HSV / HSI / HSL)","level":"1.3.5.7","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_7.md","ref":"Chapter_2/Language/cn/Docs_2_5_7.md","articles":[]}]},{"title":"2.6 色彩的存储","level":"1.3.6","depth":2,"path":"Chapter_2/Language/cn/Docs_2_6.md","ref":"Chapter_2/Language/cn/Docs_2_6.md","articles":[{"title":"2.6.1 色彩格式(Color Format)与色彩存储","level":"1.3.6.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_1.md","ref":"Chapter_2/Language/cn/Docs_2_6_1.md","articles":[]},{"title":"2.6.2 RGB 体系色彩格式","level":"1.3.6.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_2.md","ref":"Chapter_2/Language/cn/Docs_2_6_2.md","articles":[]},{"title":"2.6.3 YUV 体系色彩格式","level":"1.3.6.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_3.md","ref":"Chapter_2/Language/cn/Docs_2_6_3.md","articles":[]}]},{"title":"【参考文献】","level":"1.3.7","depth":2,"path":"Chapter_2/Language/cn/References_2.md","ref":"Chapter_2/Language/cn/References_2.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_1.md","mtime":"2024-09-12T04:11:10.620Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_2.html b/Chapter_2/Language/cn/Docs_2_2.html
index 49c477d..e6a891a 100644
--- a/Chapter_2/Language/cn/Docs_2_2.html
+++ b/Chapter_2/Language/cn/Docs_2_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2190,7 +2177,7 @@ [9] 。
@@ -2234,7 +2221,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.2 颜色三要素(Three Elements of Color)","level":"1.3.2","depth":2,"next":{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},"previous":{"title":"2.1 色彩基础","level":"1.3.1","depth":2,"path":"Chapter_2/Language/cn/Docs_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_2.md","mtime":"2024-09-11T06:09:50.160Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.2 颜色三要素(Three Elements of Color)","level":"1.3.2","depth":2,"next":{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},"previous":{"title":"2.1 色彩基础","level":"1.3.1","depth":2,"path":"Chapter_2/Language/cn/Docs_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_2.md","mtime":"2024-09-12T04:11:10.630Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_2_1.html b/Chapter_2/Language/cn/Docs_2_2_1.html
index 01a8c5c..b6031fa 100644
--- a/Chapter_2/Language/cn/Docs_2_2_1.html
+++ b/Chapter_2/Language/cn/Docs_2_2_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2195,7 +2182,7 @@ 2.2.1 色调(H
混合律是对加法混合论的一次成功拓展,此时已经隐约可以看到最初色度图的理论雏形了。不过这时对颜色的索引还停留在比较初级的阶段。现代学界和工业界已普遍采用 色度(Chromaticity) ,配合 颜色空间(Color Space) ,来代替描述颜色种类。色调更多的被用于艺术和设计领域。
另一方面,随着 现代色彩体系(Modern Color System) 的在细分领域的逐步分化,部分颜色空间的规格出发点,也对色调(Hue)和饱和度(Saturation)代表的概念本身进行了充分的抽象,形成了诸如 LAB、LUV 和 颜色三要素(HSL)等经典的色彩空间方案 。为当代计算机工业体系中,艺术设计、数据传输和工程计算方面的贯通,提供了较大的帮助(可参见后文 2.5 经典色彩空间 )。
@@ -2239,7 +2226,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"next":{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},"previous":{"title":"2.2 颜色三要素(Three Elements of Color)","level":"1.3.2","depth":2,"path":"Chapter_2/Language/cn/Docs_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2.md","articles":[{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_2_1.md","mtime":"2024-09-11T06:09:50.170Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"next":{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},"previous":{"title":"2.2 颜色三要素(Three Elements of Color)","level":"1.3.2","depth":2,"path":"Chapter_2/Language/cn/Docs_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2.md","articles":[{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_2_1.md","mtime":"2024-09-12T04:11:10.630Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_2_2.html b/Chapter_2/Language/cn/Docs_2_2_2.html
index 8a77150..0a3f9aa 100644
--- a/Chapter_2/Language/cn/Docs_2_2_2.html
+++ b/Chapter_2/Language/cn/Docs_2_2_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2213,7 +2200,7 @@ 2.2.2 饱
在已知白点(White Point)和选定色的情况下。依据格拉斯曼饱和度取值,人们可以计算得期望的渐变色泽,从而快速调色。
同 色调(Hue) 一样, 饱和度(Saturation) 也处于简单系统中,不方便体系下的量化。因此,饱和度的概念在现代学界和工业界中,同样也普遍被色度(Chromaticity)配合颜色空间(Color Space)代替表示 ,以便于工程量化计算。 现代色彩体系(Modern Color System) 中的部分方案,对饱和度概念进行了有效的利用转换(可参见后文 2.5.7 颜色三要素色彩空间 )。
@@ -2257,7 +2244,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"next":{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]},"previous":{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_2_2.md","mtime":"2024-09-11T06:09:50.170Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"next":{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]},"previous":{"title":"2.2.1 色调(Hue)","level":"1.3.2.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_1.md","ref":"Chapter_2/Language/cn/Docs_2_2_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_2_2.md","mtime":"2024-09-12T04:11:10.630Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_2_3.html b/Chapter_2/Language/cn/Docs_2_2_3.html
index 20b2794..8db9565 100644
--- a/Chapter_2/Language/cn/Docs_2_2_3.html
+++ b/Chapter_2/Language/cn/Docs_2_2_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2251,7 +2238,7 @@ 2.2.3 光
我们能否将描述自然现象的参考标准,应用在有局限的实际生产活动中 。
@@ -2295,7 +2282,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"next":{"title":"2.3 色彩的衡量","level":"1.3.3","depth":2,"path":"Chapter_2/Language/cn/Docs_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_3.md","articles":[{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_2.md","ref":"Chapter_2/Language/cn/Docs_2_3_2.md","articles":[]},{"title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","level":"1.3.3.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_3.md","ref":"Chapter_2/Language/cn/Docs_2_3_3.md","articles":[]},{"title":"2.3.4 经典三刺激函数(Tristimulus Values Functions)","level":"1.3.3.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_4.md","ref":"Chapter_2/Language/cn/Docs_2_3_4.md","articles":[]},{"title":"2.3.5 现代色彩体系(Modern Color System)","level":"1.3.3.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_5.md","ref":"Chapter_2/Language/cn/Docs_2_3_5.md","articles":[]}]},"previous":{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_2_3.md","mtime":"2024-09-11T06:09:50.170Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"next":{"title":"2.3 色彩的衡量","level":"1.3.3","depth":2,"path":"Chapter_2/Language/cn/Docs_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_3.md","articles":[{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_2.md","ref":"Chapter_2/Language/cn/Docs_2_3_2.md","articles":[]},{"title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","level":"1.3.3.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_3.md","ref":"Chapter_2/Language/cn/Docs_2_3_3.md","articles":[]},{"title":"2.3.4 经典三刺激函数(Tristimulus Values Functions)","level":"1.3.3.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_4.md","ref":"Chapter_2/Language/cn/Docs_2_3_4.md","articles":[]},{"title":"2.3.5 现代色彩体系(Modern Color System)","level":"1.3.3.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_5.md","ref":"Chapter_2/Language/cn/Docs_2_3_5.md","articles":[]}]},"previous":{"title":"2.2.2 饱和度(Saturation)","level":"1.3.2.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_2.md","ref":"Chapter_2/Language/cn/Docs_2_2_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_2_3.md","mtime":"2024-09-12T04:11:10.630Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_3.html b/Chapter_2/Language/cn/Docs_2_3.html
index 50182a0..8c0a061 100644
--- a/Chapter_2/Language/cn/Docs_2_3.html
+++ b/Chapter_2/Language/cn/Docs_2_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2173,7 +2160,7 @@ 2.3 色彩的
如果能够将光波本身和颜色建立起直接的可量化的转换关系 ,就能够解决表示上的问题了。这就是 配色函数 的由来。
于是,首先需要做的是 获得科学证明 ,以 提供函数构建理论上的支持 。
@@ -2217,7 +2204,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.3 色彩的衡量","level":"1.3.3","depth":2,"next":{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},"previous":{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_3.md","mtime":"2024-09-11T06:09:50.170Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.3 色彩的衡量","level":"1.3.3","depth":2,"next":{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},"previous":{"title":"2.2.3 光亮度(Luminance)","level":"1.3.2.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_2_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_3.md","mtime":"2024-09-12T04:11:10.630Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_3_1.html b/Chapter_2/Language/cn/Docs_2_3_1.html
index 9d6fab3..4b8d26b 100644
--- a/Chapter_2/Language/cn/Docs_2_3_1.html
+++ b/Chapter_2/Language/cn/Docs_2_3_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2292,7 +2279,7 @@ F ( C ) F(C) F ( C ) ,我们就可以将色温为 T 0 T_{0} T 0 时对应的颜色,以 F ( C 0 , L 0 ) F(C_0,\ L_0) F ( C 0 , L 0 ) 的形式表述到函数所在参考系中。因此,这个用于颜色匹配的转换函数 F ( C ) F(C) F ( C ) ,就被称为 配色函数(Color-Matching Functions) 。
只要能找到适合的 F ( C ) F(C) F ( C ) 使颜色能够被统一的衡量,就能制定工业标准,正式开始现代化的工程实践了。
@@ -2336,7 +2323,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"next":{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_2.md","ref":"Chapter_2/Language/cn/Docs_2_3_2.md","articles":[]},"previous":{"title":"2.3 色彩的衡量","level":"1.3.3","depth":2,"path":"Chapter_2/Language/cn/Docs_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_3.md","articles":[{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_2.md","ref":"Chapter_2/Language/cn/Docs_2_3_2.md","articles":[]},{"title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","level":"1.3.3.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_3.md","ref":"Chapter_2/Language/cn/Docs_2_3_3.md","articles":[]},{"title":"2.3.4 经典三刺激函数(Tristimulus Values Functions)","level":"1.3.3.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_4.md","ref":"Chapter_2/Language/cn/Docs_2_3_4.md","articles":[]},{"title":"2.3.5 现代色彩体系(Modern Color System)","level":"1.3.3.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_5.md","ref":"Chapter_2/Language/cn/Docs_2_3_5.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_3_1.md","mtime":"2024-09-11T06:09:50.190Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"next":{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_2.md","ref":"Chapter_2/Language/cn/Docs_2_3_2.md","articles":[]},"previous":{"title":"2.3 色彩的衡量","level":"1.3.3","depth":2,"path":"Chapter_2/Language/cn/Docs_2_3.md","ref":"Chapter_2/Language/cn/Docs_2_3.md","articles":[{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_2.md","ref":"Chapter_2/Language/cn/Docs_2_3_2.md","articles":[]},{"title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","level":"1.3.3.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_3.md","ref":"Chapter_2/Language/cn/Docs_2_3_3.md","articles":[]},{"title":"2.3.4 经典三刺激函数(Tristimulus Values Functions)","level":"1.3.3.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_4.md","ref":"Chapter_2/Language/cn/Docs_2_3_4.md","articles":[]},{"title":"2.3.5 现代色彩体系(Modern Color System)","level":"1.3.3.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_5.md","ref":"Chapter_2/Language/cn/Docs_2_3_5.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_3_1.md","mtime":"2024-09-12T04:11:10.640Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_3_2.html b/Chapter_2/Language/cn/Docs_2_3_2.html
index 432510c..cc141b1 100644
--- a/Chapter_2/Language/cn/Docs_2_3_2.html
+++ b/Chapter_2/Language/cn/Docs_2_3_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2183,7 +2170,7 @@ Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by Gitbook
@@ -2227,7 +2214,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"next":{"title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","level":"1.3.3.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_3.md","ref":"Chapter_2/Language/cn/Docs_2_3_3.md","articles":[]},"previous":{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_3_2.md","mtime":"2024-09-11T06:09:50.190Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","level":"1.3.3.2","depth":3,"next":{"title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","level":"1.3.3.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_3.md","ref":"Chapter_2/Language/cn/Docs_2_3_3.md","articles":[]},"previous":{"title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","level":"1.3.3.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_1.md","ref":"Chapter_2/Language/cn/Docs_2_3_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_3_2.md","mtime":"2024-09-12T04:11:10.640Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_3_3.html b/Chapter_2/Language/cn/Docs_2_3_3.html
index d5864af..0012408 100644
--- a/Chapter_2/Language/cn/Docs_2_3_3.html
+++ b/Chapter_2/Language/cn/Docs_2_3_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2215,7 +2202,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2226,7 +2213,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2186,7 +2173,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2172,7 +2159,7 @@ 2.4 色彩的
自 1931 年 CIE RGB & CIE XYZ 色彩空间 [12] 被提出后,色彩在工程中的对比标准就被统一在了 CIE 逐步采纳、整理和定义的 一系列规格之下 。而 CIE XYZ 色彩空间具有直观客观和正向全可见光色域的特点,使得它更适合被用来作为工业应用的基准体系。所以,我们往往都会将需要处理的颜色数据, 转换到 CIE XYZ 之下进行权衡 。
当然,整个 CIE 色彩空间体系,其提出迭代的过程和当下的统治地位也并不是一蹴而就。这里先对工程上由 CIE 规范的关键概念进行介绍。以便于为我们更好的理解后续章节中,不同色彩空间的提出背景和针对性解决的问题困难,提供帮助。
@@ -2216,7 +2203,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.4 色彩的对比","level":"1.3.4","depth":2,"next":{"title":"2.4.1 色域(Color Gamut )","level":"1.3.4.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_1.md","ref":"Chapter_2/Language/cn/Docs_2_4_1.md","articles":[]},"previous":{"title":"2.3.5 现代色彩体系(Modern Color System)","level":"1.3.3.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_5.md","ref":"Chapter_2/Language/cn/Docs_2_3_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_4.md","mtime":"2024-09-11T06:09:50.200Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.4 色彩的对比","level":"1.3.4","depth":2,"next":{"title":"2.4.1 色域(Color Gamut )","level":"1.3.4.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_1.md","ref":"Chapter_2/Language/cn/Docs_2_4_1.md","articles":[]},"previous":{"title":"2.3.5 现代色彩体系(Modern Color System)","level":"1.3.3.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_3_5.md","ref":"Chapter_2/Language/cn/Docs_2_3_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_4.md","mtime":"2024-09-12T04:11:10.650Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_4_1.html b/Chapter_2/Language/cn/Docs_2_4_1.html
index fb7d286..a0435af 100644
--- a/Chapter_2/Language/cn/Docs_2_4_1.html
+++ b/Chapter_2/Language/cn/Docs_2_4_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2182,7 +2169,7 @@ 2.4.1 色域&
由于 CIE RGB & XYZ 最基本的定义是基于 2° 角 的 视网膜小窝(Fovea Centralis)间隔 来获取的人眼视觉感受效果。因此,通常我们所称的色域以及其相关概念(如色度等),在未明确说明视网膜小窝间隔夹脚的情况下,都是假定指定基于 2° 角的测量结果( 除 2° 角外,相对常用的还有 10° 角 )。
@@ -2226,7 +2213,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.4.1 色域(Color Gamut )","level":"1.3.4.1","depth":3,"next":{"title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","level":"1.3.4.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_2.md","ref":"Chapter_2/Language/cn/Docs_2_4_2.md","articles":[]},"previous":{"title":"2.4 色彩的对比","level":"1.3.4","depth":2,"path":"Chapter_2/Language/cn/Docs_2_4.md","ref":"Chapter_2/Language/cn/Docs_2_4.md","articles":[{"title":"2.4.1 色域(Color Gamut )","level":"1.3.4.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_1.md","ref":"Chapter_2/Language/cn/Docs_2_4_1.md","articles":[]},{"title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","level":"1.3.4.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_2.md","ref":"Chapter_2/Language/cn/Docs_2_4_2.md","articles":[]},{"title":"2.4.3 色差(Chromatic Aberration)","level":"1.3.4.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_3.md","ref":"Chapter_2/Language/cn/Docs_2_4_3.md","articles":[]},{"title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","level":"1.3.4.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_4.md","ref":"Chapter_2/Language/cn/Docs_2_4_4.md","articles":[]},{"title":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point)","level":"1.3.4.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_5.md","ref":"Chapter_2/Language/cn/Docs_2_4_5.md","articles":[]},{"title":"2.4.6 显色指数(Color Rendering Index)","level":"1.3.4.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_6.md","ref":"Chapter_2/Language/cn/Docs_2_4_6.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_4_1.md","mtime":"2024-09-11T06:09:50.200Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.4.1 色域(Color Gamut )","level":"1.3.4.1","depth":3,"next":{"title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","level":"1.3.4.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_2.md","ref":"Chapter_2/Language/cn/Docs_2_4_2.md","articles":[]},"previous":{"title":"2.4 色彩的对比","level":"1.3.4","depth":2,"path":"Chapter_2/Language/cn/Docs_2_4.md","ref":"Chapter_2/Language/cn/Docs_2_4.md","articles":[{"title":"2.4.1 色域(Color Gamut )","level":"1.3.4.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_1.md","ref":"Chapter_2/Language/cn/Docs_2_4_1.md","articles":[]},{"title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","level":"1.3.4.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_2.md","ref":"Chapter_2/Language/cn/Docs_2_4_2.md","articles":[]},{"title":"2.4.3 色差(Chromatic Aberration)","level":"1.3.4.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_3.md","ref":"Chapter_2/Language/cn/Docs_2_4_3.md","articles":[]},{"title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","level":"1.3.4.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_4.md","ref":"Chapter_2/Language/cn/Docs_2_4_4.md","articles":[]},{"title":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point)","level":"1.3.4.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_5.md","ref":"Chapter_2/Language/cn/Docs_2_4_5.md","articles":[]},{"title":"2.4.6 显色指数(Color Rendering Index)","level":"1.3.4.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_6.md","ref":"Chapter_2/Language/cn/Docs_2_4_6.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_4_1.md","mtime":"2024-09-12T04:11:10.650Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_4_2.html b/Chapter_2/Language/cn/Docs_2_4_2.html
index fdcbc1c..9c643e9 100644
--- a/Chapter_2/Language/cn/Docs_2_4_2.html
+++ b/Chapter_2/Language/cn/Docs_2_4_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2213,7 +2200,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2196,7 +2183,7 @@ 2.4.3 色
C = √ Δ x 2 + Δ y 2
替换了色调饱和度参数,使广义狭义在公式层面得到了统一。
@@ -2240,7 +2227,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.4.3 色差(Chromatic Aberration)","level":"1.3.4.3","depth":3,"next":{"title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","level":"1.3.4.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_4.md","ref":"Chapter_2/Language/cn/Docs_2_4_4.md","articles":[]},"previous":{"title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","level":"1.3.4.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_2.md","ref":"Chapter_2/Language/cn/Docs_2_4_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_4_3.md","mtime":"2024-09-11T06:09:50.200Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.4.3 色差(Chromatic Aberration)","level":"1.3.4.3","depth":3,"next":{"title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","level":"1.3.4.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_4.md","ref":"Chapter_2/Language/cn/Docs_2_4_4.md","articles":[]},"previous":{"title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","level":"1.3.4.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_2.md","ref":"Chapter_2/Language/cn/Docs_2_4_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_4_3.md","mtime":"2024-09-12T04:11:10.660Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_4_4.html b/Chapter_2/Language/cn/Docs_2_4_4.html
index 32f10e5..1e4af34 100644
--- a/Chapter_2/Language/cn/Docs_2_4_4.html
+++ b/Chapter_2/Language/cn/Docs_2_4_4.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2362,7 +2349,7 @@ x y = ⎩ ⎪ ⎪ ⎨ ⎪ ⎪ ⎧ − 0 . 2 6 6 1 2 3 9 ⋅ T c 3 1 0 9 − 0 . 2 3 4 3 5 8 9 ⋅ T c 2 1 0 6 + 0 . 8 7 7 6 9 5 6 ⋅ T 1 0 3 c + 0 . 1 7 9 9 1 0 − 3 . 0 2 5 8 4 6 9 ⋅ T c 3 1 0 9 + 2 . 1 0 7 0 3 7 9 ⋅ T c 2 1 0 6 + 0 . 2 2 2 6 3 4 7 ⋅ T 1 0 3 c + 0 . 2 4 0 3 9 0 1 6 6 7 K ≤ T c ≤ 4 0 0 0 K 4 0 0 0 K ≤ T c ≤ 2 5 0 0 0 K = ⎩ ⎪ ⎨ ⎪ ⎧ − 1 . 1 0 6 3 8 1 4 ⋅ x 3 − 1 . 3 4 8 1 1 0 2 0 ⋅ x 2 + 2 . 1 8 5 5 5 8 3 2 ⋅ x − 0 . 2 0 2 1 9 6 8 3 − 0 . 9 5 4 9 4 7 6 ⋅ x 3 − 1 . 3 7 4 1 8 5 9 3 ⋅ x 2 + 2 . 0 9 1 3 7 0 1 5 ⋅ x − 0 . 1 6 7 4 8 8 6 7 + 3 . 0 8 1 7 5 8 0 ⋅ x 3 − 5 . 8 7 3 3 8 6 7 0 ⋅ x 2 + 3 . 7 5 1 1 2 9 9 7 ⋅ x − 0 . 3 7 0 0 1 4 8 3 1 6 6 7 K ≤ T c ≤ 2 2 2 2 K 2 2 2 2 K ≤ T c ≤ 4 0 0 0 K 4 0 0 0 K ≤ T c ≤ 2 5 0 0 0 K
但是这一套算法,仍然无法代替非精确场景下,直接通过对应物理色温计算普朗克轨迹上色度的方法实用。因此,CIE 也和麦卡米指数逼近的情况一样,仅是将其列入了相关色温在需求精确值情况下的补充。这里有所了解即可。
@@ -2406,7 +2393,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","level":"1.3.4.4","depth":3,"next":{"title":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point)","level":"1.3.4.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_5.md","ref":"Chapter_2/Language/cn/Docs_2_4_5.md","articles":[]},"previous":{"title":"2.4.3 色差(Chromatic Aberration)","level":"1.3.4.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_3.md","ref":"Chapter_2/Language/cn/Docs_2_4_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_4_4.md","mtime":"2024-09-11T06:09:50.200Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","level":"1.3.4.4","depth":3,"next":{"title":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point)","level":"1.3.4.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_5.md","ref":"Chapter_2/Language/cn/Docs_2_4_5.md","articles":[]},"previous":{"title":"2.4.3 色差(Chromatic Aberration)","level":"1.3.4.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_4_3.md","ref":"Chapter_2/Language/cn/Docs_2_4_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_4_4.md","mtime":"2024-09-12T04:11:10.660Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_4_5.html b/Chapter_2/Language/cn/Docs_2_4_5.html
index 0e245f1..bfca4c9 100644
--- a/Chapter_2/Language/cn/Docs_2_4_5.html
+++ b/Chapter_2/Language/cn/Docs_2_4_5.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2214,7 +2201,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2231,7 +2218,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2173,7 +2160,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2187,7 +2174,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2204,7 +2191,7 @@ ⎣ ⎡ C ′ M ′ Y ′ ⎦ ⎤ = ⎣ ⎡ ( 1 − K ) ⋅ C + K ( 1 − K ) ⋅ M + K ( 1 − K ) ⋅ Y + K ⎦ ⎤
而对于 CYMK 色彩空间和 RGB 色彩空间互转,就有需要以 CMY 色彩空间作为桥梁。先根据转换方向,通过 CMY 色彩空间进行 C R G B → C C M Y C_{RGB} \rightarrow C_{CMY} C R G B → C C M Y 或者 C C M Y K → C C M Y C_{CMYK} \rightarrow C_{CMY} C C M Y K → C C M Y ,再通过 CMY 与 RGB 与 CMYK 的关系,进行间接转换。
@@ -2248,7 +2235,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.5.2 颜料三原色色彩空间(CMY / CMYK )","level":"1.3.5.2","depth":3,"next":{"title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","level":"1.3.5.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_3.md","ref":"Chapter_2/Language/cn/Docs_2_5_3.md","articles":[]},"previous":{"title":"2.5.1 光学三原色色彩空间(RGB)","level":"1.3.5.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_1.md","ref":"Chapter_2/Language/cn/Docs_2_5_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_5_2.md","mtime":"2024-09-11T06:09:50.230Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.5.2 颜料三原色色彩空间(CMY / CMYK )","level":"1.3.5.2","depth":3,"next":{"title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","level":"1.3.5.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_3.md","ref":"Chapter_2/Language/cn/Docs_2_5_3.md","articles":[]},"previous":{"title":"2.5.1 光学三原色色彩空间(RGB)","level":"1.3.5.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_1.md","ref":"Chapter_2/Language/cn/Docs_2_5_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_5_2.md","mtime":"2024-09-12T04:11:10.680Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_5_3.html b/Chapter_2/Language/cn/Docs_2_5_3.html
index 94d3f5b..c64d861 100644
--- a/Chapter_2/Language/cn/Docs_2_5_3.html
+++ b/Chapter_2/Language/cn/Docs_2_5_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2187,7 +2174,7 @@ C R G B = R ⋅ R e d 7 0 0 + G ⋅ G r e e n 5 4 6 . 1 + B ⋅ B l u e 4 3 5 . 8 = V e c t o r [ R , G , B ]
因此,CIE RGB 也不可避免的继承了光学三原色色彩空间的 负色匹配 问题。
@@ -2231,7 +2218,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","level":"1.3.5.3","depth":3,"next":{"title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","level":"1.3.5.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_4.md","ref":"Chapter_2/Language/cn/Docs_2_5_4.md","articles":[]},"previous":{"title":"2.5.2 颜料三原色色彩空间(CMY / CMYK )","level":"1.3.5.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_2.md","ref":"Chapter_2/Language/cn/Docs_2_5_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_5_3.md","mtime":"2024-09-11T06:09:50.230Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","level":"1.3.5.3","depth":3,"next":{"title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","level":"1.3.5.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_4.md","ref":"Chapter_2/Language/cn/Docs_2_5_4.md","articles":[]},"previous":{"title":"2.5.2 颜料三原色色彩空间(CMY / CMYK )","level":"1.3.5.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_2.md","ref":"Chapter_2/Language/cn/Docs_2_5_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_5_3.md","mtime":"2024-09-12T04:11:10.680Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_5_4.html b/Chapter_2/Language/cn/Docs_2_5_4.html
index 306c363..3d66092 100644
--- a/Chapter_2/Language/cn/Docs_2_5_4.html
+++ b/Chapter_2/Language/cn/Docs_2_5_4.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2211,7 +2198,7 @@ Δ C \Delta C Δ C 的话。那么 XYZ 色彩空间中,单位 Δ C \Delta C Δ C 的颜色变化情况就显得不那么均匀。这个就是 平均色差 问题。
如何处理平均色差问题?CIE 和美标给出了不同的思路。CIE 将色差问题,拆分为色度图均匀化和白点取值影响归一化两个问题,区分考虑。提出了着重于细微色差变化的 CIE LAB 色彩空间标准,和偏重标准光源线性归一化的 CIE LUV 色彩空间标准。而美标则以商业出发点,追求色彩还原更接近人眼生理感受,同时还要兼顾工业体系中对色彩信息的精细度要求,进而推进了颜色三要素色彩空间的制定。
@@ -2255,7 +2242,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","level":"1.3.5.4","depth":3,"next":{"title":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space)","level":"1.3.5.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_5.md","ref":"Chapter_2/Language/cn/Docs_2_5_5.md","articles":[]},"previous":{"title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","level":"1.3.5.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_3.md","ref":"Chapter_2/Language/cn/Docs_2_5_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_5_4.md","mtime":"2024-09-11T06:09:50.230Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","level":"1.3.5.4","depth":3,"next":{"title":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space)","level":"1.3.5.5","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_5.md","ref":"Chapter_2/Language/cn/Docs_2_5_5.md","articles":[]},"previous":{"title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","level":"1.3.5.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_3.md","ref":"Chapter_2/Language/cn/Docs_2_5_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_5_4.md","mtime":"2024-09-12T04:11:10.680Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_5_5.html b/Chapter_2/Language/cn/Docs_2_5_5.html
index 679b8bf..5b0d2da 100644
--- a/Chapter_2/Language/cn/Docs_2_5_5.html
+++ b/Chapter_2/Language/cn/Docs_2_5_5.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2249,7 +2236,7 @@ Δ C = √ ( Δ a ⋆ ) 2 + ( Δ b ⋆ ) 2 会发现,通过这种方式切割得到的整个人眼可见光色域范围,色差均匀程度依赖于白点的同时,也并非完全均匀。越靠近色度图白点,色差变化越小;越靠近色度图边缘,色差变化越大,不过相较于 XYZ 已有很大改善 。
@@ -2293,7 +2280,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space)","level":"1.3.5.5","depth":3,"next":{"title":"2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space)","level":"1.3.5.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_6.md","ref":"Chapter_2/Language/cn/Docs_2_5_6.md","articles":[]},"previous":{"title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","level":"1.3.5.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_4.md","ref":"Chapter_2/Language/cn/Docs_2_5_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_5_5.md","mtime":"2024-09-11T06:09:50.230Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space)","level":"1.3.5.5","depth":3,"next":{"title":"2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space)","level":"1.3.5.6","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_6.md","ref":"Chapter_2/Language/cn/Docs_2_5_6.md","articles":[]},"previous":{"title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","level":"1.3.5.4","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_4.md","ref":"Chapter_2/Language/cn/Docs_2_5_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_5_5.md","mtime":"2024-09-12T04:11:10.690Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_5_6.html b/Chapter_2/Language/cn/Docs_2_5_6.html
index 78d9cb9..835ce63 100644
--- a/Chapter_2/Language/cn/Docs_2_5_6.html
+++ b/Chapter_2/Language/cn/Docs_2_5_6.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2249,7 +2236,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2421,7 +2408,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2175,7 +2162,7 @@ 2.6 色彩的
随着 19世纪 80年代个人电脑的快速发展。灰度图格式也从 单字节码(1-bit) ,经过 IBM 单色显示屏适配器(MDA [Monochrome Display Adapter]) 2-bit 格式 ,Commodore 128 所搭载 8563 显示控制器(VDC [Video Display Controller]) 提供的 4-bit 格式 ,演变到了Apple II 与 IBM 5150 的 8-bit 单色格式 。
1981 年,IBM 结合 CIE 1976 UCS 在 RGB 色彩空间上的补充,开发并发布了携带彩色数据编解码 IBM 彩色图形适配器(CGA [Color Graphics Adapter]) 的 IBM 5153 。 标志着计算机正式进入了彩色时代。自此开启了计算机 现代色彩格式(Modern Color Format) 的大门。
@@ -2219,7 +2206,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.6 色彩的存储","level":"1.3.6","depth":2,"next":{"title":"2.6.1 色彩格式(Color Format)与色彩存储","level":"1.3.6.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_1.md","ref":"Chapter_2/Language/cn/Docs_2_6_1.md","articles":[]},"previous":{"title":"2.5.7 颜色三要素色彩空间(HSV / HSI / HSL)","level":"1.3.5.7","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_7.md","ref":"Chapter_2/Language/cn/Docs_2_5_7.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_6.md","mtime":"2024-09-11T06:09:50.240Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.6 色彩的存储","level":"1.3.6","depth":2,"next":{"title":"2.6.1 色彩格式(Color Format)与色彩存储","level":"1.3.6.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_1.md","ref":"Chapter_2/Language/cn/Docs_2_6_1.md","articles":[]},"previous":{"title":"2.5.7 颜色三要素色彩空间(HSV / HSI / HSL)","level":"1.3.5.7","depth":3,"path":"Chapter_2/Language/cn/Docs_2_5_7.md","ref":"Chapter_2/Language/cn/Docs_2_5_7.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_6.md","mtime":"2024-09-12T04:11:10.690Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_6_1.html b/Chapter_2/Language/cn/Docs_2_6_1.html
index 34889e5..2d2820e 100644
--- a/Chapter_2/Language/cn/Docs_2_6_1.html
+++ b/Chapter_2/Language/cn/Docs_2_6_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2190,7 +2177,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2380,7 +2367,7 @@ 24-bit RGB & 32-bit RGBA8888显然,RGB 色彩格式和物理存储空间的扩展紧密相关,其每一次可表示色阶的扩充,都意味着一次存储介质和空间的显著提升 。
此特点决定了,当市面上绝大多数显卡的存储及处理能力没有发展的情况下,更细腻的 RGB 色彩格式也不太可能得到推广。同理,广泛应用于图像传输的 YUV 色彩格式则是规格驱动,其更多依赖于传输协议的演变和数据带宽的更新迭代。
@@ -2424,7 +2411,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.6.2 RGB 体系色彩格式","level":"1.3.6.2","depth":3,"next":{"title":"2.6.3 YUV 体系色彩格式","level":"1.3.6.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_3.md","ref":"Chapter_2/Language/cn/Docs_2_6_3.md","articles":[]},"previous":{"title":"2.6.1 色彩格式(Color Format)与色彩存储","level":"1.3.6.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_1.md","ref":"Chapter_2/Language/cn/Docs_2_6_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_6_2.md","mtime":"2024-09-11T06:09:50.240Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.6.2 RGB 体系色彩格式","level":"1.3.6.2","depth":3,"next":{"title":"2.6.3 YUV 体系色彩格式","level":"1.3.6.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_3.md","ref":"Chapter_2/Language/cn/Docs_2_6_3.md","articles":[]},"previous":{"title":"2.6.1 色彩格式(Color Format)与色彩存储","level":"1.3.6.1","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_1.md","ref":"Chapter_2/Language/cn/Docs_2_6_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_6_2.md","mtime":"2024-09-12T04:11:10.700Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/Docs_2_6_3.html b/Chapter_2/Language/cn/Docs_2_6_3.html
index 3b54203..4ccd281 100644
--- a/Chapter_2/Language/cn/Docs_2_6_3.html
+++ b/Chapter_2/Language/cn/Docs_2_6_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2338,7 +2325,7 @@ YUV 的ٗ
至此,有关音视频工程中的图片色彩处理部分,基本讲解完毕。下一章我们将利用目前已掌握的音视频知识,来做针对一段音频和一张图片基本分析的工程实践。
@@ -2382,7 +2369,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"2.6.3 YUV 体系色彩格式","level":"1.3.6.3","depth":3,"next":{"title":"【参考文献】","level":"1.3.7","depth":2,"path":"Chapter_2/Language/cn/References_2.md","ref":"Chapter_2/Language/cn/References_2.md","articles":[]},"previous":{"title":"2.6.2 RGB 体系色彩格式","level":"1.3.6.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_2.md","ref":"Chapter_2/Language/cn/Docs_2_6_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_6_3.md","mtime":"2024-09-11T06:09:50.240Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"2.6.3 YUV 体系色彩格式","level":"1.3.6.3","depth":3,"next":{"title":"【参考文献】","level":"1.3.7","depth":2,"path":"Chapter_2/Language/cn/References_2.md","ref":"Chapter_2/Language/cn/References_2.md","articles":[]},"previous":{"title":"2.6.2 RGB 体系色彩格式","level":"1.3.6.2","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_2.md","ref":"Chapter_2/Language/cn/Docs_2_6_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/Docs_2_6_3.md","mtime":"2024-09-12T04:11:10.700Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_2/Language/cn/References_2.html b/Chapter_2/Language/cn/References_2.html
index 1347b03..4e82e2b 100644
--- a/Chapter_2/Language/cn/References_2.html
+++ b/Chapter_2/Language/cn/References_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2219,7 +2206,7 @@ Ө
[48] ITU-R, Rec. ITU-R BT.2020-2, "BT.2020 : Parameter values for ultra-high definition television systems for production and international programme exchange", Article Number E 70000, archived from the original on 2015-10-14
[49] 雷霄骅, "Color format conversion: The simplest example of libswscale based on FFmpeg (YUV to RGB)", archived (Web: https://blog.csdn.net/leixiaohua1020/article/details/42134965 ) from the original on 2014-12-28
@@ -2263,7 +2250,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"【参考文献】","level":"1.3.7","depth":2,"next":{"title":"三、音视频常用基础算法","level":"1.4","depth":1,"path":"Chapter_3/Language/cn/Apex_3_Introduce.md","ref":"Chapter_3/Language/cn/Apex_3_Introduce.md","articles":[{"title":"3.1 信号分析的核心算法 - 傅立叶变换","level":"1.4.1","depth":2,"path":"Chapter_3/Language/cn/Docs_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_1.md","articles":[{"title":"3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT)","level":"1.4.1.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_1.md","ref":"Chapter_3/Language/cn/Docs_3_1_1.md","articles":[]},{"title":"3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT)","level":"1.4.1.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_2.md","ref":"Chapter_3/Language/cn/Docs_3_1_2.md","articles":[]},{"title":"3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT)","level":"1.4.1.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_3.md","ref":"Chapter_3/Language/cn/Docs_3_1_3.md","articles":[]},{"title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","level":"1.4.1.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_4.md","ref":"Chapter_3/Language/cn/Docs_3_1_4.md","articles":[]}]},{"title":"3.2 频率信息提取 - 常用滤波算法","level":"1.4.2","depth":2,"path":"Chapter_3/Language/cn/Docs_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_2.md","articles":[{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_1.md","ref":"Chapter_3/Language/cn/Docs_3_2_1.md","articles":[]},{"title":"3.2.2 双边滤波(Bilateral Filter)","level":"1.4.2.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_2.md","ref":"Chapter_3/Language/cn/Docs_3_2_2.md","articles":[]},{"title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","level":"1.4.2.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_3.md","ref":"Chapter_3/Language/cn/Docs_3_2_3.md","articles":[]},{"title":"3.2.4 马尔滤波(Marr Filter)","level":"1.4.2.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_4.md","ref":"Chapter_3/Language/cn/Docs_3_2_4.md","articles":[]},{"title":"3.2.5 索贝尔滤波(Sobel Filter)","level":"1.4.2.5","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_5.md","ref":"Chapter_3/Language/cn/Docs_3_2_5.md","articles":[]},{"title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","level":"1.4.2.6","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_6.md","ref":"Chapter_3/Language/cn/Docs_3_2_6.md","articles":[]}]},{"title":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理","level":"1.4.3","depth":2,"path":"Chapter_3/Language/cn/Docs_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3.md","articles":[{"title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","level":"1.4.3.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_3_1.md","articles":[]},{"title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","level":"1.4.3.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_3_2.md","articles":[]},{"title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","level":"1.4.3.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3_3.md","articles":[]}]},{"title":"3.4 空域冗余控制 - 基础光流算法与色度压缩","level":"1.4.4","depth":2,"path":"Chapter_3/Language/cn/Docs_3_4.md","ref":"Chapter_3/Language/cn/Docs_3_4.md","articles":[{"title":"3.4.1 传统光流法(Classic Optical Flow Methods)","level":"1.4.4.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_1.md","ref":"Chapter_3/Language/cn/Docs_3_4_1.md","articles":[]},{"title":"3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow])","level":"1.4.4.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_2.md","ref":"Chapter_3/Language/cn/Docs_3_4_2.md","articles":[]},{"title":"3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow])","level":"1.4.4.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_3.md","ref":"Chapter_3/Language/cn/Docs_3_4_3.md","articles":[]},{"title":"3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling])","level":"1.4.4.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_4.md","ref":"Chapter_3/Language/cn/Docs_3_4_4.md","articles":[]}]},{"title":"3.5 频域冗余控制 - 基础变换编码","level":"1.4.5","depth":2,"path":"Chapter_3/Language/cn/Docs_3_5.md","ref":"Chapter_3/Language/cn/Docs_3_5.md","articles":[{"title":"3.5.1 整数离散正余弦变换(DST/DCT)","level":"1.4.5.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_1.md","ref":"Chapter_3/Language/cn/Docs_3_5_1.md","articles":[]},{"title":"3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform])","level":"1.4.5.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_2.md","ref":"Chapter_3/Language/cn/Docs_3_5_2.md","articles":[]},{"title":"3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform])","level":"1.4.5.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_3.md","ref":"Chapter_3/Language/cn/Docs_3_5_3.md","articles":[]}]},{"title":"【在线展示】","level":"1.4.6","depth":2,"path":"Chapter_3/Language/cn/Playground_3.md","ref":"Chapter_3/Language/cn/Playground_3.md","articles":[]},{"title":"【参考文献】","level":"1.4.7","depth":2,"path":"Chapter_3/Language/cn/References_3.md","ref":"Chapter_3/Language/cn/References_3.md","articles":[]}]},"previous":{"title":"2.6.3 YUV 体系色彩格式","level":"1.3.6.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_3.md","ref":"Chapter_2/Language/cn/Docs_2_6_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/References_2.md","mtime":"2024-09-11T06:09:50.250Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"【参考文献】","level":"1.3.7","depth":2,"next":{"title":"三、音视频常用基础算法","level":"1.4","depth":1,"path":"Chapter_3/Language/cn/Apex_3_Introduce.md","ref":"Chapter_3/Language/cn/Apex_3_Introduce.md","articles":[{"title":"3.1 信号分析的核心算法 - 傅立叶变换","level":"1.4.1","depth":2,"path":"Chapter_3/Language/cn/Docs_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_1.md","articles":[{"title":"3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT)","level":"1.4.1.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_1.md","ref":"Chapter_3/Language/cn/Docs_3_1_1.md","articles":[]},{"title":"3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT)","level":"1.4.1.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_2.md","ref":"Chapter_3/Language/cn/Docs_3_1_2.md","articles":[]},{"title":"3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT)","level":"1.4.1.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_3.md","ref":"Chapter_3/Language/cn/Docs_3_1_3.md","articles":[]},{"title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","level":"1.4.1.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_4.md","ref":"Chapter_3/Language/cn/Docs_3_1_4.md","articles":[]}]},{"title":"3.2 频率信息提取 - 常用滤波算法","level":"1.4.2","depth":2,"path":"Chapter_3/Language/cn/Docs_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_2.md","articles":[{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_1.md","ref":"Chapter_3/Language/cn/Docs_3_2_1.md","articles":[]},{"title":"3.2.2 双边滤波(Bilateral Filter)","level":"1.4.2.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_2.md","ref":"Chapter_3/Language/cn/Docs_3_2_2.md","articles":[]},{"title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","level":"1.4.2.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_3.md","ref":"Chapter_3/Language/cn/Docs_3_2_3.md","articles":[]},{"title":"3.2.4 马尔滤波(Marr Filter)","level":"1.4.2.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_4.md","ref":"Chapter_3/Language/cn/Docs_3_2_4.md","articles":[]},{"title":"3.2.5 索贝尔滤波(Sobel Filter)","level":"1.4.2.5","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_5.md","ref":"Chapter_3/Language/cn/Docs_3_2_5.md","articles":[]},{"title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","level":"1.4.2.6","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_6.md","ref":"Chapter_3/Language/cn/Docs_3_2_6.md","articles":[]}]},{"title":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理","level":"1.4.3","depth":2,"path":"Chapter_3/Language/cn/Docs_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3.md","articles":[{"title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","level":"1.4.3.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_3_1.md","articles":[]},{"title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","level":"1.4.3.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_3_2.md","articles":[]},{"title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","level":"1.4.3.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3_3.md","articles":[]}]},{"title":"3.4 空域冗余控制 - 基础光流算法与色度压缩","level":"1.4.4","depth":2,"path":"Chapter_3/Language/cn/Docs_3_4.md","ref":"Chapter_3/Language/cn/Docs_3_4.md","articles":[{"title":"3.4.1 传统光流法(Classic Optical Flow Methods)","level":"1.4.4.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_1.md","ref":"Chapter_3/Language/cn/Docs_3_4_1.md","articles":[]},{"title":"3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow])","level":"1.4.4.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_2.md","ref":"Chapter_3/Language/cn/Docs_3_4_2.md","articles":[]},{"title":"3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow])","level":"1.4.4.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_3.md","ref":"Chapter_3/Language/cn/Docs_3_4_3.md","articles":[]},{"title":"3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling])","level":"1.4.4.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_4.md","ref":"Chapter_3/Language/cn/Docs_3_4_4.md","articles":[]}]},{"title":"3.5 频域冗余控制 - 基础变换编码","level":"1.4.5","depth":2,"path":"Chapter_3/Language/cn/Docs_3_5.md","ref":"Chapter_3/Language/cn/Docs_3_5.md","articles":[{"title":"3.5.1 整数离散正余弦变换(DST/DCT)","level":"1.4.5.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_1.md","ref":"Chapter_3/Language/cn/Docs_3_5_1.md","articles":[]},{"title":"3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform])","level":"1.4.5.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_2.md","ref":"Chapter_3/Language/cn/Docs_3_5_2.md","articles":[]},{"title":"3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform])","level":"1.4.5.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_3.md","ref":"Chapter_3/Language/cn/Docs_3_5_3.md","articles":[]}]},{"title":"【在线展示】","level":"1.4.6","depth":2,"path":"Chapter_3/Language/cn/Playground_3.md","ref":"Chapter_3/Language/cn/Playground_3.md","articles":[]},{"title":"【参考文献】","level":"1.4.7","depth":2,"path":"Chapter_3/Language/cn/References_3.md","ref":"Chapter_3/Language/cn/References_3.md","articles":[]}]},"previous":{"title":"2.6.3 YUV 体系色彩格式","level":"1.3.6.3","depth":3,"path":"Chapter_2/Language/cn/Docs_2_6_3.md","ref":"Chapter_2/Language/cn/Docs_2_6_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_2/Language/cn/References_2.md","mtime":"2024-09-12T04:11:10.700Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Apex_3_Introduce.html b/Chapter_3/Language/cn/Apex_3_Introduce.html
index acc6955..8d52758 100644
--- a/Chapter_3/Language/cn/Apex_3_Introduce.html
+++ b/Chapter_3/Language/cn/Apex_3_Introduce.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2218,7 +2205,7 @@ 目录
【参考文献】
@@ -2262,7 +2249,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"三、音视频常用基础算法","level":"1.4","depth":1,"next":{"title":"3.1 信号分析的核心算法 - 傅立叶变换","level":"1.4.1","depth":2,"path":"Chapter_3/Language/cn/Docs_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_1.md","articles":[{"title":"3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT)","level":"1.4.1.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_1.md","ref":"Chapter_3/Language/cn/Docs_3_1_1.md","articles":[]},{"title":"3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT)","level":"1.4.1.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_2.md","ref":"Chapter_3/Language/cn/Docs_3_1_2.md","articles":[]},{"title":"3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT)","level":"1.4.1.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_3.md","ref":"Chapter_3/Language/cn/Docs_3_1_3.md","articles":[]},{"title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","level":"1.4.1.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_4.md","ref":"Chapter_3/Language/cn/Docs_3_1_4.md","articles":[]}]},"previous":{"title":"【参考文献】","level":"1.3.7","depth":2,"path":"Chapter_2/Language/cn/References_2.md","ref":"Chapter_2/Language/cn/References_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Apex_3_Introduce.md","mtime":"2024-09-11T06:09:50.270Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"三、音视频常用基础算法","level":"1.4","depth":1,"next":{"title":"3.1 信号分析的核心算法 - 傅立叶变换","level":"1.4.1","depth":2,"path":"Chapter_3/Language/cn/Docs_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_1.md","articles":[{"title":"3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT)","level":"1.4.1.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_1.md","ref":"Chapter_3/Language/cn/Docs_3_1_1.md","articles":[]},{"title":"3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT)","level":"1.4.1.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_2.md","ref":"Chapter_3/Language/cn/Docs_3_1_2.md","articles":[]},{"title":"3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT)","level":"1.4.1.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_3.md","ref":"Chapter_3/Language/cn/Docs_3_1_3.md","articles":[]},{"title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","level":"1.4.1.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_4.md","ref":"Chapter_3/Language/cn/Docs_3_1_4.md","articles":[]}]},"previous":{"title":"【参考文献】","level":"1.3.7","depth":2,"path":"Chapter_2/Language/cn/References_2.md","ref":"Chapter_2/Language/cn/References_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Apex_3_Introduce.md","mtime":"2024-09-12T04:11:10.720Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_1.html b/Chapter_3/Language/cn/Docs_3_1.html
index a9145d8..84e7bdd 100644
--- a/Chapter_3/Language/cn/Docs_3_1.html
+++ b/Chapter_3/Language/cn/Docs_3_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2189,7 +2176,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2589,7 +2576,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2485,7 +2472,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2779,7 +2766,7 @@ FFTW: Fastest Fourier Transform in the West. by Matteo Frigo and Steven G. Johnson. at MIT.
@@ -2823,7 +2810,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT)","level":"1.4.1.3","depth":3,"next":{"title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","level":"1.4.1.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_4.md","ref":"Chapter_3/Language/cn/Docs_3_1_4.md","articles":[]},"previous":{"title":"3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT)","level":"1.4.1.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_2.md","ref":"Chapter_3/Language/cn/Docs_3_1_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_1_3.md","mtime":"2024-09-11T06:09:50.280Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT)","level":"1.4.1.3","depth":3,"next":{"title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","level":"1.4.1.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_4.md","ref":"Chapter_3/Language/cn/Docs_3_1_4.md","articles":[]},"previous":{"title":"3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT)","level":"1.4.1.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_2.md","ref":"Chapter_3/Language/cn/Docs_3_1_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_1_3.md","mtime":"2024-09-12T04:11:10.760Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_1_4.html b/Chapter_3/Language/cn/Docs_3_1_4.html
index c7cf227..2001ce1 100644
--- a/Chapter_3/Language/cn/Docs_3_1_4.html
+++ b/Chapter_3/Language/cn/Docs_3_1_4.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2171,7 +2158,7 @@
3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)
2011 年, [12]。 【申请 IEEE 授权中】
@@ -2215,7 +2202,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","level":"1.4.1.4","depth":3,"next":{"title":"3.2 频率信息提取 - 常用滤波算法","level":"1.4.2","depth":2,"path":"Chapter_3/Language/cn/Docs_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_2.md","articles":[{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_1.md","ref":"Chapter_3/Language/cn/Docs_3_2_1.md","articles":[]},{"title":"3.2.2 双边滤波(Bilateral Filter)","level":"1.4.2.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_2.md","ref":"Chapter_3/Language/cn/Docs_3_2_2.md","articles":[]},{"title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","level":"1.4.2.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_3.md","ref":"Chapter_3/Language/cn/Docs_3_2_3.md","articles":[]},{"title":"3.2.4 马尔滤波(Marr Filter)","level":"1.4.2.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_4.md","ref":"Chapter_3/Language/cn/Docs_3_2_4.md","articles":[]},{"title":"3.2.5 索贝尔滤波(Sobel Filter)","level":"1.4.2.5","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_5.md","ref":"Chapter_3/Language/cn/Docs_3_2_5.md","articles":[]},{"title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","level":"1.4.2.6","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_6.md","ref":"Chapter_3/Language/cn/Docs_3_2_6.md","articles":[]}]},"previous":{"title":"3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT)","level":"1.4.1.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_3.md","ref":"Chapter_3/Language/cn/Docs_3_1_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_1_4.md","mtime":"2024-09-11T06:09:50.280Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","level":"1.4.1.4","depth":3,"next":{"title":"3.2 频率信息提取 - 常用滤波算法","level":"1.4.2","depth":2,"path":"Chapter_3/Language/cn/Docs_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_2.md","articles":[{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_1.md","ref":"Chapter_3/Language/cn/Docs_3_2_1.md","articles":[]},{"title":"3.2.2 双边滤波(Bilateral Filter)","level":"1.4.2.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_2.md","ref":"Chapter_3/Language/cn/Docs_3_2_2.md","articles":[]},{"title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","level":"1.4.2.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_3.md","ref":"Chapter_3/Language/cn/Docs_3_2_3.md","articles":[]},{"title":"3.2.4 马尔滤波(Marr Filter)","level":"1.4.2.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_4.md","ref":"Chapter_3/Language/cn/Docs_3_2_4.md","articles":[]},{"title":"3.2.5 索贝尔滤波(Sobel Filter)","level":"1.4.2.5","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_5.md","ref":"Chapter_3/Language/cn/Docs_3_2_5.md","articles":[]},{"title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","level":"1.4.2.6","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_6.md","ref":"Chapter_3/Language/cn/Docs_3_2_6.md","articles":[]}]},"previous":{"title":"3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT)","level":"1.4.1.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_3.md","ref":"Chapter_3/Language/cn/Docs_3_1_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_1_4.md","mtime":"2024-09-12T04:11:10.760Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_2.html b/Chapter_3/Language/cn/Docs_3_2.html
index deb314e..a489a6f 100644
--- a/Chapter_3/Language/cn/Docs_3_2.html
+++ b/Chapter_3/Language/cn/Docs_3_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2180,7 +2167,7 @@ 在线演示
@@ -2224,7 +2211,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.2 频率信息提取 - 常用滤波算法","level":"1.4.2","depth":2,"next":{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_1.md","ref":"Chapter_3/Language/cn/Docs_3_2_1.md","articles":[]},"previous":{"title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","level":"1.4.1.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_4.md","ref":"Chapter_3/Language/cn/Docs_3_1_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_2.md","mtime":"2024-09-11T06:09:50.280Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.2 频率信息提取 - 常用滤波算法","level":"1.4.2","depth":2,"next":{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_1.md","ref":"Chapter_3/Language/cn/Docs_3_2_1.md","articles":[]},"previous":{"title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","level":"1.4.1.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_1_4.md","ref":"Chapter_3/Language/cn/Docs_3_1_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_2.md","mtime":"2024-09-12T04:11:10.760Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_2_1.html b/Chapter_3/Language/cn/Docs_3_2_1.html
index 538a8ca..c977190 100644
--- a/Chapter_3/Language/cn/Docs_3_2_1.html
+++ b/Chapter_3/Language/cn/Docs_3_2_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2491,7 +2478,7 @@ 考虑问题主要出现在高斯滤波的各向同性,或许可以通过引入高低频差异修饰滤波器,来达成要求。这种做法被称为 边缘保存(Edge Preserving) 。
@@ -2535,7 +2522,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"next":{"title":"3.2.2 双边滤波(Bilateral Filter)","level":"1.4.2.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_2.md","ref":"Chapter_3/Language/cn/Docs_3_2_2.md","articles":[]},"previous":{"title":"3.2 频率信息提取 - 常用滤波算法","level":"1.4.2","depth":2,"path":"Chapter_3/Language/cn/Docs_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_2.md","articles":[{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_1.md","ref":"Chapter_3/Language/cn/Docs_3_2_1.md","articles":[]},{"title":"3.2.2 双边滤波(Bilateral Filter)","level":"1.4.2.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_2.md","ref":"Chapter_3/Language/cn/Docs_3_2_2.md","articles":[]},{"title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","level":"1.4.2.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_3.md","ref":"Chapter_3/Language/cn/Docs_3_2_3.md","articles":[]},{"title":"3.2.4 马尔滤波(Marr Filter)","level":"1.4.2.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_4.md","ref":"Chapter_3/Language/cn/Docs_3_2_4.md","articles":[]},{"title":"3.2.5 索贝尔滤波(Sobel Filter)","level":"1.4.2.5","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_5.md","ref":"Chapter_3/Language/cn/Docs_3_2_5.md","articles":[]},{"title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","level":"1.4.2.6","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_6.md","ref":"Chapter_3/Language/cn/Docs_3_2_6.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_2_1.md","mtime":"2024-09-11T06:09:50.280Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"next":{"title":"3.2.2 双边滤波(Bilateral Filter)","level":"1.4.2.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_2.md","ref":"Chapter_3/Language/cn/Docs_3_2_2.md","articles":[]},"previous":{"title":"3.2 频率信息提取 - 常用滤波算法","level":"1.4.2","depth":2,"path":"Chapter_3/Language/cn/Docs_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_2.md","articles":[{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_1.md","ref":"Chapter_3/Language/cn/Docs_3_2_1.md","articles":[]},{"title":"3.2.2 双边滤波(Bilateral Filter)","level":"1.4.2.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_2.md","ref":"Chapter_3/Language/cn/Docs_3_2_2.md","articles":[]},{"title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","level":"1.4.2.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_3.md","ref":"Chapter_3/Language/cn/Docs_3_2_3.md","articles":[]},{"title":"3.2.4 马尔滤波(Marr Filter)","level":"1.4.2.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_4.md","ref":"Chapter_3/Language/cn/Docs_3_2_4.md","articles":[]},{"title":"3.2.5 索贝尔滤波(Sobel Filter)","level":"1.4.2.5","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_5.md","ref":"Chapter_3/Language/cn/Docs_3_2_5.md","articles":[]},{"title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","level":"1.4.2.6","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_6.md","ref":"Chapter_3/Language/cn/Docs_3_2_6.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_2_1.md","mtime":"2024-09-12T04:11:10.760Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_2_2.html b/Chapter_3/Language/cn/Docs_3_2_2.html
index a514775..cfa6ca6 100644
--- a/Chapter_3/Language/cn/Docs_3_2_2.html
+++ b/Chapter_3/Language/cn/Docs_3_2_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2354,7 +2341,7 @@ 为处理这个问题,我们相对放松对算力的限制。一个可行的方案是在标准高斯滤波的基础上,通过使用多个方向梯度共同作用,重新构造一个满足 非各向同性(Not Isotropic) 条件的滤波单元 (毕竟非全方位的梯度差异,还无法满足各向异性条件) ,来保存和引入核内像素移动和频率波传导关系。使我们能够对核内像素所占均值比重进行更为合理的分配,起到缓解效果。
这种多梯度的方式,会增强算法对图像边缘的处理能力,保存边缘的同时增强细节。因此也被称为 边缘锐化(Edge Sharpening) 。
@@ -2398,7 +2385,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.2.2 双边滤波(Bilateral Filter)","level":"1.4.2.2","depth":3,"next":{"title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","level":"1.4.2.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_3.md","ref":"Chapter_3/Language/cn/Docs_3_2_3.md","articles":[]},"previous":{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_1.md","ref":"Chapter_3/Language/cn/Docs_3_2_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_2_2.md","mtime":"2024-09-11T06:09:50.280Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.2.2 双边滤波(Bilateral Filter)","level":"1.4.2.2","depth":3,"next":{"title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","level":"1.4.2.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_3.md","ref":"Chapter_3/Language/cn/Docs_3_2_3.md","articles":[]},"previous":{"title":"3.2.1 高斯滤波(Gauss Filter)","level":"1.4.2.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_1.md","ref":"Chapter_3/Language/cn/Docs_3_2_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_2_2.md","mtime":"2024-09-12T04:11:10.770Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_2_3.html b/Chapter_3/Language/cn/Docs_3_2_3.html
index 84b1e4d..258810b 100644
--- a/Chapter_3/Language/cn/Docs_3_2_3.html
+++ b/Chapter_3/Language/cn/Docs_3_2_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2348,7 +2335,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2423,7 +2410,7 @@ Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by Gitbook
@@ -2467,7 +2454,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.2.4 马尔滤波(Marr Filter)","level":"1.4.2.4","depth":3,"next":{"title":"3.2.5 索贝尔滤波(Sobel Filter)","level":"1.4.2.5","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_5.md","ref":"Chapter_3/Language/cn/Docs_3_2_5.md","articles":[]},"previous":{"title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","level":"1.4.2.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_3.md","ref":"Chapter_3/Language/cn/Docs_3_2_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_2_4.md","mtime":"2024-09-11T06:09:50.290Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.2.4 马尔滤波(Marr Filter)","level":"1.4.2.4","depth":3,"next":{"title":"3.2.5 索贝尔滤波(Sobel Filter)","level":"1.4.2.5","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_5.md","ref":"Chapter_3/Language/cn/Docs_3_2_5.md","articles":[]},"previous":{"title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","level":"1.4.2.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_3.md","ref":"Chapter_3/Language/cn/Docs_3_2_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_2_4.md","mtime":"2024-09-12T04:11:10.770Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_2_5.html b/Chapter_3/Language/cn/Docs_3_2_5.html
index 0f6e55b..754d7fb 100644
--- a/Chapter_3/Language/cn/Docs_3_2_5.html
+++ b/Chapter_3/Language/cn/Docs_3_2_5.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2309,7 +2296,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2171,7 +2158,7 @@
3.2.6 各向异性扩散(Anisotropic Diffusion)
【待补充】
@@ -2215,7 +2202,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","level":"1.4.2.6","depth":3,"next":{"title":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理","level":"1.4.3","depth":2,"path":"Chapter_3/Language/cn/Docs_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3.md","articles":[{"title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","level":"1.4.3.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_3_1.md","articles":[]},{"title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","level":"1.4.3.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_3_2.md","articles":[]},{"title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","level":"1.4.3.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3_3.md","articles":[]}]},"previous":{"title":"3.2.5 索贝尔滤波(Sobel Filter)","level":"1.4.2.5","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_5.md","ref":"Chapter_3/Language/cn/Docs_3_2_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_2_6.md","mtime":"2024-09-11T06:09:50.290Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","level":"1.4.2.6","depth":3,"next":{"title":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理","level":"1.4.3","depth":2,"path":"Chapter_3/Language/cn/Docs_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3.md","articles":[{"title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","level":"1.4.3.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_3_1.md","articles":[]},{"title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","level":"1.4.3.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_3_2.md","articles":[]},{"title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","level":"1.4.3.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3_3.md","articles":[]}]},"previous":{"title":"3.2.5 索贝尔滤波(Sobel Filter)","level":"1.4.2.5","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_5.md","ref":"Chapter_3/Language/cn/Docs_3_2_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_2_6.md","mtime":"2024-09-12T04:11:10.770Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_3.html b/Chapter_3/Language/cn/Docs_3_3.html
index d4d40bd..e992283 100644
--- a/Chapter_3/Language/cn/Docs_3_3.html
+++ b/Chapter_3/Language/cn/Docs_3_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2177,7 +2164,7 @@ 在线演示
@@ -2221,7 +2208,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理","level":"1.4.3","depth":2,"next":{"title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","level":"1.4.3.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_3_1.md","articles":[]},"previous":{"title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","level":"1.4.2.6","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_6.md","ref":"Chapter_3/Language/cn/Docs_3_2_6.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_3.md","mtime":"2024-09-11T06:09:50.290Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理","level":"1.4.3","depth":2,"next":{"title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","level":"1.4.3.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_3_1.md","articles":[]},"previous":{"title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","level":"1.4.2.6","depth":3,"path":"Chapter_3/Language/cn/Docs_3_2_6.md","ref":"Chapter_3/Language/cn/Docs_3_2_6.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_3.md","mtime":"2024-09-12T04:11:10.770Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_3_1.html b/Chapter_3/Language/cn/Docs_3_3_1.html
index 08fad53..fa170be 100644
--- a/Chapter_3/Language/cn/Docs_3_3_1.html
+++ b/Chapter_3/Language/cn/Docs_3_3_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2891,7 +2878,7 @@ HO
HOG 数据帧(HOG Frame)更多被作为经过特征提取后的预处理输入数据,传入目标物体检测等人工智能计算机视觉方向的算法模型。 通过模型获取的物体识别结果后,再利用训练好的目标跟踪模型,或传统目标跟踪算法(诸如:核卷积滤波(KCF [Kernelized Correlation Filter])[18] 、MOSSE 算法等)等,来获取视频流中运动物体在时序上的关联性。
那么,用于判断目标检测结果是否准确的方法,也就是目标检测模型的 损失函数(Loss Function) 是什么呢?
@@ -2935,7 +2922,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","level":"1.4.3.1","depth":3,"next":{"title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","level":"1.4.3.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_3_2.md","articles":[]},"previous":{"title":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理","level":"1.4.3","depth":2,"path":"Chapter_3/Language/cn/Docs_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3.md","articles":[{"title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","level":"1.4.3.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_3_1.md","articles":[]},{"title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","level":"1.4.3.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_3_2.md","articles":[]},{"title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","level":"1.4.3.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_3_1.md","mtime":"2024-09-11T06:09:50.290Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","level":"1.4.3.1","depth":3,"next":{"title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","level":"1.4.3.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_3_2.md","articles":[]},"previous":{"title":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理","level":"1.4.3","depth":2,"path":"Chapter_3/Language/cn/Docs_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3.md","articles":[{"title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","level":"1.4.3.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_1.md","ref":"Chapter_3/Language/cn/Docs_3_3_1.md","articles":[]},{"title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","level":"1.4.3.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_3_2.md","articles":[]},{"title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","level":"1.4.3.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_3_1.md","mtime":"2024-09-12T04:11:10.780Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_3_2.html b/Chapter_3/Language/cn/Docs_3_3_2.html
index bf529f3..893317d 100644
--- a/Chapter_3/Language/cn/Docs_3_3_2.html
+++ b/Chapter_3/Language/cn/Docs_3_3_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2481,7 +2468,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2206,7 +2193,7 @@ W P a r a m s = ⎩ ⎪ ⎨ ⎪ ⎧ S i z e S t e p L e v e l = ( W , H ) = ( ⌊ 2 I m g _ W ⌋ + 1 , ⌊ 2 I m g _ H ⌋ + 1 ) = ( u , v ) = ( l v ⋅ W I m g _ W , l v ⋅ H I m g _ H ) = ( l v ) , l v ∈ [ 1 , 3 ]
代入图像大小获得配置,来快速获取包含完整被检测物体的闭包,方便模型处理得到目标实际区域,并工程缩减模型的输入。
@@ -2250,7 +2237,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","level":"1.4.3.3","depth":3,"next":{"title":"3.4 空域冗余控制 - 基础光流算法与色度压缩","level":"1.4.4","depth":2,"path":"Chapter_3/Language/cn/Docs_3_4.md","ref":"Chapter_3/Language/cn/Docs_3_4.md","articles":[{"title":"3.4.1 传统光流法(Classic Optical Flow Methods)","level":"1.4.4.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_1.md","ref":"Chapter_3/Language/cn/Docs_3_4_1.md","articles":[]},{"title":"3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow])","level":"1.4.4.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_2.md","ref":"Chapter_3/Language/cn/Docs_3_4_2.md","articles":[]},{"title":"3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow])","level":"1.4.4.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_3.md","ref":"Chapter_3/Language/cn/Docs_3_4_3.md","articles":[]},{"title":"3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling])","level":"1.4.4.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_4.md","ref":"Chapter_3/Language/cn/Docs_3_4_4.md","articles":[]}]},"previous":{"title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","level":"1.4.3.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_3_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_3_3.md","mtime":"2024-09-11T06:09:50.300Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","level":"1.4.3.3","depth":3,"next":{"title":"3.4 空域冗余控制 - 基础光流算法与色度压缩","level":"1.4.4","depth":2,"path":"Chapter_3/Language/cn/Docs_3_4.md","ref":"Chapter_3/Language/cn/Docs_3_4.md","articles":[{"title":"3.4.1 传统光流法(Classic Optical Flow Methods)","level":"1.4.4.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_1.md","ref":"Chapter_3/Language/cn/Docs_3_4_1.md","articles":[]},{"title":"3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow])","level":"1.4.4.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_2.md","ref":"Chapter_3/Language/cn/Docs_3_4_2.md","articles":[]},{"title":"3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow])","level":"1.4.4.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_3.md","ref":"Chapter_3/Language/cn/Docs_3_4_3.md","articles":[]},{"title":"3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling])","level":"1.4.4.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_4.md","ref":"Chapter_3/Language/cn/Docs_3_4_4.md","articles":[]}]},"previous":{"title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","level":"1.4.3.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_2.md","ref":"Chapter_3/Language/cn/Docs_3_3_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_3_3.md","mtime":"2024-09-12T04:11:10.780Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_4.html b/Chapter_3/Language/cn/Docs_3_4.html
index 0b72a1f..12f686b 100644
--- a/Chapter_3/Language/cn/Docs_3_4.html
+++ b/Chapter_3/Language/cn/Docs_3_4.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2178,7 +2165,7 @@ Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by Gitbook
@@ -2222,7 +2209,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.4 空域冗余控制 - 基础光流算法与色度压缩","level":"1.4.4","depth":2,"next":{"title":"3.4.1 传统光流法(Classic Optical Flow Methods)","level":"1.4.4.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_1.md","ref":"Chapter_3/Language/cn/Docs_3_4_1.md","articles":[]},"previous":{"title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","level":"1.4.3.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_4.md","mtime":"2024-09-11T06:09:50.300Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.4 空域冗余控制 - 基础光流算法与色度压缩","level":"1.4.4","depth":2,"next":{"title":"3.4.1 传统光流法(Classic Optical Flow Methods)","level":"1.4.4.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_1.md","ref":"Chapter_3/Language/cn/Docs_3_4_1.md","articles":[]},"previous":{"title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","level":"1.4.3.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_3_3.md","ref":"Chapter_3/Language/cn/Docs_3_3_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_4.md","mtime":"2024-09-12T04:11:10.780Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_4_1.html b/Chapter_3/Language/cn/Docs_3_4_1.html
index ac79150..9949d68 100644
--- a/Chapter_3/Language/cn/Docs_3_4_1.html
+++ b/Chapter_3/Language/cn/Docs_3_4_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2462,7 +2449,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2309,7 +2296,7 @@ ( V x , V y ) (V_x,\ V_y) ( V x , V y ) 的求解上。所以,工程化会采用小于当前分块的子块大小做卷积核,使用近似求解快速计算。当然也可以在满足精度要求下,通过模型化解决,思路类似于光流补帧的数据预处理。而由于涉及到规格中的不少工程处理技巧,有关 BDOF 标准化的部分,我们留到 H.266 规格详解时再行展开。
@@ -2353,7 +2340,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow])","level":"1.4.4.2","depth":3,"next":{"title":"3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow])","level":"1.4.4.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_3.md","ref":"Chapter_3/Language/cn/Docs_3_4_3.md","articles":[]},"previous":{"title":"3.4.1 传统光流法(Classic Optical Flow Methods)","level":"1.4.4.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_1.md","ref":"Chapter_3/Language/cn/Docs_3_4_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_4_2.md","mtime":"2024-09-11T06:09:50.300Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow])","level":"1.4.4.2","depth":3,"next":{"title":"3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow])","level":"1.4.4.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_3.md","ref":"Chapter_3/Language/cn/Docs_3_4_3.md","articles":[]},"previous":{"title":"3.4.1 传统光流法(Classic Optical Flow Methods)","level":"1.4.4.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_1.md","ref":"Chapter_3/Language/cn/Docs_3_4_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_4_2.md","mtime":"2024-09-12T04:11:10.790Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_4_3.html b/Chapter_3/Language/cn/Docs_3_4_3.html
index cb44407..669257a 100644
--- a/Chapter_3/Language/cn/Docs_3_4_3.html
+++ b/Chapter_3/Language/cn/Docs_3_4_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2246,7 +2233,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2349,7 +2336,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2174,7 +2161,7 @@ Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by Gitbook
@@ -2218,7 +2205,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.5 频域冗余控制 - 基础变换编码","level":"1.4.5","depth":2,"next":{"title":"3.5.1 整数离散正余弦变换(DST/DCT)","level":"1.4.5.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_1.md","ref":"Chapter_3/Language/cn/Docs_3_5_1.md","articles":[]},"previous":{"title":"3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling])","level":"1.4.4.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_4.md","ref":"Chapter_3/Language/cn/Docs_3_4_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_5.md","mtime":"2024-09-11T06:09:50.310Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.5 频域冗余控制 - 基础变换编码","level":"1.4.5","depth":2,"next":{"title":"3.5.1 整数离散正余弦变换(DST/DCT)","level":"1.4.5.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_1.md","ref":"Chapter_3/Language/cn/Docs_3_5_1.md","articles":[]},"previous":{"title":"3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling])","level":"1.4.4.4","depth":3,"path":"Chapter_3/Language/cn/Docs_3_4_4.md","ref":"Chapter_3/Language/cn/Docs_3_4_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_5.md","mtime":"2024-09-12T04:11:10.790Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_5_1.html b/Chapter_3/Language/cn/Docs_3_5_1.html
index 82adb2f..58c2d95 100644
--- a/Chapter_3/Language/cn/Docs_3_5_1.html
+++ b/Chapter_3/Language/cn/Docs_3_5_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2562,7 +2549,7 @@ K ^ = D ⋅ K \hat{K}=D \cdot K K ^ = D ⋅ K 计算即可,而 K K K 在窗口大小不变(即基底函数族固定)情况下,不会发生变化,可认为是一个常数矩阵。
@@ -2606,7 +2593,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"3.5.1 整数离散正余弦变换(DST/DCT)","level":"1.4.5.1","depth":3,"next":{"title":"3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform])","level":"1.4.5.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_2.md","ref":"Chapter_3/Language/cn/Docs_3_5_2.md","articles":[]},"previous":{"title":"3.5 频域冗余控制 - 基础变换编码","level":"1.4.5","depth":2,"path":"Chapter_3/Language/cn/Docs_3_5.md","ref":"Chapter_3/Language/cn/Docs_3_5.md","articles":[{"title":"3.5.1 整数离散正余弦变换(DST/DCT)","level":"1.4.5.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_1.md","ref":"Chapter_3/Language/cn/Docs_3_5_1.md","articles":[]},{"title":"3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform])","level":"1.4.5.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_2.md","ref":"Chapter_3/Language/cn/Docs_3_5_2.md","articles":[]},{"title":"3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform])","level":"1.4.5.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_3.md","ref":"Chapter_3/Language/cn/Docs_3_5_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_5_1.md","mtime":"2024-09-11T06:09:50.310Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"3.5.1 整数离散正余弦变换(DST/DCT)","level":"1.4.5.1","depth":3,"next":{"title":"3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform])","level":"1.4.5.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_2.md","ref":"Chapter_3/Language/cn/Docs_3_5_2.md","articles":[]},"previous":{"title":"3.5 频域冗余控制 - 基础变换编码","level":"1.4.5","depth":2,"path":"Chapter_3/Language/cn/Docs_3_5.md","ref":"Chapter_3/Language/cn/Docs_3_5.md","articles":[{"title":"3.5.1 整数离散正余弦变换(DST/DCT)","level":"1.4.5.1","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_1.md","ref":"Chapter_3/Language/cn/Docs_3_5_1.md","articles":[]},{"title":"3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform])","level":"1.4.5.2","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_2.md","ref":"Chapter_3/Language/cn/Docs_3_5_2.md","articles":[]},{"title":"3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform])","level":"1.4.5.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_3.md","ref":"Chapter_3/Language/cn/Docs_3_5_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Docs_3_5_1.md","mtime":"2024-09-12T04:11:10.790Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/Docs_3_5_2.html b/Chapter_3/Language/cn/Docs_3_5_2.html
index 998b36b..d53aa6b 100644
--- a/Chapter_3/Language/cn/Docs_3_5_2.html
+++ b/Chapter_3/Language/cn/Docs_3_5_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2360,7 +2347,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2475,7 +2462,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2173,7 +2160,7 @@
@@ -2217,7 +2204,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"【在线展示】","level":"1.4.6","depth":2,"next":{"title":"【参考文献】","level":"1.4.7","depth":2,"path":"Chapter_3/Language/cn/References_3.md","ref":"Chapter_3/Language/cn/References_3.md","articles":[]},"previous":{"title":"3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform])","level":"1.4.5.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_3.md","ref":"Chapter_3/Language/cn/Docs_3_5_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Playground_3.md","mtime":"2024-09-11T06:09:50.310Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"【在线展示】","level":"1.4.6","depth":2,"next":{"title":"【参考文献】","level":"1.4.7","depth":2,"path":"Chapter_3/Language/cn/References_3.md","ref":"Chapter_3/Language/cn/References_3.md","articles":[]},"previous":{"title":"3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform])","level":"1.4.5.3","depth":3,"path":"Chapter_3/Language/cn/Docs_3_5_3.md","ref":"Chapter_3/Language/cn/Docs_3_5_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/Playground_3.md","mtime":"2024-09-12T04:11:10.800Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_3/Language/cn/References_3.html b/Chapter_3/Language/cn/References_3.html
index 1e850d9..4d7029f 100644
--- a/Chapter_3/Language/cn/References_3.html
+++ b/Chapter_3/Language/cn/References_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2205,7 +2192,7 @@ Ӡ
[34] Salehifar M, Koo M, Lim J, et al. CE 6.2. 6: Reduced Secondary Transform (RST)[J]. Joint Video Experts Team (JVET) of ITU-T SG, 2018, 16: 10-18.
[35] Koo M, Salehifar M, Lim J, et al. CE6: reduced secondary transform (RST)(CE6-3.1)[J]. Joint Video Experts Team (JVET) of ITU-T SG, 2019, 16: 19-27.
@@ -2249,7 +2236,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"【参考文献】","level":"1.4.7","depth":2,"next":{"title":"四、音视频机器学习基础","level":"1.5","depth":1,"path":"Chapter_4/Language/cn/Apex_4_Introduce.md","ref":"Chapter_4/Language/cn/Apex_4_Introduce.md","articles":[{"title":"4.1 发展概览","level":"1.5.1","depth":2,"path":"Chapter_4/Language/cn/Docs_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_1.md","articles":[]},{"title":"4.2 模型工程基础","level":"1.5.2","depth":2,"path":"Chapter_4/Language/cn/Docs_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_2.md","articles":[{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_4.md","ref":"Chapter_4/Language/cn/Docs_4_2_4.md","articles":[]}]},{"title":"4.3 经典激活函数(Classic Activation Function)","level":"1.5.3","depth":2,"path":"Chapter_4/Language/cn/Docs_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_3.md","articles":[{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_1.md","ref":"Chapter_4/Language/cn/Docs_4_3_1.md","articles":[]},{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]}]},{"title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","level":"1.5.4","depth":2,"path":"Chapter_4/Language/cn/Docs_4_4.md","ref":"Chapter_4/Language/cn/Docs_4_4.md","articles":[{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]}]},{"title":"4.5 损失函数(Loss Function)","level":"1.5.5","depth":2,"path":"Chapter_4/Language/cn/Docs_4_5.md","ref":"Chapter_4/Language/cn/Docs_4_5.md","articles":[{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_12.md","ref":"Chapter_4/Language/cn/Docs_4_5_12.md","articles":[]}]},{"title":"4.6 常用最优化算法(Optimizer Operator)","level":"1.5.6","depth":2,"path":"Chapter_4/Language/cn/Docs_4_6.md","ref":"Chapter_4/Language/cn/Docs_4_6.md","articles":[{"title":"4.6.1 基础优化算法","level":"1.5.6.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_1.md","ref":"Chapter_4/Language/cn/Docs_4_6_1.md","articles":[]},{"title":"4.6.2 优化算法的优化-应对震荡","level":"1.5.6.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_2.md","ref":"Chapter_4/Language/cn/Docs_4_6_2.md","articles":[]},{"title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","level":"1.5.6.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_3.md","ref":"Chapter_4/Language/cn/Docs_4_6_3.md","articles":[]},{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_4.md","ref":"Chapter_4/Language/cn/Docs_4_6_4.md","articles":[]},{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]}]},{"title":"4.7 模型结构速览","level":"1.5.7","depth":2,"path":"Chapter_4/Language/cn/Docs_4_7.md","ref":"Chapter_4/Language/cn/Docs_4_7.md","articles":[{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]}]},{"title":"【参考文献】","level":"1.5.8","depth":2,"path":"Chapter_4/Language/cn/References_4.md","ref":"Chapter_4/Language/cn/References_4.md","articles":[]}]},"previous":{"title":"【在线展示】","level":"1.4.6","depth":2,"path":"Chapter_3/Language/cn/Playground_3.md","ref":"Chapter_3/Language/cn/Playground_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/References_3.md","mtime":"2024-09-11T06:09:50.320Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"【参考文献】","level":"1.4.7","depth":2,"next":{"title":"四、音视频机器学习基础","level":"1.5","depth":1,"path":"Chapter_4/Language/cn/Apex_4_Introduce.md","ref":"Chapter_4/Language/cn/Apex_4_Introduce.md","articles":[{"title":"4.1 发展概览","level":"1.5.1","depth":2,"path":"Chapter_4/Language/cn/Docs_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_1.md","articles":[]},{"title":"4.2 模型工程基础","level":"1.5.2","depth":2,"path":"Chapter_4/Language/cn/Docs_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_2.md","articles":[{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_4.md","ref":"Chapter_4/Language/cn/Docs_4_2_4.md","articles":[]}]},{"title":"4.3 经典激活函数(Classic Activation Function)","level":"1.5.3","depth":2,"path":"Chapter_4/Language/cn/Docs_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_3.md","articles":[{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_1.md","ref":"Chapter_4/Language/cn/Docs_4_3_1.md","articles":[]},{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]}]},{"title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","level":"1.5.4","depth":2,"path":"Chapter_4/Language/cn/Docs_4_4.md","ref":"Chapter_4/Language/cn/Docs_4_4.md","articles":[{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]}]},{"title":"4.5 损失函数(Loss Function)","level":"1.5.5","depth":2,"path":"Chapter_4/Language/cn/Docs_4_5.md","ref":"Chapter_4/Language/cn/Docs_4_5.md","articles":[{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_12.md","ref":"Chapter_4/Language/cn/Docs_4_5_12.md","articles":[]}]},{"title":"4.6 常用最优化算法(Optimizer Operator)","level":"1.5.6","depth":2,"path":"Chapter_4/Language/cn/Docs_4_6.md","ref":"Chapter_4/Language/cn/Docs_4_6.md","articles":[{"title":"4.6.1 基础优化算法","level":"1.5.6.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_1.md","ref":"Chapter_4/Language/cn/Docs_4_6_1.md","articles":[]},{"title":"4.6.2 优化算法的优化-应对震荡","level":"1.5.6.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_2.md","ref":"Chapter_4/Language/cn/Docs_4_6_2.md","articles":[]},{"title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","level":"1.5.6.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_3.md","ref":"Chapter_4/Language/cn/Docs_4_6_3.md","articles":[]},{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_4.md","ref":"Chapter_4/Language/cn/Docs_4_6_4.md","articles":[]},{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]}]},{"title":"4.7 模型结构速览","level":"1.5.7","depth":2,"path":"Chapter_4/Language/cn/Docs_4_7.md","ref":"Chapter_4/Language/cn/Docs_4_7.md","articles":[{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]}]},{"title":"【参考文献】","level":"1.5.8","depth":2,"path":"Chapter_4/Language/cn/References_4.md","ref":"Chapter_4/Language/cn/References_4.md","articles":[]}]},"previous":{"title":"【在线展示】","level":"1.4.6","depth":2,"path":"Chapter_3/Language/cn/Playground_3.md","ref":"Chapter_3/Language/cn/Playground_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_3/Language/cn/References_3.md","mtime":"2024-09-12T04:11:10.800Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Apex_4_Introduce.html b/Chapter_4/Language/cn/Apex_4_Introduce.html
index f3fa8c2..1ce6e49 100644
--- a/Chapter_4/Language/cn/Apex_4_Introduce.html
+++ b/Chapter_4/Language/cn/Apex_4_Introduce.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2236,7 +2223,7 @@ 目录
【参考文献】
@@ -2280,7 +2267,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"四、音视频机器学习基础","level":"1.5","depth":1,"next":{"title":"4.1 发展概览","level":"1.5.1","depth":2,"path":"Chapter_4/Language/cn/Docs_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_1.md","articles":[]},"previous":{"title":"【参考文献】","level":"1.4.7","depth":2,"path":"Chapter_3/Language/cn/References_3.md","ref":"Chapter_3/Language/cn/References_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Apex_4_Introduce.md","mtime":"2024-09-11T06:09:50.460Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"四、音视频机器学习基础","level":"1.5","depth":1,"next":{"title":"4.1 发展概览","level":"1.5.1","depth":2,"path":"Chapter_4/Language/cn/Docs_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_1.md","articles":[]},"previous":{"title":"【参考文献】","level":"1.4.7","depth":2,"path":"Chapter_3/Language/cn/References_3.md","ref":"Chapter_3/Language/cn/References_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Apex_4_Introduce.md","mtime":"2024-09-12T04:11:10.890Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_1.html b/Chapter_4/Language/cn/Docs_4_1.html
index 80ac8ec..bfba0cb 100644
--- a/Chapter_4/Language/cn/Docs_4_1.html
+++ b/Chapter_4/Language/cn/Docs_4_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2242,7 +2229,7 @@ Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by Gitbook
@@ -2286,7 +2273,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.1 发展概览","level":"1.5.1","depth":2,"next":{"title":"4.2 模型工程基础","level":"1.5.2","depth":2,"path":"Chapter_4/Language/cn/Docs_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_2.md","articles":[{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_4.md","ref":"Chapter_4/Language/cn/Docs_4_2_4.md","articles":[]}]},"previous":{"title":"四、音视频机器学习基础","level":"1.5","depth":1,"path":"Chapter_4/Language/cn/Apex_4_Introduce.md","ref":"Chapter_4/Language/cn/Apex_4_Introduce.md","articles":[{"title":"4.1 发展概览","level":"1.5.1","depth":2,"path":"Chapter_4/Language/cn/Docs_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_1.md","articles":[]},{"title":"4.2 模型工程基础","level":"1.5.2","depth":2,"path":"Chapter_4/Language/cn/Docs_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_2.md","articles":[{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_4.md","ref":"Chapter_4/Language/cn/Docs_4_2_4.md","articles":[]}]},{"title":"4.3 经典激活函数(Classic Activation Function)","level":"1.5.3","depth":2,"path":"Chapter_4/Language/cn/Docs_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_3.md","articles":[{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_1.md","ref":"Chapter_4/Language/cn/Docs_4_3_1.md","articles":[]},{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]}]},{"title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","level":"1.5.4","depth":2,"path":"Chapter_4/Language/cn/Docs_4_4.md","ref":"Chapter_4/Language/cn/Docs_4_4.md","articles":[{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]}]},{"title":"4.5 损失函数(Loss Function)","level":"1.5.5","depth":2,"path":"Chapter_4/Language/cn/Docs_4_5.md","ref":"Chapter_4/Language/cn/Docs_4_5.md","articles":[{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_12.md","ref":"Chapter_4/Language/cn/Docs_4_5_12.md","articles":[]}]},{"title":"4.6 常用最优化算法(Optimizer Operator)","level":"1.5.6","depth":2,"path":"Chapter_4/Language/cn/Docs_4_6.md","ref":"Chapter_4/Language/cn/Docs_4_6.md","articles":[{"title":"4.6.1 基础优化算法","level":"1.5.6.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_1.md","ref":"Chapter_4/Language/cn/Docs_4_6_1.md","articles":[]},{"title":"4.6.2 优化算法的优化-应对震荡","level":"1.5.6.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_2.md","ref":"Chapter_4/Language/cn/Docs_4_6_2.md","articles":[]},{"title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","level":"1.5.6.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_3.md","ref":"Chapter_4/Language/cn/Docs_4_6_3.md","articles":[]},{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_4.md","ref":"Chapter_4/Language/cn/Docs_4_6_4.md","articles":[]},{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]}]},{"title":"4.7 模型结构速览","level":"1.5.7","depth":2,"path":"Chapter_4/Language/cn/Docs_4_7.md","ref":"Chapter_4/Language/cn/Docs_4_7.md","articles":[{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]}]},{"title":"【参考文献】","level":"1.5.8","depth":2,"path":"Chapter_4/Language/cn/References_4.md","ref":"Chapter_4/Language/cn/References_4.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_1.md","mtime":"2024-09-11T06:09:50.460Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.1 发展概览","level":"1.5.1","depth":2,"next":{"title":"4.2 模型工程基础","level":"1.5.2","depth":2,"path":"Chapter_4/Language/cn/Docs_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_2.md","articles":[{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_4.md","ref":"Chapter_4/Language/cn/Docs_4_2_4.md","articles":[]}]},"previous":{"title":"四、音视频机器学习基础","level":"1.5","depth":1,"path":"Chapter_4/Language/cn/Apex_4_Introduce.md","ref":"Chapter_4/Language/cn/Apex_4_Introduce.md","articles":[{"title":"4.1 发展概览","level":"1.5.1","depth":2,"path":"Chapter_4/Language/cn/Docs_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_1.md","articles":[]},{"title":"4.2 模型工程基础","level":"1.5.2","depth":2,"path":"Chapter_4/Language/cn/Docs_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_2.md","articles":[{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_4.md","ref":"Chapter_4/Language/cn/Docs_4_2_4.md","articles":[]}]},{"title":"4.3 经典激活函数(Classic Activation Function)","level":"1.5.3","depth":2,"path":"Chapter_4/Language/cn/Docs_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_3.md","articles":[{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_1.md","ref":"Chapter_4/Language/cn/Docs_4_3_1.md","articles":[]},{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]}]},{"title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","level":"1.5.4","depth":2,"path":"Chapter_4/Language/cn/Docs_4_4.md","ref":"Chapter_4/Language/cn/Docs_4_4.md","articles":[{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]}]},{"title":"4.5 损失函数(Loss Function)","level":"1.5.5","depth":2,"path":"Chapter_4/Language/cn/Docs_4_5.md","ref":"Chapter_4/Language/cn/Docs_4_5.md","articles":[{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_12.md","ref":"Chapter_4/Language/cn/Docs_4_5_12.md","articles":[]}]},{"title":"4.6 常用最优化算法(Optimizer Operator)","level":"1.5.6","depth":2,"path":"Chapter_4/Language/cn/Docs_4_6.md","ref":"Chapter_4/Language/cn/Docs_4_6.md","articles":[{"title":"4.6.1 基础优化算法","level":"1.5.6.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_1.md","ref":"Chapter_4/Language/cn/Docs_4_6_1.md","articles":[]},{"title":"4.6.2 优化算法的优化-应对震荡","level":"1.5.6.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_2.md","ref":"Chapter_4/Language/cn/Docs_4_6_2.md","articles":[]},{"title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","level":"1.5.6.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_3.md","ref":"Chapter_4/Language/cn/Docs_4_6_3.md","articles":[]},{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_4.md","ref":"Chapter_4/Language/cn/Docs_4_6_4.md","articles":[]},{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]}]},{"title":"4.7 模型结构速览","level":"1.5.7","depth":2,"path":"Chapter_4/Language/cn/Docs_4_7.md","ref":"Chapter_4/Language/cn/Docs_4_7.md","articles":[{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]}]},{"title":"【参考文献】","level":"1.5.8","depth":2,"path":"Chapter_4/Language/cn/References_4.md","ref":"Chapter_4/Language/cn/References_4.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_1.md","mtime":"2024-09-12T04:11:10.890Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_2.html b/Chapter_4/Language/cn/Docs_4_2.html
index 0ce806d..aaaf2d6 100644
--- a/Chapter_4/Language/cn/Docs_4_2.html
+++ b/Chapter_4/Language/cn/Docs_4_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2186,7 +2173,7 @@ 4.2 模型
皆为训练过程中的 样本量级参数 。
那么,除去这部分变量,实际进行运算的基本单元是什么呢?
@@ -2230,7 +2217,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.2 模型工程基础","level":"1.5.2","depth":2,"next":{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},"previous":{"title":"4.1 发展概览","level":"1.5.1","depth":2,"path":"Chapter_4/Language/cn/Docs_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_2.md","mtime":"2024-09-11T06:09:50.470Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.2 模型工程基础","level":"1.5.2","depth":2,"next":{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},"previous":{"title":"4.1 发展概览","level":"1.5.1","depth":2,"path":"Chapter_4/Language/cn/Docs_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_2.md","mtime":"2024-09-12T04:11:10.900Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_2_1.html b/Chapter_4/Language/cn/Docs_4_2_1.html
index 44748a1..3369a99 100644
--- a/Chapter_4/Language/cn/Docs_4_2_1.html
+++ b/Chapter_4/Language/cn/Docs_4_2_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2202,7 +2189,7 @@ 2 × 1 2 \times 1 2 × 1 的结果向量,向量的维度依赖于对比集的标注。此时,输出层就需要采用 2 × 1 2 \times 1 2 × 1 个节点,来接收前一级隐藏层的输入(例子只有一层隐藏层)。
所以综合而言,在工程上,算子常常是以最小的 方法单元(Method Unit) 而存在,层中节点相当于最小 执行单元(Operation Unit) 。层则相当于由一系列算子按照一定的处理顺序,组成的 任务单元(Task Unit) 。而模型(Model)则是由一系列层按照既定目标排列组合,形成的 作业流水线(Process Pipeline) 。
@@ -2246,7 +2233,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"next":{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},"previous":{"title":"4.2 模型工程基础","level":"1.5.2","depth":2,"path":"Chapter_4/Language/cn/Docs_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_2.md","articles":[{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_4.md","ref":"Chapter_4/Language/cn/Docs_4_2_4.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_2_1.md","mtime":"2024-09-11T06:09:50.470Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"next":{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},"previous":{"title":"4.2 模型工程基础","level":"1.5.2","depth":2,"path":"Chapter_4/Language/cn/Docs_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_2.md","articles":[{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_4.md","ref":"Chapter_4/Language/cn/Docs_4_2_4.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_2_1.md","mtime":"2024-09-12T04:11:10.900Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_2_2.html b/Chapter_4/Language/cn/Docs_4_2_2.html
index 236e03a..c75504e 100644
--- a/Chapter_4/Language/cn/Docs_4_2_2.html
+++ b/Chapter_4/Language/cn/Docs_4_2_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2212,7 +2199,7 @@ 神߬
z i = w i ⋅ δ ( x i ) + b i
也由此,可以被称为 隐藏层函数(Hidden Layer Function) 。
@@ -2256,7 +2243,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"next":{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},"previous":{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_2_2.md","mtime":"2024-09-11T06:09:50.480Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"next":{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},"previous":{"title":"4.2.1 算子(Operator)& 层(Layer)","level":"1.5.2.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_1.md","ref":"Chapter_4/Language/cn/Docs_4_2_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_2_2.md","mtime":"2024-09-12T04:11:10.900Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_2_3.html b/Chapter_4/Language/cn/Docs_4_2_3.html
index 00a6d74..1abe43c 100644
--- a/Chapter_4/Language/cn/Docs_4_2_3.html
+++ b/Chapter_4/Language/cn/Docs_4_2_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2188,7 +2175,7 @@ 4
除此之外,当下包括 大模型(Large Model) 在内的多种模型融合技术,简称 多模态(Multi Model) ,皆开始采用多模型混合的实现。
例如,由 杨立昆(Yann LeCun) 提出的,基于 短期预测(Short Term Prediction) 和 长期预测交叉融合(Joint Embedding) 实现完整连续时效预测,的 自监督大模型(Self-Supervised Large Model) 理论中,通过将传统深度学习(指带单一功能深度学习模型)的各个功能层或层组合,拆分为包含:损失模型(Cost Module,类似于一个复杂的,非单一点生效的损失函数替代模型)、感知模型(Perception Module)、规则模型(Policy Module)、动作模型(Action Model)、世界模型(World Model)在内的多种特定任务模型(Specific Model),组合为复杂的连续网络,以期实现模型自学习处理体系。
@@ -2232,7 +2219,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"next":{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_4.md","ref":"Chapter_4/Language/cn/Docs_4_2_4.md","articles":[]},"previous":{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_2_3.md","mtime":"2024-09-11T06:09:50.480Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"next":{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_4.md","ref":"Chapter_4/Language/cn/Docs_4_2_4.md","articles":[]},"previous":{"title":"4.2.2 神经元(Neuron)","level":"1.5.2.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_2.md","ref":"Chapter_4/Language/cn/Docs_4_2_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_2_3.md","mtime":"2024-09-12T04:11:10.900Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_2_4.html b/Chapter_4/Language/cn/Docs_4_2_4.html
index 30f1af3..b2ba631 100644
--- a/Chapter_4/Language/cn/Docs_4_2_4.html
+++ b/Chapter_4/Language/cn/Docs_4_2_4.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2197,7 +2184,7 @@ 嵌
即,组成嵌入集的特征向量形式,并没有特殊的要求。但往往需要根据采用的损失函数来决定最终的格式。这一点在实践中非常重要。由于评估数据常用于线性回归,区别起见被称为 预测集(Predictions) 。
现在,我们基本掌握了深度学习的入门概念。让我们分步来看,一个神经网络的具体细节。
@@ -2241,7 +2228,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"next":{"title":"4.3 经典激活函数(Classic Activation Function)","level":"1.5.3","depth":2,"path":"Chapter_4/Language/cn/Docs_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_3.md","articles":[{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_1.md","ref":"Chapter_4/Language/cn/Docs_4_3_1.md","articles":[]},{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]}]},"previous":{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_2_4.md","mtime":"2024-09-11T06:09:50.480Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.2.4 特征选择(Feature Selection)","level":"1.5.2.4","depth":3,"next":{"title":"4.3 经典激活函数(Classic Activation Function)","level":"1.5.3","depth":2,"path":"Chapter_4/Language/cn/Docs_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_3.md","articles":[{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_1.md","ref":"Chapter_4/Language/cn/Docs_4_3_1.md","articles":[]},{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]}]},"previous":{"title":"4.2.3 神经网络(NN [Neural Network])","level":"1.5.2.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_2_3.md","ref":"Chapter_4/Language/cn/Docs_4_2_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_2_4.md","mtime":"2024-09-12T04:11:10.910Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_3.html b/Chapter_4/Language/cn/Docs_4_3.html
index c3143e2..adfaaeb 100644
--- a/Chapter_4/Language/cn/Docs_4_3.html
+++ b/Chapter_4/Language/cn/Docs_4_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2194,7 +2181,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2217,7 +2204,7 @@ Sigmoid 算子Ա
The sigmoid of 0.500000 is 0.622459
@@ -2261,7 +2248,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"next":{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},"previous":{"title":"4.3 经典激活函数(Classic Activation Function)","level":"1.5.3","depth":2,"path":"Chapter_4/Language/cn/Docs_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_3.md","articles":[{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_1.md","ref":"Chapter_4/Language/cn/Docs_4_3_1.md","articles":[]},{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_1.md","mtime":"2024-09-11T06:09:50.480Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"next":{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},"previous":{"title":"4.3 经典激活函数(Classic Activation Function)","level":"1.5.3","depth":2,"path":"Chapter_4/Language/cn/Docs_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_3.md","articles":[{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_1.md","ref":"Chapter_4/Language/cn/Docs_4_3_1.md","articles":[]},{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_1.md","mtime":"2024-09-12T04:11:10.910Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_3_2.html b/Chapter_4/Language/cn/Docs_4_3_2.html
index 021a423..cf5de22 100644
--- a/Chapter_4/Language/cn/Docs_4_3_2.html
+++ b/Chapter_4/Language/cn/Docs_4_3_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2217,7 +2204,7 @@ Tanh 算子化The tanh of 0.500000 is 0.462117
@@ -2261,7 +2248,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"next":{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},"previous":{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_1.md","ref":"Chapter_4/Language/cn/Docs_4_3_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_2.md","mtime":"2024-09-11T06:09:50.480Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"next":{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},"previous":{"title":"4.3.1 Sigmoid","level":"1.5.3.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_1.md","ref":"Chapter_4/Language/cn/Docs_4_3_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_2.md","mtime":"2024-09-12T04:11:10.910Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_3_3.html b/Chapter_4/Language/cn/Docs_4_3_3.html
index c9f1294..dea2fab 100644
--- a/Chapter_4/Language/cn/Docs_4_3_3.html
+++ b/Chapter_4/Language/cn/Docs_4_3_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2217,7 +2204,7 @@ Softplus 算子
The softplus of 0.500000 is 0.648721
@@ -2261,7 +2248,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"next":{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},"previous":{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_3.md","mtime":"2024-09-11T06:09:50.490Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"next":{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},"previous":{"title":"4.3.2 Tanh","level":"1.5.3.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_2.md","ref":"Chapter_4/Language/cn/Docs_4_3_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_3.md","mtime":"2024-09-12T04:11:10.910Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_3_4.html b/Chapter_4/Language/cn/Docs_4_3_4.html
index df135e7..b490baa 100644
--- a/Chapter_4/Language/cn/Docs_4_3_4.html
+++ b/Chapter_4/Language/cn/Docs_4_3_4.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2322,7 +2309,7 @@ ReLU 族算
The RReLU of -0.500000 with alpha=0.100000 , lower=0.000000 , and upper=1.000000 is -0.019595
@@ -2366,7 +2353,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"next":{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},"previous":{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_4.md","mtime":"2024-09-11T06:09:50.490Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"next":{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},"previous":{"title":"4.3.3 Softplus","level":"1.5.3.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_3.md","ref":"Chapter_4/Language/cn/Docs_4_3_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_4.md","mtime":"2024-09-12T04:11:10.920Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_3_5.html b/Chapter_4/Language/cn/Docs_4_3_5.html
index 3dd8b7f..ea787f2 100644
--- a/Chapter_4/Language/cn/Docs_4_3_5.html
+++ b/Chapter_4/Language/cn/Docs_4_3_5.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2244,7 +2231,7 @@ ELU & SELU 算[
The SELU of -0.500000 with alpha=1.673263 and lambda=1.050701 is -0.428348
@@ -2288,7 +2275,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"next":{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},"previous":{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_5.md","mtime":"2024-09-11T06:09:50.490Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"next":{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},"previous":{"title":"4.3.4 ReLU 族 ","level":"1.5.3.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_4.md","ref":"Chapter_4/Language/cn/Docs_4_3_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_5.md","mtime":"2024-09-12T04:11:10.920Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_3_6.html b/Chapter_4/Language/cn/Docs_4_3_6.html
index 9547625..b29d8c9 100644
--- a/Chapter_4/Language/cn/Docs_4_3_6.html
+++ b/Chapter_4/Language/cn/Docs_4_3_6.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2232,7 +2219,7 @@ Mish 算子化The mish of 0.500000 is 0.462117
@@ -2276,7 +2263,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"next":{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]},"previous":{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_6.md","mtime":"2024-09-11T06:09:50.490Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"next":{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]},"previous":{"title":"4.3.5 ELU & SELU","level":"1.5.3.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_5.md","ref":"Chapter_4/Language/cn/Docs_4_3_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_6.md","mtime":"2024-09-12T04:11:10.920Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_3_7.html b/Chapter_4/Language/cn/Docs_4_3_7.html
index a5bd0f4..20799c2 100644
--- a/Chapter_4/Language/cn/Docs_4_3_7.html
+++ b/Chapter_4/Language/cn/Docs_4_3_7.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2298,7 +2285,7 @@ Swish 族算
除此之外,非单调性也是近期激活函数新的关注点,业界的研究显示,适当的引入非单调性,能够很好的增强激活函数将源数据,输出为非线性数据的信息保存水平。
综合而言,建议现阶段使用激活函数,优先考虑:ReLU、LReLU、ReLU-N、h-Swish,根据是否需要配合优化算法(利用 smooth 特性),进一步选择是否采用:Softplus、Swish。结合现有硬件水平,适度的考虑含有指数计算的激活函数。
@@ -2342,7 +2329,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"next":{"title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","level":"1.5.4","depth":2,"path":"Chapter_4/Language/cn/Docs_4_4.md","ref":"Chapter_4/Language/cn/Docs_4_4.md","articles":[{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]}]},"previous":{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_7.md","mtime":"2024-09-11T06:09:50.490Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"next":{"title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","level":"1.5.4","depth":2,"path":"Chapter_4/Language/cn/Docs_4_4.md","ref":"Chapter_4/Language/cn/Docs_4_4.md","articles":[{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]}]},"previous":{"title":"4.3.6 Mish","level":"1.5.3.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_6.md","ref":"Chapter_4/Language/cn/Docs_4_3_6.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_3_7.md","mtime":"2024-09-12T04:11:10.920Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_4.html b/Chapter_4/Language/cn/Docs_4_4.html
index cda8123..8cb4170 100644
--- a/Chapter_4/Language/cn/Docs_4_4.html
+++ b/Chapter_4/Language/cn/Docs_4_4.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2209,7 +2196,7 @@ Σ ( x ⃗ ) = ∑ h j ( x )
在这些前提下,我们来看这三个经典链接函数。
@@ -2253,7 +2240,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","level":"1.5.4","depth":2,"next":{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},"previous":{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_4.md","mtime":"2024-09-11T06:09:50.490Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","level":"1.5.4","depth":2,"next":{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},"previous":{"title":"4.3.7 Swish 族 ","level":"1.5.3.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_3_7.md","ref":"Chapter_4/Language/cn/Docs_4_3_7.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_4.md","mtime":"2024-09-12T04:11:10.920Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_4_1.html b/Chapter_4/Language/cn/Docs_4_4_1.html
index e6fbeb4..d7a16a1 100644
--- a/Chapter_4/Language/cn/Docs_4_4_1.html
+++ b/Chapter_4/Language/cn/Docs_4_4_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2236,7 +2223,7 @@ Dropout 算子Ա
和理论表现一致。
@@ -2280,7 +2267,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"next":{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},"previous":{"title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","level":"1.5.4","depth":2,"path":"Chapter_4/Language/cn/Docs_4_4.md","ref":"Chapter_4/Language/cn/Docs_4_4.md","articles":[{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_4_1.md","mtime":"2024-09-11T06:09:50.490Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"next":{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},"previous":{"title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","level":"1.5.4","depth":2,"path":"Chapter_4/Language/cn/Docs_4_4.md","ref":"Chapter_4/Language/cn/Docs_4_4.md","articles":[{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_4_1.md","mtime":"2024-09-12T04:11:10.920Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_4_2.html b/Chapter_4/Language/cn/Docs_4_4_2.html
index f951f93..0141031 100644
--- a/Chapter_4/Language/cn/Docs_4_4_2.html
+++ b/Chapter_4/Language/cn/Docs_4_4_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2242,7 +2229,7 @@ Maxout 算子化
和理论表现一致。
@@ -2286,7 +2273,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"next":{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]},"previous":{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_4_2.md","mtime":"2024-09-11T06:09:50.500Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"next":{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]},"previous":{"title":"4.4.1 Dropout","level":"1.5.4.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_1.md","ref":"Chapter_4/Language/cn/Docs_4_4_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_4_2.md","mtime":"2024-09-12T04:11:10.930Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_4_3.html b/Chapter_4/Language/cn/Docs_4_4_3.html
index 886c946..b1ccad4 100644
--- a/Chapter_4/Language/cn/Docs_4_4_3.html
+++ b/Chapter_4/Language/cn/Docs_4_4_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2364,7 +2351,7 @@ Softmax 算子Ա
当然,连接函数并不只有列出的这三种类型。每年都有大量有关此方面的研究,给出新的样式。但从上我们也能够发现,若非足够泛化,连接函数鲜有脱离模型而存在的独立类型 。这在上文中列出的 Maxout 与 Dropout、Softmax 的对比中有明显体现。因此,需要在训练中注意这一点。
目前,我们已经掌握了基本的样本提炼手段, 接下来就需要考虑权重迭代了。
@@ -2408,7 +2395,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"next":{"title":"4.5 损失函数(Loss Function)","level":"1.5.5","depth":2,"path":"Chapter_4/Language/cn/Docs_4_5.md","ref":"Chapter_4/Language/cn/Docs_4_5.md","articles":[{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_12.md","ref":"Chapter_4/Language/cn/Docs_4_5_12.md","articles":[]}]},"previous":{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_4_3.md","mtime":"2024-09-11T06:09:50.500Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"next":{"title":"4.5 损失函数(Loss Function)","level":"1.5.5","depth":2,"path":"Chapter_4/Language/cn/Docs_4_5.md","ref":"Chapter_4/Language/cn/Docs_4_5.md","articles":[{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_12.md","ref":"Chapter_4/Language/cn/Docs_4_5_12.md","articles":[]}]},"previous":{"title":"4.4.2 Maxout","level":"1.5.4.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_2.md","ref":"Chapter_4/Language/cn/Docs_4_4_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_4_3.md","mtime":"2024-09-12T04:11:10.930Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5.html b/Chapter_4/Language/cn/Docs_4_5.html
index 230ce42..e77280c 100644
--- a/Chapter_4/Language/cn/Docs_4_5.html
+++ b/Chapter_4/Language/cn/Docs_4_5.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2195,7 +2182,7 @@ ص
正则项(Regularities) ,用于衡量模型 复杂度(complexity) 的损失函数组成部分。衡量模型复杂度的方法有很多。大部分是从权重对整个模型影响的层面来判断的,即从权重的大小,来衡量某个参数对整体模型的影响。
接下来,我们就分别从 回归项(Regression)、分类项(Classification)、正则项(Regularities)三种类型,来了解损失函数的使用。
@@ -2239,7 +2226,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5 损失函数(Loss Function)","level":"1.5.5","depth":2,"next":{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},"previous":{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5.md","mtime":"2024-09-11T06:09:50.500Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5 损失函数(Loss Function)","level":"1.5.5","depth":2,"next":{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},"previous":{"title":"4.4.3 SoftMax","level":"1.5.4.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_4_3.md","ref":"Chapter_4/Language/cn/Docs_4_4_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5.md","mtime":"2024-09-12T04:11:10.930Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_1.html b/Chapter_4/Language/cn/Docs_4_5_1.html
index 5f4cfbd..e747758 100644
--- a/Chapter_4/Language/cn/Docs_4_5_1.html
+++ b/Chapter_4/Language/cn/Docs_4_5_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2222,7 +2209,7 @@ MAE 算子化The MAE is 0.100000
@@ -2266,7 +2253,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"next":{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},"previous":{"title":"4.5 损失函数(Loss Function)","level":"1.5.5","depth":2,"path":"Chapter_4/Language/cn/Docs_4_5.md","ref":"Chapter_4/Language/cn/Docs_4_5.md","articles":[{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_12.md","ref":"Chapter_4/Language/cn/Docs_4_5_12.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_1.md","mtime":"2024-09-11T06:09:50.500Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"next":{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},"previous":{"title":"4.5 损失函数(Loss Function)","level":"1.5.5","depth":2,"path":"Chapter_4/Language/cn/Docs_4_5.md","ref":"Chapter_4/Language/cn/Docs_4_5.md","articles":[{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_12.md","ref":"Chapter_4/Language/cn/Docs_4_5_12.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_1.md","mtime":"2024-09-12T04:11:10.930Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_10.html b/Chapter_4/Language/cn/Docs_4_5_10.html
index cc244a0..017a4f2 100644
--- a/Chapter_4/Language/cn/Docs_4_5_10.html
+++ b/Chapter_4/Language/cn/Docs_4_5_10.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2358,7 +2345,7 @@ N-Pair Loss 算ֵ
对组排异损失从样本宏观角度,统一了正负样本概念。指明了,非当前指向类的负样本,可以被认为是指向负样本类型情况的正样本。因此,对于 N 分类处理过程,整个运算损失计算时间复杂度被化简为仅有 2N。相当的高效。
@@ -2402,7 +2389,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"next":{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},"previous":{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_10.md","mtime":"2024-09-11T06:09:50.500Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"next":{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},"previous":{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_10.md","mtime":"2024-09-12T04:11:10.930Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_11.html b/Chapter_4/Language/cn/Docs_4_5_11.html
index cd1b7c9..d06fe55 100644
--- a/Chapter_4/Language/cn/Docs_4_5_11.html
+++ b/Chapter_4/Language/cn/Docs_4_5_11.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2188,7 +2175,7 @@ 4.5.11 正ԡ
L-1 惩罚项(L 1 L_1 L 1 Regularity) 由于其特性,常被用于裁剪参数数量,缩减模型宽度。从另一种角度来理解,可以认为 L-1 的思想其实和 Maxout 激活函数的思想有些类似。都是通过线性关系,来整合实际特征曲线。只不过 L-1 是从模型复杂度的角度,Maxout 是从非线性特征的角度。
L-1 惩罚项被证明,对于稀疏性模型优化非常有效。
@@ -2232,7 +2219,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"next":{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_12.md","ref":"Chapter_4/Language/cn/Docs_4_5_12.md","articles":[]},"previous":{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_11.md","mtime":"2024-09-11T06:09:50.500Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"next":{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_12.md","ref":"Chapter_4/Language/cn/Docs_4_5_12.md","articles":[]},"previous":{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_11.md","mtime":"2024-09-12T04:11:10.930Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_12.html b/Chapter_4/Language/cn/Docs_4_5_12.html
index fdd712a..68e07e7 100644
--- a/Chapter_4/Language/cn/Docs_4_5_12.html
+++ b/Chapter_4/Language/cn/Docs_4_5_12.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2190,7 +2177,7 @@ 4.5.12 正ԡ
至此,损失函数的三类组成部分认识完毕。其实我们只做了粗浅的介绍,真正实用中,还有大量的细分和类型设计。除了少数我们介绍的经典如 MAE、MSE 等,每一个新的损失函数,都可能意味着有自己独特的配套神经网络结构。
究其原因,还是在于损失函数作用的范围,在于衡量整个网络的迭代,这决定了它不太可能会脱离而存在。使用中,需要小心。
@@ -2234,7 +2221,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"next":{"title":"4.6 常用最优化算法(Optimizer Operator)","level":"1.5.6","depth":2,"path":"Chapter_4/Language/cn/Docs_4_6.md","ref":"Chapter_4/Language/cn/Docs_4_6.md","articles":[{"title":"4.6.1 基础优化算法","level":"1.5.6.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_1.md","ref":"Chapter_4/Language/cn/Docs_4_6_1.md","articles":[]},{"title":"4.6.2 优化算法的优化-应对震荡","level":"1.5.6.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_2.md","ref":"Chapter_4/Language/cn/Docs_4_6_2.md","articles":[]},{"title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","level":"1.5.6.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_3.md","ref":"Chapter_4/Language/cn/Docs_4_6_3.md","articles":[]},{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_4.md","ref":"Chapter_4/Language/cn/Docs_4_6_4.md","articles":[]},{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]}]},"previous":{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_12.md","mtime":"2024-09-11T06:09:50.510Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.12 正则项-L2 惩罚","level":"1.5.5.12","depth":3,"next":{"title":"4.6 常用最优化算法(Optimizer Operator)","level":"1.5.6","depth":2,"path":"Chapter_4/Language/cn/Docs_4_6.md","ref":"Chapter_4/Language/cn/Docs_4_6.md","articles":[{"title":"4.6.1 基础优化算法","level":"1.5.6.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_1.md","ref":"Chapter_4/Language/cn/Docs_4_6_1.md","articles":[]},{"title":"4.6.2 优化算法的优化-应对震荡","level":"1.5.6.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_2.md","ref":"Chapter_4/Language/cn/Docs_4_6_2.md","articles":[]},{"title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","level":"1.5.6.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_3.md","ref":"Chapter_4/Language/cn/Docs_4_6_3.md","articles":[]},{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_4.md","ref":"Chapter_4/Language/cn/Docs_4_6_4.md","articles":[]},{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]}]},"previous":{"title":"4.5.11 正则项-L1 惩罚","level":"1.5.5.11","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_11.md","ref":"Chapter_4/Language/cn/Docs_4_5_11.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_12.md","mtime":"2024-09-12T04:11:10.940Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_2.html b/Chapter_4/Language/cn/Docs_4_5_2.html
index a8a00be..f7cdfb8 100644
--- a/Chapter_4/Language/cn/Docs_4_5_2.html
+++ b/Chapter_4/Language/cn/Docs_4_5_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2223,7 +2210,7 @@ MSE 算子化The MSE is 0.033333
@@ -2267,7 +2254,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"next":{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},"previous":{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_2.md","mtime":"2024-09-11T06:09:50.510Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"next":{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},"previous":{"title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","level":"1.5.5.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_1.md","ref":"Chapter_4/Language/cn/Docs_4_5_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_2.md","mtime":"2024-09-12T04:11:10.940Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_3.html b/Chapter_4/Language/cn/Docs_4_5_3.html
index 008b8d3..d0ea659 100644
--- a/Chapter_4/Language/cn/Docs_4_5_3.html
+++ b/Chapter_4/Language/cn/Docs_4_5_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2234,7 +2221,7 @@ Huber Loss 算子
The Huber loss is 0.033333
@@ -2278,7 +2265,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"next":{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},"previous":{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_3.md","mtime":"2024-09-11T06:09:50.510Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"next":{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},"previous":{"title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","level":"1.5.5.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_2.md","ref":"Chapter_4/Language/cn/Docs_4_5_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_3.md","mtime":"2024-09-12T04:11:10.940Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_4.html b/Chapter_4/Language/cn/Docs_4_5_4.html
index 7ec71cb..ab85660 100644
--- a/Chapter_4/Language/cn/Docs_4_5_4.html
+++ b/Chapter_4/Language/cn/Docs_4_5_4.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2244,7 +2231,7 @@ Quantile Loss 算
The quantile loss is 0.083333
@@ -2288,7 +2275,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"next":{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},"previous":{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_4.md","mtime":"2024-09-11T06:09:50.510Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"next":{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},"previous":{"title":"4.5.3 回归项-休伯损失(Huber Loss)","level":"1.5.5.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_3.md","ref":"Chapter_4/Language/cn/Docs_4_5_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_4.md","mtime":"2024-09-12T04:11:10.940Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_5.html b/Chapter_4/Language/cn/Docs_4_5_5.html
index a63a909..ced8e6c 100644
--- a/Chapter_4/Language/cn/Docs_4_5_5.html
+++ b/Chapter_4/Language/cn/Docs_4_5_5.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2226,7 +2213,7 @@ Log Loss 算子
The log loss is -0.056644, for object class 'apple'
@@ -2270,7 +2257,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"next":{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},"previous":{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_5.md","mtime":"2024-09-11T06:09:50.520Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"next":{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},"previous":{"title":"4.5.4 回归项-分位数损失(Quantile Loss)","level":"1.5.5.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_4.md","ref":"Chapter_4/Language/cn/Docs_4_5_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_5.md","mtime":"2024-09-12T04:11:10.940Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_6.html b/Chapter_4/Language/cn/Docs_4_5_6.html
index dff374c..c6ba32f 100644
--- a/Chapter_4/Language/cn/Docs_4_5_6.html
+++ b/Chapter_4/Language/cn/Docs_4_5_6.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2230,7 +2217,7 @@ Cross Entropy Loss
上面的代码中,展示了存在三类分类情况下,样本的输入分类和预测特征向量,皆未归一化会产生的结果。交叉熵损失仍然能使用,但不精确。
@@ -2274,7 +2261,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"next":{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},"previous":{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_6.md","mtime":"2024-09-11T06:09:50.520Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"next":{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},"previous":{"title":"4.5.5 分类项-对数损失(Log Loss)","level":"1.5.5.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_5.md","ref":"Chapter_4/Language/cn/Docs_4_5_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_6.md","mtime":"2024-09-12T04:11:10.940Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_7.html b/Chapter_4/Language/cn/Docs_4_5_7.html
index 447591e..28f2a63 100644
--- a/Chapter_4/Language/cn/Docs_4_5_7.html
+++ b/Chapter_4/Language/cn/Docs_4_5_7.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2228,7 +2215,7 @@ Hinge Loss 算子
The hinge loss is 0.250000
@@ -2272,7 +2259,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"next":{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},"previous":{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_7.md","mtime":"2024-09-11T06:09:50.520Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"next":{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},"previous":{"title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","level":"1.5.5.6","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_6.md","ref":"Chapter_4/Language/cn/Docs_4_5_6.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_7.md","mtime":"2024-09-12T04:11:10.950Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_8.html b/Chapter_4/Language/cn/Docs_4_5_8.html
index bb8052d..6047aa3 100644
--- a/Chapter_4/Language/cn/Docs_4_5_8.html
+++ b/Chapter_4/Language/cn/Docs_4_5_8.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2234,7 +2221,7 @@ Contrastive Loss
The contrastive loss is 0.1250000
@@ -2278,7 +2265,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"next":{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},"previous":{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_8.md","mtime":"2024-09-11T06:09:50.520Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"next":{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_9.md","ref":"Chapter_4/Language/cn/Docs_4_5_9.md","articles":[]},"previous":{"title":"4.5.7 分类项-合页损失(Hinge Loss)","level":"1.5.5.7","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_7.md","ref":"Chapter_4/Language/cn/Docs_4_5_7.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_8.md","mtime":"2024-09-12T04:11:10.950Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_5_9.html b/Chapter_4/Language/cn/Docs_4_5_9.html
index 77f96a1..7ada69b 100644
--- a/Chapter_4/Language/cn/Docs_4_5_9.html
+++ b/Chapter_4/Language/cn/Docs_4_5_9.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2379,7 +2366,7 @@ Triplet Loss 算
从这个例子就能看出, 有效的工程化能够极大提升算法的训练效率,减小耗时。
这即是工程师在此处的关键作用。
@@ -2423,7 +2410,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"next":{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},"previous":{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_9.md","mtime":"2024-09-11T06:09:50.520Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.5.9 分类项-三元损失(Triplet Loss)","level":"1.5.5.9","depth":3,"next":{"title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","level":"1.5.5.10","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_10.md","ref":"Chapter_4/Language/cn/Docs_4_5_10.md","articles":[]},"previous":{"title":"4.5.8 分类项-对比损失(Contrastive Loss)","level":"1.5.5.8","depth":3,"path":"Chapter_4/Language/cn/Docs_4_5_8.md","ref":"Chapter_4/Language/cn/Docs_4_5_8.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_5_9.md","mtime":"2024-09-12T04:11:10.950Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_6.html b/Chapter_4/Language/cn/Docs_4_6.html
index 2b2f099..942c753 100644
--- a/Chapter_4/Language/cn/Docs_4_6.html
+++ b/Chapter_4/Language/cn/Docs_4_6.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2181,7 +2168,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2220,7 +2207,7 @@
不过,既然有了疑问,那自然有解决办法。
@@ -2264,7 +2251,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.6.1 基础优化算法","level":"1.5.6.1","depth":3,"next":{"title":"4.6.2 优化算法的优化-应对震荡","level":"1.5.6.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_2.md","ref":"Chapter_4/Language/cn/Docs_4_6_2.md","articles":[]},"previous":{"title":"4.6 常用最优化算法(Optimizer Operator)","level":"1.5.6","depth":2,"path":"Chapter_4/Language/cn/Docs_4_6.md","ref":"Chapter_4/Language/cn/Docs_4_6.md","articles":[{"title":"4.6.1 基础优化算法","level":"1.5.6.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_1.md","ref":"Chapter_4/Language/cn/Docs_4_6_1.md","articles":[]},{"title":"4.6.2 优化算法的优化-应对震荡","level":"1.5.6.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_2.md","ref":"Chapter_4/Language/cn/Docs_4_6_2.md","articles":[]},{"title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","level":"1.5.6.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_3.md","ref":"Chapter_4/Language/cn/Docs_4_6_3.md","articles":[]},{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_4.md","ref":"Chapter_4/Language/cn/Docs_4_6_4.md","articles":[]},{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_6_1.md","mtime":"2024-09-11T06:09:50.520Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.6.1 基础优化算法","level":"1.5.6.1","depth":3,"next":{"title":"4.6.2 优化算法的优化-应对震荡","level":"1.5.6.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_2.md","ref":"Chapter_4/Language/cn/Docs_4_6_2.md","articles":[]},"previous":{"title":"4.6 常用最优化算法(Optimizer Operator)","level":"1.5.6","depth":2,"path":"Chapter_4/Language/cn/Docs_4_6.md","ref":"Chapter_4/Language/cn/Docs_4_6.md","articles":[{"title":"4.6.1 基础优化算法","level":"1.5.6.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_1.md","ref":"Chapter_4/Language/cn/Docs_4_6_1.md","articles":[]},{"title":"4.6.2 优化算法的优化-应对震荡","level":"1.5.6.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_2.md","ref":"Chapter_4/Language/cn/Docs_4_6_2.md","articles":[]},{"title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","level":"1.5.6.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_3.md","ref":"Chapter_4/Language/cn/Docs_4_6_3.md","articles":[]},{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_4.md","ref":"Chapter_4/Language/cn/Docs_4_6_4.md","articles":[]},{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_6_1.md","mtime":"2024-09-12T04:11:10.950Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_6_2.html b/Chapter_4/Language/cn/Docs_4_6_2.html
index 6de46d1..5cdba68 100644
--- a/Chapter_4/Language/cn/Docs_4_6_2.html
+++ b/Chapter_4/Language/cn/Docs_4_6_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2208,7 +2195,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2250,7 +2237,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2195,7 +2182,7 @@ η \eta η 防爆因子,建议 ϵ = 1 0 e - 8 \epsilon = \text{10e-8} ϵ = 10e-8 避免干扰运算
Adam 很好的结合了前辈们的各种优化处理手段,成为了集大成之优化函数。因此,Adam是被经常使用的,现代主流优化函数之一。
@@ -2239,7 +2226,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"next":{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]},"previous":{"title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","level":"1.5.6.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_3.md","ref":"Chapter_4/Language/cn/Docs_4_6_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_6_4.md","mtime":"2024-09-11T06:09:50.530Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"next":{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]},"previous":{"title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","level":"1.5.6.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_3.md","ref":"Chapter_4/Language/cn/Docs_4_6_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_6_4.md","mtime":"2024-09-12T04:11:10.960Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_6_5.html b/Chapter_4/Language/cn/Docs_4_6_5.html
index e2979a8..e7d09b0 100644
--- a/Chapter_4/Language/cn/Docs_4_6_5.html
+++ b/Chapter_4/Language/cn/Docs_4_6_5.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2218,7 +2205,7 @@ Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by Gitbook
@@ -2262,7 +2249,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"next":{"title":"4.7 模型结构速览","level":"1.5.7","depth":2,"path":"Chapter_4/Language/cn/Docs_4_7.md","ref":"Chapter_4/Language/cn/Docs_4_7.md","articles":[{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]}]},"previous":{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_4.md","ref":"Chapter_4/Language/cn/Docs_4_6_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_6_5.md","mtime":"2024-09-11T06:09:50.530Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"next":{"title":"4.7 模型结构速览","level":"1.5.7","depth":2,"path":"Chapter_4/Language/cn/Docs_4_7.md","ref":"Chapter_4/Language/cn/Docs_4_7.md","articles":[{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]}]},"previous":{"title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","level":"1.5.6.4","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_4.md","ref":"Chapter_4/Language/cn/Docs_4_6_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_6_5.md","mtime":"2024-09-12T04:11:10.960Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_7.html b/Chapter_4/Language/cn/Docs_4_7.html
index 5f34d62..a5ac8c7 100644
--- a/Chapter_4/Language/cn/Docs_4_7.html
+++ b/Chapter_4/Language/cn/Docs_4_7.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2173,7 +2160,7 @@ 4.7 模型
不过,在起步阶段,我们还需要决定具体使用哪一种模型类型,来构建面向目标的神经网络。可供选择的类型,其实在本章的开篇就已介绍,即深度神经网络(DNN [Deep Neural Network])的分类(见 4.1 )。
这里我们主要对 当下主流的 CNN、RNN、GAN、Transformer 类别 ,进行说明。
@@ -2217,7 +2204,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.7 模型结构速览","level":"1.5.7","depth":2,"next":{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},"previous":{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_7.md","mtime":"2024-09-11T06:09:50.530Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.7 模型结构速览","level":"1.5.7","depth":2,"next":{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},"previous":{"title":"4.6.5 优化算法对比与使用建议","level":"1.5.6.5","depth":3,"path":"Chapter_4/Language/cn/Docs_4_6_5.md","ref":"Chapter_4/Language/cn/Docs_4_6_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_7.md","mtime":"2024-09-12T04:11:10.960Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_7_1.html b/Chapter_4/Language/cn/Docs_4_7_1.html
index 4e3615f..fc40b56 100644
--- a/Chapter_4/Language/cn/Docs_4_7_1.html
+++ b/Chapter_4/Language/cn/Docs_4_7_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2314,7 +2301,7 @@ CNN 的ף
相信未来,我们仍然能够一睹 CNN 回归 LLM 多模态语言大模型的风采。
至此,CNN 的初级概念和网络结构,基本介绍完毕。有了这些知识背景,在了解 CNN 的各种类型网络的设计时,亦能窥得大概。其余就需要仔细钻研论文,以了解全貌了。
@@ -2358,7 +2345,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"next":{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},"previous":{"title":"4.7 模型结构速览","level":"1.5.7","depth":2,"path":"Chapter_4/Language/cn/Docs_4_7.md","ref":"Chapter_4/Language/cn/Docs_4_7.md","articles":[{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_7_1.md","mtime":"2024-09-11T06:09:50.530Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"next":{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},"previous":{"title":"4.7 模型结构速览","level":"1.5.7","depth":2,"path":"Chapter_4/Language/cn/Docs_4_7.md","ref":"Chapter_4/Language/cn/Docs_4_7.md","articles":[{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_7_1.md","mtime":"2024-09-12T04:11:10.960Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_7_2.html b/Chapter_4/Language/cn/Docs_4_7_2.html
index 3c37eaf..da4d599 100644
--- a/Chapter_4/Language/cn/Docs_4_7_2.html
+++ b/Chapter_4/Language/cn/Docs_4_7_2.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2304,7 +2291,7 @@ RNN 的ף
如果 RNN 在短期内没有进一步突破,可见 Transformer 会逐步取而代之。但这,并不意味着 RNN 会退出历史舞台。技术永远都是博弈的过程,在人工智能的终极命题被解决前,无人能够断言。
需要注意的是,RNN 从始至终意图解决的都是“记忆”问题,而非 CNN 所解决的“提取”问题 。两者 并不冲突 ,甚至还可以适度融合,即组合形成 CNN+RNN 融合模型(Hybrid Model)。由 CNN 的特征提取(FE)子网得倒高级特征,再经过 RNN 代替原 CNN 的特征选择(FS)子网和结果输出(RO)子网,实现对高级特征的时间敏感训练。
@@ -2348,7 +2335,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"next":{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]},"previous":{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_7_2.md","mtime":"2024-09-11T06:09:50.540Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"next":{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]},"previous":{"title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","level":"1.5.7.1","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_1.md","ref":"Chapter_4/Language/cn/Docs_4_7_1.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_7_2.md","mtime":"2024-09-12T04:11:10.970Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/Docs_4_7_3.html b/Chapter_4/Language/cn/Docs_4_7_3.html
index 00db092..f98c97e 100644
--- a/Chapter_4/Language/cn/Docs_4_7_3.html
+++ b/Chapter_4/Language/cn/Docs_4_7_3.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2469,7 +2456,7 @@ Transforme
至此,随着经典模型结构 自注意力网络(Transformer)介绍完毕,基本理论知识也完成了初步的梳理。
从下一章开始,我们将正式步入音视频处理的实践工程领域。
@@ -2513,7 +2500,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"next":{"title":"【参考文献】","level":"1.5.8","depth":2,"path":"Chapter_4/Language/cn/References_4.md","ref":"Chapter_4/Language/cn/References_4.md","articles":[]},"previous":{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_7_3.md","mtime":"2024-09-11T06:09:50.540Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"next":{"title":"【参考文献】","level":"1.5.8","depth":2,"path":"Chapter_4/Language/cn/References_4.md","ref":"Chapter_4/Language/cn/References_4.md","articles":[]},"previous":{"title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","level":"1.5.7.2","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_2.md","ref":"Chapter_4/Language/cn/Docs_4_7_2.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/Docs_4_7_3.md","mtime":"2024-09-12T04:11:10.970Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_4/Language/cn/References_4.html b/Chapter_4/Language/cn/References_4.html
index 6dcc75d..e302e1a 100644
--- a/Chapter_4/Language/cn/References_4.html
+++ b/Chapter_4/Language/cn/References_4.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2193,7 +2180,7 @@ խ
[22] Bahdanau D, Cho K, Bengio Y. Neural machine translation by jointly learning to align and translate[J]. arXiv preprint arXiv:1409.0473, 2014.
[23] Vaswani A, Shazeer N, Parmar N, et al. Attention is all you need[J]. Advances in neural information processing systems, 2017, 30.
@@ -2237,7 +2224,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"【参考文献】","level":"1.5.8","depth":2,"next":{"title":"五、音视频帧分析与数据处理","level":"1.6","depth":1,"path":"Chapter_5/Language/cn/Apex_5_Introduce.md","ref":"Chapter_5/Language/cn/Apex_5_Introduce.md","articles":[{"title":"5.1 音视频帧与环境准备","level":"1.6.1","depth":2,"path":"Chapter_5/Language/cn/Docs_5_1.md","ref":"Chapter_5/Language/cn/Docs_5_1.md","articles":[{"title":"5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)","level":"1.6.1.1","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_1.md","ref":"Chapter_5/Language/cn/Docs_5_1_1.md","articles":[]},{"title":"5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio)","level":"1.6.1.2","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_2.md","ref":"Chapter_5/Language/cn/Docs_5_1_2.md","articles":[]},{"title":"5.1.3 视频分析库(PyOpenCV、Color-Science)","level":"1.6.1.3","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_3.md","ref":"Chapter_5/Language/cn/Docs_5_1_3.md","articles":[]},{"title":"5.1.4 分析环境准备","level":"1.6.1.4","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_4.md","ref":"Chapter_5/Language/cn/Docs_5_1_4.md","articles":[]},{"title":"5.1.5 其他分析软件","level":"1.6.1.5","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_5.md","ref":"Chapter_5/Language/cn/Docs_5_1_5.md","articles":[]}]},{"title":"【参考文献】","level":"1.6.2","depth":2,"path":"Chapter_5/Language/cn/References_5.md","ref":"Chapter_5/Language/cn/References_5.md","articles":[]}]},"previous":{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/References_4.md","mtime":"2024-09-11T06:09:50.540Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"【参考文献】","level":"1.5.8","depth":2,"next":{"title":"五、音视频帧分析与数据处理","level":"1.6","depth":1,"path":"Chapter_5/Language/cn/Apex_5_Introduce.md","ref":"Chapter_5/Language/cn/Apex_5_Introduce.md","articles":[{"title":"5.1 音视频帧与环境准备","level":"1.6.1","depth":2,"path":"Chapter_5/Language/cn/Docs_5_1.md","ref":"Chapter_5/Language/cn/Docs_5_1.md","articles":[{"title":"5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)","level":"1.6.1.1","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_1.md","ref":"Chapter_5/Language/cn/Docs_5_1_1.md","articles":[]},{"title":"5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio)","level":"1.6.1.2","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_2.md","ref":"Chapter_5/Language/cn/Docs_5_1_2.md","articles":[]},{"title":"5.1.3 视频分析库(PyOpenCV、Color-Science)","level":"1.6.1.3","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_3.md","ref":"Chapter_5/Language/cn/Docs_5_1_3.md","articles":[]},{"title":"5.1.4 其他分析软件","level":"1.6.1.4","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_4.md","ref":"Chapter_5/Language/cn/Docs_5_1_4.md","articles":[]}]},{"title":"【参考文献】","level":"1.6.2","depth":2,"path":"Chapter_5/Language/cn/References_5.md","ref":"Chapter_5/Language/cn/References_5.md","articles":[]}]},"previous":{"title":"4.7.3 自注意力网络(Transformer)","level":"1.5.7.3","depth":3,"path":"Chapter_4/Language/cn/Docs_4_7_3.md","ref":"Chapter_4/Language/cn/Docs_4_7_3.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_4/Language/cn/References_4.md","mtime":"2024-09-12T04:11:10.970Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_5/Language/cn/Apex_5_Introduce.html b/Chapter_5/Language/cn/Apex_5_Introduce.html
index e22d88b..b89b170 100644
--- a/Chapter_5/Language/cn/Apex_5_Introduce.html
+++ b/Chapter_5/Language/cn/Apex_5_Introduce.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2183,14 +2170,13 @@ 目录
5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)
5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio)
5.1.3 视频分析库(PyOpenCV、Color-Science)
-5.1.4 分析环境准备
-5.1.5 其他分析软件
+5.1.4 其他分析软件
【参考文献】
@@ -2234,7 +2220,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"五、音视频帧分析与数据处理","level":"1.6","depth":1,"next":{"title":"5.1 音视频帧与环境准备","level":"1.6.1","depth":2,"path":"Chapter_5/Language/cn/Docs_5_1.md","ref":"Chapter_5/Language/cn/Docs_5_1.md","articles":[{"title":"5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)","level":"1.6.1.1","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_1.md","ref":"Chapter_5/Language/cn/Docs_5_1_1.md","articles":[]},{"title":"5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio)","level":"1.6.1.2","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_2.md","ref":"Chapter_5/Language/cn/Docs_5_1_2.md","articles":[]},{"title":"5.1.3 视频分析库(PyOpenCV、Color-Science)","level":"1.6.1.3","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_3.md","ref":"Chapter_5/Language/cn/Docs_5_1_3.md","articles":[]},{"title":"5.1.4 分析环境准备","level":"1.6.1.4","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_4.md","ref":"Chapter_5/Language/cn/Docs_5_1_4.md","articles":[]},{"title":"5.1.5 其他分析软件","level":"1.6.1.5","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_5.md","ref":"Chapter_5/Language/cn/Docs_5_1_5.md","articles":[]}]},"previous":{"title":"【参考文献】","level":"1.5.8","depth":2,"path":"Chapter_4/Language/cn/References_4.md","ref":"Chapter_4/Language/cn/References_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_5/Language/cn/Apex_5_Introduce.md","mtime":"2024-09-11T06:10:00.490Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"五、音视频帧分析与数据处理","level":"1.6","depth":1,"next":{"title":"5.1 音视频帧与环境准备","level":"1.6.1","depth":2,"path":"Chapter_5/Language/cn/Docs_5_1.md","ref":"Chapter_5/Language/cn/Docs_5_1.md","articles":[{"title":"5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)","level":"1.6.1.1","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_1.md","ref":"Chapter_5/Language/cn/Docs_5_1_1.md","articles":[]},{"title":"5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio)","level":"1.6.1.2","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_2.md","ref":"Chapter_5/Language/cn/Docs_5_1_2.md","articles":[]},{"title":"5.1.3 视频分析库(PyOpenCV、Color-Science)","level":"1.6.1.3","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_3.md","ref":"Chapter_5/Language/cn/Docs_5_1_3.md","articles":[]},{"title":"5.1.4 其他分析软件","level":"1.6.1.4","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_4.md","ref":"Chapter_5/Language/cn/Docs_5_1_4.md","articles":[]}]},"previous":{"title":"【参考文献】","level":"1.5.8","depth":2,"path":"Chapter_4/Language/cn/References_4.md","ref":"Chapter_4/Language/cn/References_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_5/Language/cn/Apex_5_Introduce.md","mtime":"2024-09-14T03:29:06.950Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_5/Language/cn/Docs_5_1.html b/Chapter_5/Language/cn/Docs_5_1.html
index ba8cd86..dd2adbd 100644
--- a/Chapter_5/Language/cn/Docs_5_1.html
+++ b/Chapter_5/Language/cn/Docs_5_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2179,7 +2166,7 @@ 常用ש
在开始搭建分析环境之前,还需要对常用的工具库进行简单的介绍。由于分析所采用的工程手段,多为以 Python 为脚本语言编写的简单处理流,因此,我们需要使用到的基本库,皆为 Python 工具库。
于是为方便后续索引、使用、总结,从库功能性上做简单归类,可以分为:常用数学库 、视频分析库 和 音频分析库 。
@@ -2223,7 +2210,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"5.1 音视频帧与环境准备","level":"1.6.1","depth":2,"next":{"title":"5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)","level":"1.6.1.1","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_1.md","ref":"Chapter_5/Language/cn/Docs_5_1_1.md","articles":[]},"previous":{"title":"五、音视频帧分析与数据处理","level":"1.6","depth":1,"path":"Chapter_5/Language/cn/Apex_5_Introduce.md","ref":"Chapter_5/Language/cn/Apex_5_Introduce.md","articles":[{"title":"5.1 音视频帧与环境准备","level":"1.6.1","depth":2,"path":"Chapter_5/Language/cn/Docs_5_1.md","ref":"Chapter_5/Language/cn/Docs_5_1.md","articles":[{"title":"5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)","level":"1.6.1.1","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_1.md","ref":"Chapter_5/Language/cn/Docs_5_1_1.md","articles":[]},{"title":"5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio)","level":"1.6.1.2","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_2.md","ref":"Chapter_5/Language/cn/Docs_5_1_2.md","articles":[]},{"title":"5.1.3 视频分析库(PyOpenCV、Color-Science)","level":"1.6.1.3","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_3.md","ref":"Chapter_5/Language/cn/Docs_5_1_3.md","articles":[]},{"title":"5.1.4 分析环境准备","level":"1.6.1.4","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_4.md","ref":"Chapter_5/Language/cn/Docs_5_1_4.md","articles":[]},{"title":"5.1.5 其他分析软件","level":"1.6.1.5","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_5.md","ref":"Chapter_5/Language/cn/Docs_5_1_5.md","articles":[]}]},{"title":"【参考文献】","level":"1.6.2","depth":2,"path":"Chapter_5/Language/cn/References_5.md","ref":"Chapter_5/Language/cn/References_5.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_5/Language/cn/Docs_5_1.md","mtime":"2024-09-11T06:10:00.510Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"5.1 音视频帧与环境准备","level":"1.6.1","depth":2,"next":{"title":"5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)","level":"1.6.1.1","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_1.md","ref":"Chapter_5/Language/cn/Docs_5_1_1.md","articles":[]},"previous":{"title":"五、音视频帧分析与数据处理","level":"1.6","depth":1,"path":"Chapter_5/Language/cn/Apex_5_Introduce.md","ref":"Chapter_5/Language/cn/Apex_5_Introduce.md","articles":[{"title":"5.1 音视频帧与环境准备","level":"1.6.1","depth":2,"path":"Chapter_5/Language/cn/Docs_5_1.md","ref":"Chapter_5/Language/cn/Docs_5_1.md","articles":[{"title":"5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)","level":"1.6.1.1","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_1.md","ref":"Chapter_5/Language/cn/Docs_5_1_1.md","articles":[]},{"title":"5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio)","level":"1.6.1.2","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_2.md","ref":"Chapter_5/Language/cn/Docs_5_1_2.md","articles":[]},{"title":"5.1.3 视频分析库(PyOpenCV、Color-Science)","level":"1.6.1.3","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_3.md","ref":"Chapter_5/Language/cn/Docs_5_1_3.md","articles":[]},{"title":"5.1.4 其他分析软件","level":"1.6.1.4","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_4.md","ref":"Chapter_5/Language/cn/Docs_5_1_4.md","articles":[]}]},{"title":"【参考文献】","level":"1.6.2","depth":2,"path":"Chapter_5/Language/cn/References_5.md","ref":"Chapter_5/Language/cn/References_5.md","articles":[]}]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_5/Language/cn/Docs_5_1.md","mtime":"2024-09-12T04:11:11.000Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_5/Language/cn/Docs_5_1_1.html b/Chapter_5/Language/cn/Docs_5_1_1.html
index 438da48..1dc390d 100644
--- a/Chapter_5/Language/cn/Docs_5_1_1.html
+++ b/Chapter_5/Language/cn/Docs_5_1_1.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -3009,7 +2996,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -3085,7 +3072,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2652,8 +2639,8 @@ <BackgroundSubtractorMOG2>,
<BackgroundSubtractorKNN>
光流计算:
-calcOpticalFlowFarneback (HS 法),
-calcOpticalFlowPyrLK (LK 法)
+calcOpticalFlowFarneback (HS 法 ),
+calcOpticalFlowPyrLK (LK 法 )
运动检测:
CamShift ,
meanShift
@@ -2688,7 +2675,7 @@ 其次,是扩展库(opencv-contrib-python)所包含的额外模块。
扩展库涵盖了较多 传统计算机视觉(CV)高级算法 ,部分使用配参会较核心库更为复杂。同时,其中涉及 3D 匹配 的功能,大部分会用到 空间位姿计算(Spatial Posture Calculation) 来表示物体 在场景中的定位情况 。而对于此类涉及具有实际意义 3D 场景或物体的算法,想要展示其处理结果,一般都需要用构建空间化的渲染管线完成 ,而无法再直接使用 Matplotlib 做快速绘制(除非引入外部位姿库,或自实现)。介于此,有关 3D 绘制的部分,我们于未来再行讨论。
现在,让我们来看都有哪些 功能扩展 。
-生物识别扩展模块(cv2.bioinspired)的常用函数(简,仅列出名称),用于感知模拟(重要):
+生物识别扩展模块(cv2.bioinspired)的常用函数(简),用于感知模拟(重要):
视网膜模型(需 opencv-contrib-python 扩展的 cv2.bioinspired_Retina 模块),通过(cv2.)bioinspired_Retina.create 创建实例:
-表面检测点对特征匹配(PPF)扩展模块(cv2.ppf_match_3d)的常用函数(简,仅列出名称):
+表面检测点对特征匹配(PPF)扩展模块(cv2.ppf_match_3d)的常用函数,简:
点云模型(需 opencv-contrib-python 扩展的 cv2.ppf_match_3d 模块),通过(cv2.) ppf_match_3d.loadPLYSimple 加载 多边形点云格式(PLY [Polygon File Format])文件(.ply) ,来创建点云模型实例:
<Mat> 模型被加载 PLY 文件的光栅化与法线等信息,以 OpenCV 的 Mat 格式储存
@@ -2842,7 +2829,7 @@ <Entity>.forward
-GPU 加速扩展模块(cv2.cuda)的常用函数,是同名基础模块算法 CUDA 加速版,仅列出名称:
+GPU 加速扩展模块(cv2.cuda)的常用函数,是同名基础模块算法 CUDA 加速版,简:
GPU 信息:
cuda.getCudaEnabledDeviceCount ,
@@ -3164,7 +3151,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2178,7 +2165,7 @@ ө
[9] Bradski, Gary, and Adrian Kaehler. "OpenCV." Dr. Dobb’s journal of software tools 3.2 (2000).
[10] P. J. Besl and N. D. McKay, "A method for registration of 3-D shapes," in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 14, no. 2, pp. 239-256, Feb. 1992, doi: 10.1109/34.121791.
@@ -2206,7 +2193,7 @@ No results matching "
+
@@ -2218,7 +2205,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"【参考文献】","level":"1.6.2","depth":2,"previous":{"title":"5.1.5 其他分析软件","level":"1.6.1.5","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_5.md","ref":"Chapter_5/Language/cn/Docs_5_1_5.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_5/Language/cn/References_5.md","mtime":"2024-09-11T06:10:00.570Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":"../../..","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"【参考文献】","level":"1.6.2","depth":2,"previous":{"title":"5.1.4 其他分析软件","level":"1.6.1.4","depth":3,"path":"Chapter_5/Language/cn/Docs_5_1_4.md","ref":"Chapter_5/Language/cn/Docs_5_1_4.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"Chapter_5/Language/cn/References_5.md","mtime":"2024-09-12T04:11:11.040Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":"../../..","book":{"language":""}});
});
diff --git a/Chapter_5/Pictures/tools_Audiacity.png b/Chapter_5/Pictures/tools_Audiacity.png
new file mode 100644
index 0000000..79c9a50
Binary files /dev/null and b/Chapter_5/Pictures/tools_Audiacity.png differ
diff --git a/Chapter_5/Pictures/tools_SonicVisualiser_3.0.png b/Chapter_5/Pictures/tools_SonicVisualiser_3.0.png
new file mode 100644
index 0000000..a086710
Binary files /dev/null and b/Chapter_5/Pictures/tools_SonicVisualiser_3.0.png differ
diff --git a/Chapter_5/Pictures/tools_StreamEye.png b/Chapter_5/Pictures/tools_StreamEye.png
new file mode 100644
index 0000000..1b3458a
Binary files /dev/null and b/Chapter_5/Pictures/tools_StreamEye.png differ
diff --git a/DONATE.html b/DONATE.html
index 3e3ed8a..53f5470 100644
--- a/DONATE.html
+++ b/DONATE.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2184,7 +2171,7 @@ 买杯咖啡
Best Wish!💗
@@ -2228,7 +2215,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":" =[>> 赞助本作© <<]= ","level":"1.1.4","depth":2,"next":{"title":"一、音频的保存与还原","level":"1.2","depth":1,"path":"Chapter_1/Language/cn/Apex_1_Introduce.md","ref":"Chapter_1/Language/cn/Apex_1_Introduce.md","articles":[{"title":"1.1 音频基础","level":"1.2.1","depth":2,"path":"Chapter_1/Language/cn/Docs_1_1.md","ref":"Chapter_1/Language/cn/Docs_1_1.md","articles":[]},{"title":"1.2 声波三要素(Three Elements of Acoustics)","level":"1.2.2","depth":2,"path":"Chapter_1/Language/cn/Docs_1_2.md","ref":"Chapter_1/Language/cn/Docs_1_2.md","articles":[]},{"title":"1.3 声音三要素(Three Elements of Sounds)","level":"1.2.3","depth":2,"path":"Chapter_1/Language/cn/Docs_1_3.md","ref":"Chapter_1/Language/cn/Docs_1_3.md","articles":[{"title":"1.3.1 音高(Pitch)","level":"1.2.3.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_1.md","ref":"Chapter_1/Language/cn/Docs_1_3_1.md","articles":[]},{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_2.md","ref":"Chapter_1/Language/cn/Docs_1_3_2.md","articles":[]},{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_3.md","ref":"Chapter_1/Language/cn/Docs_1_3_3.md","articles":[]}]},{"title":"1.4 声音的解构","level":"1.2.4","depth":2,"path":"Chapter_1/Language/cn/Docs_1_4.md","ref":"Chapter_1/Language/cn/Docs_1_4.md","articles":[{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_2.md","ref":"Chapter_1/Language/cn/Docs_1_4_2.md","articles":[]},{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_4.md","ref":"Chapter_1/Language/cn/Docs_1_4_4.md","articles":[]},{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]}]},{"title":"1.5 声音数字化","level":"1.2.5","depth":2,"path":"Chapter_1/Language/cn/Docs_1_5.md","ref":"Chapter_1/Language/cn/Docs_1_5.md","articles":[{"title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","level":"1.2.5.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_1.md","ref":"Chapter_1/Language/cn/Docs_1_5_1.md","articles":[]},{"title":"1.5.2 模数转换(A/D [Analog-to-Digital])","level":"1.2.5.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_2.md","ref":"Chapter_1/Language/cn/Docs_1_5_2.md","articles":[]},{"title":"1.5.3 数模转换(D/A [Digital-to-Analog])","level":"1.2.5.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_3.md","ref":"Chapter_1/Language/cn/Docs_1_5_3.md","articles":[]},{"title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","level":"1.2.5.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_4.md","ref":"Chapter_1/Language/cn/Docs_1_5_4.md","articles":[]}]},{"title":"1.6 音频的存储","level":"1.2.6","depth":2,"path":"Chapter_1/Language/cn/Docs_1_6.md","ref":"Chapter_1/Language/cn/Docs_1_6.md","articles":[{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]}]},{"title":"【参考文献】","level":"1.2.7","depth":2,"path":"Chapter_1/Language/cn/References_1.md","ref":"Chapter_1/Language/cn/References_1.md","articles":[]}]},"previous":{"title":" =[>> 难度向导© <<]= ","level":"1.1.3","depth":2,"path":"GUIDER.md","ref":"GUIDER.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"DONATE.md","mtime":"2024-09-11T06:09:50.540Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":".","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":" =[>> 赞助本作© <<]= ","level":"1.1.4","depth":2,"next":{"title":"一、音频的保存与还原","level":"1.2","depth":1,"path":"Chapter_1/Language/cn/Apex_1_Introduce.md","ref":"Chapter_1/Language/cn/Apex_1_Introduce.md","articles":[{"title":"1.1 音频基础","level":"1.2.1","depth":2,"path":"Chapter_1/Language/cn/Docs_1_1.md","ref":"Chapter_1/Language/cn/Docs_1_1.md","articles":[]},{"title":"1.2 声波三要素(Three Elements of Acoustics)","level":"1.2.2","depth":2,"path":"Chapter_1/Language/cn/Docs_1_2.md","ref":"Chapter_1/Language/cn/Docs_1_2.md","articles":[]},{"title":"1.3 声音三要素(Three Elements of Sounds)","level":"1.2.3","depth":2,"path":"Chapter_1/Language/cn/Docs_1_3.md","ref":"Chapter_1/Language/cn/Docs_1_3.md","articles":[{"title":"1.3.1 音高(Pitch)","level":"1.2.3.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_1.md","ref":"Chapter_1/Language/cn/Docs_1_3_1.md","articles":[]},{"title":"1.3.2 响度(Loudness)","level":"1.2.3.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_2.md","ref":"Chapter_1/Language/cn/Docs_1_3_2.md","articles":[]},{"title":"1.3.3 音色(Timbre)","level":"1.2.3.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_3_3.md","ref":"Chapter_1/Language/cn/Docs_1_3_3.md","articles":[]}]},{"title":"1.4 声音的解构","level":"1.2.4","depth":2,"path":"Chapter_1/Language/cn/Docs_1_4.md","ref":"Chapter_1/Language/cn/Docs_1_4.md","articles":[{"title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","level":"1.2.4.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_1.md","ref":"Chapter_1/Language/cn/Docs_1_4_1.md","articles":[]},{"title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","level":"1.2.4.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_2.md","ref":"Chapter_1/Language/cn/Docs_1_4_2.md","articles":[]},{"title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","level":"1.2.4.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_3.md","ref":"Chapter_1/Language/cn/Docs_1_4_3.md","articles":[]},{"title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","level":"1.2.4.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_4.md","ref":"Chapter_1/Language/cn/Docs_1_4_4.md","articles":[]},{"title":"1.4.5 工程:频谱图(Spectrum)","level":"1.2.4.5","depth":3,"path":"Chapter_1/Language/cn/Docs_1_4_5.md","ref":"Chapter_1/Language/cn/Docs_1_4_5.md","articles":[]}]},{"title":"1.5 声音数字化","level":"1.2.5","depth":2,"path":"Chapter_1/Language/cn/Docs_1_5.md","ref":"Chapter_1/Language/cn/Docs_1_5.md","articles":[{"title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","level":"1.2.5.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_1.md","ref":"Chapter_1/Language/cn/Docs_1_5_1.md","articles":[]},{"title":"1.5.2 模数转换(A/D [Analog-to-Digital])","level":"1.2.5.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_2.md","ref":"Chapter_1/Language/cn/Docs_1_5_2.md","articles":[]},{"title":"1.5.3 数模转换(D/A [Digital-to-Analog])","level":"1.2.5.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_3.md","ref":"Chapter_1/Language/cn/Docs_1_5_3.md","articles":[]},{"title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","level":"1.2.5.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_5_4.md","ref":"Chapter_1/Language/cn/Docs_1_5_4.md","articles":[]}]},{"title":"1.6 音频的存储","level":"1.2.6","depth":2,"path":"Chapter_1/Language/cn/Docs_1_6.md","ref":"Chapter_1/Language/cn/Docs_1_6.md","articles":[{"title":"1.6.1 音频格式(Audio Format)","level":"1.2.6.1","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_1.md","ref":"Chapter_1/Language/cn/Docs_1_6_1.md","articles":[]},{"title":"1.6.2 无压缩编码格式(Uncompressed Encode)","level":"1.2.6.2","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_2.md","ref":"Chapter_1/Language/cn/Docs_1_6_2.md","articles":[]},{"title":"1.6.3 无损压缩编码格式(Lossless Encode)","level":"1.2.6.3","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_3.md","ref":"Chapter_1/Language/cn/Docs_1_6_3.md","articles":[]},{"title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","level":"1.2.6.4","depth":3,"path":"Chapter_1/Language/cn/Docs_1_6_4.md","ref":"Chapter_1/Language/cn/Docs_1_6_4.md","articles":[]}]},{"title":"【参考文献】","level":"1.2.7","depth":2,"path":"Chapter_1/Language/cn/References_1.md","ref":"Chapter_1/Language/cn/References_1.md","articles":[]}]},"previous":{"title":" =[>> 难度向导© <<]= ","level":"1.1.3","depth":2,"path":"GUIDER.md","ref":"GUIDER.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"DONATE.md","mtime":"2024-09-12T04:11:11.050Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":".","book":{"language":""}});
});
diff --git a/GUIDER.html b/GUIDER.html
index fbe6918..ec00994 100644
--- a/GUIDER.html
+++ b/GUIDER.html
@@ -2074,25 +2074,12 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2216,7 +2203,7 @@
-
+
-
+
- 5.1.4 分析环境准备
-
-
-
-
-
-
-
-
-
-
-
-
- 5.1.5 其他分析软件
+ 5.1.4 其他分析软件
@@ -2403,8 +2390,7 @@ 音࢜
5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)
5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio)
5.1.3 视频分析库(PyOpenCV、Color-Science)
-5.1.4 分析环境准备
-5.1.5 其他分析软件
+5.1.4 其他分析软件
【参考文献】
@@ -2416,7 +2402,7 @@ 音࢜
本作品采用知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议 进行许可。
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License .
@@ -2456,7 +2442,7 @@ No results matching "
var gitbook = gitbook || [];
gitbook.push(function() {
- gitbook.page.hasChanged({"page":{"title":"《音视频开发技术:原理与实践》©","level":"1.1","depth":1,"next":{"title":" =[>> 关于作者© <<]= ","level":"1.1.1","depth":2,"path":"AUTHOR.md","ref":"AUTHOR.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"README.md","mtime":"2024-09-11T06:10:00.740Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-12T02:19:31.084Z"},"basePath":".","book":{"language":""}});
+ gitbook.page.hasChanged({"page":{"title":"《音视频开发技术:原理与实践》©","level":"1.1","depth":1,"next":{"title":" =[>> 关于作者© <<]= ","level":"1.1.1","depth":2,"path":"AUTHOR.md","ref":"AUTHOR.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-default","highlight","fontsettings","-lunr","-search","search-pro","insert-logo","custom-favicon","rss","github","expandable-chapters","chapter-fold","back-to-top-button","tbfed-pagefooter","simple-page-toc","graph","chart","advanced-emoji","splitter","code","katex","-sharing","sharing-plus","alerts","url-embed","livereload"],"root":".","styles":{"website":"styles/common.css","ebook":"styles/common.css","pdf":"styles/common.css","mobi":"styles/common.css","epub":"styles/common.css"},"pluginsConfig":{"tbfed-pagefooter":{"copyright":"Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved","modify_label":"Last Updated:","modify_format":"YYYY-MM-DD HH:mm:ss"},"chapter-fold":{},"github":{"url":"https://github.com/Windsander"},"livereload":{},"simple-page-toc":{"maxDepth":3,"skipFirstH1":true},"splitter":{},"search-pro":{},"sharing-plus":{"qq":false,"all":["facebook","google","twitter","instapaper","linkedin","pocket","stumbleupon"],"douban":false,"facebook":true,"weibo":false,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":true,"messenger":false,"line":false,"vk":false,"pocket":true,"google":false,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":false},"code":{"copyButtons":true},"graph":{},"hide-element":{"elements":[".gitbook-link"]},"katex":{},"fontsettings":{"family":"sans","size":1,"theme":"white"},"rss":{"categories":["gitbook"],"title":"《音视频开发技术:原理与实践》©","description":"音视频开发资料查询好帮手","author":"李述博 (Arikan.Li)","feed_url":"https://arikanli.cyberfederal.io/rss","site_url":"https://arikanli.cyberfederal.io/","managingEditor":"me@arikanli.com","webMaster":"me@arikanli.com"},"highlight":{},"favicon":"/Cover/book_favicon.ico","back-to-top-button":{},"alerts":{},"custom-favicon":{},"url-embed":{},"advanced-emoji":{"embedEmojis":false},"sharing":{"qq":false,"all":[],"douban":false,"facebook":false,"weibo":true,"instapaper":false,"whatsapp":false,"hatenaBookmark":false,"twitter":false,"messenger":false,"line":false,"vk":false,"pocket":false,"google":true,"viber":false,"stumbleupon":false,"qzone":false,"linkedin":true},"theme-default":{"showLevel":false,"styles":{"ebook":"styles/ebook.css","epub":"styles/epub.css","mobi":"styles/mobi.css","pdf":"styles/pdf.css","print":"styles/print.css","website":"styles/website.css"}},"chart":{"type":"c3"},"insert-logo":{"style":"background: none; max-height: 30px; min-height: 30px","url":"/Cover/book_logo.png"},"expandable-chapters":{}},"theme":"default","author":"李述博 (Arikan.Li)","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"《音视频开发技术:原理与实践》©","language":"zh-hans","gitbook":"*","description":"音视频开发资料查询好帮手"},"file":{"path":"README.md","mtime":"2024-09-14T03:28:51.170Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2024-09-14T03:29:12.787Z"},"basePath":".","book":{"language":""}});
});
diff --git a/rss b/rss
index 3e8c5c4..b1a79fe 100644
--- a/rss
+++ b/rss
@@ -4,7 +4,7 @@
https://arikanli.cyberfederal.io/
RSS for Node
- Thu, 12 Sep 2024 02:20:15 GMT
+ Sat, 14 Sep 2024 03:29:57 GMT
@@ -971,6 +971,13 @@
https://arikanli.cyberfederal.io/Chapter_5/Language/cn/Docs_5_1_3.html
+ -
+
+
+ https://arikanli.cyberfederal.io/Chapter_5/Language/cn/Docs_5_1_4.html
+ https://arikanli.cyberfederal.io/Chapter_5/Language/cn/Docs_5_1_4.html
+
+
-
diff --git a/search_plus_index.json b/search_plus_index.json
index 6c7bb8f..2323674 100644
--- a/search_plus_index.json
+++ b/search_plus_index.json
@@ -1 +1 @@
-{"./":{"url":"./","title":"《音视频开发技术:原理与实践》©","keywords":"","body":"《音视频开发技术:原理与实践》© =[>> 关于作者© =[>> 赞助本作© =[>> 版权申明© 目标 对于音视频工程师/架构师来说,日常工作长中总会有大量的知识技术积累,亟待梳理以期望能够被快速检索查阅。但由于工程技术所处领域的复合特征,往往针对一个工程问题所需要的专业知识,不论深浅程度,都会横跨几门学科。而想要获取有效的处理问题所能使用的信息,都需要依次回顾、搜集和关联。这样必不可少会花费大量时间查阅各类大部头资料和文献。而这么做往往是因为,对于待解答问题非常重要的知识点,分布碎片化导致的。 音视频规格的跨度构成了本身技术的多个维度,使得我们并不能按照以往的工程思维,从单一角度来考虑涉及此类型的复合问题。 因此,本书的目的旨在以工程解决方案的实践思路过程,对相关联的各学科核心知识进行串联。以求用一套完整且关联的技术栈模板,来贯穿当下多媒体技术的所有核心技术模块。从而 为读者提供针对多媒体(音视频)分析/处理/整合/架构方面,有效技术指导与学习路线。 特色 本书结合作者工作实践,对架构师日常工作工程中涉及使用到的:数字信号处理、计算机图形学、色彩学、相关工程规格规范、驱动特征及软件框架设计等,领域的专业学科知识进行了梳理和提炼。从音视频工程师不同的技术阶段需要面临的问题为出发点,将 全书分为,音视频基础与音视分析、流媒体规格与简易编解码播放框架设计、通用统一化音视频编辑框架与渲染驱动设计,三大阶段。每一阶段,统一采用知识图谱串联工程规格与编码实践,全面讲解对应技术阶段下需要掌握的,多媒体(音视频)技术之简史、原理、算法、设计及相关推导、制定、架构与应用。 基于此,全书按照技术逐级递进的关系,构成了整体音视频从数据分析、编解码器开发、播放器开发到图形化与图像处理、特效与特效引擎的 完整技术栈。使得全书每个章节内部自成一体但确相互关联,从而便于做技术字典、工程手册和整体学习之用。 面向 书中原理与技术面向全平台,因此主要开发语言为 C/C++。部分平台化及数据分析场景,会一定程度的应用到 C#、Java、Python 等其他语言。本书适合: 初入音视频开发的新手: 本书为您提供了完整学习路径,对于打算初入本行业的开发者,本书能够帮您梳理完整的音视频开发技术路线。协助您成功入行。 有基础的音视频工程师: 本书为您提供了知识技术字典,对于日常开发工作中涉及到的相关问题分析,本书能够帮您快速定位到所需要的核心知识点,进而方便您进一步根据所给信息来做出判断,或根据提示方向来进行深度资料查阅。 多媒体编解开发者友好: 本书为您提供了ITU-T的编解码协议技术索引和讲解,您可以快速通过本书查阅常用 H.264、H.265、H.266 的关键资料和技术对比。 流媒体协议开发者友好: 本书为您提供了常用流协议的拆分解析,您可以快速通过本书查阅常用 RTP/RTCP、RTMP、HLS 的规格设定和消息类型。 学研成果转向生产部署: 本书为您提供了理论转实践的事例方案,对于将研究成果转换到实际工业生产活动的老师,本书能够为您介绍一些现已有成功实践的多媒体方面学转产探索。协助您梳理思路。 硬核的多媒体技术大咖: 若您是深耕此领域多年的老师,您不妨将本书当作一次有趣的思维之旅,从不同的视角感受音视频工程魅力,希望本书能为您提供一些帮助。当然,也更希望获得您的交流。 为方便您定位章节难度,此处提供 =[>> 难度向导 建议。 受限于作者,本书难免存在一些不足,您可以 Book-issues 进行反馈,感谢您的帮助! 目录 音视频工程基础 一、音频的保存与还原 1.1 音频基础 1.2 声波三要素(Three Elements of Acoustics) 1.3 声音三要素(Three Elements of Sounds) 1.3.1 音高(Pitch) 1.3.2 响度(Loudness) 1.3.3 音色(Timbre) 1.4 声音的解构 1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths) 1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz) 1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour]) 1.4.4 感观:频响曲线(FRC [Frequency Response Contour]) 1.4.5 工程:频谱图(Spectrum) 1.5 声音数字化 1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source) 1.5.2 模数转换(A/D [Analog-to-Digital]) 1.5.3 数模转换(D/A [Digital-to-Analog]) 1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM) 1.6 音频的存储 1.6.1 音频格式(Audio Format) 1.6.2 无压缩编码格式(Uncompressed Encode) 1.6.3 无损压缩编码格式(Lossless Encode) 1.6.4 有损压缩编码格式(Uncompressed Encode) 【参考文献】 二、色彩的运用与存储 2.1 色彩基础 2.2 颜色三要素(Three Elements of Color) 2.2.1 色调(Hue) 2.2.2 饱和度(Saturation) 2.2.3 光亮度(Luminance) 2.3 色彩的衡量 2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化 2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space) 2.3.3 经典三原色函数(Trichromatic Primaries Functions) 2.3.4 经典三刺激函数(Tristimulus Values Functions) 2.3.5 现代色彩体系(Modern Color System) 2.4 色彩的对比 2.4.1 色域(Color Gamut ) 2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram) 2.4.3 色差(Chromatic Aberration) 2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature) 2.4.5 标准光源(Standard Illuminants)& 白点(White Point) 2.4.6 显色指数(Color Rendering Index) 2.5 经典色彩空间(Classical Color Space) 2.5.1 光学三原色色彩空间(RGB) 2.5.2 颜料三原色色彩空间(CMY / CMYK ) 2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space) 2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space) 2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space) 2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space) 2.5.7 颜色三要素色彩空间(HSV / HSI / HSL) 2.6 色彩的存储 2.6.1 色彩格式(Color Format)与色彩存储 2.6.2 RGB 体系色彩格式 2.6.3 YUV 体系色彩格式 【参考文献】 三、音视频常用基础算法 3.1 信号分析的核心算法 - 傅立叶变换 3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT) 3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT) 3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT) 3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach) 3.2 频率信息提取 - 常用滤波算法 3.2.1 高斯滤波(Gauss Filter) 3.2.2 双边滤波(Bilateral Filter) 3.2.3 拉普拉斯滤波(Laplacian Filter) 3.2.4 马尔滤波(Marr Filter) 3.2.5 索贝尔滤波(Sobel Filter) 3.2.6 各向异性扩散(Anisotropic Diffusion) 3.3 时间冗余控制 - 常用特征提取与朴素阈值处理 3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient]) 3.3.2 朴素目标检测结果度量 - IoU & GIoU 3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window) 3.4 空域冗余控制 - 基础光流算法与色度压缩 3.4.1 传统光流法(Classic Optical Flow Methods) 3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow]) 3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow]) 3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling]) 3.5 频域冗余控制 - 基础变换编码 3.5.1 整数离散正余弦变换(DST/DCT) 3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform]) 3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform]) 【参考文献】 四、音视频机器学习基础 4.1 发展概览 4.2 模型工程基础 4.2.1 算子(Operator)& 层(Layer) 4.2.2 神经元(Neuron) 4.2.3 神经网络(NN [Neural Network]) 4.2.4 特征选择(Feature Selection) 4.3 经典激活函数(Classic Activation Function) 4.3.1 Sigmoid 4.3.2 Tanh 4.3.3 Softplus 4.3.4 ReLU 族 4.3.5 ELU & SELU 4.3.6 Mish 4.3.7 Swish 族 4.4 连接函数/衰减函数(Connection/Attenuation Function) 4.4.1 Dropout 4.4.2 Maxout 4.4.3 SoftMax 4.5 损失函数(Loss Function) 4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error]) 4.5.2 回归项-均方误差(MSE [Mean Squared Error]) 4.5.3 回归项-休伯损失(Huber Loss) 4.5.4 回归项-分位数损失(Quantile Loss) 4.5.5 分类项-对数损失(Log Loss) 4.5.6 分类项-交叉熵损失(Cross Entropy Loss) 4.5.7 分类项-合页损失(Hinge Loss) 4.5.8 分类项-对比损失(Contrastive Loss) 4.5.9 分类项-三元损失(Triplet Loss) 4.5.10 分类项-对组排异损失(N-Pair Loss) 4.5.11 正则项-L1 惩罚 4.5.12 正则项-L2 惩罚 4.6 常用最优化算法(Optimizer Operator) 4.6.1 基础优化算法 4.6.2 优化算法的优化-应对震荡 4.6.3 优化算法的优化-应对重点强(弱)化更新 4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation]) 4.6.5 优化算法对比与使用建议 4.7 模型结构速览 4.7.1 卷积神经网络(CNN [Convolutional Neural Network]) 4.7.2 循环神经网络(RNN [Recurrent Neural Network]) 4.7.3 自注意力网络(Transformer) 【参考文献】 五、音视频帧分析与数据处理 5.1 音视频帧与环境准备 5.1.1 常用数学库(Numpy、Pandas、Mateplotlib) 5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio) 5.1.3 视频分析库(PyOpenCV、Color-Science) 5.1.4 分析环境准备 5.1.5 其他分析软件 【参考文献】 本作品采用知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议进行许可。 This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:10:00 "},"AUTHOR.html":{"url":"AUTHOR.html","title":" =[>> 关于作者© <<]= ","keywords":"","body":"关于作者 本书由 李述博©(Arikan.Li©)独立完成 李述博(Arikan.Li)👇(This guy!)👇 CV工程师 & 架构师 & Baker 借用名言( 0_0): “名字仅是代号,知识才是真理。” 编写有感( )_T): 写作难度较大,各种资料查阅、相关知识点梳理以及辅助Demo和配套项目开发,带来了极大的压力。因此,您的您宝贵支持与意见,将是作者的重要的力量之源。 如何联系( -w-): 您可以通过 知乎 或 Github 联系到作者,感谢您的帮助。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:49 "},"COPYRIGHT.html":{"url":"COPYRIGHT.html","title":" =[>> 版权申明© <<]= ","keywords":"","body":"版权申明© 本书由 李述博©(Arikan.Li©)独立完成 本人的所有作品,包括但不限于文字,图片等内容受《著作权》法的保护,凡未经权利人明确书面授权,转载上述内容,本人有权追究侵权行为。 本人关于图片作品版权的声明: 本人在此刊载的原创作品,其版权归属本人所有。 任何传统媒体、商业公司或其他网站未经本人的授权许可,不得擅自从本人转载、转贴或者以任何其他方式复制、使用上述作品。 传统媒体、商业公司或其他网站对上述作品的任何使用,均须事先与本人联系。 对于侵犯本人的合法权益的公司、媒体、网站和人员,本人聘请的律师受本人的委托,将采取必要的措施,通过包括法律诉讼在内的途径来维护本人的合法权益。 特此声明,敬请合作。 通常情况下,允许 个人及非商业使用转载,但是请标注 作者和链接。 本作品采用知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议进行许可。 This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:49 "},"GUIDER.html":{"url":"GUIDER.html","title":" =[>> 难度向导© <<]= ","keywords":"","body":"难度向导 《入门基础》 阶段:音视频基础与音视分析,入门必需掌握之基础 入门的五章也是概念与基础理论最多的章节了。这几张的工程实践较少,但非常重要的原理、规格、定义及多。是后续更为复杂的工程实践中,被音视频工程师们做为根基般的存在。因此非常重要。 第一章 数字音频的保存与还原 本章从声学和心理声学角度对音频的相关工程量, 以及数模转换和分析对比的关键概念进行了阐述。 结合发展与规格演进,提供整体音频工程概念的梳理。 第二章 图像色彩的运用与存储 从色彩学发展史到工业体系对色彩的规格定义,章节大章以工程概念的递进关系进行介绍, 并在小章节中按照相关规格原理的发现提出时间顺序进行了由浅入深的推导说明。从而保证前后逻辑和发展上的连贯性。 第三章 音视频常用基础算法 属于纯数理基础,对音视频开发过程中常用的 图像/音频 的 分析/处理 算法,进行了梳理和讲解。 本章列出的部分,是作者在筛选掉大量非必需算法后的最小知识集合。 第四章 音视频机器学习基础 本章介绍了机器学习特别是深度学习在音视频处理中的基础知识背景。 通过对机器学习发展简史、部分关键算法和经典模型的阐述,帮助读者理解机器学习技术的一些基本运用。 第五章 音视频帧分析与简单处理 本章将音视频帧的基本概念、分析方法和简单处理技术进行了整理说明。 通过对音视频帧的深入理解和操作,读者可以掌握音视频处理的核心技术,为后续的复杂应用打下坚实的基础。 入门五章完成后,读者将有一定的音视频图像工程分析能力。并能够使用当前掌握的知识来处理音视频基本问题。 《编解传输》 阶段:流媒体规格与简易编解码播放框架设计,流的编解码与网络传输,音视频工程实践 第五章 音视频解码与流传输,是一个综合性较强的章节。这一章将对当前编解码规格进行详细的拆分与解析。通过对 H.264、H.265、H.266 的规格分析,详细的阐述当今音视频工程中,如何对视频保质保量的进行数据压缩和处理。并通过对 主流三协议:RTMP、RTP/RTCP、HLS 的分析,从协议的封装、信号设计、传输过程、规格规定上全面说明了音视频传输过程的各个方面细节。完成本章,将会使读者较为深度的理解编解码与传输,并使其能够有一定程度的规格定制与改进能力。 第六章 音视频的编解播与流分析,结合了第五章与入门四章的知识要领做工程实践。本章节将注重工程能力建设,从软件工程设计角度剖析音视频的编解播三大经典工程方案,并引导读者建立架构师思维与匹配的动手能力。 中级四章完成后,读者将能够胜任大部分业界的音视频项目工作需求,和一定程度的音视频架构师要求。 《渲染进阶》 阶段:通用统一化音视频编辑框架与渲染驱动设计,图像处理技术与特效引擎 第七章 图形驱动统一化的理论基础,是为后续章节开始进行的计算机图形处理,进行相关的理论基础铺垫与解析。中级/高级架构师,在工作内容上已不可避免会涉及到音视频2D、3D特效的处理与实践,并会较多的参与到 AI 技术工程化的框架设计工作中。因此,对于计算机图形学的了解是必要且必须的。 第八章 图形驱动与渲染引擎技术,则是一个较为复杂的复合章节。本章结合作者开源工程实践(UltraDriver),在前面几章铺垫的基础上,深入驱动底层逻辑,剖析了常见渲染引擎的核心元素,并完整的讲解了从GPU通信管线建立到实际场景渲染的完整过程。完成本章,将会使读者对整个渲染驱动有详尽的理解,并能够独立运用GPU驱动特性完成复杂的 3D 渲染工作。 第九章 音视频播放与特效编辑,结合作者开源工程实践(UltraTimeline),讲解了音视频编辑中的最为关键的技术系统:UTT 统一时间轴系统,通过此系统,读者将能够独立完成一系列复杂音视频的编辑过程。从而在音视频特效处理方面正式的进入工程大门。 高级三章完成后,后续的继续学习提升将脱离工程范畴。因此,更进一步的探索,就要求深入了解算法和硬件驱动,从而衔接到 AI-CV 等方面的相关研究工作,或游戏引擎物理引擎的开发架设。此两个方向的经典文献与著作较多,且已有成熟体系,因此本书既到此为止。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:10:00 "},"DONATE.html":{"url":"DONATE.html","title":" =[>> 赞助本作© <<]= ","keywords":"","body":"买杯咖啡 如果您愿意为本书爆肝的作者买一些精神食粮,来让他当一名 24H 狼灭的话... Buy Me Espresso 👇( ✨w✨)👇 WeChat Best Wish!💗 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Apex_1_Introduce.html":{"url":"Chapter_1/Language/cn/Apex_1_Introduce.html","title":"一、音频的保存与还原","keywords":"","body":"一、音频的保存与还原 引言 声音是音视频的重要组成部分。当代计算机体系中对声音的一系列处理,被统称为数字音频技术(Digital Audio Tech)。什么是音频?音频是如何被数字化表示和重现的? 本章节主要整理说明了,部分数字音频的构成、调制和保存。通过对当代计算机图像有关音频处理发展史的梳理,以期为工程上对音频进行操作和分析,提供必要知识图谱。 声波是音频的载体,因此对音频的讨论,也就是对声波特性的讨论。 关键字:音频基础、音频三要素、音频频谱图、音频调制、音频压缩、音频格式 目录 1.1 音频基础 1.2 声波三要素(Three Elements of Acoustics) 1.3 声音三要素(Three Elements of Sounds) 1.3.1 音高(Pitch) 1.3.2 响度(Loudness) 1.3.3 音色(Timbre) 1.4 声音的解构 1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths) 1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz) 1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour]) 1.4.4 感观:频响曲线(FRC [Frequency Response Contour]) 1.4.5 工程:频谱图(Spectrum) 1.5 声音数字化 1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source) 1.5.2 模数转换(A/D [Analog-to-Digital]) 1.5.3 数模转换(D/A [Digital-to-Analog]) 1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM) 1.6 音频的存储 1.6.1 音频格式(Audio Format) 1.6.2 无压缩编码格式(Uncompressed Encode) 1.6.3 无损压缩编码格式(Lossless Encode) 1.6.4 有损压缩编码格式(Uncompressed Encode) 【参考文献】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:49 "},"Chapter_1/Language/cn/Docs_1_1.html":{"url":"Chapter_1/Language/cn/Docs_1_1.html","title":"1.1 音频基础","keywords":"","body":"1.1 音频基础 数字音频技术(DAT [Digital Audio Tech]) 是当代计算机音像学的基础综合科学。其代指一系列,以 电-力-声类比(Electrical-Mechanical-Acoustic Analogy) 、 心理声学模型(Psychoacoustics Model) 等数学工具对声音进行记录、转换、存储、编解为可由挂载数字音频设备处理、播放、操作的数据,的方法论。它是一门包含了 心理声学(Psychoacoustics) 、 电声学(Electroacoustics) 、 数字信号处理(DSP [Digital Signal Processing]) 等领域知识的复合学科。 早期的探索与积累 人类对声音的探索从诞生伊始就伴随着文化和科技的发展,贯穿于历史长河之中。但以可持续化存储保存为目的,并系统性的总结为科学体系,还要从 音频录制(Audio Recording) 技术的出现开始,一直持续至今。由于历史跨度和分界相对明显,学界公认采用以音频存储介质的更替,来作为不同时代的划分。依此,当下总共经历了 4 个大的时期: 唱筒时代【物理介质】(The Phonograph era,1877 ~ 1925) 唱片时代【物理介质】(The Gramophone era,1925 ~ 1945) 磁带时代【磁力介质】(The Magnetic era,1945 ~ 1975) 数字处理时代(The Digital era,1975 ~ Present) 唱筒时代(物理介质) 1857 年,由法国科学家 斯科特· 德· 马丁维尔(Scott de Martinville) 发明声波记振仪,实现了将自然界中物理声音保存到留声媒介上,使人们首次完成了对声音信号的长时间保存。开启了物理媒介时代。当时声波记振仪的主要应用是用来辅助声学研究,通过仪器虽然能够绘制被录制声音的声波线,却无法再将声波线还原为音频回放。不过,这样的探索却为了后续提供了前置物理基础。 图 1-1 声波记振仪记录的首个音频振幅信号 在 1877年,托马斯·爱迪生基于声波记振仪原理,和与贝尔竞争发明电话过程中的启发,发明了第一台 留声机(Phonograph)。逆向思维解决了回放的问题。 留声机通过类似声波记振仪的运行方式,通过摆动式金属唱针,将录制的音频刻画在配套裹有锡箔纸的金属圆桶上。通过收音喇叭录制声音的同时,转动手摇把,推动锡纸移动,来记录保存声音到桶上对应螺纹声波线里。因此,回放时,只需要将录制用的金属探针更换为轻压弹簧探针后,从开始录制位置,手摇转动把推动锡箔纸,按录制方向旋转推移,就能够得到保存的音频回放了。不过第一代原型机,存在留声时间短,且音量不足,声音不清晰的问题。因此,在 1897 年发布的第二代留声机中,爱迪生使用了蜡筒代替了锡箔纸,让录制变为了可重复的过程。并通过增加了发条传动机制,剔除了人为摇动传动杆不匀速,引入的失真问题。二代在扩音器上使用了耳蜗结构大喇叭,物理提高了收扩音效果。 即便如此,二代留声机也因为无法复制拷贝留音,而最终以无法普及的失败告终。 这让人们逐渐意识到,单纯的录制,是无法满足人们对音频的需求的。人们开始寻找一种能够便于拷贝,且能高质量保存声音的手段。 唱片时代(物理介质) 其实早在初代留声机蓬勃发展的后几年,唱片就已经开始流传了。1887 年德裔美国工程师艾米利·伯林纳,发明了 圆盘式留声机(Gramophone) 和 唱片(Gramophone Record)。但是由于早期唱片先是面临了复刻问题,后虽然通过涂蜡锌板和镀金母版解决了量产问题,可却因为成本问题并不能被大众接受。直到 1891 年,伯林纳发现通过虫胶作为原材料,能达到和植物橡胶等同的保存水平,并具有高可塑性的特点,才最终解决了量产和成本问题。 图 1-2 首个手摇式圆盘唱片机(Gramophone) 于是,1893年,伯林纳和其合作伙伴共同于美国、英国、德国,先后成立了 留声机公司(Gramophone Company),制作销售圆盘式留声机并灌录唱片。这就是后来业界顶顶大名(中古黑胶铁烧)的 RCA、EMI(Electric&Musical Industries Ltd.)、DG 公司的前身。从此正式开启了与唱筒留声机的竞争。 图 1-3 早期黑胶唱片(Gramophone/Vinyl Record)示意图 直到 1929 年,随着爱迪生停止了最后一条唱筒生产线,唱片类型留声机因其高效、高保真(在当时看来)、高性价比和高量产的特性,彻底的击败了唱筒时代。 但看似已然立于不败之地的唱片,也仍然存在各种各样的问题。其中最致命的莫过于,唱片本身保存所需要占用的物理空间,有些过大了。换一种更为科学的表述就是,唱片本身的物理信息密度仍然不够小。这便是它的阿喀琉斯之踵。 磁带时代(磁力介质) 20世纪30年代,德国“法尔本”和“无线电信”两家公司的工程师们发明了一种有氧化铁涂层的塑料带,创造出了 磁带(Magnetic Type) 的雏形。但由于三氧化二铁本身化学特性,使得录制的声音因材质均匀程度偏差导致了部分失真问题,而无法与唱片抗衡。直到1947年,美国人马文·卡姆拉斯对原有的三氧化二铁磁带进行了改善。卡姆拉斯采用了一个完整的磁性线圈来代替录制磁头,使用一根钢丝(后来工程优化成为了磁针)嵌入到磁线圈中。利用空气作为缓冲,以磁场频震间接录制音频,从而保证了线圈磁力不会干扰到信号的录制。这使得磁带记录所得音频数据,较改进之前的响度有了4倍左右的提升。不过,随之而来的是复杂工艺带来的成本问题。 图 1-4 早期磁带机(Tape Drive)示意图(糟糕的大小) 这个邻人绝望的情况,直到1963年,才由来自飞利浦公司的荷兰工程师 劳德维克·卢·奥滕斯(Lou Ottens) 解决。奥滕斯受到了 RCA 的 “音频盒子”(Sound Tape Cartridge) 启发,通过缩减存储时长和采用多年迭代而来的更先进磁性塑料软带材质,推出了 卡式磁带(Cassette Tape)。极大的缩减了磁带保存音频的空间体积,使得一般客户有了更大的意愿来使用这种便携的音频存储媒介。 图 1-5 卡式磁带(Cassette Type)示意图 而随着 1964 年,察觉到卡式磁带革命性成果的飞利浦,乘势而为的推出了可自由录音的便携式磁带录音机后,由黑胶唱片所主导的最后一片大众音频阵地,也正式宣告被磁带所取代。 值得庆幸的是,因为黑胶唱片的物理录入和存储特性,使得它再音质还原上,能够较好的稳定保存录制时的音轨特征。从而让众多高质量唱片公司仍然愿意为新颖的歌手专辑,推出黑胶介质。让黑胶唱片在音乐发烧友等群体中,延续了较高的认同,从而避免了像唱筒一般,被彻底淘汰的命运。 之后,磁带登临主流,在 19世纪 70 年代至 80 年代间,得到了快速发展。以索尼(Sony)Walkman 系列为代表,造就了集磁带和便携随身听技术于一身的全世界范围的风靡。 但是,好景不长。数字时代随着 CD 进入大众视野,在短期内极大的抢占了原磁带的消费级音频市场。 数字处理时代 值得一提的是,磁带虽然淡出了主流视线,但是因为其本身的可靠存储特性,仍然被拥有大量数据的团体和公司,作为最为可靠的数据备份方式而使用着。这让磁带仍然在纯粹的数据持续化存储方面,保有了巨大活力。大公司往往也不吝惜对此投入,例如 2015 年 IBM 和富士影视(Fuji Film)就共同研发推出了高达 220TB 存储量的单盒存储磁带,其成本仅为同类型硬盘十五分之一。 近几年,磁带存储保有存储量发展,也仍然保持了每年越 32%~37% 的增速,成为了在摩尔定律逐步失效背景下,仍然满足摩尔定律,并久经考验的信息存储技术。 可见,未来随着大数据模型和各种存储骤增的前提下,长效存储的磁带,短时间内必然是不会退出历史舞台的。 其实,到这里我们能够发现,音频的数字时代似乎并不是一种标的于存储媒介的物理类型的一种分类。数字时代音频,更多的是对音频本身的保存方式抽象手段,进行的一种分代。即介质本身,已经不在被认为是区分时代差异的关键点,而各类压缩算法的突破,则成为了真正的关键。 在 CD 时代伊始,音频的格式就从传统的纯物理记录方式,演变成了调制解调(PCM)配合格式压缩存储的处理过程。这正是数字时代和以往传统时代相比,最为显著的特征。 因此,想要理解并处理音频,首先需要从如何衡量 声音(Sounds) 开始。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:49 "},"Chapter_1/Language/cn/Docs_1_2.html":{"url":"Chapter_1/Language/cn/Docs_1_2.html","title":"1.2 声波三要素(Three Elements of Acoustics)","keywords":"","body":"1.2 声波三要素(Three Elements of Acoustics) 声音(Sounds) 是对所有由振动产生,可以被人感知并理解的,由固体、液体、气体等介质而传播的一类 声波(Acoustic Wave) 的统称 [1] 。只有被人能够听到的声波,才属于声音。所以,声音也可以被称为狭义声波(Narrow Acoustic Wave)。由于其本质仍是声波。客观对声波的测定量,也是同作用于声音的。 什么是声波呢? 声波(Acoustic Wave) 是指在介质中传播的机械波。其本质是振动中的质点,在介质中以波的形式传播 [2] 。因此,声波既可以是 纵波(Longitudinal Wave),也可以是 横波(Transverse Wave)。 根据两种基本机械波的特性, 横声波(Transverse Acoustic Wave) 只能在固体介质中传播; 纵声波(Longitudinal Acoustic Wave) 则能在固/液/气或介于中间态的介质中传播; 理想声波方程式(Ideal Acoustic Wave Equation) 即然是机械波,那么从波的传播角度,就可以根据机械波的物理性,测量出衡量声波的一维传播方向关系: ∂2p∂t2=c2⋅∂2p∂x2 {\\displaystyle \\begin{aligned} \\frac{\\partial^2 p}{\\partial t^2} = c^2 \\cdot \\frac{\\partial^2 p}{\\partial x^2} \\\\ \\end{aligned} } ∂t2∂2p=c2⋅∂x2∂2p 其中, 以 ccc 代表 声速(Propagation Speed),即声波的传播速度,单位常用 米/秒(m/s) 以 ppp 代表 声压(Acoustic Pressure),即声波的压强,单位为 帕斯卡(Pa) 以 xxx 代表 声位(Spatial Position),即声波的当前空间位置,单位为常用 米(m) 以 ttt 代表 时刻,单位常用 秒(s) 这就是著名的 一维声波恒等式(1D Acoustic Wave Equation)。而以 x⃗\\vec{x}x⃗ 表示当前声位在空间中距离发出点(即原点,我们假设声波从原点产生)的位姿,将传播从一维扩展到 dim(x⃗)=ndim(\\vec{x}) = ndim(x⃗)=n 维空间,则有: ∂2p∂t2=c2⋅∇2p=c2⋅Δpx⃗=c2⋅∑i=0dim(x⃗)(∂2p∂xi2) {\\displaystyle \\begin{aligned} \\frac{\\partial^2 p}{\\partial t^2} &= c^2 \\cdot \\nabla^2 p = c^2 \\cdot \\Delta p_{\\vec{x}} \\\\ &= c^2 \\cdot \\sum_{i=0}^{\\dim(\\vec{x})} \\left( \\frac{\\partial^2 p}{\\partial x_i^2} \\right) \\\\ \\end{aligned} } ∂t2∂2p=c2⋅∇2p=c2⋅Δpx⃗=c2⋅i=0∑dim(x⃗)(∂xi2∂2p) 得到了通用的 理想声波方程(Ideal Acoustic Wave Equation),也称为 费曼三维声波恒等式(Feynman's 3D Acoustic Wave Equation) [3] 。 可见,时间 ttt 时的声压有关时间的二阶偏导数,和该时刻下,声波所处空间位置的二阶导数与声速的平方,成正相关。在介质均匀的理想条件下,已知 声速 ccc 、声压 ppp 、声位 xxx 中的任何两个量,都能推导出时刻 ttt 的另外一个定量取值。 所以,声速(Propagation Speed) 、声压(Acoustic Pressure) 、声位(Spatial Position) 被称为 广义声波三要素,简称 声波三要素(Three Elements of Acoustics)。 但是,人对声音的感知充满了主观因素,纯物理测量值虽然能够描述声音的客观特性,却无法度量声音的主观成分。我们还需要介于主客观之间的兼容标的,来协助对主观感受的量化表示。不过,根据声波三要素,我们却可由此来对新定主观体系下的度量衡,进行客观测定。并用于不同参考系下的转换表示。这点即是声音三要素的底层科学支撑,很重要。 什么是 声音三要素(Three Elements of Sounds) 呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:49 "},"Chapter_1/Language/cn/Docs_1_3.html":{"url":"Chapter_1/Language/cn/Docs_1_3.html","title":"1.3 声音三要素(Three Elements of Sounds)","keywords":"","body":" Hex Data Display .hex-container { text-align: center; } .hex-data { display: inline-block; text-align: left; font-weight: bold; font-family: monospace; white-space: pre; } 1.3 声音三要素(Three Elements of Sounds) 声音三要素(Three Elements of Sounds) 是人们从 心理声学(Psychoacoustics) 角度,对最能影响人对声音本身感官感受的,三个最重要关键参数的归纳。分别是:音高(Pitch) 、 响度(Loudness) 、 音色(Timbre)。 预备乐理(声乐)知识 由于 声音(Sounds) 和 音乐(Musics) 密不可分。而很多知识,尤其是人的主观认知,总是会和先验艺术有关。为了接下来的工程方面理解,这里先非展开的,提前介绍一些关键 声乐(艺术)概念 : 纯音(Pure Tone),是指单一频率的正弦函数波形声波的声音; 音阶(Octave),即倍频程,指半频率增减的八度音阶。属于工程声乐学; 音程(Interval),指两个纯音之间所差的音阶体系下的距离,即度数; 音名(Names),指音阶不变的前提下,相隔八度的纯音的集合。属于声乐术语(艺术); 音级(Steps),指同音名下,从低到高的每个独立纯音的层级。属于声乐术语(艺术); 半音(Semitone),指音程为音阶一半的音名,即(八度下的)四度音; 音调(Notes),全体音名、半音的统称。在欧拉提出 调性网络 后,来自于网络拓扑; 音分(Cent),人为对两个相邻半音间的音程,以 波长比 为 12002^{1200}\\sqrt{2}1200√2 ,作 100 非等分; 音序(Sequence),指顺序排列下,同音级的相邻两个音调的距离; 除了八度音外,还有 七度音(Heptachord) 和 五度音(宫商角徵羽),本文从工程特点出发, 统一用八度音(Octave)代指音阶(音阶英文是 Gamut、Scale,易与其它概念造成混淆)。 理想的音阶是由纯音构成。 下文若无说明,音阶 均采用理想音阶(Ideal Octave)。 而 八度指的是在八度音下,同一个音名两个相临音级差异,即音程,为 八度。 八度音阶包含 7 个音名,5 个半音,8 个音级,即钢琴键位: 图 1-6 八度音钢琴键盘示意图 图中,黑色琴键为半音,白色琴键为纯音(理想)。 而 C4 则是 A4(440 Hz)对应通用 A440 标准下的基准键(Standard Key),有 C4 为 261.63Hz 标准。 Hz 是频率的单位,我们将在后续介绍。 A440 八度音阶又被称为 斯图加特音阶(Stuttgart Octave),属于 ISO 16 标准。根据标准,有音阶频率表如下: StepsNames 0 1 2 3 4 5 6 7 8 C 16.352(−48) 32.703(−36) 65.406(−24) 130.81(−12) 261.63(0) 523.25(+12) 1046.5(+24) 2093.0(+36) 4186.0(+48) C♯/D♭ 17.324(−47) 34.648(−35) 69.296(−23) 138.59(−11) 277.18(+1) 554.37(+13) 1108.7(+25) 2217.5(+37) 4434.9(+49) D 18.354(−46) 36.708(−34) 73.416(−22) 146.83(−10) 293.66(+2) 587.33(+14) 1174.7(+26) 2349.3(+38) 4698.6(+50) D♯/E♭ 19.445(−45) 38.891(−33) 77.782(−21) 155.56(−9) 311.13(+3) 622.25(+15) 1244.5(+27) 2489.0(+39) 4978.0(+51) E 20.602(−44) 41.203(−32) 82.407(−20) 164.81(−8) 329.63(+4) 659.26(+16) 1318.5(+28) 2637.0(+40) 5274.0(+52) F 21.827(−43) 43.654(−31) 87.307(−19) 174.61(−7) 349.23(+5) 698.46(+17) 1396.9(+29) 2793.8(+41) 5587.7(+53) F♯/G♭ 23.125(−42) 46.249(−30) 92.499(−18) 185.00(−6) 369.99(+6) 739.99(+18) 1480.0(+30) 2960.0(+42) 5919.9(+54) G 24.500(−41) 48.999(−29) 97.999(−17) 196.00(−5) 392.00(+7) 783.99(+19) 1568.0(+31) 3136.0(+43) 6271.9(+55) G♯/A♭ 25.957(−40) 51.913(−28) 103.83(−16) 207.65(−4) 415.30(+8) 830.61(+20) 1661.2(+32) 3322.4(+44) 6644.9(+56) A 27.500(−39) 55.000(−27) 110.00(−15) 220.00(−3) 440.00(+9) 880.00(+21) 1760.0(+33) 3520.0(+45) 7040.0(+57) A♯/B♭ 29.135(−38) 58.270(−26) 116.54(−14) 233.08(−2) 466.16(+10) 932.33(+22) 1864.7(+34) 3729.3(+46) 7458.6(+58) B 30.868(−37) 61.735(−25) 123.47(−13) 246.94(−1) 493.88(+11) 987.77(+23) 1975.5(+35) 3951.1(+47) 7902.1(+59) 表格横向为音级,纵向为音名(包含半音)。橙色为标准钢琴,所包含的八度音阶。表中数值格式为: 【对应频率(Hz)】(距离 C4 基准的(+/-)音序) 简单乐理知识准备就绪。现在,读者肯定存在大量思考,比如:什么或为什么是频率?这和声音三元素又有什么关系?乐理和工程又是怎么关联的? 让我们带着这些知识和疑问,来进入细节。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:49 "},"Chapter_1/Language/cn/Docs_1_3_1.html":{"url":"Chapter_1/Language/cn/Docs_1_3_1.html","title":"1.3.1 音高(Pitch)","keywords":"","body":"1.3.1 音高(Pitch) 音高(Pitch) 是代表声音振动频率高低的 主观感知量(Subjective Perceptions),是映射自对应声波频率纯客观物理量的心里声学概念。有时,我们会用 音调/声调(Tone)代指音高的工程名称,这其实不够准确。若发生这种情况,我们就 不能 将代指音高的音调,与乐理中关联音阶(Octave)的音调(Tone)等同。两者存在换算但并不是一个概念。 即,音高(Pitch)不是音调/声调(Tone),更不是音阶(Octave)。 美(Mel [Melodies])& 美体系(Mel Scale) 音高的单位是 美(Mel [Melodies]),这是一个主观标定的单位。以 美(Mel)单位来衡量音高的系统,被称为 美体系(Mel Scale)。该体系来自于美国心理学家 史丹利·史密斯·史蒂文斯(Stanley Smith Stevens,1906-1973) 于 1963 年进行的有关心理声学定量的研究 [4] 。所以, 不属于 当前 国际通用的计量体系单位(SI Unit [International System of Units])。 不过,凭借 美体系(Mel Scale) 在人耳感知上相对生理准确的量化,和本身在出发点设定上存在和频率(Frequency)之间的 直接函数映射。所以,美(Mel)常被选定为统一单位,在声学工程上作基础标的。记美体系音高为 PmP_mPm ,频率为 FFF ,有 1963 的早期换算(现已废弃): Pm=2595⋅log10(F) {\\displaystyle \\begin{aligned} P_m &= 2595 \\cdot \\log_{10} \\left(F \\right) \\\\ \\end{aligned} } Pm=2595⋅log10(F) 这是以 1000 Hz 响度为 40 dB(声压级)的纯音(即只包含一个频率)为 1000 Mel ,来测算拟合得到的经验公式。受限于检测设备,会存在一定的误差。 因此,该公式对应拟合方式,在随后的 1968、1976、1990 年,分别经历了三次较大程度的重测。而现在我们采用的主要有 两套转换标准。 一个是由 道格拉斯·奥肖内西(Douglas O'Shaughnessy) 在 1976 年修订的 1000Hz 基准(1000 mel)按 700Hz 分割转换标准 [5] ,被称为 奥肖内西美体系(O'Shaughnessy's Mel Scale) : Pm=2595⋅log10(1+F700) {\\displaystyle \\begin{aligned} P_m &= 2595 \\cdot \\log_{10} \\left(1 + \\frac{F}{700} \\right) \\\\ \\end{aligned} } Pm=2595⋅log10(1+700F) 另一个则是 1999 年由 MATLAB 主导的修订结果 [6] ,被称为 斯莱尼美体系(Slaney's Mel Scale)。这也是 librosa 库采用的算法,有: Pm={3F200, F1000 Hz15+27⋅log6.4(F1000), F≥1000 Hz {\\displaystyle \\begin{aligned} P_m &= \\begin{cases} \\frac{3F}{200} &, \\ F Pm=⎩⎪⎨⎪⎧2003F15+27⋅log6.4(1000F), F1000 Hz, F≥1000 Hz 两者差异,如下图: 图 1-7 两种美体系(Mel Scale)差异对比(0-8000 Hz)示意图 相对来说,在不存在体系换算的条件下,会优先选择 奥肖内西 转换公式。而当存在系统换算,尤其是涉及 librosa 库时,建议优先以统一体系为要求,采用相同体系的转换公式。 需要注意的是,美体系都是对单一频率纯声的转换。而什么是频率呢? 频率(Frequency) 频率(Frequency) 是指声音对应机械波属性的源振动频率。是声音三要素中唯一的纯客观物理量。当然,一般我们所称的声音的频率,都是指可被感知的声音频率,即前文提到的 狭义声波(Narrow Acoustic Wave) 范围的 可听频率(AF [Audible Frequency])。 频率的单位是 赫兹(Hz [Hertz]),表示单位时间一秒内,振源发生完整周期性往复运动的次数,即 10Hz=10/s10 Hz = 10/s10Hz=10/s 。假设存在波长为 λ\\lambdaλ ,波速为 ccc 的波,有相应周期为 TTT ,频率为 FFF ,则: F=1T,c=λF {\\displaystyle \\begin{aligned} F &= \\frac{1}{T} \\quad ,\\quad c = \\lambda F \\\\ \\end{aligned} } F=T1,c=λF 在标准大气压的理想空气介质中,人类能够听见并识别大约 20Hz~20000Hz 频率范围的声波。有 AF 属于 20Hz~20000Hz。 以此为基准, 频率小于 20Hz 范围的声波,被我们称为 次声波(Infrasound)。而 频率大于 20000Hz 范围的声波,被我们称为 超声波(Ultrasound)。次声波和超声波都是相对于人而言的 单阈范围域。 图 1-8 三类声波范围示意图(蓝色指狭义声波) [2] 即然被归为声音三要素,就表示人对不同频率声音的听感有不少差异。在假设其它影响量不变的理想情况下,本书查阅了一些基于日常关联心理声学测量的结果,汇总如下表以供参考: Frequency(Median) Object Feelings Description(Subjective) 20Hz 发动机 汽车呼啸而过的轰鸣声 25Hz 大提琴的最低音调 类似低音炮发出的震撼 50Hz 洗衣机的运转时 洗衣机正常工作时的声响 100Hz 柜式冰箱运转时 柜式冰箱压缩机工作的声响 200Hz 剧院环境男低音 低沉浑厚的歌声 500Hz 轮播式电话铃声 是种清脆响亮的声音 1000Hz 钢琴中音C大调 更为清脆明亮的声音 2000Hz 剧院环境女高音 高亢嘹亮的歌声 4000Hz 蚊子飞行时 嗡嗡且恼人的脆鸣声 8000Hz 发光二极管示波器 实验室示波器工作的声响 12000Hz 成熟家犬 狗吠叫警示时的吼声 18000Hz 超声波清洗器 清洗器工作时的吱吱声 从上表可知,以听感角度考虑会十分的主观。但请不要忘记,频率本身是客观的。上述统计中采用的,是由选定样本声音中,所包含的所有频率声波的 复合频率中值(Median)。自然界中大部分声音 并非 由 单一 频率波构成。这也是产生不同音色(Timbre)的原因之一。 观察例举的统计结果,会发现直觉上非常吵闹的声音,如飞机发动机的声音,其频率并不一定高。而一些我们生活中感觉难以察觉的声音,如蚊子飞行声,却不一定低频。 显然,频率并不能代表声音的高低。我们还需要其它参数表示,那就是 响度(Loudness)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:49 "},"Chapter_1/Language/cn/Docs_1_3_2.html":{"url":"Chapter_1/Language/cn/Docs_1_3_2.html","title":"1.3.2 响度(Loudness)","keywords":"","body":"1.3.2 响度(Loudness) 响度(Loudness),有时虽不准确但也会被称为 音量(Volume),是指人对声音大小的 主观感知量(Subjective Perceptions),是对声波的 声压(Acoustic Pressure) 物理量的感观描述。响度是根据人对不同声压反应,而人为测量出的一种 非客观(Non-Objective) 的量化值。 响度的早期单位是 宋(Sone),这是一个 主观标定的单位。同 音高 一样,来自于 史丹利·史密斯·史蒂文斯(S. S. Stevens) 于 1963 年的实验结果 [4] 。 由于主观成分因素,宋 同样不属于当前 国际通用的计量体系单位(SI Unit [International System of Units]),而且因相对粗粒度而不太经常被采用。工程通用对响度进行衡量的单位,是声压级。 声压级(SPL [Sound Pressure Level]) 声压级(SPL [Sound Pressure Level]) 是由 美国国家标准学会(ANSI [American National Standards Institute]) 测定,同样为 主观标定的 响度单位。但由于相对精确的度量水平,在通常非实验误差情况下,可以作为稳定的工程单位使用。声压级单位为 分贝(dB),常用 NNN 表示代指。 我们有当前最新一次实验室精确测量的 ANSI/ASA S1.1-2013 规格为基准 [7] 。修正锚定 以 1000Hz 纯音,在人耳能听见的最小阈限压强 pref=20μPap_{ref} = 20 \\mu Papref=20μPa 为 1 dB1 \\ dB1 dB 值,由此推导得声压级公式: N=20⋅log10(ppref) {\\displaystyle \\begin{aligned} N &= 20 \\cdot \\log_{10} \\left( \\frac{p}{p_{ref}} \\right) \\\\ \\end{aligned} } N=20⋅log10(prefp) 其中, 以 ppp 代表当前目标声音,对应声波的 声压(Acoustic Pressure); 以 prefp_{ref}pref 代表 参考声压(Reference Acoustic Pressure),为规格固定量, pref=20μPap_{ref} = 20 \\mu Papref=20μPa ; 而在 ANSI 的声压级单位系统下,记宋体系响度为 LNL_NLN ,则分贝(dB)与 宋(Sone)存在换算关系: LN=2N−4010 {\\displaystyle \\begin{aligned} L_N &= 2^{\\tfrac{N - 40}{10}} \\\\ \\end{aligned} } LN=210N−40 即: N=40+log2(LN) {\\displaystyle \\begin{aligned} N &= 40 + \\log_2 \\left( L_N \\right) \\\\ \\end{aligned} } N=40+log2(LN) 除 宋(Sone) 以外,另一个常见的体系是 方(Phon)。在该修订里,规定: 40 dB=40 Phon=1 Sone {\\displaystyle \\begin{aligned} 40 \\ dB = 40 \\ Phon = 1 \\ Sone \\\\ \\end{aligned} } 40 dB=40 Phon=1 Sone 一般情况下,宋(Sone)和方(Phon)用于常量标记,而 SPL分贝(dB)用于响度值。 但是,从前文我们得知,自然界中的大部分声音,其本身就是复合的。这种情况下怎么评估它的响度呢? 此时,就需要使用 复合频率下 的声音计算公式了。 复合响度公式(Multi-Source Loudness Formula) 假设一个 单一的自然声(Natural Sound),记为 N∑N_{\\sum}N∑ 由一组声压为 p=[p0, p1, ⋯ , pn]p = [p_0,\\ p_1,\\ \\cdots \\ ,\\ p_n]p=[p0, p1, ⋯ , pn] 的单频率声波组成,有: N∑=10⋅log10(p02+p12+⋯+pn2pref2)=10⋅log10(∑n(pipref)2) {\\displaystyle \\begin{aligned} N_{\\sum} &= 10 \\cdot \\log_{10} \\left( \\frac{ {p_0}^2 + {p_1}^2 + \\cdots +{p_n}^2 }{ {p_{ref}}^2 } \\right) \\\\ &= 10 \\cdot \\log_{10} \\left( \\sum^n \\left( \\frac{p_i}{ p_{ref}} \\right)^2 \\right) \\\\ \\end{aligned} } N∑=10⋅log10(pref2p02+p12+⋯+pn2)=10⋅log10(∑n(prefpi)2) 而工程中的单频率声波代表参数,可能直接为响度,如 L=[L0, L1, ⋯ , Ln]L = [L_0,\\ L_1,\\ \\cdots \\ ,\\ L_n]L=[L0, L1, ⋯ , Ln] 。则带入单频率响度公式,上式可写为: N∑=10⋅log10(∑n(pipref)2)=10⋅log10(∑n10Li10 dB)=10⋅log10(10L010 dB+10L110 dB+⋯+10Ln10 dB) {\\displaystyle \\begin{aligned} N_{\\sum} &= 10 \\cdot \\log_{10} \\left( \\sum^n \\left( \\frac{p_i}{ p_{ref}} \\right)^2 \\right) = 10 \\cdot \\log_{10} \\left( \\sum^n 10^{\\frac{L_i}{10\\ dB}} \\right) \\\\ &= 10 \\cdot \\log_{10} \\left( 10^{\\frac{L_0}{10\\ dB}} + 10^{\\frac{L_1}{10\\ dB}} + \\cdots + 10^{\\frac{L_n}{10\\ dB}} \\right) \\\\ \\end{aligned} } N∑=10⋅log10(∑n(prefpi)2)=10⋅log10(∑n1010 dBLi)=10⋅log10(1010 dBL0+1010 dBL1+⋯+1010 dBLn) 这就是 声音(复合声波)的响度公式。 可虽然 分贝(dB)系统最为广泛且常常被使用,但却 仍然不属于 国际通用的计量体系单位(SI Unit)。 真正被作为科学的单位,是声强(Sound Intensity)。 声强(Sound Intensity) 声强(Sound Intensity) 是对单个声波强度的科学表示,指声波在单位面积下所具有的声压(Acoustic Pressure),对外功率之和。 声强单位为 瓦每平方( W/m2W/m^2W/m2 ),一般被记为 III 表示。有: I=p⋅v⃗ {\\displaystyle \\begin{aligned} I &= p \\cdot \\vec{v} \\\\ \\end{aligned} } I=p⋅v⃗ 其中, 以 ppp 代表当前目标声音,对应声波的 声压(Acoustic Pressure) ; 以 v⃗\\vec{v}v⃗ 代表机械波的做功方向,是个 速度量,每个维度分量单位都为 米每秒(m/s) ; 由于一般我们用声强来计算,理想状态的当前声波能量值。为了简化计算,通常会选择均匀介质情况的理想单点声源,作为背景条件。这种情况下,做工方向 v⃗\\vec{v}v⃗ 就可以被认为是 球面坐标中,单位平方点的向外法向量了。 于是,声强 III 即可转为,由声压 ppp ,传播介质密度 ρ\\rhoρ ,和声速 ccc ,计算表示: I=p2ρc {\\displaystyle \\begin{aligned} I &= \\frac{p^2} {\\rho c} \\\\ \\end{aligned} } I=ρcp2 因为一般都是在空气介质中进行衡量,所以有 ρ≈1.293 kg/m3\\rho \\approx 1.293 \\ kg/m^3ρ≈1.293 kg/m3 ,而 c≈343 m/sc \\approx 343 \\ m/sc≈343 m/s 。 所以,根据声压快速获取对应声音在空气中的声强公式为: I≈p2443.499 W/m2 {\\displaystyle \\begin{aligned} I &\\approx \\frac{p^2} {443.499} \\ W/m^2\\\\ \\end{aligned} } I≈443.499p2 W/m2 使用上式速算时,压强取 帕斯卡(Pa)数量级下的数值即可。 那么,声强和响度是什么关系呢? 很遗憾,两者分属不同系统,并不存在 直接上的 关联。但存在 间接换算 关系。 声强级(SIL)与 声压级(SPL)的换算 声强级(SIL [Sound Intensity Level]) 类似于 声压级(SPL [Sound Pressure Level]) 的定义,皆是用来 主观标定 响度的单位系统/系统单位。SIL 的单位沿用了 SPL 的 分贝(dB),甚至两者换算公式,都可基本等同。所以,仍然以 NNN 表示声音响度,有: IIref=p2pref2=(ppref)2N=20⋅log10(ppref)=10⋅log10(IIref) {\\displaystyle \\begin{aligned} \\frac{I}{ I_{ref} } = \\frac{p^2}{ {p_{ref}}^2 } &= \\left( \\frac{p}{ p_{ref} } \\right)^2 \\\\ N = 20 \\cdot \\log_{10} \\left( \\frac{p}{ p_{ref} } \\right) &= 10 \\cdot \\log_{10} \\left( \\frac{I}{ I_{ref} } \\right) \\\\ \\end{aligned} } IrefI=pref2p2N=20⋅log10(prefp)=(prefp)2=10⋅log10(IrefI) 以 N∑N_{\\sum}N∑ 表复合,取声压 p=[p0, p1, ⋯ , pn]p = [p_0,\\ p_1,\\ \\cdots \\ ,\\ p_n]p=[p0, p1, ⋯ , pn] ,声强 I=[I0, I1, ⋯ , In]I = [I_0,\\ I_1,\\ \\cdots \\ ,\\ I_n]I=[I0, I1, ⋯ , In] ,则复合响度公式有: N∑=10⋅log10(∑n(pipref)2)=10⋅log10(∑n(IiIref))=10⋅log10(10L010 dB+10L110 dB+⋯+10Ln10 dB) {\\displaystyle \\begin{aligned} N_{\\sum} &= 10 \\cdot \\log_{10} \\left( \\sum^n \\left( \\frac{p_i}{ p_{ref} } \\right)^2 \\right) = 10 \\cdot \\log_{10} \\left( \\sum^n \\left( \\frac{I_i}{I_{ref}} \\right) \\right) \\\\ &= 10 \\cdot \\log_{10} \\left( 10^{\\frac{L_0}{10\\ dB}} + 10^{\\frac{L_1}{10\\ dB}} + \\cdots + 10^{\\frac{L_n}{10\\ dB}} \\right) \\\\ \\end{aligned} } N∑=10⋅log10(∑n(prefpi)2)=10⋅log10(∑n(IrefIi))=10⋅log10(1010 dBL0+1010 dBL1+⋯+1010 dBLn) 至此,两个主客观系统间,达成了转换条件。一般的 pref=20μPap_{ref} = 20 \\mu Papref=20μPa 时,有 Iref=1 pW/m2I_{ref} = 1 \\ pW/m^2Iref=1 pW/m2 。我们用声压级表示响度,而以声强计算能量。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:49 "},"Chapter_1/Language/cn/Docs_1_3_3.html":{"url":"Chapter_1/Language/cn/Docs_1_3_3.html","title":"1.3.3 音色(Timbre)","keywords":"","body":"1.3.3 音色(Timbre) 音色(Timbre),是指声音的主观成色。本身更偏向于乐理而非工程。 注意一个关键的理解偏差:音色不是音质! 心理声学(Psychoacoustics)有时称其为 音调色(Tone Color) 或 音调质(Tone Quality),转译到中文若用音质来代替,实则是不准确的。音质(Sound Quality),是由信噪比(SNR [Signal to Noise Ratio])决定的工程量,后面章节会有介绍。之所以强调,是因为该点非常容易造成初学者混淆,从而提高学习门槛。 如果说音高(Pitch)和响度(Loudness)都是单一影响参数的代称,那么音色则是一种 复合影响因子 带来的,基于传统声乐经验和历史因素,对体感的 弱标准化 规定描述。也就是说,音色是三要素中,最为主观的一个了。 而不同的音色到底有何种区别呢?这需要从音色本质说起。 音色的频率链(Frequency Series)/ 谐波链(Harmonic Series) 考虑一个来自 ISO 16 标准音阶(Octave)的 纯音 A4(乐理 la,440 Hz),理想情况是 只有一个 频率,即 440Hz 的。但是,实际生活中,假设完美调校的钢琴和其他乐器,演奏的 A4 虽然能够听出来是对应音调,但也能明显可区分来自于不同乐器。甚至不同品牌厂家的同一种乐器,演奏同一音调时,也会有不同听感。 这种似是而非、若即若离的情况,是怎么一回事呢?想要回答这个问题,需要解释两个方面:“似是”的一面,和“若离”的一面。 我们知道,声音都是复合的一组频率。因此,从声波物理性出发: 将决定声音基准音调的单一频率,称为 基波(Fundamental) ; 将决定声音本身特征的衍生频率,称为 谐波(Harmonic) ; “似是”来自于相同的基波,基波决定音调(Note),即标志着声音本身的指向。“若离”来自于 谐波,这是决定一个 声音具体特征 的主要因素。不同声源弹奏同一 乐理音调(Music Note) 时,相同音调理想情况下,基波都是完全一致的。而组成声音的所有谐波差异,才导致了不同听感。 一般的,我们将由 一个基波(Single Fundamental) 和 一组谐波(Multi Harmonics) 共同叠加而成的声音,为 复合音(Complex Sounds)。 如下图,就是来自于ISO 16 标准音阶(Octave)调音的,单一音调 B3 在实际某钢琴上的表现。此钢琴 B3 复合音的组成中,最左侧蜂刺状 246Hz 频率位置 即为 B3 基波,而基波右侧其余蜂刺位则为该复合音谐波。 图 1-9 某钢琴标准 B3 调音的频率响度特征(响度归一化) [8] 而一个复合音中,从低频到高频所有纯音的频率,所构成的数组,就是 频率链(Frequency Series)/ 谐波链(Harmonic Series)。即从 工程角度 所理解的,声音的 音色(Timbre)。 由此,我们可知基波、谐波、音色三者的关系了。 基波(Fundamental) 基波(Fundamental),也称为 第一谐波(First Harmonic),指感观音色对应某指定 基准纯音(Standard Pure Tone) 的频率。基波决定某标准音阶在器乐设备上的准确性。 同一规范(如 A440)下,调校准确的各类声音设备,基波频率完全相等。 基波和频率间的换算为: Note(n)=(122)n−9×440Hz {\\displaystyle \\begin{aligned} Note(n) = \\left( ^{12}\\sqrt{2} \\right)^{n - 9} \\times 440 Hz \\\\ \\end{aligned} } Note(n)=(12√2)n−9×440Hz 其中, 以 nnn 表示 当前音名(Names)距离 C4 的音序(Sequence) ; 而 440Hz 即 A4 标定值,A4 与 C4 标准键(Standard Key) 的音序为 +9 ; 而同样的,当我们已知对应基波的频率,则可以计算出它与 C4 的音序,从而反向查表得到它在乐理上的音调。记目标基波频率为 F(n)F(n)F(n) ,则: n=12 log2(F(n)440 Hz)+9 {\\displaystyle \\begin{aligned} n = 12 \\ \\log_2\\left( \\frac{F(n)}{440\\ Hz} \\right)+9\\\\ \\end{aligned} } n=12 log2(440 HzF(n))+9 此公式,即为工程上常用的 A440 频率音序公式(Frequency Sequence Formula)。 谐波(Harmonic) 谐波(Harmonic) 指自指定基波以 整数倍频率 衍生的纯音声波。基波衍生的谐波一般不会仅有一个。假设基波位于谐波链(Harmonic Series)的第一位,有 i=1i = 1i=1 ,频率为 F(n)=F1F(n) = F_1F(n)=F1 。则位于顺序第 iii 位的谐波频率 F(i)F(i)F(i) 有: F(i)=i×F1 {\\displaystyle \\begin{aligned} F(i) = i \\times F_1 \\\\ \\end{aligned} } F(i)=i×F1 所以,仍然以之前的 钢琴 B3 为例,有: 图 1-10 某钢琴标准 B3 调音的谐波链(响度归一化)示意图 [8] 可见,决定整个谐波链的关键,就在于第一谐波,也就是基波上。而在基波响度相同的情况下,产生的第二、第三、... 、第 i 谐波,其 数目 和 各自的响度,才确定了声源特色。 至此,声音三要素与工程量映射,就解释清楚了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:49 "},"Chapter_1/Language/cn/Docs_1_4.html":{"url":"Chapter_1/Language/cn/Docs_1_4.html","title":"1.4 声音的解构","keywords":"","body":"1.4 声音的解构 声音,大多为复合声,从前文的介绍中我们可以发现,至少能够从三个角度去构建参考系。即,乐理角度(艺术) 、 心理声学(感观) 、 声乐工程(声音三要素)。 不同角度观察到的,可以认为是同一声音在各自领域平面的投影。而我们通过这种方式,从不同的视角,拼接出了声音本身。所以,声音也可以被称为是某种程度上的高维信息。 并非 在不考虑传播时,直觉上的仅有时频那么简单。 接下来,我们便分别从这三个不同的视角,去看如何处理。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_4_1.html":{"url":"Chapter_1/Language/cn/Docs_1_4_1.html","title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","keywords":"","body":"1.4.1 乐理:音调(Notes)& 五度圈(Circle of Fifths) 在声音三要素开始的部分,我们已经简单介绍了一些乐理基础概念。而乐理对声音的描述,都是基于 音调(Note) 为出发点的。通过音调指向基音,建立主观参考系下的客观不变量,从而构造统一的联系。结合 系统的 记录方式,完成对一定时间段下,音乐的保存。 所以,乐谱(Musical Notation) 就是一种,以手动编排抄录的方式,进行声音持续化存储的早期人工手段。而 乐理音调(Music Note),以下我们简称为 音调(Note),就是一种粗粒度(相对于数字时代编码调制而言)的固定采样。 依旧采用 八度音(Octave)的音阶体系,首先需要建立乐理(艺术)心理(感观)转换。 音调(Notes)的 音程尺度描述(ISN [Interval Scale Name]) 这个我们已经介绍过。 八度音阶(Octave) 以钢琴音级为 4 时为准,包括音名、半音在内,共有 12 个,即 C、C♯/D♭、D、D♯/E♭、E、F、F♯/G♭、G、G♯/A♭、A、A♯/B♭、B 。为方便说明,我们补充下一级的 C5 到表中,有: C C♯D♭ D D♯E♭ E F F♯G♭ G G♯A♭ A A♯B♭ B C5 261.63(0) 277.18(+1) 293.66(+2) 311.13(+3) 329.63(+4) 349.23(+5) 369.99(+6) 392.00(+7) 415.30(+8) 440.00(+9) 466.16(+10) 493.88(+11) 523.25(+12) 明明是 八度音,却有包扩 C5 在内的 13 个音调。尺度不一太尴尬了,怎么办呢? 音乐艺术先贤们也遇到了同样的问题。于是,根据 两两相邻音调间的音程(Interval),在同音级下,有了不同的 音程尺度描述(ISN [Interval Scale Name])。正好作为转换起点。 在 C4 所在第 4 音级取 ISN。所有音调与 C4 相比音程为: Notes Frequency(Sequence) Interval Scale Name Interval as Notes C4 261.63 (0) 纯一度(P1 [Perfect Unison]) 0 C♯/D♭ 277.18 (+1) 小二度(m2 [Minor Second]) 0.5 D 293.66 (+2) 大二度(M2 [Major Second]) 1 D♯/E♭ 311.13 (+3) 小三度(m3 [Minor Third]) 1.5 E 329.63 (+4) 大三度(M3 [Major Third]) 2 F 349.23 (+5) 纯四度(P4 [Perfect Fourth]) 2.5 F♯/G♭ 369.99 (+6) 增四度(A4)/减五度(d5) 3 G 392.00 (+7) 纯五度(P5 [Perfect Fifth]) 3.5 G♯/A♭ 415.30 (+8) 小六度(m6 [Minor Sixth]) 4 A 440.00 (+9) 大六度(M6 [Major Sixth]) 4.5 A♯/B♭ 466.16 (+10) 小七度(m7 [Minor Seventh]) 5 B 493.88 (+11) 大七度(M7 [Major Seventh]) 5.5 C5 523.25 (+12) 纯八度(P8 [Perfect Octave]) 6 表内抽象的音程名中,出现了一些 非精确量词(Inaccurate Quantifiers) 被使用其中。确切的来说,基础量词有五种,由小到大分别是(注意简写时的 大小写区分 ): 减(d [Diminished]) 小(m [Minor])、纯(P [Perfect])、大(M [Major]) 增(A [Augmented]) 上述量词怎么来的呢?直接意义上,这是两套体系。一套基于 相对音程,一套基于 绝对音程。简单来说,取整数 k∈Zk \\in \\mathbb{Z}k∈Z 表示通用数字级。则 绝对音程(n.AI [Absolute Interval]),采用 减(d [Diminished]) 、 增(A [Augmented]),是取用第 4 级的纯一度(P1)就是 C4 261.63 Hz 作为原点。记原点音调音序为 norin_{ori}nori ,目标音调音序为 ntagn_{tag}ntag ,有: 减 k 度(dk [Diminished k]),意味着 Δn=ntag−nori=2(k−2)\\Delta n = n_{tag} - n_{ori} = 2(k - 2)Δn=ntag−nori=2(k−2) ; 增 k 度(Ak [Augmented k]),意味着 Δn=ntag−nori=2(k−0.5)\\Delta n = n_{tag} - n_{ori}= 2(k - 0.5)Δn=ntag−nori=2(k−0.5) ; 相对音程(n.RI [Relative Interval]),采用 小(m [Minor]) 、 纯(P [Perfect]) 、 大(M [Major]),是一种差值概念。记被比较的音调音序为 ncomn_{com}ncom ,而目标音调音序为 ntagn_{tag}ntag ,有: 小 k 度(mk [Minor k]),指 Δn=(ntag−ncom)%12∈{1, 3, 8, 10}\\Delta n = ({n_{tag} - n_{com}})\\%12 \\in \\{1,\\ 3,\\ 8,\\ 10 \\}Δn=(ntag−ncom)%12∈{1, 3, 8, 10} 时对应 k∈{2, 3, 6, 7}k\\in \\{2,\\ 3,\\ 6,\\ 7 \\}k∈{2, 3, 6, 7} ; 纯 k 度(Pk [Perfect k]),指 Δn=(ntag−ncom)%12∈{0, 5, 7, 12}\\Delta n = ({n_{tag} - n_{com}})\\%12 \\in \\{0,\\ 5,\\ 7,\\ 12 \\}Δn=(ntag−ncom)%12∈{0, 5, 7, 12} 时对应 k∈{1, 4, 5, 8}k\\in \\{1,\\ 4,\\ 5,\\ 8 \\}k∈{1, 4, 5, 8} ; 大 k 度(Mk [Major k]),指 Δn=(ntag−ncom)%12∈{2, 4, 9, 11}\\Delta n = ({n_{tag} - n_{com}})\\%12 \\in \\{2,\\ 4,\\ 9,\\ 11 \\}Δn=(ntag−ncom)%12∈{2, 4, 9, 11} 时对应 k∈{2, 3, 6, 7}k\\in \\{2,\\ 3,\\ 6,\\ 7 \\}k∈{2, 3, 6, 7} ; 而在 所有音调与 C4 相比的音程表 中,之所以出现了 F♯/G♭ 用绝对音程(Absolute Interval)与 A440 440 Hz 相比,其它采用相对音程(Relative Interval)与 C4 261.63 Hz 相比的原因,正是在 Δn=ntag−ncom=6\\Delta n = n_{tag} - n_{com} = 6Δn=ntag−ncom=6 时,用相对音程的 小(m)、纯(P)、大(M) 无法描述 该音程。所以,不得已 才借用了绝对音程的 增(A)、减(d) 描述法。 现在,我们已知同音级下的音程表示了。不过实际使用中,往往会出现两个参与计算的音调是跨级的情况。虽然两方法都适用于跨越多音级(跨级)的音程计算,但 绝对音程(n.AI)和 相对音程(n.RI)在对此的表达上,还是存在较大差异的。 绝对音程(n.AI)的 跨级计算 绝对音程(n.AI) 因为存在原点而且不区分范围,因此可以在单一方向上持续增,或持续减。不过因为往低频方向持续运动,会可能有负值。 所以,除 Δn=6\\Delta n = 6Δn=6 情况外,我们一般只用它来像高频方向计数。而这种处理使得以 n.AI 公式计算出来是多少 k ,就应该称为增减多少 k 度(AK/dK)。 例如, 从 D4->C6 的音序差 Δn=24−2=2×11→k=13\\Delta n = 24 - 2 = 2 \\times 11 \\rightarrow k=13Δn=24−2=2×11→k=13 ,为 减十三度(d13) ; 从 D4->F6 的音序差 Δn=29−2=2×13.5→k=14\\Delta n = 29 - 2 = 2 \\times 13.5 \\rightarrow k=14Δn=29−2=2×13.5→k=14 ,为 增十四度(A14) ; 从 C4->F6♯/G6♭ 的音序差 Δn=30−0=2×15→k=17\\Delta n = 30 - 0 = 2 \\times 15 \\rightarrow k=17Δn=30−0=2×15→k=17 ,为 减十七度(d17) ; 相对音程(n.RI)的 跨级计算 相对音程(n.RI) 的跨级计算就要麻烦一些。这个麻烦主要体现在相对音程的音程尺度描述(ISN)在带上 Δn=6\\Delta n = 6Δn=6 从绝对音程中借用的 增四度(A4)/减五度(d5)后,也仅有 13 个。 所以,在跨级描述上,相对音程情况需要引入其它的量词用以记录级数差。一个简单的方法就是 在公式基础上,根据跨越的级数,在称为中增加 级数 x 七度 的大小。 例如, 从 D4->C6 的音序差 Δn=(24−2)%12=10→k=7(+7×1)\\Delta n = (24 - 2)\\%12 = 10 \\rightarrow k= 7 \\left(+ 7 \\times 1 \\right)Δn=(24−2)%12=10→k=7(+7×1) ,为 小十四度(m14) ; 从 D4->F6 的音序差 Δn=(29−2)%12=3→k=3(+7×2)\\Delta n = (29 - 2)\\%12 = 3 \\rightarrow k= 3 \\left(+ 7 \\times 2 \\right)Δn=(29−2)%12=3→k=3(+7×2) ,为 小十七度(m17) ; 但当 Δn=6m, m∈Z\\Delta n = 6m, \\ m \\in \\mathbb{Z}Δn=6m, m∈Z 时, 借用 的 增四度(A4)/减五度(d5) 又不能 换回绝对音程来重新计算,该怎么办呢? 相对音程针对这种情况,引入了 倍数(Multiples)来辅助标记。即 m倍增/m倍减。 例如, 从 C4->F6♯/G6♭的音序差 Δn=(30−0)%12=6→A4/d5(×2)\\Delta n = (30 - 0)\\%12 = 6 \\rightarrow A4/d5 \\left(\\times 2 \\right)Δn=(30−0)%12=6→A4/d5(×2) ,有 m=2m = 2m=2 的值,称为 二倍增四度(AAA4)/二倍减五度(ddd5)。即多出来的倍数 m=2m = 2m=2 ,就代表着需要 多写 几个 增(A)或 减(d)。 至此,结合 Δn=6\\Delta n = 6Δn=6 时的倍数描述 和 “±7” 度法,我们就能够从乐理(艺术)上形容跨多音级(Steps)的相对音程了。 不过,这样的算法要求我们知道当前音调的音序。但因为一般情况下,乐谱中采用的都是确认 大/小调 主音(Keytone) 后,对包含音调距离主音音程的符号化记录。所以, 必须要能获取主音的音序才能相对计算出,乐谱中的实际乐符的音程,进而推得音序和标的频率。 大/小调(Major Scale/Minor Scale) 什么是大/小调?大/小调(Major Scale/Minor Scale) 是古典音乐中,对一组参与演奏音调韵律的总结。不同 大/小调所采用的音调是不同的。 这里有相当多的乐理(艺术)细分,为了便于说明,除非特别声明,否则都认为 未指明类型的 大/小调 皆属于 自然音阶(Diatonic Scale)。 其中 大/小调 中的 大/小,虽同名于 相对音程 的 大/小,但两者却 并不是一个概念。大/小调 对大/小 的定义,并不是指音程差,而是指组成 大/小调 的自然音阶(Diatonic Scale)中 包含的一系列古典音调(Classical Tone)。 例如,C 大调(Major C)的主音(Keytone)就是 C4 261.63 Hz 。但总共包含: C4→D4→E4→F4→G4→A4→B4 C4 \\rightarrow D4 \\rightarrow E4 \\rightarrow F4 \\rightarrow G4 \\rightarrow A4 \\rightarrow B4 C4→D4→E4→F4→G4→A4→B4 所以,如果直接算。 从乐谱到我们可以使用的音程尺度名称间,还需要进行一次大/小调到实际音调组间的转换。 之后,才能够利用相对音程公式,完成快速反向计算来得到换算音序值。再用得到的音序值,查询基音频率。 因此,必须要依赖于 快速确定 大/小调 的手段。该手段就是 五度圈查询法。 五度圈(Circle of Fifths)查寻法 回到音乐(艺术)史早期,人们制定了诸如:古典五律、十二平均律等非精确度量衡。而在 十二平均律(12-TET [12-Tone Equal Temperament]) 中,将属于自然音阶(Diatonic Scale)的自然大调(Major Scale)第 4 音级 C–D–E–F–G–A–B 取为标准(此处取现代声学标准,明朝皇族世子朱载堉发明时,近代物理才刚起步,还未有机械波概念,所以仍是依赖于古筝琴律),而对 C-B(12-TET 采用的实际是 等效到同间隔的 C-F ) 间音调进行了 比例分割。 此举启发了人们对古典五律的划分,从而有了 自然大调(Major Scale),即 C大调, 的五度圈(Circle of Fifths)查寻法。这是一种将上文 12 音调以圆圈的形式串联的表示方式。当然,人们创造出该方法的时候,是凭借着历史经验总结而来的。不得令人感叹其中的智慧。 有速查图如下: 图 1-11 五度圈音调表示意图 [9] 此即为最早且被应用至今(如吉他等)的快速跨级查表法。 图中,大写字母代表自然大调(Major), 小写字母代表自然小调(Minor)。 音调(Note)所带的升降号( ♯/♭\\sharp/\\flat♯/♭ ),在音乐(艺术)中,被称作 调号(Key Signatures)。 以此为出发点,转换到同音级处理。就有, 同圈层 的音调,相邻两个音调间的音程(Interval),顺时针时差值为 Δn=(ntag−ncom)=7→k=5\\Delta n = ({n_{tag} - n_{com}}) = 7 \\rightarrow k=5Δn=(ntag−ncom)=7→k=5 纯五度(P5),逆时针时为 Δn=(ntag−ncom)=5→k=4\\Delta n = ({n_{tag} - n_{com}}) = 5 \\rightarrow k=4Δn=(ntag−ncom)=5→k=4 纯四度(P4)。称为 相邻调(Adjacent Key)。 五度圈中位于 同位置内外圈 的大小调,两者间的 主音(Keytone) 音程为 小三度(m3),且 主音调号(Key Signatures)相同。称为关系调(Relative Key)。 当我们从内圈向外查找,有音程: a->C(如 A4->C5)有 Δn=(12−9)%12=3(+0)→k=3\\Delta n = (12 - 9)\\%12 = 3(+0) \\rightarrow k=3Δn=(12−9)%12=3(+0)→k=3 ,为 小三度(m3) ; d->C(如 D4->C5)有 Δn=(12−2)%12=10(+0)→k=7\\Delta n = (12 - 2)\\%12 = 10(+0) \\rightarrow k=7Δn=(12−2)%12=10(+0)→k=7 ,为 小七度(m7) ; d->E(如 D4->E5)有 Δn=(16−2)%12=2→k=2(+1×7)\\Delta n = (16 - 2)\\%12 = 2 \\rightarrow k=2(+1\\times7)Δn=(16−2)%12=2→k=2(+1×7) ,为 大九度(M9) ; 所以, 以升/降序来看,五度圈是螺旋上升/下降的。 我们以表中 C♯C\\sharpC♯ 代表着进入了 更上层高音级,而 C♭C\\flatC♭ 代表降至 更下层低音级。则跨越三层的升序大调五度圈,就如下所示: 图 1-12 三层五度圈(升调方向)音调表示意图 自然大/小调,均包含 7 个音调。分别是主音前 1 个音调,和包括主音在内的后 6 个音调。在此基础上,结合五度圈的维度特点,只需要以 滑动窗口来标记对应调位,即可速查大小调中的各个音调: 【,主音,】 例如, 查表得 CCC 大调的基础音调,组成为 [F, C, G, D, A, E, B][F,\\ C,\\ G,\\ D,\\ A,\\ E,\\ B][F, C, G, D, A, E, B] 查表得 F♯F\\sharpF♯ 大调的基础音调,组成为 [B, F♯, C♯, G♯, D♯, A♯, E♯][B,\\ F\\sharp,\\ C\\sharp,\\ G\\sharp,\\ D\\sharp,\\ A\\sharp,\\ E\\sharp][B, F♯, C♯, G♯, D♯, A♯, E♯] 查表得 E♭E\\flatE♭ 大调的基础音调,组成为 [A♭, E♭, B♭, F, C, G, D][A\\flat,\\ E\\flat,\\ B\\flat,\\ F,\\ C,\\ G,\\ D][A♭, E♭, B♭, F, C, G, D] 查表得 C♯C\\sharpC♯ 大调的基础音调,组成为 [F♯, C♯, G♯, D♯, A♯, E♯, B♯][F\\sharp,\\ C\\sharp,\\ G\\sharp,\\ D\\sharp,\\ A\\sharp,\\ E\\sharp,\\ B\\sharp ][F♯, C♯, G♯, D♯, A♯, E♯, B♯] 五度图中一圈(不升降),就是音级为 4 时 ISO 16 标准的 A440 八度音阶 C4 子表。 C C♯D♭ D D♯E♭ E F F♯G♭ G G♯A♭ A A♯B♭ B C5 261.63(0) 277.18(+1) 293.66(+2) 311.13(+3) 329.63(+4) 349.23(+5) 369.99(+6) 392.00(+7) 415.30(+8) 440.00(+9) 466.16(+10) 493.88(+11) 523.25(+12) 而当发生升降时,对于在表中没有对应的额外 Δn\\Delta nΔn 个 ♯/♭\\sharp/\\flat♯/♭ 标志,提升或降低 Δn/2\\Delta n / 2Δn/2 个音级再次查对应 4±Δn24 \\pm \\tfrac{\\Delta n}{2}4±2Δn 音级,对应的 音阶频率子表。 例如, 对于五度圈更上一层的 C♯♯=C5=253.25(+12)C\\sharp \\sharp = C5 = 253.25 (+12)C♯♯=C5=253.25(+12) ,而 C♯♯♯=D♭♯=C♯5=554.37(+13)C\\sharp \\sharp \\sharp = D \\flat \\sharp = C\\sharp 5 = 554.37 (+13)C♯♯♯=D♭♯=C♯5=554.37(+13) 。 至此达成,利用音调在乐理上的音程尺度描述(ISN),以两种参考系的关联,利用公式或搜图,来转换到工程音序频率关系了。从而方便我们根据 乐理音调(Musical Note)查询它的 基波(Fundamental)频率。 为何频率在分析中,显得格外重要呢?因为频率是贯穿三种分析视角的唯一量。 ISN 本身在乐理(艺术)上,是人为认为的尺度平均的。不过,乐理上的平均,是否意味着实际频率的平均呢?结合 A440 频率音序公式(FSF)判断可知, 乐理平均并不意味着均匀的频率划分。这和人耳的听感息息相关。 经过近代心理声学对人耳感观的样本统计测定后,了解到其中的一些端倪。 音调(Notes)的 频率比(Frequency Ratio) 我们发现,以 C4 261.63 Hz 为标准,人对与 C4 频率呈现一定特殊比例的音调,会有更好的听感反馈(详见 等响曲线)。而以某些相应比例,按照从低到高的非线性变化,会使人产生 聆听时的平滑感(Smoothly)。根据这样的研究结果,可见古人间接以 非线性频率比 (虽然发明命名法的时候并无测定,而是后续心理声学补测), 主观确定了音调划分。 仍然采用该音级 4 的例子。有 12 个音调间,距离 C4 基础音调的音程(Interval)和 大致频率比(C4: 当前音调,精确小数点后一位)如下: C4 C♯D♭ D D♯E♭ E F F♯G♭ G G♯A♭ A A♯B♭ B C5 261.63(0) 277.18(+1) 293.66(+2) 311.13(+3) 329.63(+4) 349.23(+5) 369.99(+6) 392.00(+7) 415.30(+8) 440.00(+9) 466.16(+10) 493.88(+11) 523.25(+12) 0 0.5 1 1.5 2 2.5 3 3.5 4 4.5 5 5.5 6 1:1 16:15 9:8 6:5 5:4 4:3 45:32 3:2 8:5 5:3 16:9 15:8 2:1 那么,这一发现有什么作用呢? 它的作用,体现在 创造新的音色。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_4_2.html":{"url":"Chapter_1/Language/cn/Docs_1_4_2.html","title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","keywords":"","body":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz) 我们对乐理音调到感观转换已有基本认知。但当我们遇到一些频率不在表中,且也不属于表中任意一个独立音调频率倍数(即其它音级),却又悦耳到想要记录的,非隶属单一琴键的声音时。或者想要合理的创造一种不存在于自然中的合成音时。音调频率比关系的平滑听感,就成为了指引。 它使我们可以通过已有音调频率的合理组合,以响度代替融合比例(可以说是古早的混音了,非常感性比较考验演奏者水平),来拟合新的声音。 在乐理中,称之为 和声(Harmony)。 和声(Harmony) & 协和(Consonance)& 不协(Dissonance) 和声(Harmony) 是指将音调以两两形式组和,而产生新的声音的过程。当然,为了给予艺术发挥空间,不会也不能固定响度入内。所以,和声是指参与声音其音色频率链的合成。此处为了通用说明,需要固定图形化时,采用相同大小的抽象响度表示。 不过请注意,在实际在数字合成过程中,还是需要 结合响度构建和声后的新谐波链的。 图 1-13 某钢琴同音级下纯一度(P1)与纯五度(P5)的泛音链和声图 上例为同音级的 P1 + P5 和声(如 C4 + G4)。这样的一组声音同时弹奏时,为人们带来了听感上的和谐。而同音级下的 P1 + M2 和声(如 C4 + D4),则没有这么融洽: 图 1-14 某钢琴同音级下纯一度(P1)与大二度(M2)的泛音链和声图 从两者和更多样本的 频率链重叠情况上,过往的研究者们发现,如果参与合成的声音,在频率链上有 较多的重合谐波 时,人耳会觉得声音 和谐(Harmony) 不突兀。 而同音名不同级的 乐理音调(Musical Note),其 谐波链几乎完全重合,各频率总是相差 2 的整数倍大小。如 C4 + C5 或 F4 + F6 等,几乎可以认为就是一个声音。依此称为 完美协和(Perfect Consonance)。 但相仿 P1 + P5 和声情况的音调组合,其 谐波链存在较多重合,却依然可以被分辨。我们称其为 不完美协和(Inperfect Consonance)。 而相仿 P1 + M2 和声情况的音调组合,其 谐波链几乎很少重合,参与基音相对可辨。我们称之为 不协(Dissonance)。 至于不协情况中,其 谐波链在一定范围内完全无重合的情况,参与基音完全可辨。我们称之为 完全不协(Perfect Dissonance)。 同理,可以扩展至更复杂的和声组合。 显然,协与不协的问题,同人耳对频率的敏感度高度相关。 仍然采用该音级 4 的例子。以 12 个音调间和声情况统计。距离 C4 基础音调的音程(Interval)和 大致频率比(C4: 当前音调,精确小数点后一位)如下: C4 C♯D♭ D D♯E♭ E F F♯G♭ G G♯A♭ A A♯B♭ B C5 P1 m2 M2 m3 M3 P4 A4/d5 P5 m6 M6 m7 M7 P8 0 0.5 1 1.5 2 2.5 3 3.5 4 4.5 5 5.5 6 1:1 16:15 9:8 6:5 5:4 4:3 45:32 3:2 8:5 5:3 16:9 15:8 2:1 表中,橙色 表示与 C4 完美协和,黄色 表示与 C4 不完美协和,蓝色 表示与 C4 不协,靛色 表示与 C4 完全不协。将统计扩展到整个当前音级中两两音调时,就有(音程 P1 省略比例): 可见, 当两音调间音程 为 [P1, P4, P5, P8][P1,\\ P4,\\ P5,\\ P8][P1, P4, P5, P8] 时,两音调和声 完美协和 ; 当两音调间音程 为 [m3, M3, m6, M6][m3,\\ M3,\\ m6,\\ M6][m3, M3, m6, M6] 时,两音调和声 不完美协和 ; 当两音调间音程 为 [M2, A4/d5, m7][M2,\\ A4/d5,\\ m7][M2, A4/d5, m7] 时,两音调和声 不协 ; 当两音调间音程 为 [m2, M7][m2,\\ M7][m2, M7] 时,两音调和声 完全不协 ; 至此,我们便可以利用此规律,使构成复杂和谐音的组成音,满足两两和声协和匹配。继而创造出新的声音。 这种特殊的和声过程,即是和弦(Chord)。 和弦(Chord)& 三和弦(Triad) 以协和(包括完美协和、不完美协和)音程规律,取一组由升调方向选择的三个或更多音调组成的和声,在乐理中被称为 和弦(Chord)。 和弦以成组的两两相邻音间音程差异,分 三度和弦 和 非三度和弦 两个类别。 三度弦,即以三度音程(包括 m3、M3)构成的一组和弦。 根据组成的个数又可以细化为:三和弦(三音) 、 七和弦(四音) 、 九和弦(五音) 、 十一和弦(六音) 、 十三和弦(七音)。 非三度弦,即音间音程非三度。 情况则较为复杂,包括 转位/离调和弦 所代表的一系列和弦。 在工程上,相对较常用的是三度弦。而三度弦分类下,各中和弦概念存在基本规律,可以直接从三和弦向上衍生。因此,为了便于记忆,本书采用三和弦(三音)讲解。至于非三度弦的其它类型,借助对三和弦的理解,需要时再行查阅乐理专业资料即可。 三和弦(Triad) 的组成音有三个,根据 升调 顺序被分别称为 一音(First) 、 三音(Third) 、 五音(Fifth)。有时也称为 根音(R [Root]) 、 中音(M [Mediant]) 、 冠音(T [Top])。 根音(R),即一音(First),指组成音中位于低音位置的音调; 中音(M),即三音(Third),指组成音中与根音音程三度的音调; 冠音(T),即五音(Fifth),指组成音中与根音音程五度的音调; 一般的,我们会结合两种称谓,用 根音(Root) 、 三音(Third) 、 五音(Fifth) 指代三和弦组成。 因为三度、五度包含了共有 m3、M3、d5、P5、A5 的 5 种音程在内的类型。在根音选定时,可以产生 4 种不同的组合方式,有: 大三和弦,取 Root + M3 + P5,记为 RRR ; 小三和弦,取 Root + m3 + P5,记为 rrr ; 增三和弦,取 Root + M3 + A5,记为 R+R^+R+ ; 减三和弦,取 Root + m3 + d5,记为 r∘r^{\\circ}r∘ ; 根音的选择是不受限的,比如取 C4 即 C大调的主音为根音,则有 C4 下的 大/小/增/减三和弦分别为: C=[C, E, G]c=[C, E♭, G]C+=[C, E, G♯]c∘=[C, E♭, G♭] {\\displaystyle \\begin{aligned} C &= [C,\\ E,\\ G] &c &= [C,\\ E\\flat ,\\ G] \\\\ C^+ &= [C,\\ E,\\ G\\sharp] &c^{\\circ} &= [C,\\ E\\flat,\\ G\\flat] \\\\ \\end{aligned} } CC+=[C, E, G]=[C, E, G♯]cc∘=[C, E♭, G]=[C, E♭, G♭] 而取 C大调 的 F4 为根音,则有 F4 下的 大/小/增/减三和弦分别为: F=[F, A, C5]f=[F, A♭, C5]F+=[F, A, C5♯]f∘=[F, A♭, B] {\\displaystyle \\begin{aligned} F &= [F,\\ A,\\ C5] &f &= [F,\\ A\\flat ,\\ C5] \\\\ F^+ &= [F,\\ A,\\ C5\\sharp] &f^{\\circ} &= [F,\\ A\\flat,\\ B] \\\\ \\end{aligned} } FF+=[F, A, C5]=[F, A, C5♯]ff∘=[F, A♭, C5]=[F, A♭, B] 这种组合类型是固定的,可以类推至任意音调。理论适用于所有三度音程和弦,即三度弦。 不过,因为音程之于 自然音阶 间的转换原因。通过组合公式,直接计算的方式依旧会显得比较繁琐。能不能参考五度圈对大/小调的查表方式,构建一个相类似的查表法,来快速完成多音调的和弦组合呢? 答案是可以的,调性网络(Tonnetz) 就是答案。 调性网络(Tonnetz) 现代调性网络(Tonnetz)原型,来自于数学大家 莱昂哈德·欧拉(Leonhard Euler,1707~1783) 在早年尝试的,以数学建模构造良好合声的探索 [10] 。欧拉首次采用数学图论方法,解决和弦问题,提出了 欧拉调性网络(Euler's Tonnetz,Tonnetz 是德语,相当于英文的 Tone-net)。 图 1-15 欧拉论文原稿中的调性网络(Euler's Tonnetz)示意图 欧拉调性网络可视化的表示了,协和和弦间的音程关系。从上而下的标识了两种联系。位于 上方的音调,其 左分支 是距离它最近的五度(P5、d5、A5)音程对应音调,而 右分支 是距离它最近的大三度(M3)音程对应音调。例如,F->C(F4->C5) 有 C 是 F 的 P5,F->A(F4->A4) 有 A 是 F 的 M3。全图涵盖了同音级下的一套完整标准十二律。 不过因为范围和和弦上的局限性,欧拉调性网络没有得到太多的应用。 状态一直持续到 19 世纪时期末,被 胡戈·里曼(Hugo Riemann,1849~1919) 打破。 胡戈·里曼结合五度圈查表法有关升降调的螺旋延展性,在对 大小调间和弦二元性(Major/Minor Chord Dualism) 的研究时,发现了大三和弦和小三和弦间,在欧拉调性网络沿音级的五度展开上。可以通过简单的 平移变换(Schritt) 和 倒影变换(Wechsel),得到同源与相邻向上/向下和弦效果。由此,提出了 里曼理论(Riemannian Theory)。并在这之后,将原有两个主要变换中的倒影变换,扩展到了 关系变换 与 导音变换 双变换。结合原有大小和弦二元论的 平移变换,构成三主要变换体系 [11] ,称为 新里曼三元理论(Neo-Riemannian Triadic Theory),简称 新里曼理论(Neo-Riemannian Theory)。 图 1-16 新里曼理论(Neo-Riemannian Theory)的三主要变换 注意,上图中选择三和弦时,必须按照从根音到冠音的相同箭头方向选择。箭头表示了 位于下一位 的组成音。 依托调性网络的几何化,新里曼理论的 三种主要变换,分别是: P变换(P Transformation),即 平行变换(Parallel Transformation)。如上图蓝色箭头标识关键步骤。 P变换只能在完全相同主音的自然音阶,即同一大/小调,内进行。 在已知一则三和弦组成情况下,查询根音(Root)和五音(Fifth)相同的另一组三和弦的方式。再以根音五音连线作为平行四边形对角线,用两组三音(Third)构造平行四边形。、结果中与原三和弦中音相对的另一角,为所求和弦中音。如图中 C=[C→E→G] & c=[C→E♭→G]C = [C \\rightarrow E \\rightarrow G] \\ \\& \\ c = [C \\rightarrow E\\flat \\rightarrow G]C=[C→E→G] & c=[C→E♭→G] 。 P变换,让我们能够快速完成 同主音间 大/小三和弦 的转换。 R变换(R Transformation),即 关系变换(Relative Transformation)。如上图红色箭头标识关键步骤。 R变换只能在主音音程互为小三度(m3)关系的自然音阶,即关系调(Relative Key),间进行。 在已知一则三和弦组成情况下,通过将五音(Fifth)升/降一个五度(P5、d5、A5),再次以移动后的五音(Fifth)与原三和弦三音(Third)查询 大/小调的同位关系三和弦。构成结果平行四边形的另一角,就是升调方向时所求关系大调五音,或降调方向时所求关系小调根音。如图中 c=[C→E♭→G] & E♭=[E♭→G→B♭]c = [C \\rightarrow E\\flat \\rightarrow G] \\ \\& \\ E\\flat = [E\\flat \\rightarrow G \\rightarrow B\\flat]c=[C→E♭→G] & E♭=[E♭→G→B♭] 。 R变换,让我们能够快速完成 关系调间 大三和弦 与 小三和弦 的转换。 L变换(L Transformation),即 导音变换(Leading-Tone Transformation)。如上图靛色箭头标识关键步骤。 L变换只能在主音音程互为升调方向纯五度(P5)关系的自然音阶,即相邻调(Adjacent Key),间进行。 在已知一则三和弦组成情况下,通过将五音(Fifth)升/降一个大三度(M3),查询新位置下的五音所处的三和弦。构成结果平行四边形的另一角,就是升调方向时所求相邻调五音,或降调方向时所求相邻调根音。如图中 A♭=[A♭→C→E♭] & c=[C→E♭→G]A\\flat = [A\\flat \\rightarrow C \\rightarrow E\\flat] \\ \\& \\ c = [C \\rightarrow E\\flat \\rightarrow G]A♭=[A♭→C→E♭] & c=[C→E♭→G] 。 L变换,让我们能够快速完成 相邻调间 大/小三和弦 的转换。 新里曼理论,进一步完善了现代调性网络的音程变换图系统。使得以该理论为根据的几何建模,存在 无限延伸的音调覆盖 和 快速可查 的特点。从而变得足够泛化且实用。由此绘制而成的新里曼理论平面拓扑调性网络,成为了三和弦的快速查表法的基石: 图 1-17 以新里曼理论(Neo-Riemannian Theory)绘制的调性网络 再配合上五度圈的自然音阶快速确定,与音序频率表的音调频率映射关系,即可完成对和弦的音调频率转换。 以频率,打通乐理到心理声学、声乐工程的关系。 至此,我们已经掌握了基础的乐理观测方法,并能够较为客观的评判。而在前文中,我们提到人耳对频率的感知,是促成一切的关键。而心理声学的实验结果,是其客观数据化的前提。 那么,其具体是怎样的测量过程,而结果又是怎样体现的呢? 这就需要提到 等响曲线(Equal Loudness Level Contour) 了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_4_3.html":{"url":"Chapter_1/Language/cn/Docs_1_4_3.html","title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","keywords":"","body":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour]) 等响曲线(ELLC [Equal Loudness-Level Contour]) 是反映在面对指定常量响度的稳定纯音时,人耳对各纯音基波频率下的响度感知最小临界点情况 [12] 。作为心理测量值,最终的结果是基于多组样本测量结果所得的 平均值。 等响曲线的测量 有记录可查的最早心理声学测量结果,来自 哈维·弗莱彻(Harvey Fletcher,1884~1981) 和 怀尔登·芒森(Wilden A. Munson,1902~1982) 于 1933 年发表的 “响度、其定义、测量和计算” 一文 [13] 。正是该论文,奠定了等响曲线的基本测量方式。 弗莱彻和芒森对于每个频率和强度,以选取的 1000Hz 参考音 为基准。调整参考音的响度,直到听众认为它与测试输入的 “稳定纯音” 响度相同,作为一次有效记录标准。统计了大量样本。 测量中,样本指的是不同的受试者。所以样本的基数决定了标准的有效程度。而样本的输入所用的 “稳定纯音”,则指的是在 固定响度下 的一组,按照基音以一定频率步长 (一般是取三分之一个八度递增,或以 10Hz 递增,前者居多)从人听力下限 20Hz 开始递增至 20000Hz (ISO 226 系列因采用音调测量,频率递增是非等步长的,范围为 20~12500Hz [14] )的 所有纯音音调。 因此,假设选用了 [0, 20, 40, 60, 80, 100][0,\\ 20,\\ 40,\\ 60,\\ 80,\\ 100][0, 20, 40, 60, 80, 100] 共 6 组固定响度,选择 10Hz 频率步长。则每组有 1999 个纯音输入,共记 6×1999=119946 \\times 1999 = 119946×1999=11994 个输入。当然,一般统计采用的是三分之一八度递增,不会有如此多且密集的输入。 虽然样本存在差异,但是输入却可以在一定程度上客观的表示,以减小个体不同造成的误差。在本书前面的章节已经介绍了, 单一音调声音都是复合音(Complex Sounds)。而其 和声(Harmony) 可看作是由一系列不同基音频率下的一组纯音,将各自谐波链按响度叠加组合而成。 这一理论可以延生至自然界中其他种类声音的合成,即声音的合成就是泛音链的合成。当然,也可以作用于 非标准音程(Interval) 的特殊单一音调合成。 频律响度特征(FLF [Frequency Loudness Feature]) 我们可以通过以横坐标为频率而纵坐标为响度,将构成某一时间点上的一个单音的所有频率成分,拆分到各频率混合后的响度标识状态了。 而形成的频率响度曲线,为了 区别于乐理 中的和弦和声相对未量化响度的概念,被称为 该单音在该时刻下 的 频律响度特征(FLF [Frequency Loudness Feature])。通过 FLF,我们可以判断 音色 情况,这点在之前的声音三要素部分时,已有使用。 图 1-18 某低音(Bass)的频律响度特征(FLF)示意连峰图 在真实场景中,我们很少能拿到指定时刻的存粹数据。所以,一般会 统计一小段时间片 下的 频率响度信息,并通过计算这一段时间片内各频率自身的 响度加合求算术平均,来表示该时间片 中间点的时刻,所具有的 频率响度特征。 如下图所示(相关代码实现,参见第五章),响度 、 频率 、 谐波链 情况表露无遗: 图 1-19 多乐器演奏音调 A4 时在 5s 处取 100ms 所得频率响度特征 频率响度信息的来源,则是 来自对相应时间片内的原音频数据,进行离散傅立叶变换做 时频分离 获取。具体原理,会在本书第三章中进行详细阐述。 FLF 反映的是,指定时刻声音本身的构成。 等响曲线的输入,是以 FLF 为标准,按照选取频率处理所得的指定响度,有相对纯粹谐波链的音调。 等响曲线的最新修订 既然是统计所得,那么等响曲线就存在 迭代 和 标准修订。最新一次修订来自于 国际标准化组织(ISO [International Organization for Standardization]) 在 2023 年发布的 ISO 226:2023 标准。该标准联合了来自世界各地的多家研究机构的综合数据结果,统计了从 18 至 25 岁来自欧美和东南亚的大量受试者测量结果求均值所得,是 国际通用标准。 2023 年的再次测量,选择了 [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100][0,\\ 10,\\ 20,\\ 30,\\ 40,\\ 50,\\ 60,\\ 70,\\ 80,\\ 90,\\ 100][0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 共 11 组固定响度。以 10 方(Phon)为输入响度步长,统计仍然以从 20Hz 按照 三分之一个八度(One-Third Octave) 递增至 12500Hz 的,每个响度分组包含 29 个音调频率输入。共计 11×29=31911 \\times 29 = 31911×29=319 类 输入信号 [15] 。 图 1-20 ISO 226:2023 标准等响曲线 [15] 上图展示了相应结果,注意,横坐标并非是等频率步长的,而是以音程尺度描述(ISN [Interval Scale Name])标记的 三分之一个八度(One-Third Octave) 步长。 最下方的 0 方(Phon)线,表示 人的可听下限,称为 可闻阈(The Threshold of Hearing)。第二小的 10 方(Phon)线,表示人的 最小可辨认下限,称为 静音阈(The Threshold of Quiet)。介于可闻阈和静音阈间的声音,能够听见但不可辨认。最上方的 100 方(Phon)线,则表示 人的听觉痛觉线,称为最大安全听阈上限,或 痛觉阈(The Threshold of Pain)。超过痛觉阈的响度,会使人听觉不适并产生 疼痛感,且在持续一段时间后,对人的听力造成 永久性 的损害。 三者都 相对缺少样本,因此采用了 虚线 标记。由 可闻阈 和 痛觉阈 所围成的区域,被称为安全听阈(Safety Hearing Threshold)。 所有的工程技术,按理来说,皆因该在安全听阈范围内进行。避免对人耳造成损害。而我们该如何衡量这一点,并同时检测此类设备是否符合我们期望的标准呢?只需考察其 频率响应(Frequency Response)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_4_4.html":{"url":"Chapter_1/Language/cn/Docs_1_4_4.html","title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","keywords":"","body":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour]) 频率响应(Frequency Response) 用来指代某系统(既可以是设备,也可以是人,泛指针对频率的感知器)在 接收频率(Frequency)输入时的输出响度(Loudness)(或最佳接收响度) 的特点 [16] 。是描述设备频段内,频率响度尺度 的客观测定结果。 频率响应是对声音的响应。我们需要先了解,怎样描述一个单音在某时刻的特征,才能更好的理解什么是频率响应。虽然两者并不相同。 另外的,声音的频率响应 和 电路学(Circuitology)中的频率响应效应 也是 非等位 的概念(虽然两者有着一样的名称),注意区分差异。电路学频率响应效应不在本书范围。 需要注意的是,前文中介绍的 频律响度特征(FLF) 虽然和 频率响应曲线(FRC,泛指所有频率响应的绘图结果)采用了 相同的坐标系设置,并因横纵坐标一致,而能够同参考系内展示。但是 两者含义完全不同。切勿混淆! 频率 响应(Response)与 响度(Loudness)的辨析 响应是某系统的尺度,而 响度是系统尺度下的值。响度对应的尺度,既可以来自于 考察范围内的某系统响应(例如,某设备),也可以来自于更大范围的相对客观系统(例如,自然界)。考察范围,就是该声音传播感知链中的设备部分。 对于 相对客观系统,因为一般作为 响度的基础标的,当其他系统属于其子系统时, 子母系统尺度转换的响度不需要放缩。而只有 发生于兄弟系统间的尺度变换,需要放缩交互时的响度。 因此,在采用 ANSI/ASA S1.1-2013 或 ISO 226 系列 规格中,以 声压级(SPL) 的相对客观 分贝(dB) 单位表示响度作为前提。如果在 考察范围内传递,不同兄弟系统 下的响度,就需要 将前一级 的响度值,从源尺度变换为新尺度,计算 放缩后的抽象值。 例如,从自然届的客观环境测量某单一频率纯音为 50dB,在当前采样设备上,有该频率下 0~100dB 的响应范围,采样到的该频率为 50dB。但如果采样后,接收该频率的下一级设备只有0~60dB 的响应范围,则经过该二级设备处理后,源频率就变成了约取值 30dB 大小。当然,实际上 并不是 可以通过 计算得到的,而需要进行 频谱测量。但 大致变化的形势,可以等效来看。 即,存在 非精确换算 : Ldev≈FRdevFRori⋅Lori {\\displaystyle \\begin{aligned} L_{dev} \\approx \\frac{FR_{dev}}{FR_{ori}} \\cdot L_{ori} \\\\ \\end{aligned} } Ldev≈FRoriFRdev⋅Lori 以参考值,表示该设备对前一级输入的影响。 而设备频响是如何测定的呢? 其测量的方式为,在保证输入声音的复合响度值不变的前提下,逐步增大指定频率的响度(保证复合响度不变则通时衰减其他频率响度),直到达到复合响度值。最终测得的该频率响度上限,就是该复合响度下的对应频率频率响应上限。 上限可以为负数,代表该复合响度下,无法感知指定频率。 同理,将该频率的响度衰减到 0 并保持复合响度不变时,得到的就是该复合响度下的该频率的频率响应下限。当然,下限值不出意外一般都是 0 ,并不会高于上限。也就是,当上限为 0 或负数时,下限也不存在意义了。 频率响应在不同系统下的作用 为了便于说明,本书将 “某系统” 分成三种:接收声音转换为信号的 收音设备(Hearer),接收信号并输出声音的 放音设备(Speaker),和 末端系统(Terminus)。 对于 收音设备(Hearer),频率响应反应的是,当收音设备 接收到声音信号后,由设备本身决定的当前放音音量下,能够感知到 声音包含频率中,某 单个指定频率 的实际 响度尺度。什么是收音设备呢?诸如,人耳、麦克风、MIDI 测试仪、助听器 等,用来接收声音或临耳(临近末端)播放的东西。 对于 放音设备(Speaker),频率响应反应的是,当放音设备 接收到频率信号后,由设备本身作用而产生 相应频率声音的最大响度,与 输入频率 之间的关系。什么是放音设备呢?简单来说,就是例如:人的声带、音响、麦克风、钢琴等乐器、蜂鸣器、耳机 之类,能够产生声音或提供声音至后级的东西。 对于 末端系统(Terminus),频率响应表示的则是 感知的终点。其自身拥有的 听阈(Hearing Threshold),决定了最终结果。而 听阈就是该系统对声音感知的等响曲线围成的范围。这里的末端系统,指的即为整个传播感知链的最尾端。不一定为人的神经系统,也可以是测量仪器,或者存粹的虚拟端。 频率响应对于三种设备的意义不同。但三者都会以人类较通用的听觉频率范围(20~20000Hz),作为 主要区段(Major Range) 进行考察。 除了末端系统(Terminus)的频率响应可由等响曲线衡量,收放音设备的频率响应情况,则还需要其他方式表示。 收音频响曲线(HFR [Hearer Frequency Response]) 我们将作用于收音设备的频响曲线,称为 收音频响曲线(HFR [Hearer Frequency Response])。HFR 被用来确认 收音设备的优良程度。代表 收音设备的收音频段的频率敏感范围,和响度尺度关系。 而根据情况差异,对收音设备的衡量,既可以按照其 HFR 可以有效地重现 20~20000Hz 的整个频段(如监听麦克),也可以限制在可听频谱内的较小频段(如通讯麦克)。 以 HiFi 收音为例: 图 1-21 AKG C414XLS 麦克风的 HFR 如上图所示,我们选择 AKG C414XLS 来做 全向收音。从 C414XLS 的官方 HFR 上,可以看到在 40 Hz 、80Hz 、160Hz 基音频率的声音采集时,C414XLS 频率响应从对应采集频率起始点位置(横坐标的 基音频率 作垂线至图中 标记的当前频率线焦点)至 20000Hz 频段,基本都控制在了 ±3dB 范围内,且有较少的波动。说明了 C414XLS 有一个近乎完美平坦的频率响应曲线,能够 更好的 保存采集声音的清晰度和立体感。 由此,我们引申出了衡量 HFR 好恶的 主要考察指标: 即,从采集频率位置 开始的 足够平直(Flat) 和 足够光滑(Smoothly)。 平直且光滑的 HFR 代表着,收音设备能够以 相同大小的响度尺度,来 采集 任何落于 频段范围 内的声音,而不会引入较大的设备误差。从而使得接收的声音,在经过本级处理后,不会 输出发生形变的原声采集信号。 由于采集基本是满尺度,响度尺度在 HFR 上基本等同于当前响度了。所以,为了规范和便于区分,通常会在 HFR 中采用相对于原声的响度差,来表示还原程度。即 HFR 的纵坐标,表示的是 采集结果尺度对比原响度差值,称为 相对声压级(Relative SPL)。 放音频响曲线(SFR [Speaker Frequency Response]) 我们将作用于放音设备的频响曲线,称为 放音频响曲线(SFR [Speaker Frequency Response])。SFR 被用来确认 放音设备的优良程度。代表 放音设备的防音频段的响度尺度,和频率稳定范围关系。 同 HFR 类似,衡量 SFR 好恶主要考察指标,几乎与 HFR 一致: 即,从附和误差范围 的起始点后,有 足够平直(Flat) 和 足够光滑(Smoothly) 曲线。 平直且光滑的 SFR 代表着,放音设备能够以 相同大小的响度尺度,来同尺度的 播放 任何落于 频段范围 内的,未超过尺度范围响度 的声音。这同样意味着,该放音设备不会引入较大的设备误差,从而导致接收的输入信号,或者前一级输出声音的频率响度特征,在本级输出发生形变。 因此,理论上的最佳 SFR 就应该是一条直线。 图 1-22 一条在 110~18000Hz 下(±3dB)平直的 SFR 样例 这样的理想状态,基本无法企及。所以,参考 HFR 测定的区间量,SFR 设置了 浮动标准,即 在一定的响度范围内的相对水平 即可。如上图,就是音响某设备的 SFR 测试结果。该设备在 110~18000Hz 有响应均在 ±3dB 的修正内,称 该设备 110~18000Hz(±3dB)平直。这既是它的 SFR 属性。对于人造设备来说 SFR 一般是固定的。 而对于 生物器官,例如动物或人的声带等,衡量 SFR 是没有意义的。生物声带 SFR 受客观个体差异的影响,是独特且不一而同的(相比工业制品)非平直曲线族构成的区域范围。 生物有通过改变声带的大小和形状,来产生不同的声音的能力。这使得其可以 经由训练,来调整自身在某些声音频段上的频率响应,来达到更高或更低的频率稳定的响度增减。这种动态的能力,让生物具有了 动态的频率响应范围。同时,也可以后天调整频率响应表现。 例如,经过训练的歌手,其好听的嗓音究其原因,就是在处于自身主要声音特色频段,且适合自身音量大小的发声时,有着快速变换音调(基音)但始终处于相对光滑平直的 SFR 的能力。 如果我们能够使人造放音设备也具有这样的能力,或许也能实现根据不同的需要,动态调整放音频率响应。不过这样的技术成本太高。且由于需要考虑共振等因素,不一定能够得到我们想要的平滑平直 SFR ,让市面上并没有这样的产品。 大多数音响类产品,在 对自身品质有自信 的情况下,都会给出 SPR 以提供客户参考。 图 1-23 B&C 的 5FG44 喇叭单元官方 SFR 上图为意大利 B&C Speakers 公司给出的,在空间响度恒定在 92dB 的情况下,测得自家低频驱动器 5FG44 喇叭单元的官方 SFR 。 但 SPR 并不一定都可以这么轻松获取,大多时候我们只能得到 SPR 结果的范围参数。在这种情况下,不参考 SFR 比较两个设备的好坏可用:固定频段比较浮动范围,或者 固定浮动范围比较频段。两者都是快速判断的办法。频段越广,浮动范围越小,则设备越优秀。 现在,让我们进入一个完整的感知过程。 传播感知链 & 频响上下文(Frequency Response Context) 以录音棚采样这一事件举例。在监听的过程中,通常调音监理希望对链路的声源,产生的声音特征,进行完整的保存,直到进入终端(也就是人的神经系统)评估。在这种上下文语境的理想状态下,最终的末端接收的声音频率响度特征,需要 尽可能的和声源频率响度特征保持一致。 假设此时歌唱者发出了一个单音,整个传播感知链如下(实际情况中的曲线要复杂得多): 图 1-24 录音棚采样场景的理想传播感知链模拟 图中, 橙色线,表示 歌唱者该时刻单音 的 频率响度特征(FLF); 红色线,表示作为 收音/放音设备 时的 收音频响曲线(HFR)/放音频响曲线(SFR); 蓝色线,表示 传输过程 中,源单音在该级下的 频率响度特征(FLF); 上例就很好的展示了,三类设备间的关系。对于前一级来说的收音设备,对于其后一级而言,是它的放音设备。 但这种监听模式,即 狭义上的 HiFi(High Fidelity),是否和人直接听到相同的声音的感受一致呢?答案可能和大部分发烧友的直观认知不一致,那就是“不是”。或者说,这种 HiFi 上的听觉感受,更近似于通过骨头传导下,歌唱者自己听到的自己的原声。 同样情况下,假设聆听声音完全一致。不考虑位姿(听声方位不同,也会导致听到的声音不同,这部分不在本书讨论范围内),存在一个站在声源附近,直接通过空气传播,用耳朵来听的聆听者。此时,他的传播感知链如下(图例中略微夸张了效果): 图 1-25 录音棚内听众(假设)场景的理想传播感知链模拟 显然,相同声源的某个音,在不同体系的传播感知链下,有不同的末端系统(Terminus)的直观感受。而 决定不同体系的关键背景要求(Majot Background Requests),就被称为 频响上下文(Frequency Response Context)。 可见,背景条件(即上下文)的不同,对需求的频率响应的衡量方式,也是完全不同的。因此,在实际情况中,背景信息至关重要,决定了我们如何利用频率响应进行有效的分析和处理。界定上下文,往往是开始频响分析的第一步,也是最为关键的一步。 为了方便理解,以频率响应应用的 HiFi 监听领域,来作为下文讲解的 频响上下文。 监听 HiFi 耳机的 SFR 设计原理 这里或许已经有读者存在疑问。即然 平直光滑 是衡量 HFR & SFR 的统一标准。那为何在上文的 HiFi 传播感知链中,理想监听耳机的 SFR 却并不平直呢? 先考虑一个类似的场景,例如电影院的声场营造。 对于影院,理想的放音设备,应尽可能的在接收到指定范围频率的等响度输入时,能够响度恒定(理想)的输出该频段的任何声音。从而在音源上,物理客观的保证对输入的恒等还原。但有时,因为设备本身或者环境因素,我们需要突出或只产生某一个频段的声音时,此时的 SFR 就被用来作为调整的依据,突出一些某频段并减弱一些评断。直到设备的 SFR 被控制在突出该频段下,有最大响度的最佳反馈。 有来自于《声音的重建》其作者的研究工作,在平稳 SFR 输出音响下,各类影院在座位处的 HFR 综合统计 [17] : 图 1-26 不同大小影院的座位平均听感 HFR 统计数据 [17] 上面展示了影院环境下,座位上的 HFR 已经不是平稳的了。因此,为了保证座位处的听感一致于自然环境听感(类似于前文中举例的录音棚内听众直听),就需要让音响的 SFR 在 20~1000Hz 有一定程度的响应(即响度尺度上的)衰减。这么做的结果就是,影院音响的 SFR 会在低频和中频的频段,不够平直。 所以,为了抵消环境或个体等的主客观影响,会调整发音设备,使其 SFR 满足条件。如下: 图 1-27 影院环境根据座位平均听感 HFR 对音响调节结果 SFR 示意图 [17] 同理,当我们处于 HiFi 监听的频响上下文时,由于直贴人耳,以及 HiFi 期望对歌手原声完全还原的目标,也需要对耳机进行调整,使得耳机的 SFR 可以抵消人耳的听阈特征。 这意味着,需要监听耳机尽可能的在接收到指定范围的频率时,能够拟合人耳(或最终感知器)在对应响度下的频响曲线。从而保证,通过前级与本级的 HFR 增减向消,实现传递数据的线性稳定。最终在末端感知节点的源数据还原时,具有目标一致性。 想到这里,首先就是如何获取人耳在对应响度下的频响曲线。这点其实很容易,只需要对照 等响曲线(ELLC),按照相应的输入响度声压级(SPL)转方(Phon)单位后,查找所在曲线即可。查到的曲线(当响度并不在图中,而是落于两曲线间时,需要计算等效曲线),就是 当前响度下的人耳频响曲线。 以 50dB 为例,由于在满足 ISO 标准条件下有 1 dB=1 Phon1\\ dB = 1\\ Phon1 dB=1 Phon 可知,需要查找的等响曲线为 50 方(Phon)等响曲线。 有下图( 橙色 为查找结果): 图 1-28 ISO 226:2023 标准等响曲线截取 50 方(Phon)线 这即为所需 50dB 输入下的人耳平均频响曲线,即 50dB 的人耳 HFR。显然这只是个平均统计数据(非常高端的定制 HiFi 耳模入耳式耳机,除了采样耳道模型来制作耳机音道外,还会为每个人都进行个体 ELLC 测量,并基于测量结果,独立设计专属于个人的耳机 SFR,但这么做不适用于批量生产且极度昂贵)。 那么,想要 抵消掉这种频响情况,让听者能够完整感受到歌唱者的原始声音,该怎么做呢? 只需要,让生产的耳机在选定的响度输入时,其 SFR 每个频率下的响度尺度,完全与此时人耳 HFR 对应频率下的响度尺度,以 50dB 水平线为轴对称,就能达到效果。如下: 图 1-29 基于 ISO 226:2023 的 50 方(Phon)线设计的入耳耳机 SFR 上图中,绿色线 即为想要在 50dB 响度下达到完全监听 HiFi 效果的入耳式耳机,其理想的 50dB SFR 曲线。入耳式耳机由于没有空胞(即耳罩式耳机,发生单元与耳朵间的空腔)问题需要考虑,等效下待处理的只有人耳 HFR 曲线,因此才会呈现出对称关系。 以该 SFR 调整设备,直到设备频率响应近似如此,就能达到最好效果。 不过,耳机这种人造设备,其频响特性基本是固定的。这代表着,如果我们选定一个响度作为 基准响度(Standard Volume),那么在设备面临或高或低的其他响度输入时,其 SFR 是会有一定程度形变的。也就是说,我们只能尽可能的保证在选定基础响度的一定误差范围内,贴合抵消该响度范围内的人耳 HFR 曲线。 所以,我们需要一定的标准,来方便生产活动的统一产品质量衡量。 哈曼曲线(Harman Target Curve) 就是这类标准之一。 为什么是之一呢?因为人耳的特殊性和厂商各自的特色,不同厂商或研究机构,基于不同的样本集,指定了多种适用于一定范围人群或自身产品特色的 SFR 标准。而 哈曼曲线则属于其中被接受程度最广的标准之一。 搞清楚哈曼曲线,对于其他类似的标准,即可举一反三触类旁通。 哈曼曲线(HTC [Harman Target Curve]) 哈曼曲线(HTC [Harman Target Curve]) 是用类似于前面本书提到 “ 50dB 响度时,入耳式耳机 SFR 抵消 人耳 ISO 226 标准 50Phons HFR” 的目标导向,获取的 人耳 85dB 情况下 的 HiFi 场景,用于设备参考的 主观测量 SFR 标准。 最早的 HTC 2013 标准,是由 肖恩·奥利佛(Sean Olive) 博士 和 哈曼音频实验室(Harman Audio Lab) 的其他研究人员,在 2013 年利用研究所条件,设计了 6 组 双盲试听对比实验(Double-Blind Listening Test) 测得的,经修正后听者 HFR 采样均值曲线。 图 1-30 哈曼曲线 2013 标准实验采用的测试听力设备 [18] 他们对如上表单的听音设备,进行了 每组 10 位不同听力情况听众 的,听众评分和设备频响曲线测试和统计,最终得到了如下结果: 图 1-31 哈曼曲线 2013 测试听力设备听众评分与设备 HFR 结果 [18] 奥利佛博士和其团队,将 评分最高的 HP1~HP4 组的 HFR 数据,进行了基于 感知均衡 情况(图中绿色线即为均衡器调整)的 平均化修正,再将四组结果进行了拟合,得到了光滑的设备 85dB 时的 HFR 人造曲线。 图 1-32 哈曼曲线 2013 和 2015 标准 以此,认为贴近于该曲线的耳机设备,有着满足大多数人最佳听感的主观度量曲线。 不过由于样本量过小,2013 年的测量结果并没有足够的说服力。为了解决说服力问题,在 2015 年、 2017 年,哈曼曲线又经过了两轮样本量级和受试设备的扩充,并重新测定了结果。 而最 新一次的测定就是 2017 年的 HTC 2017标准。相对更具有参考价值: 图 1-33 哈曼曲线 2013 和 2015 标准 但正如 肖恩·奥利佛 本人所言,“It is important for the reader not to draw generalizations from these results beyond the conditions we tested.” (“重要的是,读者不要从这些结果中得出超出我们测试条件的概括。”) [19] 哈曼曲线只能是参考,是存在大量主观作用和客观条件的。只能作为一种主观标准提供有限的意见。而这也和其他类似的耳机 SFR 标准,有着同样的问题。 至于具体是否能先觉的量化每个人的听觉体验呢?或许肖恩博士的另一句话,会有更大的参考价值,那就是 [19]: “It makes perfect sense, at least to me. Only then will listeners hear the truth -- music reproduced as the artist intended.”(“至少对我来说是这样的。(不过也正是)只有这样,听众才能听到真相——音乐按照艺术家的意图复制。”) 这就是感官感受和工程测量的不同了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_4_5.html":{"url":"Chapter_1/Language/cn/Docs_1_4_5.html","title":"1.4.5 工程:频谱图(Spectrum)","keywords":"","body":"1.4.5 工程:频谱图(Spectrum) 如果说之前我们介绍的几种对声音的解构方式,或多或少都显得有些唯心的话,那 频谱图(Spectrum)则是一种纯客观的声音度量与观测法了。 频谱图(Spectrum) 是对持续一段时间的某段声音,其频率情况在由 指定频率区间 与 时域 构成 复平面(Complex Plane) 上展开的可视化描述图。同 某时刻声音的频率响度特征(FLF)一样,需要利用傅立叶变换时频分离获取。而从从时间流逝的角度,即时间轴作为 z轴 来观察,两者也存在相互关系。 频率响度特征(FLF)与频谱图的关系 可以认为,如果时间是离散的,频谱图就相当于由该声音持续长度时间的一系列各个时刻 频率响度特征切片,构成的图表。 虽然频谱图中的时间被认为是连续的(实际只是步长较小而已,毕竟考虑计算及性能,会采用从属于傅立叶离散变换的 短时傅立叶变换(STFT [Short-TIme Fourier Transform]) 快速处理),但参考 FLF 的生成方式,只需要 小范围求平均 就能完成切片。 如下: 图 1-34 一组乐器演奏 A4 音调约 12s 的 3D 频谱图 上图就是一张完整的频谱图,如果我们在 5s左右截取前后 100ms 左右数据,并取平均。就能获得其在 5s 左右 ±100ms 范围的频率响度特征(FLF),而这结果在之前的章节中,已经见过了,即: 图 1-35 多乐器演奏音调 A4 时在 5s 处取 100ms 所得频率响度特征 而这张频谱图,就是该 FLF 的声源分析结果(具体的生成用代码,在本书第五章节提供)。 因此,通过某段声音的频谱图分析,我们通过切片手段,能够获取该声源,在任意时刻的频率响度特征。这也是为什么,频率响度特征,有时被称为频率响度切片的原因。 一般的,若无特指,声音的频谱图皆代指该声音的 三维频谱图(3D Spectrum)。 频率响度特征(FLF)与频谱图的关系 显然,频谱图有三个坐标轴,分别是 时间轴(Time Axis)、频率轴(Frequency Axis)、响度轴(Loudness Axis)。三个坐标轴两两构成平面,而这些平面在某种程度上,提供观察声音信息的不同视角: 由 时间轴(Time Axis) 和 频率轴(Frequency Axis) 构成了 时频切面; 由 时间轴(Time Axis) 和 响度轴(Loudness Axis) 构成了 波形切面; 由 频率轴(Frequency Axis) 和 响度轴(Loudness Axis) 构成了 频响切面; 可见,前文中有关声音在乐理和感受上的解构,多发生于 波形切面 和 频响切面(尤其是后者),及其关联平行平面(如 ELLC、FRC 等)的观察窗口。可以说是 对该切面信息主观度量的衍生产物。 语谱图(Spectrogram)与 时频切面(TFS [Time-Frequency Section]) 时频切面(TFS [Time-Frequency Section]) 能够用来获取,在指定响度大小情况下,严格满足该响度的频率随时间的分布关系。由于单独看某一个固定的响度值下的时频关系,并没有太大意义,因此常以某声音完整数据的所有响度时频切片,按照切片所处响度高低用不同颜色表示后叠加,来二维的表示该声音的频谱情况。 这样做的几何意义,即为获得了该声音的频谱图,在时频切面的投影。而通过不同颜色(通常为冷暖过渡色)对原频谱信息进行了完整的降为保存,使得这个投影结果,也可以用来代表原频谱图情况,被称为 声纹图(Voiceprint)。 声纹图(Voiceprint)因此也被称为 二维频谱图(2D Spectrum)。为了区别于 三维频谱图 以免产生混淆,被改称为 语谱图(Spectrogram)。 图 1-36 一组乐器演奏 A4 音调约 12s 的 3D 频谱图 & 投影所得 语谱图(上) 波形图(Waveform)与 波形切面(TLS [Time-Loudness Section]) 波形切面(TLS [Time-Loudness Section]) 是声音信息在经过时频分离后,得到的 从时域观察频域角度的频域维度切片(注意观察方向,垂直于 FLS 情况,时频分离原理在第三章详解)。能够用来获取,指定某频率下,该频率随时间的幅度(即响度)变化情况。 通常而言,如果需要分析指定频率的情况,可以采用如此切割手段。这种处理方式一般被用在降噪模型训练,或一段特定频段频率的综合分析。所以,会取用指定频段的相应所有切片,按照其频段内频率用不同颜色表示后叠加,来二维的表示该频段内的响度时间情况。这种表示方式所构成的声音二维图表,被称为 有限频段波形图(Limited Band Waveform)。 而当我们选择的频段涵盖了整个声音的全部频段(大多为人耳听力的频率范围,即 20~20000Hz)时,就能够得到整个声音的完整波形图了。而这也是最为人熟知的声音图表形式,同时也是该声音 完整的时域信息,即大部分情况下所指的 时域(Time Domain)。 图 1-37 一组乐器演奏 A4 音调约 12s 的 3D 频谱图 & 投影所得 波形图(右) 频响切面(FLS [Frequency-Loudness Section]) 频响切面(FLS [Frequency-Loudness Section]) 是声音信息在经过时频分离后,得到的 从频域观察时域角度的时域维度切片(注意观察方向,垂直于 TLS 情况,时频分离原理在第三章详解)。能够用来获取,指定某时刻下,该时刻的频率构成情况。即 频率响度特征(FLF)。 这个视角我们已经充分的辨析过了,此处亦不再赘言。 但有一点还需强调。 我们以类似获取声音 波形图 和 语谱图 的方式,获得频谱图在频响切面的投影,涵盖了该声音的 完整频域信息,即大部分情况下所指的 频域(Frequency Domain)。 注意 频率响应切片(FLF) 和 频域 的区别,与父子关系(频域切片 和 完整频域)。 图 1-38 一组乐器演奏 A4 音调约 12s 的 3D 频谱图 & 投影所得 完整频域(前) 至此,从乐理角度(艺术)、心理声学(感观)、声乐工程(声音三要素)。读者以具备基本的完整分析一段声音,并初步提取有效数据的认知能力! 下一节,让我们开始音频的采样与调制,掌握声音是如何从物理信号,转化为可传递数字信号的关键。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:10:00 "},"Chapter_1/Language/cn/Docs_1_5.html":{"url":"Chapter_1/Language/cn/Docs_1_5.html","title":"1.5 声音数字化","keywords":"","body":"1.5 声音数字化 在本章伊始,我们提到了当下 音频录制(Audio Recording) 技术所处的时代,为 数字处理时代(The Digital era)。在数字时代最为显著的特征,就是从传统的纯物理记录方式,演变成了调制解调配合格式压缩存储的处理过程。 而将声音从物理波转为数字保存,并在需要时提供还原能力的技术,就是 调制解调(Modulation & Demodulation) 技术。 这既是本节讨论的内容。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_5_1.html":{"url":"Chapter_1/Language/cn/Docs_1_5_1.html","title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","keywords":"","body":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source) 由于从数字处理时代开始,信号间的差异已逐步变大。为了 更好地区分 通过传感器采集的电流数据格式和数模模数转换后的格式,我们将其分为 模拟信号(Analog Signal) 和 数字信号(Digital Signal) 两种: 模拟信号(Analog Signal) 为采用电物理量表示的真实波源的象形连续信息,属于 连续时间信号(CTS [Continuous Time Signal]); 数字信号(Digital Signal) 为有限离散值表示的离散信息,是 量化(Quantification) 后的 离散时间信号(DTS [Discrete Time Signals]); 同时,为了区分两者和数据源的关系,将 信号现实源头 称为 真实波源(Original Source)。 所以,只有真实波源(Original Source)才代表物理世界中的实际波情况。 模拟信号与数字信号,一个是 通过电压电阻等电力学技术采集(Collecting)到的真实波源数据,一个是 通过电子信息技术处理的电压电流转数字码结果。 注意,采集(Collecting) 并不是 采样(Sampling)。两者没有直接联系,属于不同阶段的不同过程。但有时也会将从真实波源获取模拟信号的过程,称为 模拟信号采样(Analog Signal Sampling),需要通过具体上下文来区别。 小心容易混淆。 真实波源一般通过一些 电传感器 来转为模拟信号。这些传感器包括:由多感光电阻单元构成照相机感光器(Camera Sensor)、由驻极体(ECM)单元构成的麦克风传感器、以动圈切割磁感线产生电流的动圈麦克风,简单的压力传感器(Pressure Sensor)等。 简单来说, 模拟信号(Analog Signal) 是 电流信号; 数字信号(Digital Signal) 是 电位信号; 真实波源(Original Source) 是 现实世界里的波(光波、机械波、引力波等); 我们所听到的声音,在物理介质(如空气、水等)中直接传导的信息,在转为电流电压表形后,就可以被认为是模拟信号。数字信号则在自变量(如时间)和因变量(如幅度)上,都是离散且有限的。 但我们 并不能直接简单的将离散时间信号(DST),等同于 数字信号。因为,离散时间信号在不经过量化因变量的操作前,其只是自变量的离散。例如,时间上间隔的从一段声音的模拟信号上截取切片,构成的时序离散的信号,其因变量的波动情况仍然属于自然量描述。 所以,采样自模拟信号的未量化离散时间信号,即为对应数字信号的 中间形态。 由此引申出,模拟信号到数字信号的转换过程: 称为 模数转换(A/D [Analog-to-Digital])。作用于 模数转换(A/D)的设备为 模数转换器(ADC [Analog-to-Digital Converter])。 而从 数字信号到模拟信号的还原过程: 称为 数模转换(D/A [Digital-to-Analog])。作用于 数模转换(D/A)的设备为 数模转换器(DAC [Digital-to-Analog Converter]),即所谓 HiFi 的 解码 DAC。 数模转换和模数转换,并不只局限于音视频的信息转换。其他类型的现实世界信息,也存在同样复杂或简单和电信号互转的过程。由于原理相近,本书选择以音频作为主,不做其他信号的相关展开。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_5_2.html":{"url":"Chapter_1/Language/cn/Docs_1_5_2.html","title":"1.5.2 模数转换(A/D [Analog-to-Digital])","keywords":"","body":"1.5.2 模数转换(A/D [Analog-to-Digital]) 模数转换(A/D) 完成对采集到真实物理信息(如温度、气压、举例等)所得的 模拟信号,到 数字信号的映射。这么做的目的,是为了利用数字信号 可度量、可改动、可计算 的特性,来实现对信息的 操作、保存、传递。 大多数情况下, A/D 会把采样和量化 放到 ADC 单元里 一步完成。为了同时控制这两个变量到同一基准上,ADC 通过引入 固定参考输入(Reference Input),使其频率在合成累加器单元门电路数代表 比特分辨率(Bit Resolution) 作用下,转为十进制的等步长分割。从而可以拆解为由比特分辨率表示的,整数比特离散值,即门电路的电位。控制电位调整参考输入,逼近模拟信号。 最终得到的门电路开关状态以 数字码(Digital Codes) 记录,即为 输出数字信号。 可见,参考输入的电压就是 ADC 所能处理的最大电压。该值通常都来自于各国行业标准采样电压,或由特种设备的内部变压器/脉冲单元/时钟芯片决定。而 ADC 量化采样公式每被执行一次,就是一次量化采样完整过程。采样的频率,来自于设备内部时钟频率,通常是以电脉冲的形式触发。但模拟信号的输出是连续的,因此该时钟频率(即采样频率)的大小,会对采样结果有一定影响。 采样的准确度与采样率设定 根据 香农采样定律(Nyquist–Shannon Sampling Theorem),时钟频率需要为 采样数据源最大频率的至少两倍大小,才能保证采集最大频率时,不会因为非整数倍取样而导致变形。这种变形属于来自于采样过程的 源头干扰,会产生 难以消弭的 影响,例如:在一定距离拍照电子屏时出现的摩尔纹。具体原理相对简单,如下图所示,不再展开赘述(模拟代码见本章节事例)。 图 1-39 香农采样定律取 1.3 倍于被采样频率时的采样失真演示 依此,我们对采样频率的制定,亦有标准公式。强调这里的采样指的是 A/D 过程中的采样。 假设,当前已知一 ADC 设备,想要处理的 模拟信号 脉冲频率范围 为 FAno∈[Fmin, Fmax]F_{Ano} \\in [F_{min},\\ F_{max}]FAno∈[Fmin, Fmax] ,该 设备的采样频率 为 FADCF_{ADC}FADC 。则 理想中能够覆盖最大高频模拟信号的无失真频率 FADCF_{ADC}FADC 需满足: FADC≥2⋅Fmax {\\displaystyle \\begin{aligned} F_{ADC} \\ge 2 \\cdot F_{max} \\\\ \\end{aligned} } FADC≥2⋅Fmax 按照该不等式设置的 ADC 采样频率,即可符合要求。上式因此常在工程中被称为 安全采样不等式(Safety Sampling Inequality)。而根据安全采样不等式设定的 FADCF_{ADC}FADC ,称为该设备的 数字信号采样率(Digital Sampling Rate),即 采样率(Samplerate/Sample Rate)。 现在,采样频率的问题解决了。如何处理获取的离散数据,将其转换为数字码标识呢?这需要依赖 A/D 量化公式(A/D Quantization Formula),即量化采样公式的帮助。 量化采样公式(A/D Quantization Formula) 如果记 模拟信号(Analog Signal)的电压(Voltage) 为 VAnoV_{Ano}VAno ,参考输入(Reference Input)的电压(Voltage) 为 VRefV_{Ref}VRef 。合成累加器的门总数,即该 ADC 的 最大比特分辨率(Max Bit Resolution) 为 NNN 。假设模拟信号经过 ADC 处理后,某时刻输出的 数字信号(Digital Signal)十进制表示 为 DDD ,则这几个量间的关系就为: D=VAnoVRef⋅(2N−1) {\\displaystyle \\begin{aligned} D = \\frac{V_{Ano}}{V_{Ref}} \\cdot (2^N - 1) \\\\ \\end{aligned} } D=VRefVAno⋅(2N−1) 此式即为 ADC 量化采样公式,由于 采样不依赖于公式,也被称为 A/D 量化公式(A/D Quantization Formula)。 图 1-40 在 ADC 量化采样公式作用下的 A/D 映射结果 如上,当取用 VRef=6 VV_{Ref} = 6\\ VVRef=6 V 时,有输入模拟信号电压 VAno∈[0, 6]V_{Ano} \\in[0,\\ 6]VAno∈[0, 6] 的数字码映射情况。连续信号通过公式处理,变成了离散值。而 1VRef⋅(2N−1)\\tfrac{1}{V_{Ref}} \\cdot (2^N - 1)VRef1⋅(2N−1) 就是每个十进制下数字码(数字码都是二进制)所能覆盖的电压范围,称之为 1 单位 的 最小显著字节(LSB [Least Significant Bit]) 范围。 而用上例参数的 ADC 对一个时长为 4 个周期且 VAno∈[0, 6]V_{Ano} \\in[0,\\ 6]VAno∈[0, 6] 的正弦模拟信号,进行模数转换。其完整处理的效果如下: 图 1-41 模拟信号经 ADC 量化采样演示 对于一款 ADC 单元,在设计确定了 采样率(Samplerate)、最大比特分辨率(Max Bit Resolution) 和 参考输入(Reference Input) 后,对于该设备的这些相关属性,既成 常数固定。其中,最大比特分辨率(Max Bit Resolution)取值 NNN ,被标注为 ADC 设备的 采样位深(Sampling Bit Depth)。 取值 NNN 为多少,就代表着单个 ADC 上,有多少以 参考输入电压二的幂指倍缩小电压信号 所组成的门后电压单元。 由于参考电压一般要求稳定,所以至少需要以内部元件提供稳定三相电来作为基准。不过,对于精度要求极低的设备,为了电子组件复用和电路板的简化,会采用把采样时钟信号的电压作为参考输入的非常做法。但对于高精度设备(包括麦克风等),时钟信号为高频信号,是严格不能作为参考输入的。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_5_3.html":{"url":"Chapter_1/Language/cn/Docs_1_5_3.html","title":"1.5.3 数模转换(D/A [Digital-to-Analog])","keywords":"","body":"1.5.3 数模转换(D/A [Digital-to-Analog]) 数模转换(D/A) 是对模数转换(A/D)的逆向过程,完成 从数字信号还原至模拟信号 的工作。注意,还原的是模拟信号(Analog Signal),而并非真实波源(Original Source)。对音频来说,转换所得的模拟信号,再经过放音设备播放(如音响、扬声器单元等),成为真实波源。 数模转换公式(D/A Transfer Function) 如果记 数字信号(Digital Signal)十进制表示 为 DDD ,参考输入(Reference Input)的电压(Voltage) 为 VRefV_{Ref}VRef ,合成累加器的门总数,即该 DAC 的 最大比特分辨率(Max Bit Resolution) 为 NNN 。假设数字信号经过 DAC 处理后,某时刻输出的 模拟信号(Analog Signal)电压(Voltage) 为 VAnoV_{Ano}VAno ,则这几个量间的关系就为: VAno=D2N−1⋅VRef {\\displaystyle \\begin{aligned} V_{Ano} = \\frac{D}{2^N-1} \\cdot V_{Ref} \\\\ \\end{aligned} } VAno=2N−1D⋅VRef 此式即为 DAC 数模转换公式,由于 DAC 为参考输入构建波形的内部时钟信号脉冲周期不依赖于公式,也被称为 D/A 转换公式(D/A Transfer Function)。 同 ADC 一致,DAC 中参考输入电压二的幂指倍缩小电压信号,组成了位门后的各个门电路所对应电压输入。其所有输入的周期皆为时钟信号的周期,即周期完全一致。 图 1-42 在 DAC 数模转换公式作用下的 D/A 映射结果 如上(注意坐标轴),当取用 VRef=6 VV_{Ref} = 6\\ VVRef=6 V 有 DAC 最大比特分辨率 N=4N = 4N=4 时,输入数字信号十进制表示 D∈[0, 15]D \\in[0,\\ 15]D∈[0, 15] 的模拟信号还原的理想情况。在没有 DAC 设备误差的情况下,上一小节经过我们模数转换所得时长为 4 个周期的数字信号,就能还原为原 VAno∈[0, 6]V_{Ano} \\in[0,\\ 6]VAno∈[0, 6] 的正弦模拟信号: 图 1-43 数字信号经 DAC 数模转换演示 对于一款 DAC 单元,在设计确定了 时钟频率(Clock Frequency)、最大比特分辨率(Max Bit Resolution) 和 参考输入(Reference Input) 后,对于该设备的这些相关属性,既成 常数固定。其中,最大比特分辨率(Max Bit Resolution)取值 ,被标注为 DAC 设备的 解析位深(Analytical Bit Depth),即俗称的解析力。 同样的,想要达到 较好的还原 模拟信号效果,DAC 的 时钟频率(Clock Frequency),需要和 ADC 的工业标准保持一致。因此,有时也被用 ADC 的采样率(Samplerate)的称谓代指,即所谓 DAC 采样率。这种称谓其实是不准确的。 而在多级设备的放音场景,为了保证包括 DAC 在内的整条解码放音链路上设备的时钟频率一致,常需要我们提供外侧时钟信号(Clock Signal),来避免由于设备间的差异,而导致还原后的模拟信号,在传递和还原真实波源时,发生周期上的挤压/拉伸形变。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_5_4.html":{"url":"Chapter_1/Language/cn/Docs_1_5_4.html","title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","keywords":"","body":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM) 实际上,为了避免理解中的混淆,我们在上文中介绍的 模数转换(A/D)和 数模转换(D/A)方式,都是基于 脉冲编码调制(PCM [Pulse-Code Modulation]) 进行的。能够完成数模模数转换的方法,除了 PCM 法外,还有 脉冲密度调制(PDM [Pulse-Density Modulation]),以及一系列不同出发点的调制模式,比如 脉冲带宽调制(PDM [Pulse-Width Modulation]) 等。由于在本领域内相对非主流,或是已经属于相对落后技术,亦不再讨论。 本节主要以相对具有代表性的 PCM 与 PDM 进行比对。 脉冲编码调制(PCM [Pulse-Code Modulation]) 是通过将模拟信号的 电压幅度,以 离散数字码 的形式等效表示,从而转换为数字信号。其在转换前后 时序(周期)上是一致的。转换后的数字信号,幅度变化拟合原有模拟信号幅度变化轮廓。 脉冲密度调制(PDM [Pulse-Density Modulation]) 则是将模拟信号的 电压幅度,以 一段时间内的高密度脉冲数量 来表示,从而转换为数字脉冲。其在 时序(周期)上是差异的,转换后的数字信号,幅度二元变化(只有 0/1 值)。 主要的不同,来自于对模拟信号 幅度抽象模式。 PCM & PDM 异同辨析 基于 PCM 的 A/D、D/A 过程,数字信号是二维信号,时序信息与幅度信息依旧保持为两个维度。而基于 PDM 的 A/D、D/A 过程,数字信号是一维信号,原模拟信号的时序信息和幅度信息,被叠加到同一维度上,以采样频率对应周期长度进行了转后数字信号单一维度上的分片。 相应的,对于 PCM 法所得结果幅度切割程度的重要指标,采样位深(Sampling Bit Depth),则 不存在于 PDM 法中。 PDM 采用 过采样系数(Oversampling Ratio) 配合 数字信号采样率(Digital Sampling Rate) 的方式,来表示 采样分辨率(Sampling Resolution)。即 PDM 和 PCM 的采样率,在意义上是不一样的: PCM 采样率,代表在一个时钟信号周期内,设备对模拟信号采样的次数; PDM 采样率,代表在一个时钟信号周期内,设备对模拟信号一次采样的幅度累计上限; 因此,PDM 设备在一个时钟信号周期内,仅仅数字化模拟信号 一个时刻。PCM 设备在一个时钟信号周期内,则数字化模拟信号 多个时刻。 需要注意的是,PDM 采样率(Samplerate) 决定了 PDM 设备的 可采样幅度范围,但这 并不 意味着可以等价于设备的时钟频率,这是两个概念。仍然记该 PDM 设备 参考输入(Reference Input) 大小为 VRefV_{Ref}VRef ,数字信号采样率(Digital Sampling Rate) 为 FADCF_{ADC}FADC ,过采样系数(Oversampling Ratio) 为 SrS_{r}Sr ,而 采样率(Digital Sampling Rate) 为 FFF 。则顺序 iii 的 二元数字信号(0-1 Digital Signal) 值 DiD_iDi 与几个量间的关系有: ∑i=0Sr⋅FDi={1, VAnoVRef>00, VAnoVRef=0 {\\displaystyle \\begin{aligned} \\sum_{i=0}^{S_r \\cdot F} D_i &= \\begin{cases} 1 &, \\ \\frac{V_{Ano}}{V_{Ref}} > 0 \\\\ 0 &, \\ \\frac{V_{Ano}}{V_{Ref}} = 0 \\end{cases} \\\\ \\end{aligned} } i=0∑Sr⋅FDi=⎩⎪⎪⎨⎪⎪⎧10, VRefVAno>0, VRefVAno=0 一个时钟信号周期内 Di=1D_i = 1Di=1 累积个数,就是 PDM 数字信号的 脉冲密度(Pulse Density)。我们记脉冲密度为 IpI_pIp ,原模拟信号被采样时间点为 ttt 则: Ip=(∑Di⋅(Sr⋅F))t {\\displaystyle \\begin{aligned} I_p = \\left( \\sum D_i \\cdot (S_r \\cdot F) \\right)_{t} \\\\ \\end{aligned} } Ip=(∑Di⋅(Sr⋅F))t 所以,对 PDM 设备来说,IpI_pIp 才代表了原模拟信号在 ttt 时的 等效振幅(即电压),有: VAno(t)=Ip(t)⇔∑i=C⋅tC⋅t + Sr⋅FDi {\\displaystyle \\begin{aligned} V_{Ano}(t) = I_p(t) \\Leftrightarrow \\sum_{i =C \\cdot t}^{C \\cdot t \\ +\\ S_r \\cdot F} D_i \\\\ \\end{aligned} } VAno(t)=Ip(t)⇔i=C⋅t∑C⋅t + Sr⋅FDi 其中,时钟频率(Clock Frequency) 记为 CCC 。 所以,PDM 是完全不同于 PCM 的方法论。 而不论是 PCM 还是 PDM,其理想情况下都可以保持转换还原前后,原模拟信号不发生改变。 对于 PDM 来说,最显著的特点就是在同等情况下,能够提供 比 PCM 更细腻的分辨率,但缺点也很明显,即 更窄的动态范围(时钟周期性和等效较低的对原模拟信号的采样频率)。 此外,PDM 受分时分区的采样频率,和通过电压控制的开关门电路累计计数关系,而易受外界和设备自身影响,导致容易引入内外噪音干扰。不过由于只需要按频率对应的一个周期内,累计幅度时发送单一信号(1⋅VRef1 \\cdot V_{Ref}1⋅VRef),无幅度累计发送单一信号(0⋅VRef0 \\cdot V_{Ref}0⋅VRef)的方式,转换数字信息。而使 PDM 的构造显然要简单于 PCM 方式的 ADC、DAC,这让用 PDM 方式构造的该类设备,具有较低能耗和低造价(制造简单)的优势。 由于这些原因,PDM 设备常被用在一些低电力和相对精度较低的需求场景,如电器控制单元、LED灯驱动器、一些微型麦克风设备等,相对更靠近使用端的设备。 相比之下,PCM 的处理方式,显然更容易相对完整的保存原有模拟信号信息。 音视频工程场景中,我们常处理的音频信号,基本为 PCM 方式获取的数字信号。 对于想要进行调整的 PDM 数字信号,通常需要转换为 PCM 数字信号后,再行以 PCM 更具优势的直接编辑方式,进行相关操作。而位于计算机体系内用来实现音频存储的数字信号基础类型,亦为 PCM 类型的数字信号。 由此可见 PCM 数字信号的重要性。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_6.html":{"url":"Chapter_1/Language/cn/Docs_1_6.html","title":"1.6 音频的存储","keywords":"","body":"1.6 音频的存储 经过上一小节,我们已经能够将大多数声音的模拟信号,转为 PCM 数字信号的音频数据。而接下来,在现代计算机系统内,这些数据具体该怎么进行 储存保存 呢?考虑到其本身 已经为数字码(Digital Codes) 格式,一种直接使然的思路,就是什么都不再进行变动,采用直接写入到磁盘中的方式,保存原始数字信号。 这就是音频存储的基础格式,PCM 音频格式。 什么是音频格式? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_6_1.html":{"url":"Chapter_1/Language/cn/Docs_1_6_1.html","title":"1.6.1 音频格式(Audio Format)","keywords":"","body":"1.6.1 音频格式(Audio Format) 音频格式(Audio Format),也被称为 音频文件格式(Audio File Format)。其被用来指代,对当前目标 音频数字信号(Digital Signal) 数据,进行保存至数字存储设备中的 处理方式和最终数据结果。 即,音频格式 包含了两部分重要的信息:压缩算法(Compress Formula) 和 存储格式(Data Format)。两者共同构成了 音频格式 的核心属性。 不过,由于存储格式大都是由压缩算法决定,且采用相同于原数字信号本身的数字码表示方式进行存储。可以说压缩算法的差异,才是决定不同音频格式间差异的关键。 而音频的存储格式,在这一点上,仅仅作为压缩算法的运算结果,并不起主导作用。 三者关系如下所示: 所以,根据格式本身所采用的压缩算法类型,音频格式可以分为 三大种类: 未压缩音频格式(Uncompressed [Uncompressed Audio Format]),不采用任何压缩算法直接存储,例如前文提到的 PCM 音频格式; 无损压缩音频格式(Lossless [Lossless Compression Audio Format]),采用无损压缩算法,对数字信号进行压缩后的存储格式,例如 FLAC 音频格式; 有损压缩音频格式(Lossy [Lossy Compression Audio Format]),采用有损压缩算法后,得到的存储格式,例如著名的 MP3 音频格式; 显然,想要理解这几类的划分,从 压缩算法 入手,是个较好的切入点。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_6_2.html":{"url":"Chapter_1/Language/cn/Docs_1_6_2.html","title":"1.6.2 无压缩编码格式(Uncompressed Encode)","keywords":"","body":" Hex Data Display .hex-container { text-align: center; } .hex-data { display: inline-block; text-align: left; font-weight: bold; font-family: monospace; white-space: pre; } 1.6.2 未压缩音频格式(Uncompressed Audio Format) 未压缩音频格式(Uncompressed [Uncompressed Audio Format]) 即 没有经过任何压缩算法处理,而直接将数字信号的数字码,作为存储格式保存的音频格式。因此,未压缩音频格式的存储格式(Data Format) 与 原数字信号 在音频数据内容部分,具有 完全一致的数字码值。 常见的未压缩音频格式,除了前文反复提到的 PCM 格式外,还有 PDM、WAV、AIFF、AU 等。PCM、PDM 格式自不必多提,而在系统中常用的 WAV 、SND/AU、AIFF等 则需额外引申。 首先的一个疑问就是,为什么会有这么多未压缩音频格式? 未压缩音频格式种类的产生 如此结果的产生,主要来自于 两个重要因素:调制模式的差异,和 描述信息的不同。 调制模式的差异,造成了如 PCM、PDM 之类的区分,这也是上节末尾我们所谈到的(其具体原理,已经于前面的章节中讲解,如有疑问可以回顾),是来自于不同 AD/DA 方法论的区别。 描述信息的不同,则指向该格式,是否携带了 描述音频的头部信息(Header Information) 用来标记当前音频文件对应音频数据的全局附加信息。而这,则是来自于 系统文件规范(System File Specifications) 指定的人为因素。 WAV 音频格式 自微软在早期 IBM 时代提出了 资源交换档案标准(RIFF [Resource Interchange File Format]) 规范后,所有于 Windows 系统内的数据存储文件,都需要按照: 【分块描述信息块(Chunk Descriptor)】+【复合数据子块(sub-Chunks)】 的形式,完成数据封装。而音频数据所对应的,即为 波形格式数据(Wave File Format),也就是所谓 WAV(.wav) 格式。同时,随着 Windows 系统的极大普及,WAV 格式也成为了一种于多系统间的通用基础音频格式类型。 Windows 系统以后缀来区别具体归属 RIFF 类型。而不论是否采用 PCM 调制模式,想要存储且不采用压缩算法的音频数字信号,都需要按 RIFF 要求进行封装。当然,现行的 WAV 格式文件,基本都是对 PCM 数据的 RIFF 套壳。 RIFF 规定,WAV 数据格式包含了三部分数据,分别是: 分块描述信息块(Chunk Descriptor),存放基于 RIFF 体系的当前块标记; 音频格式子块(fmt sub-Chunk),存放当前存储音频相关数据的附加信息; 音频数据子块(data sub-Chunk),存放当前存储音频的存储数据; 不同区域,包含的信息(即各自参数)的 目标作用域 是不同的。分块描述信息块,主要是基于 RIFF 体系的 相对宏观存储信息记录,目的是为了方便于计算机系统据此进行数据归纳处理。而 音频格式子块 和 音频数据子块 才是对该 数字信号 代表的 音频实际信息的描述和保存。 因此,三部分各有参数标记各自重点。 分块描述信息块(Chunk Descriptor) 主要包含 3 个属性,分别是: Params Range(bytes) Details ChunkID 0x00~0x03 (4) 标记当前块 ID,固定存储 'RIFF' 四个大写字母的 ASCII 码,即 == 0x52494646 ChunkSize 0x04~0x07 (4) 记录当前块除 ChunkID 和 ChunkSize 属性外,完整文件的总体大小(bytes),== 4 + (8 + Subchunk1Size) + (8 + Subchunk2Size) Format 0x08~0x0b (4) 标记当前 RIFF 文件类型,WAV 固定存储 'WAVE' 四个大写字母的 ASCII 码,即 == 0x57415645 音频格式子块(FMT sub-Chunk) 主要包含 8 个属性和 2 个额外字段,分别是: Params Range(bytes) Details Subchunk1ID 0x0c~0x0f (4) 标记当前 子块-1 的 ID(即子块类型),固定存储 'fmt' 三个小写字母的 ASCII 码,即 == 0x666d7420 Subchunk1Size 0x10~0x13 (4) 记录当前子块除 ID 和 Size 属性外的大小(bytes),而对于存储 PCM 数字信号,该值恒定 == 16 bytes AudioFormat 0x14~0x15 (2) 音频格式类型,非常用参数,因为当本身为 != 1 的值时,代表着文件存储的音频数据,采用了对应标记值的压缩算法。此时一般会采用对应的格式后缀。对于 PCM 格式,该值恒定 == 1 NumChannels 0x16~0x17 (2) 存储音频数据的通道数,单通道(Mono == 1),双通道(Stereo == 2),N 通道(== N) SampleRate 0x18~0x1b (4) 数字信号采样率,注意不同调制类型需要考虑前文提到的差异,对 PCM 来说就是 ,有该值 == 8000 | 11025 | 24000 | 44100 等 ByteRate 0x1c~0x1f (4) 比特率,即当前全通道单采样周期所得数据的传输率 == SampleRate * NumChannels * BitsPerSample/8 BlockAlign 0x20~0x21 (2) 全通道数据单次采样的对齐大小,即一次全通道采样的有效数据大小,固定 == NumChannels * BitsPerSample/8 BitsPerSample 0x22~0x23 (2) 代表来自于 数模模数转换 的 采样位深(Sampling Bit Depth)/ 最大比特分辨率(Max Bit Resolution),该值单位为 bits,有 == 8 | 16 | 32 bits 等 ExtraParamSize 0x24~0x25 (2) 额外参数信息大小,如无则不占用字节大小,非常用参数,原因同 AudioFormat,对于 PCM 来说,该值始终 == 0,且字段不存在 ExtraParams 0x26~0x26+X (X) 额外参数内容,同上,对 PCM 始终 X == 0 需要注意的是,音频格式子块(FMT sub-Chunk)中的 ExtraParamSize 和 ExtraParams 并不是始终存在的。对于 以 PCM 数字信号数据为主要载荷信息的 WAV 格式,该两个字段在 fmt 子块中,是不存在。 即,ExtraParamSize 和 ExtraParams,在 WAV 中并不占用任何有效数据字段。 音频数据子块(DATA sub-Chunk) 主要包含 3 个属性,分别是: Params Range(bytes) Details Subchunk2ID 0x24~0x27 (4) 标记当前 子块-2 的 ID(即子块类型),固定存储 'data' 四个小写字母的 ASCII 码,即 == 0x64617461 Subchunk2Size 0x28~0x2b (4) 记录当前子块除 ID 和 Size 属性外的大小(bytes),而对于存储 PCM 数字信号,该值为数字信号数据大小 == PCM-data-size bytes Data 0x2c~0x2c+X (X) 当前 PCM 数字信号的数字码信息,共计 X bytes 所以,音频数据子块(DATA sub-Chunk) 其实就是 PCM 音频格式时,被存储到计算机系统中的 PCM 存储文件(.pcm)有效数据部分。 三个 WAV 的组成部分以固定顺序排布,如下所示: 图 1-44 WAV 音频格式的完整结构成分 共同构成了一则有效的 WAV 音频格式文件。 现在,让我们再来看一段 72 bytes 的 WAV 音频文件(十六进制格式单字节展开): 52 49 46 46 24 08 00 00 57 41 56 45 66 6d 74 20 10 00 00 00 01 00 02 00 22 56 00 00 88 58 01 00 04 00 10 00 64 61 74 61 00 08 00 00 00 00 00 00 24 17 1e f3 3c 13 3c 14 16 f9 18 f9 34 e7 23 a6 3c f2 24 f2 11 ce 1a 0d 按照上述划分,就能得到各自子块的信息了: 图 1-45 演示用 72 bytes 的 WAV 音频文件解析 从上可知,样例其实是从一段 2048 Bytes 的 PCM 音频对应 WAV 文件 中,从头 截取 72 Bytes 数据 组成的。所以,利用头部信息来交验数据完整性,或取得更早阶段(即调制阶段)的信息,在 WAV 这种 具有 分块描述信息块(Chunk Descriptor) 的音频格式(Audio Format)里成为了可能。 这也是为何类 WAV 结构音频格式(包括将要提到的 SND/AU 和 AIFF 等),会代替了直接以 PCM 在电脑中进行非工程化存储的原因。 不过,这也引申出了 两种截然相反的,有关未压缩音频格式的 制定思路:缩减头文件信息减少复杂度 ,和 增加头文件所能涵盖的辅助数据。典型代表,分别是 SND/AU 和 AIFF 音频格式。 SND/AU 音频格式 SND/AU 是一种极简的,携带有描述信息的未压缩音频格式。该格式由已于 2009 年被 甲骨文(Oracle)收购的 美国昇阳电子有限公司(Sun Microsystems, Inc) 提出,用于解决麦克风录制转换后的 PCM 数字信号,快速简易存储的问题。 SND/AU 音频格式(.snd/.au) 不以 块(Chunk)/子块(sub-Chunk) 形式对关键数据进行分离,而是 直接将存储分为三个部分区段,分别是: 头信息区段(Header),存储必要的最基本音频描述信息; 变长辅助信息区段(Variable-length Informational Field),存储需要的额外信息; 音频数据区段(Data),存放当前存储音频的存储数据; 初看之下可能感觉同 WAV 格式并无太大差异,然而事实并非如此。SUD/AU 的相关音频数据的参数,以及自身有关文件系统的标志,都被 集中于头信息字段的 6 个固定参数中。而涉及音频本身,诸如版权信息、作者名称等数据,则并未规范如何存储,只指定了必须放入 变长辅助信息区段 的要求。这使 系统并不需要管理这部分信息。而音频数据区段,则只能存放 PCM 数字信号的数字码数据。 所以,变长辅助信息区段(Variable-length Informational Field) 只规定 必须占用 4 bytes 大小,并在有 额外信息 时,以整数字节增加,如:5(4+1) bytes、6(4+2) bytes 等。而 音频数据区段(Data)则紧随其后,直接以 PCM 采样按通道(Channels)数,交替分 blocks 存储即可(同 WAV 的 data 部分)。 头信息区段(Header) 主要包含 6 个属性,分别是: Params Range(bytes) Details magic 0x00~0x03 (4) 标记当文件类型,固定存储 '.snd' 四个字符文件后缀的 ASCII 码,即 == 0x2e736e64 hdr_size 0x04~0x07 (4) 记录音频数据起始偏移(bytes),用于快速数据,有 == 24 + Informational_Field_Size(bytes) data_size 0x08~0x0b (4) 本用于记录数字信号数据大小,但由于可通过,文件大小 - hdr_size 算得,因此可取 == 0xffffffff 表示无记录/ == n 表示 n bytes 大小 encoding 0x0c~0x0f (4) 用于标记具体存储的 PCM 数据,所采用的标准见下方表格,只可取 == 1, 2, 3, 4, 5, 6, 7, 23, 24, 25, 26, 27 sample_rate 0x10~0x13 (4) 数字信号采样率,由于 SUD 只能存 PCM,对 PCM 来说就是 ,有该值 == 8000 | 11025 | 24000 | 44100 等 channels 0x14~0x17 (4) 存储音频数据的通道数,单通道(Mono == 1),双通道(Stereo == 2),N 通道(== N) 不难发现,我们认为的 比特率(bitrate),或者至少该有的 采样位深(Sampling Bit Depth) 信息,并没有直接体现在头信息字段的参数中。这 并不 意味着没有包含该信息,而是 SND/AU 音频格式,通过 固定格式可支持类型 的方式,将这部分信息 封入了头信息字段的 encoding 子段 里,间接表示 了。 而 SND/AU 所支持的 PCM 采样规格,总计有 12 种,如下: Type Name ID Details 8 bit ISDN u-law 1 采样位深 为 8-bit 电话信号 uLaw 有损传输压缩算法 8 bit linear PCM 2 采样位深 为 8-bit 的线性 PCM 调制 16 bit linear PCM 3 采样位深 为 16-bit 的线性 PCM 调制 24 bit linear PCM 4 采样位深 为 24-bit 的线性 PCM 调制 32 bit linear PCM 5 采样位深 为 32-bit 的线性 PCM 调制 32 bit IEEE floating point 6 采样位深 为 32-bit 的 IEEE 归一化浮点 PCM 数据 64 bit IEE floating point 7 采样位深 为 64-bit 的 IEEE 归一化浮点 PCM 数据 4 bit CCITT G721 ADPCM 23 采样位深 为 4-bit 的 ITU G721 自适应 PCM 规格 CCITT G722 ADPCM 24 采样位深 为 4-bit 的 ITU G722 自适应 PCM 规格 CCITT G723 ADPCM 25 采样位深 为 4-bit 的 ITU G723 自适应 PCM 规格 5 bit CCITT G723 ADPCM 26 采样位深 为 5-bit 的 ITU G723 自适应 PCM 规格 8 bit ISDN a-law 27 采样位深 为 8-bit 电话信号 aLaw 有损传输压缩算法 通过 标记 encoding 取指定 ID 的方式,锚定规定好并确认具体参数的规格档次,来简化了头内容。当然弊端也很明显。由于选定的规格,并指定了档次,使得 相关参数是固定的,无法使用同规格下的其他参数组,而 无法进行动态扩展。这一部分仅了解即可,如使用到相应详细参数,再行查阅。 三个 SND/AU 的组成部分以固定顺序排布,如下所示: 图 1-46 SND/AU 音频格式的完整结构成分 较之 WAV 格式,简化了大量块信息。 但也正是因为这些原因,使得工程上在处理 SND/AU 格式时,需要花费额外的工作,来处理被固定的信息成分。这相当于另一种通过规定来实现的压缩手段了,变相的增加了系统处理资源消耗。因此,除了在 NeXT 系统上得到了大范围应用外,现如今 SND/AU 格式已成为逐步被淘汰的一种类型。 而与之相对的,WAV 和 AIFF 则仍被大量使用在 Windows/Linux 和苹果 MacOS/iOS 系统中。让我们不得不考虑,过渡的简化信息,是否仍有必要。 AIFF 音频格式 音频交换文件格式(AIFF [Audio Interchange File Format]),即 AIFF 音频格式(.aif/.aiff),正如刚刚所提,是一种被使用在 MacOS/iOS 上的未压缩音频格式。是一种隶属于 交换文件格式(IFF [Interchange File Format]) 文件管理体系的 文件格式(File Format)。该格式的特点相比 WAV 的 RIFF 分块体系而言,有着 更为复杂的子块类别。极大提升了能够涵盖辅助信息的广度,并以此为 苹果/Linux 等系统的文件管理,提供了更为方便的归类参考项。 AIFF 音频格式,从整体角度包含量种成分: 文件格式块(FORM Chunk),用以描述服务于系统文件管理的文件本身信息; 附属信息子块(INFO sub-Chunks),一系列不同类型的持续存储子块; 附属信息子块 也被称为 本地信息块(Local Chunks),所有的 本地信息块 都以 参数值的形式,保存于 文件格式块的 chunks 数组参数属性中,作为数组值存储。 于是,由这两类共同构成了一个完整的 AIFF 文件结构,如下: 图 1-47 AIFF 音频格式的完整结构成分(文件结构)简图 从此处即可看出,IFF 体系与 RIFF 体系的差异了。IFF 体系下,子块是以树状从属关系,挂载在 IFF 文件格式块的。而 RIFF 则是 分块描述信息块 和 子块 同属一级。 IFF 文件格式块(FORM Chunk) 主要包含 4 个属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记文件起始占位符,固定存储 'FORM' 四个大写字母的 ASCII 码,即 == 0x464f524d ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外,完整文件的总体大小(bytes),== 4 + Sum(Local Chunk Size) fromType 0x08~0x0b (4) 标记当前 IFF 文件类型,AIFF 固定存储 'AIFF' 四个大写字母的 ASCII 码,即 == 0x41494646 chunks -- 用于存储 本地信息块,即子块,的完整数据见下方表格,只可取 == Sum(Local Chunk Size) 所有 IFF 文件都有以上属性,而不同 IFF 文件区别,主要存在于 本地信息块的差异上,即 chunks 数组内容的不同。 对于 AIFF 音频格式来说,它的子块情况是什么样的呢? AIFF 将子块(sub-Chunk)拓展到了共计 12 种。且所有子块,在 AIFF 文件中,只能存在一份,或完全不存在。有(按优先程度,非组装顺序,见后文): Type Details Common Chunk 通用信息(子)块,用于存放有关文件本身包含所有子块的通用参数记录 Sound Data Chunk 音频数据(子)块,用于存放音频数据,即当前 PCM 数字信号的数字码信息 Marker Chunk 标记信息(子)块,用于存放有关当前音频的标记(如发行公司等)信息 Instrument Chunk 乐器信息(子)块,用于存放直接作用于当前音频数据的声乐信息 Comment Chunk 评论信息(子)块,用于存放用户等人的交互评价信息 Name Chunk 命名文字(子)块,用于存放当前文件命名信息 Author Chunk 作者文字(子)块,用于存放当前文件作者信息(区别于标记) Copyright Chunk 版权文字(子)块,用于存放当前文件版权信息 Annotation Chunk 声明文字(子)块,用于存放当前文件声明信息 Audio Recording Chunk 录制信息(子)块,用于存放音频录制采用的设备相关信息 MIDI Data Chunk 迷笛(MIDI)数据(子)块,用于存放需迷笛系统处理的通用数据 Application Specific Chunk 应用信息(子)块,用于存放经软件调整音频后,想要持续存储的调整设定参数 如果说 SND/AU 是精简的一端,那 AIFF 无疑将尽可能多的信息装填到了音频文件中。这种复杂的数据归纳,使 AIFF 格式中,出现了 多级数据结构。 我们分别来看一下,各个分块中的参数。 通用信息块(Common Chunk) 主要包含 6 种属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'COMM' 四个大写字母的 ASCII 码,即 == 0x434f4d4d ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值固定 == 18 numChannels 0x08~0x09 (2) 存储音频数据的通道数,单通道(Mono == 1),双通道(Stereo == 2),N 通道(== N) numSampleFrames 0x0a~0x0d (4) 用于标记音频数据在数模转换时的有效采样个数,即总音频帧数 == 音频的全通道总采样次数 / 通道数 sampleSize 0x0e~0x0f (2) 即 采样位深(Sampling Bit Depth)/ 最大比特分辨率(Max Bit Resolution),该值单位为 bits,有 == 8 | 16 | 32 bits 等 sampleRate 0x10~0x13 (4) +6 (extendable) 数字信号采样率,由于 SUD 只能存 PCM,对 PCM 来说就是 ,有该值 == 8000 | 11025 | 24000 | 44100 等 音频数据块(Sound Data Chunk) 主要包含 5 种属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'SSND' 四个大写字母的 ASCII 码,即 == 0x53534e44 ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == 8 + X offset 0x08~0x0b (4) 全通道数据单次采样的偏移大小,无需偏移则为 0 即一次全通道采样的有效数据大小,存储起始偏移,== offset_per_sample blockSize 0x0c~0x0f (4) 全通道数据单次采样的对齐大小,无需对齐则为 0 即一次全通道采样的有效数据大小,固定 == numChannels * sampleSize/8 soundData 0x10~0x10+X (X) 当前 PCM 数字信号的数字码信息,共计 X bytes 标记信息块(Marker Chunk) 主要包含 4 种属性和 1 种 子数据体(sub-Data Info),本身具有 两层数据结构模型: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'MARK' 四个大写字母的 ASCII 码,即 == 0x4d41524b ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == 2 + Sum(sub-Data Size) numMarkers 0x08~0x09 (2) 当前附加标记总数,即标记子数据体的个数,== sub-Data number Markers 0x0a~0x0a + (numMarkers * perMarkerSize) 当前附加标记构成的 数组(Array),子数据体 Marker 的持有者,标记作用于总采样的每个独立采样上,时序顺序标记 Maker (bytes) Sub-Detail id (4) 当前标记唯一ID position (4) 作用于哪个采样的数组序号 markerName (str) 当前标记命名(字符串) 乐器信息块(Instrument Chunk) 主要包含 11 种属性和 1 种 子数据体(sub-Data Info),本身具有 两层数据结构模型: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'INST' 四个大写字母的 ASCII 码,即 == 0x494e5354 ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == 20 baseNote 0x08 (1) 乐器片段的基准乐理音调(Note),需配合 detune 该值采用 迷笛(MIDI)数字音调,范围为 0~127,而迷笛数字音调中,C4 == 60 detune 0x09 (1) 指定乐器片段演奏音高(Pitch),该值并未采用美体系,而直接取 音分(Cent)计数,范围为 -50~+50 ,在 baseNote 基础上乐理偏移 lowNote 0x0a (1) 指定乐器片段的最低兼容乐理音调(Note),该值采用 迷笛(MIDI)数字音调,范围为 0~127,lowNote highNote 0x0b (1) 指定乐器片段的最高兼容乐理音调(Note),该值采用 迷笛(MIDI)数字音调,范围为 0~127,highNote >= baseNote + MIDI (detune) lowVelocity 0x0c (1) 指定乐器片段的最低兼容播放速度(下限),该值采用 迷笛(MIDI)播放速度,范围为 0~127,音乐只能以大于等于该值的速度播放 highVelocity 0x0d (1) 指定乐器片段的最高兼容播放速度(上限),该值采用 迷笛(MIDI)播放速度,范围为 0~127,音乐只能以小于等于该值的速度播放 gain 0x0e~0x0f (2) 指定乐器片段的音高(Loudness)增减益,该值采用 声压级(SPL)取 n = -32768~+32767 ,代表在当前播放音量基础上,增减 n dB sustainLoop 0x10~0x19 (6) 指定乐器片段 持续播放部分 的循环和帧数据位置设定 Looper (bytes) Sub-Detail playMode (2) 记录当前循环模式 beginLoop (2) 记录循环起点的采样序号 endLoop (2) 记录循环终点的采样序号 releaseLoop 0x1a~0x23 (6) 指定乐器片段 持续播放部分 的循环和帧数据位置设定 Looper (bytes) Sub-Detail playMode (2) 记录当前循环模式 beginLoop (2) 记录循环起点的采样序号 endLoop (2) 记录循环终点的采样序号 乐器信息块 是一种用来记录音乐背景节奏,或者特殊效果器的附属信息子块。其中的 “乐器” 并不是由该块本身所指定的,而是来自于 sustainLoop 和 releaseLoop 所标定的,来自于 音频数据块(Sound Data Chunk) 的 Looper 子参数 指定 序号 beginLoop~endLoop 范围 的音频帧构成的原声片段中,采样到全部相关乐器的集合。 片段分两部:持续播放片段 和 收尾播放片段。即该信息块最后两个参数所指定的信息。 持续播放片段(sustainLoop),被用于通过序号(间接)存放声音正常播放过程中,在进入结束阶段(Release Phase)前,需要循环播放的音频帧区段。 收尾播放片段(releaseLoop),被用于通过序号(间接)存放声音正常播放过程中,在进入结束阶段(Release Phase)后,需要循环播放的音频帧区段。 而何时进入所谓 结束阶段(Release Phase),是由 标记信息块(Marker Chunk)标记数组(Markers) 中的 “结束阶段”标记(Marker) 决定的。因此,标记信息块的重要程度,要高于乐器信息块。 那么,循环的播放模式,即 循环模式(playMode) 都有哪些呢? 主要有 3 种,在无自定义情况下,分别是: 无循环模式(NoLooping),标记 0 ,表示片段只需单次顺序播放即可; 前向循环模式(ForwardLooping),标记 1 ,播完后从头部重新开始顺序播放; 前后循环模式(ForwardBackwardLooping),标记 2 ,播完后反向时序播放,以此循环; 三种模式的直观效果如下: 图 1-48 AIFF 乐器信息块 循环模式示意图 所以,乐器信息块 的 “乐器” ,实则为该指代片段数据中,用于乐理节奏或乐理意义上背景节拍器(Metronome)的乐器组合的抽象代称。 至于,乐理基调(baseNote)、偏移音高(detune)、最低音调(lowNote)、最高音调(highNote)、播速下限(lowVelocity)、播速上线(highVelocity)、音高增减益(gain)参数,则都是对整个信息块中,全部循环片段的 补充修饰,以 方便达到最佳放音效果。 评论信息块(Comment Chunk) 主要包含 4 种属性和 1 种 子数据体(sub-Data Info),本身具有 两层数据结构模型: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'COMT' 四个大写字母的 ASCII 码,即 == 0x434f4d54 ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == 2 + Sum(sub-Data Size) numComments 0x08~0x09 (2) 当前附加评论总数,即评论子数据体的个数,== sub-Data number Comments 0x0a~0x0a + (numComments * CommentSize) 当前附加评论构成的 数组(Array),子数据体 Comment 的持有者,评论以时间戳而非顺序定位,可关联 Marker Comment (bytes) Sub-Detail timestamp (4) 评论指向时间戳,单位 ms marker (4) 评论关联标记 ID count (4) 评论文字总字数 text (str) 当前位置评论(字符串) 评论信息块 存放的 评论信息,其索引标记和其他块中 以音频帧序列号 的方式 有所不同。是 直接采用音频时间戳来标记的。注意区分差异。 同时,评论内容可以通过 marker 参数,挂靠到 标记信息块 的标记中。这让相关评论数据能够同音频产生 一定程度的直接交互,该点即为评论信息块使用中的优势。 在评论信息块之后的优先级顺序中,出现了 一连 4 个文字块(Text Chunk),分别是: 命名文字块(Name Chunk)、 作者文字块(Author Chunk)、 版权文字块(Copyright Chunk)、 声明文字块(Annotation Chunk)。这 4 个文字块,拥有着相同的参数体系,为 包含 3 种属性的单层结构。 我们放在一起说明: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定(注意空格) 命名文字块 == 'NAME' == 0x4e414d45作者文字块 == 'AUTH' == 0x41555448版权文字块 == '(c) ' == 0x28232920声明文字块 == 'ANNO' == 0x414e4e4f ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == text_size == X text 0x08~0x08+X (X) 当前块的文字信息(字符串),填写相关类型块的文字信息,如:'Author: Mr.M' etc. 于 4 个文字块后的 3 类信息块,相对于 AIFF 持有的 PCM 数据来说,则并不是特别重要。当然,此处的重要性是相对于音频数据本身而言的,并不是指该 3 类信息块完全没有意义。 实际上,该 3 类信息块,即 录制信息块(Audio Recording Chunk)、 迷笛数据块(MIDI Data Chunk)、 应用信息块(Application Specific Chunk),对于分别所处的 音频工程协会(AES)规格领域、 迷笛编辑器领域、 指定的系统应用 来说,都有 至关体系设定之直接存储、操作、保存的重要性。 因此,当传递的 AIFF 文件 有涉及该三类领域时,这 3 个信息块的作用是无可替代的。不过,如非非录音师、调音师或专业乐理工程师的话,则仅需要做简单了解即可。 录制信息块(Audio Recording Chunk) 主要包含 3 种属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'AESD' 四个大写字母的 ASCII 码,即 == 0x41455344 ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值固定为 == 24 AESChannelStatus 0x08~0x20 (24) 该值用于协助 AES 实时数字音频传输(转录时),来自 AES3-1-2009 (r2019) 规定,通常只需关注位于字节第 0、2、3、4 位的预强调(Pre-emphasis)辅助值 [20] 。该值自音源生成后,就是固定参数。这里不做展开,具体见参考文献 AES3-1-2009 (r2019) 规定 迷笛数据块(MIDI Data Chunk) 主要包含 3 种属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'MIDI' 四个大写字母的 ASCII 码,即 == 0x4d494449 ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == MIDI_data_size == X MIDIData 0x08~0x08+X (X) 该值用于协助 MIDI 音频演奏/编辑系统,存储一系列位于 MIDI 体系下的关键编辑数据,部分可能重叠于 乐器信息块(Instrument Chunk)内的数据,但其他数据则更丰富且复杂,需要配合迷笛解析器或硬件设备使用 对于 迷笛数据块 中,可能与 乐器信息块 产生冲突的数据,在 无 迷笛(MIDI)设备(涵盖软硬件) 的情况下,乐器信息块 的优先级高于 迷笛数据块。而当 存在 迷笛解析器 的情况下,两套块设备内的同类信息,将以不同的 迷笛配置(MIDI Profile) 形式,展现在解析软件中。 应用信息块(Application Specific Chunk) 主要包含 3 种属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'APPL' 四个大写字母的 ASCII 码,即 == 0x4150504c ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == 4 + App_data_size == 4+X signature 0x08~0x0b (4) 标记指向系统应用签名(Application Signature),应用签名是已发布应用的唯一标识,每个应用都不同,但该字段标记的是系统级别应用的简写。在 IFF 管理体系下,该值为 4 bytes,通过这个字段,我们能准确定位数据归属的目标系统应用所在,如 Apple II Applications 有 == 'pdos' == 0x70646F73 data 0x0c~0x0c+X (X) 系统应用的相关数据,具体内容由签名指定的系统应用处理 到此, 12 种 附属信息子块(INFO sub-Chunks),即 本地信息块(Local Chunks),的作用介绍完毕。而各个子块的 信息内容优先级,便有如下顺序: 图 1-49 AIFF 乐器信息块 信息优先级排序 信息优先级高的子块,在出现同类信息的情况下,会被优先参考。不过对于特殊情况,也需要注意体系内差异。 而如果我们按照 AIFF 格式,去同样 封装一段 PCM 数字信号数据 时,它的文件结构有: 图 1-50 完整 AIFF 音频格式文件的文件结构示意图 显然,AIFF 相较于 WAV、SND/AU 来说,更加的复杂。这也是为何 AIFF 格式的运用没有 WAV 更为宽泛的原因。但富余而详细的子数据块,也使 AIFF 在多体系任务系统下,会更加的游刃有余。 借此,常用的 三种未压缩编码格式(或者说两种,即 WAV 和 AIFF) 与 PCM 基础格式,共同构成了 音频格式的地基。 但如此直接或相对直接的对 PCM 数据的存放方式,还是会有 大量空间占用浪费。于是,为了进一步缩减音频数据,在计算机系统中的持续化存储问题,工程师们开始采用压缩算法来提高空间利用率。这带来了携带压缩算法的,无损压缩编码格式(Lossless [Lossless Compression Audio Format]) 和 有损压缩编码格式(Lossy [Lossy Compression Audio Format])。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_6_3.html":{"url":"Chapter_1/Language/cn/Docs_1_6_3.html","title":"1.6.3 无损压缩编码格式(Lossless Encode)","keywords":"","body":" Hex Data Display .hex-container { text-align: center; } .hex-data { display: inline-block; text-align: left; font-weight: bold; font-family: monospace; white-space: pre; } 1.6.3 无损压缩编码格式(Lossless Compression Audio Format) 无损压缩编码格式(Lossless [Lossless Compression Audio Format]) 是采用 无损压缩算法(Lossless Compression Method)对 PCM 数字信号数据,进行封装保存的音频格式(Audio Format)。 无损压缩算法(Lossless Compression Method) 无损压缩算法(Lossless Compression Method) 是对原始数字信号经过算法压缩后,仍可以通过算法本身的逆运算,完全一致的还原回原始数字信号的算法。属于在压缩和解压缩过程中,都 不会丢失任何原始数据的可逆压缩算法(Reversible Compression Method)。[4] 常用无损压缩算法主要为 四类,分别是: 熵编码算法(Entropy Coding),采用如 哈夫曼编码(Huffman Coding)[21] 、香农-范诺编码(Shannon–Fano Coding)[22] 、算数编码(Arithmetic Coding)[23] 等。此类算法通过调整信息熵,为高出现频次信息分配较短字节位,而低出现频次信息分配较长字节位的方式,缩减整体所占字节空间大小。 预测编码算法(Predictive Coding),如 线性预测编码(LPC [Linear Predictive Coding])[24]、自适应差分脉冲编码调制(ADPCM [Adaptive Differential Pulse Code Modulation)[25] 等。这类算法通过预测下一个数据点的值,并仅存储预测误差,从而减少数据量。除了 ADPCM 外,一些诸如 差分脉冲编码调制(DPCM [Differential Pulse Code Modulation])等的主要被运用于 数模模数转换 的调制方法,也是可以被在此处的。这种调制类方法,一般通过存储相邻采样点之间的差值来减少数据量,当运用于压缩时,也可归类至预测编码算法分类。 变换编码算法(Transform Coding),如 离散傅里叶变换(DFT)、离散余弦变换(DCT) 等。该类算法通过将时域信号转换为频域信号,来更有效地表示和压缩音频数据。由于其关键程度,在本书第三章中,会重点讲解。 复合算法(Hybrid),是指一类 采用了多种类型常规算法,按一定处理流排布,共同进行压缩的算法类型。大部分无损压缩编码格式,都属于此类。比如,结合了熵编码和预测编码 FLAC、ALAC,以及多种算法混合处理的 APE。 另外需要区别一点。从 原始波源(Original Source) 到 数字信号(Digital Signal) 的过程是 有损的。但 这与此处的压缩算法毫无关联。 通过前面章节的讲解,我们可以认识到,模拟信号本身采样自原始波源的过程其实是有损的,而从模拟信号到数字信号的过程,依然也是有损的。最简单来看,单 A/D、D/A 中的 硬件比特分辨率(Bit Resolution),就可能因存在从 连续到离散值再回到模拟连续值 过程,而 引入损失。这一过程的损失被称为 采样损失(Sampling Loss)。 所以,无损压缩算法虽然没有损失,但算法接收并处理的信号本身,就已经有一定的数据丢失了。不过,相比有损算法而言,该损失可以通过部署更优质的硬件设备来降低损失量,且相对更适合在采集模拟信号过程考察。因此,与之算法因素,采样损失并不在格式中计入。 回到格式本身。无损压缩编码格式(Lossless) 最常见的主要有 三种,分别是 FLAC(.flac)、ALAC(.m4a) 和 APE(.ape)。但因为 APE 的处理流及算法闭源,与 ALAC 的平台兼容性问题,FLAC 成为当下主流,全平台兼容且具有三者中最高压缩率(30%~60%)的,无损压缩编码格式首选。 因此,本书以 FLAC 为主,介绍 无损压缩编码格式 类型的处理过程和结构特性。其他类型触类旁通,不再另行赘述。 FLAC 音频格式 开放无损编码格式(FLAC [Free Lossless Audio Codec]),即 FLAC 音频格式(.flac),是由 开放无损(音频)编码组织(Xiph.Org Foundation.) 提供的一种,针对音频数据进行压缩存储的无损音频格式。由于是复合算法,其处理流水线如下(红线编码,绿线解码,解码逆运算): 图 1-51 FLAC 音频格式编解码执行作业流水线 分块(Blocking) 是 将输入音频分解成多个连续的块(Block)的步骤。在 FLAC 中,这些块的大小是可变的。而块的最佳大小,通常受包括 采样率、随时间变化的频谱特性 等多种因素影响。虽然 FLAC 允许在一个流中使用不同的块大小,但我们仍需要参考编码器的建议,使用固定的块大小。另一方面,固定的块大小也能便于解码时的处理。 通道间去相关(Interchannel Decorrelation) 是 针对多通道(Stereo、Multi-Channel)情况 进行的,以选择的指定 去相关策略(Decorrelation Strategy) 计算新组值代原有通道数据,来 减小原始信息冗余的辅助压缩手段。 去相关策略(Decorrelation Strategy)一般有三种,即: 对称去相关(Symmetric Decorrelation)、主成分分析(PCA)、奇艺值分解(SVD)。三者都是可逆的,而 对称去相关 则是其中最快最简便的算法。 记分块后有 (C1,C2)(C_1, C_2)(C1,C2) 数据,对称去相关会根据分组的组内 平均值(Mean) 和 差值(Sub),生成该组的中间信号与侧信号结果 (M,S)(M, S)(M,S) 代替原 (C1,C2)(C_1, C_2)(C1,C2) 。有: M=C1+C22,S=C1−C22 {\\displaystyle \\begin{aligned} M = \\frac{C_1 + C_2}{2} \\quad , \\quad S = \\frac{C_1 - C_2}{2} \\\\ \\end{aligned} } M=2C1+C2,S=2C1−C2 即 简单的线性变换。理所当然,其去相关的去数据冗余和降维能力也 相对较弱。 三种策略该如何选择呢?我们可以依据下表进行决定: Strategy Features When to use? Example Symmetric FastestLow Complexity simple linear transformations简单线性变化场景 一般双通道音频 PCA SlowerHigh Complexity when needs dimensionality reduction and feature extraction需要降维和特征提取场景 需要所有通道的基本分类特征信息,用于模型 SVD SlowestHighest Complexity when needs precise matrix decomposition需要精确矩阵分解的场景 需要矩阵化全通道特征张量,用于模型 可见,除非后续步骤中涉及模型或想要更高压缩比的结果,否则选择 对称去相关 基本已能满足大多数需求。注意,解码时需要逆运算。 预测(Prediction) 则是将去相关性后的块,通过尝试找到信号的 相近数学解集,来 转换块的保留数据。一般而言,解集通常都比原始信号要 小得多。由于预测方法对编码器和解码器都是已知的,因此只需在压缩流中包含预测器的参数即可。FLAC 目前只支持四种不同类别的内置已定义好的预测器,但预留了额外的改进空间,以便添加其他方法。而从设计上讲,FLAC 允许预测器类别在块与块之间,甚至在块的各个通道之间变化。而解码时,亦需要采用相同预测方法做逆运算。 残差编码(Residual Coding) 是 必须的校准步骤,该步骤的目的是确认,预测器是否能准确的使用预测结果,描述输入的去相关块信号。此时,必须对原始信号和预测信号之间的差异(即误差,或残差信号)进行无损编码。 怎么判断预测器结果是否满足要求呢?粗略的方法,是 通过判断 残差信号(Residual Signal)所需的每个样本位数,是否少于原始信号。少于则预测有效,否则无效。而当差值过大时,通常意味着,编码器需要用 调整块大小、改变块数目、切换预测器、改变去相关方法 的流程内改动,来 重新生成预测结果。 所以,残差编码的作用,相当于整个编码过程的 自动化结果检验。同理于解码。 在经过这些步骤后,我们就得到了 用于 FLAC 格式持续化存储的数据,包含两部分: 【预测编码数据(Prediction Data)】+【残差信号(Residual Signal)】 这即是 FLAC 格式下,实际用于保存的 一个完整 音频数据块(Audio Data Block) 构成。存储的音频由一系列此种数据块,按时序排列组成。再配合 FLAC 格式文件结构的头部信息,共同组成了 FLAC 文件。 那么,一个完整的 FLAC 文件,其 文件结构 是什么样的呢?如图: 图 1-52 完整 FLAC 音频格式文件的文件结构示意图 [26] 从简图中可以看出,FLAC 文件结构仍然采用二分,以: 【元数据信息块(Metadata Blocks)】+【音频数据块(Audio Data Blocks)】 的方式,进行信息区域划分。 元数据信息块(Metadata Blocks) 是包含 流信息块 和 附属信息块 在内的,一系列 对音频数据本身特征进行描述 的 存储容器集合。和未压缩音频 AIFF 格式较为相同,FLAC 的元数据信息块对数据的组织方式,采用了分类封装。而原本用于标记文件格式的 ID 字段,被从块中独立拿出,以 恒定占用 FLAC 格式文件头部 4 字节(Bytes)的形式,锚定当前数据结构信息。 即,所有 FLAC 音频格式文件都有头部唯一字段(注意大小写): Params Range(bytes) Details FileID 0x00~0x03 (4) 标记当前文件 ID,固定存储 'fLaC' 四个大小写字母的 ASCII 码,即 == 0x664c6143 至于其他被记录的关键或非关键额外信息,按照相关成分,被分为 7 种不同种类的 基础内构元数据块 和 1 个无效标记块,分别是: Block Type Mark(bit) Details STREAMINFO 0 :0000 000 通用流信息块,必位于首位,用于记录音频流基本信息(比特率、采样率等) PADDING 1 :0000 001 对齐填充块,无内容(全部为 0)用于填充空间,用于在不重新编码音频数据的情况下添加或修改元数据 APPLICATION 2 :0000 010 应用信息(子)块,用于存放经软件调整音频后,想要持续存储的调整设定参数 SEEKTABLE 3 :0000 011 标记信息(子)块,用于存放快速定位音频流中特定位置的查找表 VORBIS_COMMENT 4 :0000 100 评论信息(子)块,包含用户定义的标签信息,用于存放用户等人的交互评价信息 CUESHEET 5 :0000 101 CUE 表(子)块,用于存放音轨的索引信息,即类比 CD 的 CUE 表 PICTURE 6 :0000 110 图像数据(子)块,用于存放当前音频的专辑封面图片等图像信息 [Reserved] 7~126 保留(子)块,预留的 7~126 号标签,为未来扩展或自定义扩展而用 [Invalid] 127 :1111 111 无效标记(子)块,是无效的元数据块类型用于唯一标识错误 在 FLAC 中,元数据块的基本组成高度一致,皆为: 【元数据头(Metadata Header)】+【元数据块数据(Metadata Block Data)】 的形式,不似于 AIFF 中的 ckID 来标记不同类型块,FLAC 采用元数据头中的固定标记位,以 类型序号 标识 元数据块的种类。即,并不以 ASCII 码标记的固定类型值作为头部信息。由此而来的好处是,FLAC 的元数据头,能够以相对统一的结构定义,并包含更多有效信息。 每个元数据块的 固定头部(Metadata Header),以下简称 头部(Header),始终为 4 字节(4 bytes),包含 3 个关键字段: Params Range(bytes) Details Last block flag 0x00 (1 bit)x--- ---- 标记当前块是否为最末位,占第一字节第七位 1 bit当块为最末位时该位为 1 ,否则为 0 Block Type 0x00 (7 bits)-xxx xxxx 块类型标记位,占第一字节的 剩余 7 bits即上表中的块类型 Mark Block Length 0x01~0x03 (3) 块大小,记录当前块的总字节长度(不含头部),24 位 而 所有的元数据块皆有如下结构: 图 1-53 FLAC 音频格式的元数据信息块统一结构示意图 现在,让我们顺序了解各类分块的关键参数(包含元数据头)。方便与系统起见,我们仍然将 元数据块(Metadata Block) 称为 块(Chunk)。 通用流信息块(STREAMINFO Chunk) 主要包含 10 种属性,分别是: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,对于 STREAMINFO,首字节有 [flag bit] 000 0000 Min Block Size 0x04~0x05 (2) 最小块大小(以样本为单位),通常为 16 或 4096 Max Block Size 0x06~0x07 (2) 最大块大小(以样本为单位),通常为 16 或 4096 Min Frame Size 0x08~0x0a (3) 最小帧大小(以字节为单位),表示音频帧的最小字节数 Max Frame Size 0x0b~0x0d (3) 最大帧大小(以字节为单位),表示音频帧的最大字节数 Sample Rate 0x0e~0x10 (2.5 = 20 bits) 数字信号采样率,由于 SUD 只能存 PCM,对 PCM 来说就是 ,有该值== 8000 | 11025 | 24000 | 44100 等 Num of Channels 0x10 (3 bits) xxx- ---- 存储音频数据的通道数,单通道(Mono == 1),双通道(Stereo == 2),N 通道(== N) Bits per Sample 0x10~0x11 (5 bits) ---x xxxx 即 采样位深(Sampling Bit Depth)/ 最大比特分辨率(Max Bit Resolution),该值单位为 bits,有== 8 | 16 | 32 bits 等 Total Samples 0x11~0x15 (4.5 = 36 bits) 用于标记音频数据在数模转换时的有效采样个数,即总音频帧数== 音频的全通道总采样次数 / 通道数 MD5 Signature 0x16~0x26 (16) 完整性 MD5 签名,用于验证音频数据完整性的 MD5 哈希值,128 位。通过验证 MD5 是否和预期一致,快速检测完整性 对齐填充块(PADDING Chunk) 主要包含 2 种属性,分别是: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,对于 PADDING,首字节有 [flag bit] 000 0001 Padding Data 0x04~0x04+X (X) 填充数据,全部为零,用于在不重新编码音频数据的情况下添加或修改元数据 应用信息块(APPLICATION Chunk) 主要包含 2 种属性,分别是: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部, 对于 APPLICATION,首字节有 [flag bit] 000 0010 Application ID 0x04~0x07 (4) 标记指向系统应用签名(Application Signature), 应用签名是已发布应用的唯一标识, 即注册的应用程序 ID,用于标识特定的应用程序 Application Data 0x08~0x08+X (X) 系统应用的相关数据, 具体内容由签名指定的系统应用处理,长度 X 字节 标记信息块(SEEKTABLE Chunk),也可称为索引表块,主要包含 2 种属性和 1 种 子数据体(sub-Data Info),本身具有 两层数据结构模型: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,对于 SEEKTABLE,首字节有 [flag bit] 000 0011 Seek Points 0x04~0x04 + (numSeekPoints * perSPSize) 由查找点构成的 数组(Array),子数据体 SeekPoint 持有者,类似 AIFF 的 Markers 标记作用于总采样的每个独立采样上,时序顺序标记 SeekPoint (bytes) Sub-Detail Sample Number (8) 查找点对应的采样数 Byte Offset (8) 查找点对应的字节偏移量 Sample Offset (2) 查找点对应的采样数偏移量 评论信息块(VORBIS_COMMENT Chunk),主要包含 5 种属性和 1 种 子数据体(sub-Data Info),本身具有 两层数据结构模型: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,VORBIS_COMMENT,首字节有 [flag bit] 000 0100 Vendor Length 0x04~0x07 (4) 标记厂商字符串长度,记为 len 厂商字符串的长度,表示厂商字符串的字节数 Vendor String 0x08~0x08+len 标记厂商字符串,字符串长度为 Vendor Length 值,用来记录当前音频的发行商等信息 User Comment List Length 0x08+len~0x08+len+4 (4) 记录当前评论个数,值为几,就有几条评论 User Comment List 0x08+len+4 ~ 0x08+len+4 + (numComments * perCommSize) 由评论构成的 数组(Array),子数据体 Comment 持有者,不同 AIFF 的 Comment FLAC 的该子数据体记录,包括的评论所有音频额外信息键值对字符串,如 \"TITLE=Example\" Comment (bytes) Sub-Detail Comment Length (4) 评论字符串长度( N 字节) Comment String (N) 评论键值对字符串 CUE 表块(CUESHEET Chunk),主要包含 7 种属性和 2 种 子数据体(sub-Data Info),本身具有 三层数据结构模型: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,对于 CUESHEET,首字节有 [flag bit] 000 0101 Media Catalog Number 0x04~0x43 (64) 记录媒体目录号,表示光盘的媒体目录号 Lead-in Samples 0x44~0x4b (8) 引导样本数,表示光盘引导区的样本数 Is CD 0x4c (1) 是否为 CD,1 表示是 CD,0 表示不是 CD Reserved 0x4d~0x5f (19) 保留字段,全部为零 Number of Tracks 0x60 (1) 总轨数,表示光盘上的总轨数,即声轨,并非通道数 各声轨间独立,是可以在播放上重叠的 Track Information 0x61 ~ 0x61 + (numTrackInfo * perTInfoSize) 由声轨构成的 数组(Array),子数据体 TrackInfo 持有者,记录每个声轨的信息,包括轨号、轨偏移、ISRC、轨索引等 TrackInfo (bytes) Sub-Detail Track Offset (8) 轨偏移,轨道的字节偏移量 Track Number (1) 轨号,即轨道的编号 ISRC (12) 声轨的国际标准录音代码 Track Type (1) 轨类型,轨道的类型 Pre-emphasis (1) 标记是否使用预加重 Reserved (3) 保留开关字段,全部为零 Track Index (N)N = (num*TrackIndexSize) 由轨索引构成的 数组(Array),子数据体 TrackIndex 持有者,记录声轨索引信息 TrackIndex (bytes) Sub-Detail Index Offset (8) 索引偏移,索引字节偏移量 Index Number (1) 索引号,即索引的编号 Reserved (3) 保留字段,全部为零 图像数据(PICTURE Chunk),主要包含 12 种属性,为: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,对于 PICTURE,首字节有 [flag bit] 000 0110 Picture Type 0x04~0x07 (4) 图片类型,表示图片的用途,例如封面、背面等 MIME Type Length 0x08~0x0b (4) MIME 类型字符串的长度,表示 MIME 类型字符串字节数,值为 X0 单位 bytes MIME Type 0x0c~0x0c+X0 (X0) MIME 类型字符串,表示图片的 MIME 类型,例如 \"image/jpeg\" 或 \"image/png\" ,字符串长度由上一条属性记录 Description Length 0x0c+X0~0x0c+X0+4 (4) 描述字符串的长度,表示描述字符串的字节数,值为 X1 单位 bytes Description last_at~last_at+X1 (X1) 描述字符串,表示图片的描述信息,例如 \"Album Cover\" Width last_at~last_at+4 (4) 图片宽度,单位为像素,例如值 512 ,即 512 像素(Pixels) Height last_at~last_at+4 (4) 图片高度,单位为像素,例如值 512 ,即 512 像素(Pixels) Color Depth last_at~last_at+4 (4) 色深,单位为位(bit),表示每个像素的位数例如值 24,表示单个像素颜色为 24 位,详见下一章 Colors Used last_at~last_at+4 (4) 每像素的颜色数,表示图片使用的颜色数,即颜色总数,例如值 16,像素值为索引,取色自 16 种色的调色板而取 0 则表示,颜色完全由像素自身决定 Picture Data Length last_at~last_at+4 (4) 图片数据的长度,表示图片数据的字节数,值为 N 单位 bytes值指向下一字段,当前块的图片数据所占总字节长度 Picture Data last_at~last_at+N (N) 图片数据(逐行扫描),当前块的实际图片二进制数据,每个像素值取 色深(ColorDepth)值代表的位数 剩下的 保留块(Reserved Chunk) 和 无效块(Invalid Chunk) 类型,因其数据结构定义为无 或 自定制。实际使用中,可根据当前工程情况,内部协议设定加以利用。 至此,对于 FLAC 音频格式,我们就能完整解析了。让我们来看一段 138 bytes 的 FLAC 音频文件数据(十六进制格式单字节展开)事例: 66 4c 61 43 00 00 00 22 00 10 00 10 00 04 00 00 10 00 ac 44 50 00 00 06 ba a8 d4 1d 8c d9 8f 00 b2 04 e9 80 09 98 ec f8 42 7e 86 01 f4 35 00 00 00 03 00 00 00 0a 69 6d 61 67 65 2f 6a 70 65 67 00 00 00 0b 41 6c 62 75 6d 20 43 6f 76 65 72 00 00 02 00 00 00 02 00 00 00 18 00 00 00 00 00 00 01 f4 00 按照上述划分,获取对应子块信息,有: 图 1-54 演示用 138 bytes 的 FLAC 音频文件数据解析 可见,样例只是一段 FLAC 数据的元数据部分,且包含了 STREAMINFO 和 PICTURE 这两个元数据块。同时,PICTURE 的图片数据 并不在上述数据 中。而从 图片数据的长度(Picture Data Length) 和 其他字段携带的信息可知,该图片数据为 512 x 512 的 128000 字节 24 位 JPEG 数据。而原音频,从 STREAMINFO 解读 可得,未在上例中包含的音频数据块中包含的音频,为 采样率 44100 Hz 的 16-bits 双声道立体声(Stereo)总计 44100 个采样值(即 1s 长度)的压缩后数据块数组。 作为无损压缩编码音频格式的代表,FLAC 具有重要的地位。它能够在不丢失任何原始音频信息的情况下,极大的减少文件大小。这使得它被广泛的应用在了高保真音频存储和传输过程中。其 无损特性确保了音频在解码后与原始音频完全一致,令其成为了 音频发烧友 和 专业音频制作 的首选格式。 同样的,该特点也是无损压缩编码音频格式,最为显著具的优势。 然而,尽管无损压缩如 FLAC 提供了最高的音质保真度,但其文件大小仍然相对较大。在许多应用场景中,如 流媒体 和 便携设备存储(尤其是在随身听时代,早期有限的存储空间情况),依然 不够便利。因此,具有更大压缩比的有损压缩编码音频格式,如 MP3 和 AAC 便成为了一种 可以接受的替代方案。这些格式 通过舍弃人耳不易察觉的音频信息,进一步减小文件大小,同时在音质和压缩率之间取得平衡。 虽然为人所带来的听觉感受,介于此,会相对有所衰减。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/Docs_1_6_4.html":{"url":"Chapter_1/Language/cn/Docs_1_6_4.html","title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","keywords":"","body":" Hex Data Display .hex-container { text-align: center; } .hex-data { display: inline-block; text-align: left; font-weight: bold; font-family: monospace; white-space: pre; } 1.6.4 有损压缩编码格式(Lossy Compression Audio Format) 有损压缩编码格式(Lossy [Lossy Compression Audio Format]) 是一种 通过舍弃部分音频数据来减少文件大小的音频压缩技术。其采用 有损压缩算法(Lossy Compression Method)对 PCM 数字信号数据,进行封装保存的音频格式(Audio Format)。 有损压缩算法(Lossy Compression Method) 有损压缩算法(Lossy Compression Method) 是对原始数字信号经过算法压缩后,仍可以通过算法本身的逆运算,相对近似的还原回原始数字信号的算法。因为算法会丢失部分数据导致音质下降,所以,有损压缩算法属于不可逆压缩算法(Irreversible Compression Method)。而按处理流分类来看,被使用在压缩/解压处理上的有损压缩算法,只有复合算法(Hybrid)类型。 对于不可逆压缩算法而言,其对 数字信号的编码(Encoding)关键步骤,大都为 采用数学逼近算法,去寻找复杂曲线的解集,用 记录解集本身 来替代原有数字信号的数字码信息 进行保存。这个求解过程就可能 引入一定的损失。其 解码(Decoding)处理,则是根据解集,结合采用算法重新合成至原有数据。 此时,还原出的数据,是 算法拟合的结果而非原始样本,即非原有数字信号的结果。 从数学角度来看,此类算法大都选择以 离散傅里叶变换(DFT)、小波变换(Wavelet Transform) 等技术的 降低算力消耗近似解方案,作为算法核心。以傅立叶族举例,在实际应用中,当选定解空间的 傅立叶基底函数族 并不是无穷时,离散傅立叶基所表示的原值,本就 会有一定损失(见本书第三章详解)。此外,来自数字信号的 有损采样(Lossy Sampling) 过程的 采样损失(Sampling Loss),依旧会存在。 所以,不可逆压缩算法是会有一定误差的。而误差的引入则来自于算法自身,一经处理后无法消除。 不过,算法带来的 压缩优势极为巨大。大部分采用有损压缩算法的音频格式,都 至少能达到 10 : 1 的压缩比(CR [Compression Ratio])即减少约 90% ,甚至更小。而极致压缩比下,算法对音频音质带来的影响,对大多数情况和使用者来说,却可忽略不计。 因此,采用有损压缩算法的有损压缩编码格式,在进入音频数字化时代后,被大量且广泛的应用于商业音频产品中。其中,普及程度最广并具有最高硬件兼容性的 MP3 格式,就是最具代表的该类类型。 MP3 音频格式 MP3(MPEG-1 Audio Layer III),即 MP3 音频格式(.mp3)。是于 1987 年,由德国 弗劳恩霍夫应用研究促进协会(Fraunhofer [Fraunhofer Society for the Advancement of Applied Research]) 研究所主导完成的 音频有损压缩格式。并于 1993 年第一代 MPEG-1 标准制定中,获得委员会认同,确立为 通用 MPEG-1 和 MPEG-2 标准的音频规格部分。 图 1-55 弗劳恩霍夫应用研究促进协会的 logo PS:弗劳恩霍夫协会是业内标杆的多机构联合体,为音视频技术的发展做出了巨大贡献。 此外,随着 2017 年 MP3 专利到期(在此之间 MP3 由多个复杂的专利体系构筑专利网进行了版权保护),该格式彻底成为了开源开放且兼具标准和广泛适用性的音频格式。 MP3 的处理流水线如下(红线编码,绿线解码,解码逆运算): 图 1-56 MP3 音频格式编解码执行作业流水线(简) 接下来,我们依据编码的流程顺序,对几个环节(包括解码的环节)进行梳理。 分组(Blocking) 和 重组(Reassemble) 是互逆的两个过程,分组是将输入的 PCM 音频信号分解成多个连续的片段的步骤。每片段通常包含 1152 个采样点,目的是将音频信号 分割成适合后续处理的小块,继而 提高压缩效率和音质。它的逆向过程即为重组。重组是将多个片段的时域信号拼接的步骤。解码器逐片段提取和处理压缩数据,逐步重建原始音频信号,直至恢复完整的音频信号(注意,被还原得到的 PCM 已 不完全相同于 原输入 PCM)。 为什么是 1152 个采样点呢?这是因为在 MPEG-1 针对音频 DCT 处理的实验上,发现 1152 个采样点能够在编码效率和质量上,达到最佳平衡点。且能够在保证音质的前提下减少计算复杂度。直到 MPEG-4 将音频更改为 AAC 音频格式,并取用了更为合理的 1024 个采样点设定。 分窗(Windowing) 和 去窗口化(De-windowing/Inverse Windowing) 互逆。 分窗是对每个分块片段,应用加权处理的步骤。常用的 窗口函数 包括 汉宁窗(Hanning Window)、 汉明窗(Hamming Window) 和 黑曼窗(Blackman Window)。几个算法都是对分组后样本片段的缩放处理,目的是 减少频谱泄漏,提高频谱分析的精度。记分组总样本数为 NNN ,而当前分块的某个采样点值为 nnn ,则对 nnn 分窗处理的结果 w(n)w(n)w(n) 有: {Hanning: w(n)=0.5⋅(1−cos(2πnN−1))Hamming: w(n)=0.54−0.46⋅cos(2πnN−1)Blackman: w(n)=0.42−0.5⋅cos(2πnN−1)+0.08⋅cos(4πnN−1) {\\displaystyle \\begin{aligned} \\begin{cases} Hanning &: \\ w(n) = 0.5 \\cdot \\left( 1 - cos \\left( \\frac{2\\pi n}{N - 1}\\right) \\right) \\\\ Hamming &: \\ w(n) = 0.54 - 0.46 \\cdot cos \\left( \\frac{2\\pi n}{N - 1}\\right) \\\\ Blackman &: \\ w(n) = 0.42 - 0.5 \\cdot cos \\left( \\frac{2\\pi n}{N - 1}\\right) + 0.08 \\cdot cos \\left( \\frac{4\\pi n}{N - 1}\\right) \\end{cases} \\\\ \\end{aligned} } ⎩⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎧HanningHammingBlackman: w(n)=0.5⋅(1−cos(N−12πn)): w(n)=0.54−0.46⋅cos(N−12πn): w(n)=0.42−0.5⋅cos(N−12πn)+0.08⋅cos(N−14πn) 通过 对帧的两端进行平滑过渡,减少边界效应。当处于解码流程时,去窗口化直接对编码过程的分窗 窗口函数(Windowing Method)做逆运算即可。当然,窗口函数并不只上面的三种,上述列出的仅为较常用的算法。 在下一步中的 离散余弦变换(DCT)和其逆变换,是整套 MP3 体系的核心之一。DCT 的作用,是 将时域信号转换为频域信号并提取频率成分,以便于 随后的子代分析 与 心理声学模型处理后的数据量化使用。原理不在本章节展开(见第三章)。 经过 DCT 所得的频域数据,被用作 子带滤波(Subband Filtering) 的输入。子带滤波是一种信号处理技术,用于 将输入信号分解成多个频率子带(Subband),每个子带包含特定频率范围内的信号成分。而结合我们在模数转换(A/D)提到的 香农采样定律(Nyquist–Shannon Sampling Theorem) 可知,有来自于 A/D 采样率(Samplerate/Sample Rate)不变的条件下, DCT 过程中的 基底函数族函数,函数可取用的 最大频率值为该采样率值的一半。所以,子带划分依据采样率的半值,以固定步长来切割出各个子带范围。 记子带计划分组数为 MMM ,每个子带的频率范围长度为固定值 SSS ,则: S=SampleRate2M {\\displaystyle \\begin{aligned} S = \\frac{SampleRate}{2M} \\\\ \\end{aligned} } S=2MSampleRate 我们以最常用的 44.1 kHZ 采样率 为例,其子带滤波后的子带划分,在 MP3 格式里,该采样率下需要分为 M=32M = 32M=32 组 [27] ,即: S=SampleRate2M=44.1 kHz2×32≈0.689 kHz {\\displaystyle \\begin{aligned} S = \\frac{SampleRate}{2M} = \\frac{44.1\\ kHz}{2 \\times 32} \\approx 0.689\\ kHz \\\\ \\end{aligned} } S=2MSampleRate=2×3244.1 kHz≈0.689 kHz 所以,通过子带滤波后的子带划分情况为 均匀的 32 组: 图 1-57 MP3 音频格式经过子带滤波后的子带分组情况 但正如本书在 等响曲线(ELLC) 和 收音频响曲线(HFR) 章节所提,人耳对声音的感知本身就是动态的。声音的 瞬时特征、由突发强信号对弱信号的心理性掩盖(即掩蔽效应)、超阈值噪音(即痛觉阈)所产生的噪音遮蔽 等,都会使人对上述 不同子带分段代表频率的感知能力发生变化。以等长划分的子带,显然 不能充分的 表示该变化所带来的影响,进而 无法做到,在筛选掉一些不必要的频段的同时,增强有效频段。 心理声学模型的作用,在此得到了体现。 通过在编码阶段引入 心理声学模型(Psychoacoustic Model) 来 动态的调整子带滤波阶段的划分结果。我们就能进一步的压缩有效数据。在 MPEG 的设定中,层(Layer)的概念是对一套不同编码复杂度和压缩效率级别的方案的代称。MP3 来自于 MPEG-1 的音频层级3(Audio Layer III)。以分窗后的数据为输入,在 Layer III 的规格下,音频需要经过一个 相对独立的复杂流水线处理过程: 图 1-58 MP3 音频格式在 MPEG-1 中的子带及心理声学模型工作流(立体声)[27] 那么,假设经过该流水线后,新的子带集:取低频部分,去除 子带1、子带2 和 子带3。中频部分,略微调整子 带10 到 子带20 的频率范围。高频部分,去除子 带30、子带31 和 子带32,调整 子带29 的频率范围。就有: 图 1-59 MP3 音频格式经过心理声学模型动态处理后的子带分组情况(模拟) 至于 每个子带内所含的对应频率数据,再辅助以 量化处理 与 哈夫曼编码(Huffman Coding)的熵编码(Entropy Coding)处理,完成流程末尾的数据压缩后。按照如下的数据结构封装后,即可得到最终的 MP3 音频数据帧(Audio Frame) 了: 图 1-60 MP3 音频格式的数据组帧后的单帧示例 量化和哈夫曼编码,相信读到此处的朋友都较为熟悉,无需赘言。其结果数据,被用于装填当前帧的帧数据(Frame Data)部分。相关的 完整帧长度(包含头部的完整帧长度),则可以通过如下公式计算: if use [Layer I]:Frame Length=(12×BitrateSampling Rate+Padding Byte)×4.if use [Layer II] or [Layer III]:Frame Length=(144×BitrateSampling Rate)+Padding Byte {\\displaystyle \\begin{aligned} &if\\ use \\ [Layer\\ I] :\\\\ &\\quad \\quad Frame\\ Length = \\left( \\frac{12 \\times Bitrate}{Sampling\\ Rate} + Padding\\ Byte \\right) \\times 4 \\\\ &\\quad .\\\\ &if\\ use \\ [Layer\\ II]\\ or\\ [Layer\\ III] :\\\\ &\\quad \\quad Frame\\ Length = \\left( \\frac{144 \\times Bitrate}{Sampling\\ Rate} \\right) + Padding\\ Byte \\end{aligned} } if use [Layer I]:Frame Length=(Sampling Rate12×Bitrate+Padding Byte)×4.if use [Layer II] or [Layer III]:Frame Length=(Sampling Rate144×Bitrate)+Padding Byte 而 组帧(Framing),简而言之,就是 按格式对数据封装的过程,注意 只占 4 字节的 帧头(Frame Header)高密度音频辅助信息。其中包含如下字段(索引对应单位为 bit ): Params Range(bits) Details Sync Word 0~11 (12) 固定头部【同步字】标签,表示当前音频帧的开始,固定为 0xFFF Version 12~13 (2) MPEG 版本,00 为 MPEG-2.5,10 为 MPEG-2,11 为 MPEG-1,01 保留 Layer 14~15 (2) MPEG 层级,01 为 Layer III,10 为 Layer II,11 为 Layer I,00保留 Protection Bit 16 (1) CRC 交验状态标志,0 指启用 CRC 校验,1 指关闭 CRC 校验,CRC 校验用于检测帧数据的传输错误 Bitrate Index 17~20 (4) 表示音频数据 比特率 的相关查表索引,比特率索引的值范围从 0001 到 1110,对应的比特率从32 kbps到320 kbps。0000表示免费模式,1111保留 Sample Rate Index 21~22 (2) 表示音频数据 采样率 的相关查表索引,00 为 44.1 kHz,01 为 48 kHz,02 为 32 kHz,11保留 Padding Bit 23 (1) 填充字节启用标志,0 表示不使用填充,1 表示使用填充,填充位用于确保帧的长度一致 Private Bit 24 (1) 预留私有标志位,私有位由应用程序自行定义和使用,不影响音频数据的解码 Channel Mode 25~26 (2) 指示音频数据的声道模式,00 为立体声(Stereo),01 为联合立体声(IS),10 为双声道(Dual),11 为单声道(Mono) Mode Extension 27~28 (2) 辅助表示联合立体声的类型,00 为禁用,01 为强制立体声,10 和 11 保留 Copyright 29 (1) 版权状态标志位,取 0 则音频数据不受版权保护,取 1 则音频数据受版权保护 Original 30 (1) 原始媒体标志位,取 0 则音频数据是复制品,取 1 则音频数据是原始录音或原始媒体 Emphasis 31~32 (2) 预强调(Pre-emphasis)处理类型 的相关查表索引,00 为 无强调,01 为 50/15 微秒(50/15 µs)滤波,10 为 保留字段,11 则采用 ITU-CCITT J.17 标准 这些信息多 以查表法 代替了在未压缩音频格式和无损压缩音频格式中,对音频基础信息的数值存储方式。使每组 MP3 帧数据,都能携带这一部分信息,方便了音频以流的形式传输。 那么 MP3 音频格式,是否单纯的只有 MP3 数据帧构成呢?显然不是。 MP3 的文件结构,依旧为 两部分组成(简单示意): 【MP3 ID3 标签(MP3 ID3 Tags)】+【MP3 帧数据(MP3 Audio Frames)】 其中,MP3 ID3 标签,被用于做包括 歌曲标题、艺术家、专辑、年份、流派、评论 等信息的记录。根据使用位置和复杂度分类,可以分类两种: ID3v1 用于 MP3 文件末尾,固定 128 字节(Bytes),最多只能包含一个; ID3v2 用于 MP3 文件开头,长度可变不固定,记录复杂数据并存在多个不同版本; 可见,ID3 标签对 MP3 的意义,几乎等同于 FLAC 的元数据块 或 未压缩音频格式中的信息块,对本身音频格式的作用和地位。只是并不保有音频基础信息而已。 ID3v1 是第一版的 ID3 标签规范,也是通用性最好且最简的 ID3 标签,固定有 7 个字段: Params Range(bytes) Details Identifier 0x00~0x02 (3) 标记当前标签 ID,固定存储 'TAG' 四个大写字母的 ASCII 码,即 == 0x544147 Title 0x03~0x1e (30) 音频标题,记录该音频标题描述,固定 30 字节长度 Artist 0x1f~0x3e (30) 音频艺术家名称,记录创作该音频的艺术家名称,固定 30 字节长度 Album 0x3f~0x5e (30) 音频专辑名,记录该音频所在专辑名称,固定 30 字节长度 Year 0x5f~0x62 (4) 音频发行年份,记录该音频发行时间点 Comment 0x63~0x7e (30) 音频短评或附属文字信息,记录该音频的一些简短的额外文字信息 Genre 0x7f (1) 音频流派 的本地相关查表索引,即音频流派归类,采用本地流派列表,记录索引值 ID3v2 则是对 ID3v1 标签的扩展,现已迭代了 ID3v2.2、 ID3v2.3 和 ID3v2.4 三个主要版本。从 ID3v2.2 开始(即首个 ID3v2 标准标签),ID3v2 类标签就采用了类似于 MP3 音频帧 的封装结构,将自身分为 两个部分组成,以便统一于音频的数据封装习惯: 【ID3 标签头(ID3 Tag Header)】+【ID3 标签帧(ID3 Tag Frame)】 三个版本 ID3v2 标签头(Tag Header)的参数基本一致,可用取值上略有差异: Params Range(bytes) Details Identifier 0x00~0x02 (3) 标记当前标签 ID,固定存储 'ID3' 四个大写字母的 ASCII 码,即 == 0x494433 Version 0x03 (1) 标签主版本号,v2.2 固定取 2,即 0000 0010 v2.3 固定取 3,即 0000 0011 v2.4 固定取 4,即 0000 0100 Revision 0x04 (1) 标签副版本号,固定取 0,即 0000 0000 Flag 0x05 (1) 标签标志位,记录采用的标签特性状态,v2.2 v2.3 v2.4:去同步,解决播放器解读 0xFF 值问题,1000 0000有压缩(仅 v2.2),标签是否压缩,0100 0000v2.3 v2.4:扩展头,标签是否包含扩展头信息,0100 0000实验位,标签是否为实验性质标签,0010 0000v2.4:尾部位,标签是否包含标签尾信息,0001 0000标签尾(Footer)为 Identifier 取 \"3DI\" ,而其余同标签头的相同数据结构信息,便于标志 ID3 结尾其他位为后续拓展保留 Size 0x06~0x09 (4) 当前 ID3 标签的数据内容长度,即不包含标签头(Header)和 标签尾(Footer)的其余部分数据长度字节数,例如:扩展头 (20) + 帧1 (30)+ 帧2 (40) = 90 Bytes 在标签头的标志位中,对于 v2.3 和 v2.4 有一个 专用于扩展的数据结构,即 扩展头(Extended Header) 数据。这一结构体常被用来存放一些额外的自定义信息(一般为一些状态标志,做功能启停和记录),放置于 ID3 帧数据队列的首位。 从参数构成上看,ID3v2.3 的可定制控件较为约束: Params Range(bytes) Details Size 0x00~0x03 (4) 扩展头占用字节数,不包含参数自身的 4 Bytes Extended Flags 0x04~0x05 (2) 扩展头标志位,表示当前扩展头特性,此处不展开 Padding Size 0x06~0x07 (2) 对齐标志位,用于填充 0 来对齐数据位数 CRC Data 0x08~+X (X) CRC 交验信息,一般为 2 Bytes 的 CRC 交验值 相比之下,ID3v2.4 的灵活度就要更高一些: Params Range(bytes) Details Size 0x00~0x03 (4) 扩展头占用字节数,不包含参数自身的 4 Bytes Num of Flag Bytes 0x04 (1) 扩展头标志位总字节数,记为 X ,辅助扩展头标志位 Extended Flags (X) 扩展头标志位,表示当前扩展头特性,此处不展开 同样的,ID3v2 标签帧(Tag Frame) 的数据结构,在几个版本间也有一定差异。 对于 ID3v2.2 有 (注意版本): Params Range(bytes) Details Tag Frame Identifier 0x00~0x02 (3) 标记当前标签帧 ID,固定存储 对应类型的 ASCII 码,具体类型见后续表 Tag Frame Size 0x03~0x04 (2) 当前 ID3 标签帧 的数据内容长度,记为 X Bytes 不包括帧头部信息的字节数,即 头部 6 Bytes 例如:帧1 (30) Size = 30-6 = 24 Bytes = X Tag Frame Flags 0x05 (1) 标签帧标志位(位标记),有:有压缩,标记当前标签帧数据是否压缩,1000 0000 有加密,标记当前标签帧数据是否加密,0100 0000 有分组,标记当前标签帧属于一组分组,0010 0000 Tag Frame Grouping ID 0x06 (1) 标签帧分组标记,动态(可有可无)根据 Flags [有分组] 情况,如有分组,则记录分组 ID 分组 ID 相同的 标签帧,属于一组数据 Tag Frame Contents 0x05~+X (X) or 0x06~+X (X) 当前标签帧的实际数据,例如:\"A Lovely Song\" 对于 ID3v2.3 和 ID3v2.4 有 (注意版本): Params Range(bytes) Details Frame Identifier 0x00~0x03 (4) 标记当前标签帧 ID,固定存储 对应类型的 ASCII 码,具体类型见后续表 Frame Size 0x04~0x07 (4) 当前 ID3 标签帧 的数据内容长度,记为 X Bytes 不包括帧头部信息的字节数,即 头部 10 Bytes 例如:帧1 (30) Size = 30-10 = 20 Bytes = X Tag Frame Status Flags 0x08 (1) 标签帧状态标志位(位标记),有: 标签保留,如修改标签时是否保留此帧,1000 0000 文件保留,如修改文件时是否保留此帧,0100 0000 只读帧,标记当前标签帧是否只能读取,0010 0000 Tag Frame Format Flags 0x09 (1) 标签帧格式标志位(位标记),这里有区分, v2.3: 有压缩,标记当前标签帧数据是否压缩,1000 0000 有加密,标记当前标签帧数据是否加密,0100 0000 有分组,标记当前标签帧属于一组分组,0010 0000 v2.4: 有分组,标记当前标签帧属于一组分组,1000 0000 有压缩,标记当前标签帧数据是否压缩,0100 0000 有加密,标记当前标签帧数据是否加密,0010 0000 去同步,解决播放器解读 0xFF 值问题,0001 0000 原长度,标记该帧是否含有原数据长度,0000 1000 Tag Frame Grouping ID 0x0a (1) 标签帧分组标记,动态(可有可无) 根据 Flags [有分组] 情况,如有分组,则记录分组 ID 分组 ID 相同的 标签帧,属于一组数据 Tag Frame Data Length Indicator 0x0b~0x0d (4) or 0x0b~0x0e (4) 标签帧原数据长度,动态(可有可无) 根据 Flags [原长度] 情况,状态开启则记录数据原长 这一属性一般配合压缩使用,不包含自身 4 Bytes 例如:压缩前 20 Bytes,则该值记录 20 Bytes Tag Frame Contents 0x0a~+X (X) or 0x0b~+X (X) or 0x0e~+X (X) 当前标签帧的实际数据,例如:\"A Lovely Song\" 那么标签帧有哪些类型呢,或者说 ID3v2 的标签帧(Tag Frame),其数据首位 帧 ID 标签 都有哪些? 我们有下表: Tag Frame Type Details Title 标题标签,此类的内容记录标题字符串,固定存储 \"TIT2\" 对应的 ASCII 码 0x54495432 Artist 艺术家标签,此类的内容记录艺术家字符串,固定存储 \"TPE1\" 对应的 ASCII 码 0x54504531 Album 专辑标签,此类的内容记录专辑字符串,固定存储 \"TALB\" 对应的 ASCII 码 0x54414c42 Year 年份标签,此类的内容记录发行年份字符串,固定存储 \"YEAR\" 对应的 ASCII 码 0x59454152 Comment 评论标签,此类的内容记录评论或额外信息键值对字符串,固定存储 \"COMM\" 对应的 ASCII 码 0x434f4d4d Genre 流派标签,此类的内容记录当前音频流派字符串,固定存储 \"TCON\" 对应的 ASCII 码 0x54434f4e Track Number 音轨标签,此类的内容记录当前音轨数(即大多为曲目数),固定存储 \"TRCK\" 对应的 ASCII 码 0x5452434b Attached Picture 关联图片标签,此类的内容记录封面图数据,固定存储 \"APIC\" 对应的 ASCII 码 0x41504943 依具上述固定值,检测不同标签帧种类的内容信息,即可参考 FLAC 的类似元数据块类型举一反三,读取标签帧内容信息(Tag Frame Contents)。对 MP3 格式来说,标签帧内容多为编解码自设定,或着 较为存粹的数据内容(即非多级结构的纯数据值)。不再单独举例。 于是,一个完整的 MP3 文件构成,有如下图数据: 图 1-61 完整 MP3 音频格式文件的文件结构举例 显然,MP3 文件结构并不像本书之前介绍的种类中,包含一个文件头部 4 Bytes 的固定文件类型标记。这是因为,MP3 标签的标签头(Tag Header)和 MP3 数据帧的帧头(Frame Header)同步字(Sync Word),都足以表明当前文件为 MP3 格式。 所以,MP3 文件结构,从数据抽象的角度来看更为精炼。 不过,随着 先进音频编码(AAC [Advanced Audio Coding]) 格式的出现,现在主流的音频流媒体传输,如对音频压缩有需要的工程中,多数选择以 AAC 进行相关音频的硬件抽象层(HAL)封装。较少在流传输中采用 MP3,虽然 AAC 只是半开源。 至此,在详细介绍了MP3格式之后,我们基本了解了音频保存与还原过程。 历经音频的基础知识到声波和声音的三要素,再到声音的解构和数字化处理,以及音频的存储格式。我们终于对音频的各个方面都有了相对深入的认识。而音频相关的基本概念和格式属性,到这里,已经在本书中完成了系统性梳理。 为了帮助开发者们在后续实践中更好地应用这些知识,章节末尾,作者列出了 常用的音频相关开源库 作为本章的句号,供大家参考和使用: FLAC C/C++ Library. by Xiph.Org Foundation. https://xiph.org/flac/api/index.html LAME (LAME Ain't an MP3 Encoder). C/C++. http://lame.sourceforge.net/ MAD (MPEG Audio Decoder). C/C++. https://www.underbit.com/products/mad/ BASS (Basic Audio Stream System). C/C++. http://www.un4seen.com/ Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_1/Language/cn/References_1.html":{"url":"Chapter_1/Language/cn/References_1.html","title":"【参考文献】","keywords":"","body":"一、【参考文献】 [1] Fitz M P. Fundamentalsof Communications Systems[M]. 2007. [2] Obaidat M S, Boudriga N A. Fundamentals of performance evaluation of computer and telecommunication systems[M]. John Wiley & Sons, 2010. [3] Feynman R P, Leighton R B, Sands M. The feynman lectures on physics; vol. i[J]. American Journal of Physics, 1965, 33(9): 750-752. [4] Stevens S S. A scale for the measurement of a psychological magnitude: loudness[J]. Psychological Review, 1936, 43(5): 405. [5] O'Shaughnessy, Douglas. Speech Communications: Human and Machine[M]. 1999. [6] Slaney M. Auditory toolbox: A matlab toolbox for auditory modeling work[J]. Interval Res. Corp. Tech. Rep, 1998, 10: 1998. [7] Terminology A. American national standard[J]. ANSI S1, 2006: 1-1994. [8] Azamian, Mohammadali & Kabir, Ehsanollah. (2019). Synthesizing the note-specific atoms based on their fundamental frequency, used for single-channel musical source separation. Multimedia Tools and Applications. 78. 10.1007/s11042-018-7060-8. [9] Grove G, Sadie S, Tyrrell J, et al. The new Grove dictionary of music and musicians[J]. (No Title), 1980. [10] Capecchi D. Leonhard Euler between mathematics and natural philosophy: An introduction to natural science Anleitung zur Naturlehre[J]. Handbook of the History and Philosophy of Mathematical Practice, 2020: 1-53. [11] Cohn R. Introduction to neo-riemannian theory: a survey and a historical perspective[J]. Journal of Music Theory, 1998: 167-180. [12] Suzuki Y, Takeshima H. Equal-loudness-level contours for pure tones[J]. The Journal of the Acoustical Society of America, 2004, 116(2): 918-933. [13] Fletcher H, Munson W A. Loudness, its definition, measurement and calculation[J]. Bell System Technical Journal, 1933, 12(4): 377-430. [14] International Organization for Standardization. Acoustics: Normal Equal-loudness-level Contours[M]. ISO, 2023. [15] Suzuki Y, Takeshima H, Kurakata K. Revision of ISO 226\" Normal Equal-Loudness-Level Contours\" from 2003 to 2023 edition: The background and results[J]. Acoustical Science and Technology, 2024, 45(1): 1-8. [16] Smith, Steven W. (1997). The Scientist and Engineer's Guide to Digital Signal Processing. California Technical Pub. pp. 177–180. ISBN 978-0966017632. [17] Toole F. The measurement and calibration of sound reproducing systems[J]. Journal of the Audio Engineering Society, 2015, 63(7/8): 512-541. [18] Olive S, Welti T. The relationship between perception and measurement of headphone sound quality[C]//Audio Engineering Society Convention 133. Audio Engineering Society, 2012. [19] Olive S, Welti T. The relationship between perception and measurement of headphone sound quality, from his blog, https://seanolive.blogspot.com/2013/04/the-relationship-between-perception-and.html, Monday, April 22, 2013. [20] AES11-2009 (r2019): AES recommended practice for digital audio engineering - Synchronization of digital audio equipment in studio operations, Audio Engineering Society, https://www.aes.org/tmpFiles/aessc/20240506/aes03-set-2009-r2019-i.pdf, 2009 [21] HUFFMAN, D. A. 1952. A method for the construction of minimum-redundancy codes. In Proceedings of the Institute of Electrical and Radio Engineers 40, 9 (Sept.), pp. 1098-1101. [22] Connell J B. A huffman-shannon-fano code[J]. Proceedings of the IEEE, 1973, 61(7): 1046-1047. [23] Rissanen J, Langdon G G. Arithmetic coding[J]. IBM Journal of research and development, 1979, 23(2): 149-162. [24] O'Shaughnessy D. Linear predictive coding[J]. IEEE potentials, 1988, 7(1): 29-32. [25] Ramamoorthy V, Jayant N S. Enhancement of ADPCM speech by adaptive postfiltering[J]. AT&T Bell Laboratories technical journal, 1984, 63(8): 1465-1475. [26] Roberts Family. FLAC Metadata Structure [EB/OL]. [2023-10-23]. https://www.the-roberts-family.net/metadata/flac.html. [27] Theile, Günther; Stolle, Gerhard; 1992; MUSICAM-Surround: A Universal Multichannel Coding System Compatible with ISO 11172-3 PDF; Institut fur Rundfunktechnik, Munich, Germany; Paper 3403; Available from: https://aes2.org/publications/elibrary-page/?id=6731 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Apex_2_Introduce.html":{"url":"Chapter_2/Language/cn/Apex_2_Introduce.html","title":"二、色彩的运用与存储","keywords":"","body":"二、色彩的运用与存储 引言 自人类对世界有认知开始,从寄思于物的艺术创作,日常生活的打扮穿着,再到科学研究对物理规律的探索,色彩始终伴随左右。什么是色彩?色彩是如何被应用到视觉工程的? 本章节主要整理说明了,部分关键光学与色彩学概念的应用和推导。通过对当代计算机图像有关颜色处理发展史的梳理,以期为工程上应用于单一图像处理、色彩权衡对比等工作,和理论上深入理解图像规格标准迭代及原理,提供必要知识图谱。 图像本身是颜色的载体,因此对图像的讨论,也就是对色彩(颜色)的讨论。 关键字:色彩基础、色彩空间、色彩格式、配色函数、色度、色温 目录 2.1 色彩基础 2.2 颜色三要素(Three Elements of Color) 2.2.1 色调(Hue) 2.2.2 饱和度(Saturation) 2.2.3 光亮度(Luminance) 2.3 色彩的衡量 2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化 2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space) 2.3.3 经典三原色函数(Trichromatic Primaries Functions) 2.3.4 经典三刺激函数(Tristimulus Values Functions) 2.3.5 现代色彩体系(Modern Color System) 2.4 色彩的对比 2.4.1 色域(Color Gamut ) 2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram) 2.4.3 色差(Chromatic Aberration) 2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature) 2.4.5 标准光源(Standard Illuminants)& 白点(White Point) 2.4.6 显色指数(Color Rendering Index) 2.5 经典色彩空间(Classical Color Space) 2.5.1 光学三原色色彩空间(RGB) 2.5.2 颜料三原色色彩空间(CMY / CMYK ) 2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space) 2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space) 2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space) 2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space) 2.5.7 颜色三要素色彩空间(HSV / HSI / HSL) 2.6 色彩的存储 2.6.1 色彩格式(Color Format)与色彩存储 2.6.2 RGB 体系色彩格式 2.6.3 YUV 体系色彩格式 【参考文献】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_1.html":{"url":"Chapter_2/Language/cn/Docs_2_1.html","title":"2.1 色彩基础","keywords":"","body":"2.1 色彩基础 1666年,艾萨克·牛顿(Isaac Newton,1642 - 1726) 通过光的色散实验,发现了太阳光可以分解成依次为红、橙、黄、绿、蓝、靛、紫的单色光,并可以由单色光复合而成白光 ,由此提出了 牛顿颜色原理(Newton's theory of colour)。并于 1705 年结合他在光学领域的其他发现与猜想,编著为《光学》[1]。在此之前,亚里士多德提出的白光为一种纯粹光源才是学界共识。色散试验的伟大,在于为人们揭示了人类视觉感知色彩形式的光学物理特性。人们首次接触到了光谱(Spectrum)概念。此后,人们对光谱进行了大量基于颜色观测的研究,并逐步完成了奠基色彩学(Color Science)的理论归纳总结。 人们发现,如果我们将由红到紫的 可见光谱(380nm - 780nm) 首尾相连,那么就能够得到一个 360 度的连续可分色表。这个表被称之为色轮(Color Wheel) [2]。色轮中, 0 度表示红色,360 度表示紫色。环的圆心,即正中央则为纯白。 在此基础上,色彩学就颜色的合成,产生了三大理论:加法混合论、减法混合论、中性混合论。从物理意义上讲,加法混合论代表着自然界中自发光物体的光源色彩混合,减法混合论代表着反光物体反射光色彩混合情况,中性混合论依赖人类生理特征进行的色彩还原形式。加法混合论和减法混合论分别在光学领域和艺术领域,得到了广泛的应用。所以,加法混合论所采用的红(Red)、绿(Green)、蓝(Blue)三基色被称为光学三原色(RGB),减法混合论所采用的深红(Cyan)、青(Magenta)、黄(Yellow)三基色被称为颜料三原色(CMY)。 图 2-1 色轮(Color Wheel)与颜色(Vienna,1772)[2] 1802年,托马斯·杨(Thomas Young,1773 - 1829) 在对可见光谱范围内光线波长测量时,发现人眼对红绿蓝三色光波极为敏感。杨确定了人眼中存在 3 种能够感知不同波长的光感神经纤维,佐证了光学三原色的生理基础,并粗略的测定了人的三色感知范围 [3]。 1850年,赫尔曼·冯·亥姆霍兹(Hermann von Helmholtz,1821 - 1894) 在杨的研究基础上,经实验确定了杨理论(Young's theory)中所提及三色感知的光感神经纤维,就是后续被我们所熟知的视锥细胞(cone cells),并对三类视锥细胞敏感的红、绿、蓝三色所对应光波波长进行了重测定。由此,进一步推动了三色理论(trichromatic theory)雏形的形成 [4] [5]。人们为了纪念两位的贡献,也将三色理论称为 杨-亥姆霍兹理论(Young–Helmholtz theory)。 图 2-2 赫尔曼·冯·亥姆霍兹的三色理论,关于视锥细胞感知范围的手稿 受限于当时的科研器材水平,亥姆霍兹很遗憾的没有确切的办法,测量到三类视锥细胞可感知的确切波长范围。不过现代医学领域的研究,已相对准确的得到了答案。我们的眼睛基于此三种颜色的波形叠加组合,形成了能够覆盖从紫到红(360nm - 780nm)的 312nm - 1050nm 可观测波长范围 [6]。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_2.html":{"url":"Chapter_2/Language/cn/Docs_2_2.html","title":"2.2 颜色三要素(Three Elements of Color)","keywords":"","body":"2.2 颜色三要素(Three Elements of Color) 1853 年,赫尔曼·格拉斯曼(Hermann Günter Grassmann,1809 - 1877) 基于三色理论,取一组红绿蓝三色光源,尝试还原其他类型视觉单色(monochromat)的实验。这就是著名的光谱的 色度特性实验 (The Colorimetric Properties of the Spectrum) [7]。实验过程经过对红绿蓝三色灯源的水平位置调整,来间接的调整了三色最终组合情况。对比则选用了契合目标结果的参考光源。根据实验结果,格拉斯曼发现,确实可以用一个变权三元一次等式来对所有可见单色光源进行基于光学三原色(RGB)的合成。但是这样的合成是有条件的,对于部分特殊的颜色,例如 橄榄绿(Vibrantgreen),就需要将红色光源摆放到隔板左边靠近对比光源的位置,才能使目标色在目标采样区域合成出来。他将这种现象称为 负色匹配('negative' colors matching)。而在此次试验中,格拉斯曼得到大量需要使用 1 个或 2 个 负色才能匹配的单色。这种现象的出现,在于当时的物理实验设备并不能很好的找到,合适作为人眼感知波峰基准值的光学三原色(RGB)波长,导致需要通过较多负拟合的方式,来人为的处理三相波叠加的还原它色问题。不过这并不影响实验有奠基理论产出。 图 2-3 赫尔曼·格拉斯曼(Hermann Günter Grassmann,1809 - 1877) 1854年,格拉斯曼结合 光谱色度特性试验 的结果,在牛顿颜色混合理论的基础上,总结归纳出了 格拉斯曼颜色定律(Grassmann's law),奠定了光学理论下现代色度学基础 [8] 。定律包含五条,分别为: 1)补色律,指任何一种颜色都有另一种同它混合产生白和灰的颜色; 2)间色律,指混合任何两种非补色便可产生一种新的混合色或介于两者之间的中间颜色; 3)代替律,指任何不同颜色混合产生的颜色可相互替代; 4)相加律,指混合色的总光亮度为组成混合色的各颜色光亮度的总和; 5)混合律,人的视觉只能分辨颜色的色调、光亮度、饱和度三种变化。 这些规律仅适用于色光的加法混合理论。即在基色体系中,只适用于光学三原色(RGB)。格拉斯曼在规律中,首次提出了 色调(Hue)、饱和度(Saturation)、光亮度(Luminance) 的重要性,这三个属性继而被称为 颜色的三要素(Three Elements of Color) [9] 。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_2_1.html":{"url":"Chapter_2/Language/cn/Docs_2_2_1.html","title":"2.2.1 色调(Hue)","keywords":"","body":"2.2.1 色调(Hue) 色调(Hue) 也被称为色相,指颜色实际种类。换一种角度来说。色调是对人眼可观察颜色的基础分类。通过色调,结合其他两个颜色的三要素,我们能够准确的描述自然界中能够形成的任意混合色。 格拉斯曼在混合律 中,以色轮作为环形颜色索引表,对色轮上颜色进行了基于几何弧度的划分,使颜色的色调能够用其与相对基准色的逆时针夹角表示。色调的作用在于,可以将任意两个环上选定颜色的权重看作物理重量,利用两点连线后线段质量中心与圆环圆心连线的延长线,来推算最终结果。 图 2-4 格拉斯曼的混合律颜色推算演示 图中,O 代表理想白点(White Point),D 代表混合后对应单色; 假设我们以选定颜色与0度的夹角,对应的弧度表示该颜色本身。现有两个颜色,分别为 (RA,GA,BA)(R_{A},G_{A},B_{A})(RA,GA,BA) 和 (RB,GB,BB)(R_{B},G_{B},B_{B})(RB,GB,BB) ,那么取权重 (WAC,WCB)(W_{AC},W_{CB})(WAC,WCB) , W=WAC+DCB=1W = W_{AC} + D_{CB} = 1W=WAC+DCB=1 。对于 D 点的颜色 (RD,GD,BD)(R_{D},G_{D},B_{D})(RD,GD,BD) 就有: RD=WACRA+WCBRBGD=WACGA+WCBGBBD=WACBA+WCBBB {\\displaystyle \\begin{aligned} R_{D} = W_{AC} R_{A} + W_{CB} R_{B} \\\\ G_{D} = W_{AC} G_{A} + W_{CB} G_{B} \\\\ B_{D} = W_{AC} B_{A} + W_{CB} B_{B} \\end{aligned} } RD=WACRA+WCBRBGD=WACGA+WCBGBBD=WACBA+WCBBB 混合律是对加法混合论的一次成功拓展,此时已经隐约可以看到最初色度图的理论雏形了。不过这时对颜色的索引还停留在比较初级的阶段。现代学界和工业界已普遍采用 色度(Chromaticity),配合 颜色空间(Color Space),来代替描述颜色种类。色调更多的被用于艺术和设计领域。 另一方面,随着 现代色彩体系(Modern Color System) 的在细分领域的逐步分化,部分颜色空间的规格出发点,也对色调(Hue)和饱和度(Saturation)代表的概念本身进行了充分的抽象,形成了诸如 LAB、LUV 和 颜色三要素(HSL)等经典的色彩空间方案。为当代计算机工业体系中,艺术设计、数据传输和工程计算方面的贯通,提供了较大的帮助(可参见后文 2.5 经典色彩空间 )。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_2_2.html":{"url":"Chapter_2/Language/cn/Docs_2_2_2.html","title":"2.2.2 饱和度(Saturation)","keywords":"","body":"2.2.2 饱和度(Saturation) *饱和度 是指颜色的浓淡程度。以其对比标准的不同,被区分为 光学饱和度(Colorfulness) 和 感官饱和度(Saturation) [10]。光学饱和度多用于工程,感官饱和度则多用于艺术设计中。 光学饱和度指标,被定义为标准白点与实际颜色的强度分量与白点到其纯色分量的长度比; 感官饱和度指标,被定义为一个区域的颜色与其当前亮度的充盈配比; 从定义的角度来看,显然感官饱和度的主观成分较大。虽然色彩的光学饱和度和感官饱和度在概念上面存在较大差异。但是实际工程实践中,这两个通常被混为一谈(虽然这么做并不严谨)。工程师们经常以光学饱和度(Colorfulness)为主,将两个概念统称为饱和度(Saturation)。因此,我们这里使用的饱和度,即代指光学饱和度(Colorfulness)[11]。 在描述的格拉斯曼颜色推算过程中,我们提到过。将其单独抽出来看: 图 2-5 格拉斯曼的饱和度定义说明 其中,D点就是推算颜色 (RD,GD,BD)(R_{D},G_{D},B_{D})(RD,GD,BD) 的最大饱和度,O点则是纯白光 OpureO_{pure}Opure 。 OC 代表白色分量强度,记为 DaD_{a}Da ; CD 代表纯色分量强度,记为 DbD_{b}Db ; 则, D=Da+Db=1D = D_{a} + D_{b} = 1D=Da+Db=1 ,记为总强度。 假设 C点的颜色为 (RC,GC,BC)(R_{C},G_{C},B_{C})(RC,GC,BC),我们就有: RC=DaOpure+DbRD=(1−Db)Opure+DbRDGC=DaOpure+DbGD=(1−Db)Opure+DbGDBC=DaOpure+DbBD=(1−Db)Opure+DbBD {\\displaystyle \\begin{aligned} R_{C} = D_{a} O_{pure} + D_{b} R_{D} = (1-D_{b}) O_{pure} + D_{b} R_{D} \\\\ G_{C} = D_{a} O_{pure} + D_{b} G_{D} = (1-D_{b}) O_{pure} + D_{b} G_{D} \\\\ B_{C} = D_{a} O_{pure} + D_{b} B_{D} = (1-D_{b}) O_{pure} + D_{b} B_{D} \\end{aligned} } RC=DaOpure+DbRD=(1−Db)Opure+DbRDGC=DaOpure+DbGD=(1−Db)Opure+DbGDBC=DaOpure+DbBD=(1−Db)Opure+DbBD 而 DbD_{b}Db 就是饱和度 SSS 。整个格拉斯曼颜色混合律就可以用一个公式表示了: C=(1−S)Opure+S(WACRA+WCBRB)=Opure+S(WACRA+WCBRB−Opure) C = (1-S) O_{pure} + S (W_{AC}R_{A}+W_{CB}R_{B}) = O_{pure} + S (W_{AC}R_{A}+W_{CB}R_{B} - O_{pure}) C=(1−S)Opure+S(WACRA+WCBRB)=Opure+S(WACRA+WCBRB−Opure) 如果记白点 O 为无穷小(0)。那么整个式子就可以简化为: C=S(WACRA+WCBRB)=S⋅D C = S (W_{AC}R_{A}+W_{CB}R_{B}) = S \\cdot D C=S(WACRA+WCBRB)=S⋅D 在已知白点(White Point)和选定色的情况下。依据格拉斯曼饱和度取值,人们可以计算得期望的渐变色泽,从而快速调色。 同 色调(Hue) 一样, 饱和度(Saturation) 也处于简单系统中,不方便体系下的量化。因此,饱和度的概念在现代学界和工业界中,同样也普遍被色度(Chromaticity)配合颜色空间(Color Space)代替表示,以便于工程量化计算。 现代色彩体系(Modern Color System) 中的部分方案,对饱和度概念进行了有效的利用转换(可参见后文 2.5.7 颜色三要素色彩空间 )。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_2_3.html":{"url":"Chapter_2/Language/cn/Docs_2_2_3.html","title":"2.2.3 光亮度(Luminance)","keywords":"","body":"2.2.3 光亮度(Luminance) 光亮度(Luminance) 也被称为辉度,是指固定光所照射单位平面面积光照区域的物理发光强度,单位是尼特( NitNitNit ),代表烛光每立方米( cd/m2cd/m^2cd/m2 ,candela per square metre)。光亮度属于光度学(Luminosity)概念。区别于亮度(Brightness)这种用来形容人生理光强直接感受的主观描述,光亮度是从可见光谱范围计量的物理量。 光亮度的计算依赖于发光强度度量。而 发光强度(Luminous Intensity) 则是用于表示光源给定方向上单位立体角内光通量的物理量,又被称为光强或光度,单位是烛光( cdcdcd , candelacandelacandela )。 如果记光亮度为 LvL_{\\mathrm {v}}Lv ,发光强度为 IvI_{\\mathrm {v}}Iv ,那么两者单位间的关系为 1 Nit=1 cd/m2 1 \\ Nit = 1 \\ cd/m^2 1 Nit=1 cd/m2 光亮度的测量方法在格拉斯曼时期,并没有太好的量化标准,因此更多的是作为一个参数来配合其他要素进行颜色描述的。现如今,对于光亮度的国际统一测量标准如下图所示: 图 2-6 光亮度测量实验与关键变量示意图 其中, 记 Σ\\SigmaΣ 代表光源,SSS 代表接受光线的物体照射表面, 记 dΣ{d\\Sigma}dΣ , 代表发光源上包含到达照射表面指定定向光线出发点的无穷小面积, 记 dSdSdS 代表照射表面上包含指定出发点的光源定向光线照射目标点的无穷小面积, 记 dΩΣd\\Omega_\\SigmadΩΣ , 代表光线出发点与 dSdSdS 所构锥体立体角(Solid Angle)的球面度(sr: Steradian), 记 dΩSd\\Omega_SdΩS , 代表光线接受点与 dΣd\\SigmadΣ 所构锥体立体角(Solid Angle)的球面度(sr: Steradian), 记 nΣn_\\SigmanΣ 代表 dΣd\\SigmadΣ 的法向量, θΣ\\theta_\\SigmaθΣ 代表 nΣn_\\SigmanΣ 与指定定向光线的夹角, 记 nSn_SnS 代表 dSdSdS 的法向量, θS\\theta_SθS 代表 nSn_SnS 与指定定向光线的夹角, 如果取国际通用单位制,且光线在传播过程中经过的介质为无损介质的话,那么就存在如下的光亮度计算公式: LvΣ=d2ΦvdΣdΩΣcosθΣ=d2ΦvdSdΩScosθS=LvS {\\displaystyle L_{\\mathrm {v}_{\\Sigma } }={\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {v} }}{\\mathrm {d} \\Sigma \\,\\mathrm {d} \\Omega _{\\Sigma }\\cos \\theta _{\\Sigma }}}={\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {v} }}{\\mathrm {d} S\\,\\mathrm {d} \\Omega _{S}\\cos \\theta _{S}}}=L_{\\mathrm {v}_{\\mathrm {S}}}} LvΣ=dΣdΩΣcosθΣd2Φv=dSdΩScosθSd2Φv=LvS 取出入面积及立体角相等,记同等出入面积为 AAA ,立体角为 Ω\\OmegaΩ ,照射角为 θ\\thetaθ ,则有: dΩ=dΩΣ=dΩSdθ =dθΣ =dθSdA=dΣ =dS {\\displaystyle \\begin{aligned} &{d} \\Omega = {d} \\Omega _{\\Sigma } = {d} \\Omega _{S} \\\\ &{d} \\theta \\ = {d} \\theta _{\\Sigma }\\ = {d} \\theta _{S} \\\\ &{d} A = {d} {\\Sigma }\\ \\ = {d} {S} \\\\ \\end{aligned} } dΩ=dΩΣ=dΩSdθ =dθΣ =dθSdA=dΣ =dS Lv=d2ΦvdAdΩcosθ L_{\\mathrm {v}} = {\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {v} }}{\\mathrm {d} A\\,\\mathrm {d} \\Omega \\cos \\theta }} Lv=dAdΩcosθd2Φv 公式中, 以 Φv\\Phi _{\\mathrm {v} }Φv 代表 光通量(Luminous Flux) , 单位是流明( lmlmlm ,lumenlumenlumen ),是指标度可见光对人眼的视觉刺激程度,是光度学下的人眼视觉特性导出量(规格量)。1 cd1\\ cd1 cd 点光源在单位立体角( 1 sr1\\ sr1 sr )下的光通量为 1 lm1\\ lm1 lm , 即 1 lm=1 cd⋅sr1 \\ lm = 1 \\ cd \\cdot sr1 lm=1 cd⋅sr 。光通量计算公式是: Iv=dΦvdΩ→Φv=∫ΣIv⋅dΩ {I _{\\mathrm {v}}} = {\\frac {\\mathrm {d} \\Phi _{\\mathrm {v} }}{\\mathrm {d} \\Omega}} \\rightarrow {\\Phi _{\\mathrm {v}}} = \\int _{\\Sigma } I_v \\cdot {d} \\Omega Iv=dΩdΦv→Φv=∫ΣIv⋅dΩ 如果记 EvΣE_{\\mathrm {v}_{\\Sigma }}EvΣ 为单位光源面积发出的光通量即 光出射度(Luminous Exitance),记 EvSE_{\\mathrm {v}_{S }}EvS 为单位受照面积接受的光通量即 光照度(Illumination)。那么在无损截止情况下 EvΣ=EvSE_{\\mathrm {v}_{\\Sigma }} = E_{\\mathrm {v}_{S }}EvΣ=EvS ,我们记为 EvE_{\\mathrm {v}}Ev 。被称为光照射度,单位是勒克斯( luxluxlux , lxlxlx )。 1 lx=1 lm/m21 \\ lx = 1 \\ lm/m^21 lx=1 lm/m2 有: Ev=dΦvdA E_{\\mathrm {v}} = {\\frac {\\mathrm {d} \\Phi _{\\mathrm {v} }}{\\mathrm {d} A}} Ev=dAdΦv 则 d2Φv{\\mathrm {d} ^{2}\\Phi _{\\mathrm {v} }}d2Φv 代表由 dΣd\\SigmadΣ 发出的光线,在 dΩΣd\\Omega_\\SigmadΩΣ 为球面度的立体角下的全方向光通量,即: d2Φv=dEv⋅dA d^{2}\\Phi _{\\mathrm {v} } = dE_{\\mathrm {v}} \\cdot dA d2Φv=dEv⋅dA 那么整个公式就可以化简为: Lv=dEvdΩ⋅cosθ {\\displaystyle L_{\\mathrm {v} }={\\frac {\\mathrm {d} E _{\\mathrm {v} }}{d \\Omega \\cdot \\cos \\theta }}} Lv=dΩ⋅cosθdEv 这个公式就是我们在光度学角度,用来计算物体 理想亮度的标准公式。 如果需要计算介质造成的损耗,那么公式需要引入 光展量(Etendue),即在材质折射率下的光束所通过的面积和光束所占有的立体角的积分。我们计 GGG 代表光展量, nnn 代表折射率,则光展量公式: G=∫Σ∫SdG→dG=n2⋅dAdΩcosθ {\\displaystyle G=\\int _{\\Sigma }\\!\\int _{S}\\mathrm {d} G} \\rightarrow {\\mathrm {d}G }=n^{2} \\cdot {\\mathrm {d} A\\,\\mathrm {d} \\Omega \\cos \\theta } G=∫Σ∫SdG→dG=n2⋅dAdΩcosθ 对于无损介质,折射率 n=1n=1n=1 。因此,整个亮度公式在知道光展量的情况下,就可以简化为: Lv=n2dΦvdG=dΦvdG∣n=1 {\\displaystyle L_{\\mathrm {v} }=n^{2}{\\frac {\\mathrm {d} \\Phi _{\\mathrm {v} }}{\\mathrm {d} G}}} = {\\frac {\\mathrm {d} \\Phi _{\\mathrm {v} }}{\\mathrm {d} G}}|_{n=1} Lv=n2dGdΦv=dGdΦv∣n=1 光亮度不会影响物体的色彩信息,而仅代表物体本身发光的强度。决定物体本身颜色信息的,则是物体所具有的色调和饱和度属性。 光度单位体系是一套反映视觉亮暗特性的光辐射计量单位,被选作基本量的不是光通量而是发光强度,因此这套公式只适用于可见光范围。对光的更为客观的描述则依赖于辐射度学的相关概念。辐射度学从黑体辐射与能量密度的学角度出发更换了物理学参照物,将光度学系统提出的度量理念适用范围,扩展到了包含长短波的完整电磁波段。进而间接的促成了色温概念在色彩学的应用。这个会在后文中有关颜色度量的章节额外说明。 由于光亮度的这种自成体系的特性。在颜色的三要素的应用中,它通常被分离单独处理。所以,现代工程体系中不会直接的应用光度学里的光亮度公式,而是采用 辐射亮度(Radiance) 的科学物理量,结合 色温(Color Temperature),或 色彩空间(Color Space)如 HSL 的色彩强度(Intensity) 的自设定参数,等概念平替。 至此,色彩学的经典理论:格拉斯曼颜色定律,所引申出的重要配参已准备完毕。问题随之而来: 我们能否将描述自然现象的参考标准,应用在有局限的实际生产活动中。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_3.html":{"url":"Chapter_2/Language/cn/Docs_2_3.html","title":"2.3 色彩的衡量","keywords":"","body":"2.3 色彩的衡量 光学三要素的出现,让人们在对颜色的客观描述上,能够凭借传统色彩学体系内的参数,进行有限程度的量化。但这并不足以适用于除科学计算和测定外的批量工程作业。毕竟在算力限定的条件下,我们不可能对每一寸光的每一个物理量都进行独立的计算。同时,大量繁琐且模糊的设定也 无法便于简化,而我们也需要获得 能够将感官上的体验和客观上的物理值联系起来的方法论。 如果能够将光波本身和颜色建立起直接的可量化的转换关系,就能够解决表示上的问题了。这就是 配色函数 的由来。 于是,首先需要做的是 获得科学证明,以 提供函数构建理论上的支持。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_3_1.html":{"url":"Chapter_2/Language/cn/Docs_2_3_1.html","title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","keywords":"","body":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化 辐射亮度(Radiance) 也被称为辐亮度,是用于描述指定辐射源,单位方向上辐射强弱的客观物理量。 辐射度学(Radiometry) 和 光度学(Luminosity),都是对电磁辐射能量进行计量的学科。不同之处在于,辐射度学是物理电磁波能量角度的客观计量,光度学是人眼视觉的主观因素后的相应计量。因此,相比于之前在颜色三要素里提及的 光亮度(Luminance),辐射度学的 辐射亮度(Radiance) 其实才更贴近光亮度的物理本质。 而人们是如何通过辐射度学对能量的定义,将光的波长和颜色对应起来的呢?这就需要提到色温的概念了。 色温(Color Temperature) 是由物体本身的黑体辐射决定的一个物理量,计量单位为 K(开尔文温度)。它被定义为,绝对黑体从绝对零度(-273.15℃)开始加温后所呈现出的颜色。由于颜色本身就是一个主观量,而颜色又是由光的波长决定的,不同的色温本质上对应的是不同波长的光。所以,如果我们将色温这个纯粹的辐射度学概念延伸应用到了色彩领域,就能利用色温代表意义本身,建立起两个体系之间的联系了。 辐射度学与光度学的单位转换 同光亮度,辐射亮度的计算也需要依赖于辐射强度度量。 辐射强度(Radiant Intensity) 是用于表示光源给定方向上单位立体角内辐射通量的物理量,单位是瓦特每球面度( W/srW/srW/sr )。辐射通量(Radiant Flux)是指单位时间内通过某一截面的辐射能,位是瓦特( WWW )。 记辐射亮度为 LeL_{\\mathrm {e}}Le ,辐射强度为 IeI_{\\mathrm {e}}Ie ,辐射通量为 Φe\\Phi _{\\mathrm {e}}Φe ,辐射照射度 EeE _{\\mathrm {e}}Ee 。那么四者间的关系为: Ie=dΦedΩ→Φe=∫ΣIe⋅dΩEe=dΦedA→d2Φe=dEe⋅dALe=d2ΦedAdΩcosθ=dEedΩ⋅cosθ {\\displaystyle \\begin{aligned} &{I _{\\mathrm {e}}} = {\\frac {\\mathrm {d} \\Phi _{\\mathrm {e} }}{\\mathrm {d} \\Omega} \\rightarrow \\Phi _{\\mathrm {e}}} = \\int _{\\Sigma } I_e \\cdot {d} \\Omega \\\\ &E_{\\mathrm {e}} = {\\frac {\\mathrm {d} \\Phi _{\\mathrm {e} }}{\\mathrm {d} A}} \\rightarrow \\mathrm {d} ^{2}\\Phi _{\\mathrm {e} } = \\mathrm {d} E_{\\mathrm {e}} \\cdot \\mathrm {d} A \\\\ & L_{\\mathrm {e}} =\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {e} }}{\\mathrm {d} A\\,\\mathrm {d} \\Omega \\cos \\theta } =\\frac {\\mathrm {d} E _{\\mathrm {e} }}{d \\Omega \\cdot \\cos \\theta } \\\\ \\end{aligned} } Ie=dΩdΦe→Φe=∫ΣIe⋅dΩEe=dAdΦe→d2Φe=dEe⋅dALe=dAdΩcosθd2Φe=dΩ⋅cosθdEe 公式中,辐射源面积为 AAA ,立体角为 Ω\\OmegaΩ ,照射角为 θ\\thetaθ ,概念基本等同光亮度公式同位参数。 显然,光亮度和辐射亮度的差异只在于参考系上。从有效范围上看,光亮度属于辐射亮度仅考虑可见光谱区域的特殊情况。为了使两个体系能够转换,1979年第十六届国际计量大会 上,人们对发光强度单位坎德拉进行了指定。现在我们说说的一单位坎德拉,即指代发光频率为 HzHzHz 的单色光,在垂直光源表面的定向单位幅角下,测量的辐射强度。即: 1 cd=1/683 W/sr=1 lm/sr → 1 W=683 lm 1 \\ cd = 1/683 \\ W/sr = 1 \\ lm / sr \\ \\ \\rightarrow \\ \\ 1 \\ W = 683 \\ lm 1 cd=1/683 W/sr=1 lm/sr → 1 W=683 lm 因此,记光辐转化率为 KKK ,单位为 lm/Wlm/Wlm/W ,则 KKK 、 Φe\\Phi _{\\mathrm {e}}Φe 与 Φv\\Phi _{\\mathrm {v}}Φv 存在两者之间的转换关系: Φv=K⋅ΦeK=683 lm/W {\\displaystyle \\Phi_v = K \\cdot \\Phi_e \\quad \\quad K = 683 \\ lm/W} Φv=K⋅ΦeK=683 lm/W 带入光亮度 LvL_{\\mathrm {v}}Lv 与辐射亮度 LeL_{\\mathrm {e}}Le 的公式,可得: Lv=K⋅Le {\\displaystyle L_{\\mathrm {v}} = K \\cdot L_{\\mathrm {e}} } Lv=K⋅Le 如此就可以通过 KKK 来完成,辐射度学和光度学间计量的转换了。 我们知道光度学中的不同颜色,本质是波长的不同。而不同的波长在辐射度学中,则代表为不同的能量密度。只要求得对应颜色光的能量密度,就能反向推算对应颜色光的波长了,进而可以将感知到的颜色用实际物理量标定。 借此,以主观感受的客观测量值,人为映射量化建立联系。 至于能量密度的测定,则可以经由物理学体系的黑体辐射定律揭示而出。 从色温到颜色 - 颜色的波长标定 色温(Color Temperature) 是由物体本身的黑体辐射决定的一个物理量,计量单位为 K(开尔文温度)。它被定义为,绝对黑体从绝对零度(-273.15℃)开始加温后,在辐射到达指定复合波情况时所具有的温度。 1900年在德国物理学会上,著名的德国物理学大师 马克思·普朗克(Max Karl Ernst Ludwig Planck,1858 - 1947),公布了自己在电磁波能量问题上的假设,这就是在物理学界影响深远的《论正常光谱中的能量分布》报告。报告的细部由同年普朗克发表的两篇论文组成,分别是《关于维恩频谱方程的改进论》(On an Improvement of Wien's Equation for the Spectrum) [23] 和《关于正常光谱中的能量分布的理论》(On the Theory of the Energy Distribution Law of the Normal Spectrum)[24] 。这两篇理论统一了之前由“紫外灾变”问题分割的,高频有效的维恩位移定律和低频有效的瑞利-金斯公式,并直接促成了量子理论的奠基和近代物理学革命。 记 λ\\lambdaλ 代表电磁波长,vvv 代表 λ\\lambdaλ 的频率, TTT 代表色温, ccc 为光速,普朗克黑体辐射定律(Planck's law|Blackbody radiation law) 的能量密度公式提出: uλ (λ,T)=8πhcλ5⋅1ehcλkT−1=4πc⋅Ie(v)=8πhv3c5⋅1ehvkT−1=uv (v,T) {\\displaystyle \\begin{aligned} u_{\\lambda }\\ (\\lambda,T) ={\\frac {8\\pi hc}{\\lambda^{5}}} \\cdot {\\frac {1}{e^{\\tfrac{hc} {\\lambda kT}}-1}} ={\\frac {4\\pi}{c}} \\cdot I_e (v) ={\\frac {8\\pi hv^3}{c^{5}}} \\cdot {\\frac {1}{e^{\\tfrac{hv} {kT}}-1}} ={u_{v }\\ (v,T)} \\\\ \\end{aligned} } uλ (λ,T)=λ58πhc⋅eλkThc−11=c4π⋅Ie(v)=c58πhv3⋅ekThv−11=uv (v,T) 公式中, ccc 为光速, 有 hhh 为 普朗克常数 取 (6.62607015⋅10−34) J⋅s(6.62607015 \\cdot 10^{-34})\\ J\\cdot s(6.62607015⋅10−34) J⋅s ,国际计量大会通过决议值, 有 kkk 为 玻尔兹曼常数 取 (1.380649⋅10−23) J/K(1.380649 \\cdot 10^{-23})\\ J/K(1.380649⋅10−23) J/K ,国际计量大会通过决议值, 当已知黑体辐射源,其单位立方体所含能量与光波长关系如下图所示: 图 2-7 黑体辐射强度与波长分布曲线示意图 图上能明显看到,当物体处于不同色温时,其黑体辐射的总能量被分配到了不同波长光波携带。最终辐射波的情况,则是由不同区段的波长叠加而成,其叠加的强度则和对应波长携带的能量强度正相关。我们取 360nm - 780nm 可见光谱(Visible Spectrum) 范围,那么上图就有如下的展示了: 图 2-8 可见光谱范围内黑体辐射与波长分布曲线示意图 显然,色温高于 5000k 的物体在短波段出现了极大的富集程度,色温低于 5000k 的物体则是长波较为密集。所以自然界中的高温物体在人眼观察中往往偏向蓝白色,相关色温低温的物体则多呈现橙红色。 记色温为 T0T_{0}T0 , T0T_{0}T0 对应的颜色为 C0C_{0}C0 光亮度 L0L_{0}L0 , C0C_{0}C0 对应可见光范围总辐射强度为 IeI_{e}Ie ,光强度 IvI_{v}Iv 。单位面积辐射能为 QQQ ,存在映射函数 Mapping(C0, L0)=QMapping(C_0,\\ L_0) = QMapping(C0, L0)=Q 。 据电磁波辐射能公式有: Q=Le⋅dA=1K⋅Iv⋅dΦvdA2cosθ⋅dA=∫360nm780nmuλ (λ,T0)⋅dλ≈∑360nm780nmuλ (T0)⋅λ {\\displaystyle \\begin{aligned} &Q = {L_e} \\cdot dA = {\\frac {1}{K}} \\cdot {I_v} \\cdot {\\frac {\\mathrm {d} \\Phi _{\\mathrm {v} }}{\\mathrm {dA^2} \\cos{\\theta }}} \\cdot dA = \\int _{360nm} ^{780nm} u_{\\lambda }\\ (\\lambda,T_0) \\cdot {d} {\\lambda} \\approx \\sum _{360nm} ^{780nm} u_{\\lambda }\\ (T_0) \\cdot {\\lambda} \\end{aligned} } Q=Le⋅dA=K1⋅Iv⋅dA2cosθdΦv⋅dA=∫360nm780nmuλ (λ,T0)⋅dλ≈360nm∑780nmuλ (T0)⋅λ 取 1 sr1\\ sr1 sr 单位发光 1 lm1\\ lm1 lm 单位光通量,即 Iv=1 cdI_{v} = 1\\ cdIv=1 cd 。 假设所有区段的电磁波在传播方向上相同,且法线方向。则上式可化为: Q=1K⋅Lv⋅dA=1K⋅IvdA=∑360nm780nmuλ (T0)⋅λ → Q=Lv⋅∑360nm780nmuλIvλ⋅K=Lv⋅∑360nm780nmuλIeλ {\\displaystyle \\begin{aligned} &Q = {\\frac {1}{K}} \\cdot {L_v} \\cdot {dA} = {\\frac {1}{K}} \\cdot {\\frac {I_v}{dA}} = \\sum _{360nm} ^{780nm} u_{\\lambda }\\ (T_0) \\cdot {\\lambda} \\ \\ \\rightarrow \\ \\ Q = {L_v} \\cdot \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_v}} \\lambda \\cdot {\\mathrm K } = {L_v} \\cdot \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_e}} \\lambda \\\\ \\end{aligned} } Q=K1⋅Lv⋅dA=K1⋅dAIv=360nm∑780nmuλ (T0)⋅λ → Q=Lv⋅360nm∑780nmIvuλλ⋅K=Lv⋅360nm∑780nmIeuλλ 那么带入映射函数,我们就有: Mapping(C0,L0)=L0⋅∑360nm780nmuλIeλ=F(C0,L0) {\\displaystyle \\begin{aligned} &Mapping(C_0, L_0) = {L_0} \\cdot \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_e}} \\lambda = F(C_0, L_0) \\\\ \\end{aligned} } Mapping(C0,L0)=L0⋅360nm∑780nmIeuλλ=F(C0,L0) C0=Convert(∑360nm780nmuλIeλ)=F(∑360nm780nmuλIeλ) {\\displaystyle \\begin{aligned} &C_0 = Convert( \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_e}} \\lambda ) = F( \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_e}} \\lambda ) \\\\ \\end{aligned} } C0=Convert(360nm∑780nmIeuλλ)=F(360nm∑780nmIeuλλ) 可见,只要选取合适的转换函数 F(C)F(C)F(C) ,我们就可以将色温为 T0T_{0}T0 时对应的颜色,以 F(C0, L0)F(C_0,\\ L_0)F(C0, L0) 的形式表述到函数所在参考系中。因此,这个用于颜色匹配的转换函数 F(C)F(C)F(C) ,就被称为 配色函数(Color-Matching Functions)。 只要能找到适合的 F(C)F(C)F(C) 使颜色能够被统一的衡量,就能制定工业标准,正式开始现代化的工程实践了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_3_2.html":{"url":"Chapter_2/Language/cn/Docs_2_3_2.html","title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","keywords":"","body":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space) 配色函数(Color Matching Functions),又被称为 色匹配函数,狭义上是用来完成从物理计量到色彩学计量的转换函数的代称。广义上,我们将用来描述一个人为设定的色彩系统里,用于量化颜色的函数称为配色函数。 我们知道,脱离参考系的函数是没有意义的,于是色彩空间概念伴随而生。通过色彩空间,颜色能够被人为设置条件下的单一系统表达。 那么,什么是色彩空间呢? 色彩空间(Color Space) 又被称为 色彩模型(Color Model),是对使用一组抽象参数结合配色函数(Color-Matching Functions),来表示颜色的数学系统的统称。色彩空间更多的是对学科理论的实践,我们可以将其看为对色彩学最为直观的规格应用。从设计出发点来看,色彩空间大体分为两类:设备相关(Device-Dependent) 色彩空间,和 设备无关(Device-Independent) 色彩空间。 设备相关(Device-Dependent)色彩空间,是指颜色的表达依赖于物理设备本身情况和指定主观参数的色彩空间。诸如:IBM RGB、CMY/CMYK,配色函数可表示颜色范围依赖设备本身性能。 设备无关(Device-Independent)色彩空间,是指一类不依赖于物理设备的客观描述色彩空间。诸如:CIE RGB、CIE XYZ、CIE 1960 LAB、CIE 1960 UCS、HSL、CIE 1964 UVW,设备的选取并不影响色彩空间范围内的颜色表示。 所以,色彩空间虽然是用来理解颜色概念的有力工具,但它本身可能并不客观。需要选定一个,能够统一无论主客的不同数学系统对颜色描述的,基础色彩空间。以此来系统化整个色彩模型体系。直接以光学三原色为基础的设备无关色彩空间,相对的能更好满足这一点,并在简化表达上具有无可替代的优势。基于此,经典三原色函数诞生了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_3_3.html":{"url":"Chapter_2/Language/cn/Docs_2_3_3.html","title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","keywords":"","body":"2.3.3 经典三原色函数(Trichromatic Primaries Functions) 1921 年左右,威廉·大卫·赖特(W. David Wright,1906 - 1997) [26] 与 约翰·吉尔德(John Guild,1889 - 1976) [27] 分别对光学三原色的基本度量系数进行了更为科学的测定,并分别于1928年 、1932年以论文形式发表了自己的结果。这两个实验,为 CIE 经典三原色函数(Trichromatic Primaries Functions)标准 的制定提供了极为关键的帮助。 我们将代表不同可见光波长对人眼视锥细胞的刺激程度的函数,称为色感函数,也就是选取人眼为传感器的 光谱响应函数(SPF [Spectral Response Function])。由色感函数在可见光波段所构成的曲线,称为色感曲线。由实验所拟合的三原色的色感曲线,在 435.8nm(蓝)、 546.1nm(绿)、 700nm(红)处达到最大峰值,如下图: 图 2-9 CIE 1931 RGB 采用的三原色色感函数 CIE 在两者实验的基础上,确定了以 光谱功率分布(SPD [Spectral Power Distribution]) 为基准的混合波三色分离函数: R=∫0∞S(λ)r‾(λ)dλG=∫0∞S(λ)g‾(λ)dλB=∫0∞S(λ)b‾(λ)dλ {\\displaystyle \\begin{aligned} &{\\displaystyle R =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {r}}(\\lambda )\\,d\\lambda } \\\\ &{\\displaystyle G =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {g}}(\\lambda )\\,d\\lambda } \\\\ &{\\displaystyle B =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {b}}(\\lambda )\\,d\\lambda } \\\\ \\end{aligned} } R=∫0∞S(λ)r(λ)dλG=∫0∞S(λ)g(λ)dλB=∫0∞S(λ)b(λ)dλ 其中, 以 r‾(λ){\\overline {r}}(\\lambda )r(λ) 、 g‾(λ){\\overline {g}}(\\lambda )g(λ) 、 b‾(λ){\\overline {b}}(\\lambda )b(λ) 即为基准三原色实验测得的拟合结果的色感函数,存在关系: ∫0∞r‾(λ)dλ=∫0∞g‾(λ)dλ=∫0∞b‾(λ)dλ {\\displaystyle \\int _{0}^{\\infty }{\\overline {r}}(\\lambda )\\,d\\lambda =\\int _{0}^{\\infty }{\\overline {g}}(\\lambda )\\,d\\lambda =\\int _{0}^{\\infty }{\\overline {b}}(\\lambda )\\,d\\lambda } ∫0∞r(λ)dλ=∫0∞g(λ)dλ=∫0∞b(λ)dλ 有 S(λ)S(\\lambda )S(λ) 为目标波长 λ\\lambdaλ 的光谱功率分布函数: S(λ)=Le(λ)θ=2∘≈d2Φe(λ)dAdΩ=dEe(λ)dΩ {\\displaystyle S(\\lambda) = L_{\\mathrm {e}}(\\lambda)_{\\theta=2^{\\circ}} \\approx {\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {e} }(\\lambda)}{\\mathrm {d} A\\,\\mathrm {d} \\Omega }} ={\\frac {\\mathrm {d} E _{\\mathrm {e} }(\\lambda)}{d \\Omega }} } S(λ)=Le(λ)θ=2∘≈dAdΩd2Φe(λ)=dΩdEe(λ) SPD 公式式中,LeL_{\\mathrm {e}}Le 为辐射亮度, Φe\\Phi _{\\mathrm {e}}Φe 为辐射通量为, EeE _{\\mathrm {e}}Ee 为辐射照射度。 通过这几个属于 辐射度学(Radiometry) 中的可被测量物理量,指定波长 的光线,就能被相对化表示为: Ray(λ)=C(R,G,B) Ray(\\lambda)= C(R,G,B) Ray(λ)=C(R,G,B) 由于 CIE RGB 所采用的改进后的配色实验,仍然存在亥姆霍兹配色实验里就存在的红光波段的负色匹配。 因此还需要进一步改进才能用于工业应用。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_3_4.html":{"url":"Chapter_2/Language/cn/Docs_2_3_4.html","title":"2.3.4 经典三刺激函数(Tristimulus Values Functions)","keywords":"","body":"2.3.4 经典三刺激函数(Tristimulus Values Functions) CIE 在 1931 年同年提出 CIE XYZ 色彩空间,尝试通过人为设计的色感函数 [12] [13],来避 RGB 的 负色匹配 问题。为了区别于 CIE RGB 中,通过实验测定拟合而得的三原色色感函数。我们将新的函数称为 CIE 三刺激函数(Tristimulus Values Functions),用来代替原有 r‾(λ){\\overline {r}}(\\lambda )r(λ) 、 g‾(λ){\\overline {g}}(\\lambda )g(λ) 、 b‾(λ){\\overline {b}}(\\lambda )b(λ) ,记为 x‾(λ){\\overline {x}}(\\lambda )x(λ) 、 y‾(λ){\\overline {y}}(\\lambda )y(λ) 、 z‾(λ){\\overline {z}}(\\lambda )z(λ) 。三个刺激函数对应的刺激曲线如下图: 图 2-10 CIE 1931 XYZ 采用的三原色色感函数 CIE 在三个刺激函数为基准下,确定了的不同波长光的三刺激值分离函数: X=∫0∞S(λ)x‾(λ)dλY=∫0∞S(λ)y‾(λ)dλZ=∫0∞S(λ)z‾(λ)dλ {\\displaystyle \\begin{aligned} &{\\displaystyle X =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {x}}(\\lambda )\\,d\\lambda } \\\\ &{\\displaystyle Y =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {y}}(\\lambda )\\,d\\lambda } \\\\ &{\\displaystyle Z =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {z}}(\\lambda )\\,d\\lambda } \\\\ \\end{aligned} } X=∫0∞S(λ)x(λ)dλY=∫0∞S(λ)y(λ)dλZ=∫0∞S(λ)z(λ)dλ 其中, 有 r‾(λ){\\overline {r}}(\\lambda )r(λ) 、 g‾(λ){\\overline {g}}(\\lambda )g(λ) 、 b‾(λ){\\overline {b}}(\\lambda )b(λ) 是将理想刺激值峰值 (μ,σ1,σ2)(\\mu ,\\sigma _{1},\\sigma _{2})(μ,σ1,σ2) ,带入高斯公式所得,这和 RGB 色感函数的拟合有一定的不同。峰值 (μ,σ1,σ2)(\\mu ,\\sigma _{1},\\sigma _{2})(μ,σ1,σ2) 中, μ\\muμ 代表峰值波长, σ1\\sigma _{1}σ1 代表 μ\\muμ 值左侧生效范围偏移量, σ2\\sigma _{2}σ2 代表 μ\\muμ 值右侧生效范围偏移量。XYZ 在度量峰值上取用了理想状态值,有: g(λ; μ,σ1,σ2)={exp(−12(λ−μ)2/σ12),λμ,exp(−12(λ−μ)2/σ22),λ≥μ. {\\displaystyle g(\\lambda;\\ \\mu ,\\sigma _{1},\\sigma _{2}) = {\\begin{cases} \\exp {\\bigl (}{-{\\tfrac {1}{2}}(\\lambda-\\mu )^{2}/{\\sigma _{1}}^{2}}{\\bigr )}, &\\lambdag(λ; μ,σ1,σ2)={exp(−21(λ−μ)2/σ12),exp(−21(λ−μ)2/σ22),λμ,λ≥μ. 推导而出: x‾(λ)=1.056g(λ; 599.8, 37.9, 31.0)+0.362g(λ; 442.0, 16.0, 26.7)−0.065g(λ; 501.1, 20.4, 26.2)y‾(λ)=0.821g(λ; 568.8, 46.9, 40.5)+0.286g(λ; 530.9, 16.3, 31.1)z‾(λ)=1.217g(λ; 437.0, 11.8, 36.0)+0.681g(λ; 459.0, 26.0, 13.8) {\\displaystyle \\begin{aligned} &{\\displaystyle {\\overline {x}}(\\lambda ) = 1.056g(\\lambda ;\\ 599.8,\\ 37.9,\\ 31.0)+0.362g(\\lambda ;\\ 442.0,\\ 16.0,\\ 26.7)-0.065g(\\lambda ;\\ 501.1,\\ 20.4,\\ 26.2) } \\\\ &{\\displaystyle {\\overline {y}}(\\lambda ) = 0.821g(\\lambda ;\\ 568.8,\\ 46.9,\\ 40.5)+0.286g(\\lambda ;\\ 530.9,\\ 16.3,\\ 31.1) } \\\\ &{\\displaystyle {\\overline {z}}(\\lambda ) = 1.217g(\\lambda ;\\ 437.0,\\ 11.8,\\ 36.0)+0.681g(\\lambda ;\\ 459.0,\\ 26.0,\\ 13.8) } \\\\ \\end{aligned} } x(λ)=1.056g(λ; 599.8, 37.9, 31.0)+0.362g(λ; 442.0, 16.0, 26.7)−0.065g(λ; 501.1, 20.4, 26.2)y(λ)=0.821g(λ; 568.8, 46.9, 40.5)+0.286g(λ; 530.9, 16.3, 31.1)z(λ)=1.217g(λ; 437.0, 11.8, 36.0)+0.681g(λ; 459.0, 26.0, 13.8) 而 S(λ)S(\\lambda )S(λ) 仍然为为目标波长 λ\\lambdaλ 的光谱功率分布函数: S(λ)=Le(λ)θ=2∘≈d2Φe(λ)dAdΩ=dEe(λ)dΩ {\\displaystyle S(\\lambda) = L_{\\mathrm {e}}(\\lambda)_{\\theta=2^{\\circ}} \\approx {\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {e} }(\\lambda)}{\\mathrm {d} A\\,\\mathrm {d} \\Omega }} ={\\frac {\\mathrm {d} E _{\\mathrm {e} }(\\lambda)}{d \\Omega }} } S(λ)=Le(λ)θ=2∘≈dAdΩd2Φe(λ)=dΩdEe(λ) 同样的,指定波长 λ\\lambdaλ 的光线,就能被相对化表示为: Ray(λ)=C(X,Y,Z) Ray(\\lambda)= C(X,Y,Z) Ray(λ)=C(X,Y,Z) 通过以 (X,Y,Z)(X,Y,Z)(X,Y,Z) 代替 (R,G,B)(R,G,B)(R,G,B) 的度量方式,CIE XYZ 解决了负色匹配问题。为了区别于 (R,G,B)(R,G,B)(R,G,B) 光学三原色的称谓,我们将 (X,Y,Z)(X,Y,Z)(X,Y,Z) 称为 三刺激值(Tristimulus Values)。 不过,CIE 1931 RGB & CIE 1931 XYZ 中对于光学三原色标准波长的测量/设置,在现代光学体系中被认为有所偏颇的。在选取基准波长时,1931 RGB 蓝绿取用气态水银(Hg)放电获谱线产生的峰值波长 435.8nm(蓝)和 546.1nm(绿),而红色却因为人眼在对 700nm 波长附近的颜色感知几乎无变化的情况下,人为介入设定为还原配色实验理想值 700nm。这一历史局限性导致的情况,也被基于 RGB 测定而考量的 XYZ 继承了。以致于为两者的 “均色问题” 埋下了伏笔。 即便如此,经典三原色函数和三刺激函数,仍然为现代色彩体系奠定了基础公式。使我们能够 以数理形式转换对应目标波长的目标光波,到相应的度量衡系统。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_3_5.html":{"url":"Chapter_2/Language/cn/Docs_2_3_5.html","title":"2.3.5 现代色彩体系(Modern Color System)","keywords":"","body":"2.3.5 现代色彩体系(Modern Color System) 现代色彩体系(Modern Color System) 的基石,即为 1931 年由前身为国际光度委员会(1900, IPC [International Photometric Commission])的国际照明委员会(CIE [International Commission on Illumination]) 提出的 CIE RGB & CIE YUV 色彩空间。 图 2-11 现代色彩体系(Modern Color System)关系图谱[20] 上图很好的展示了 CIE RGB & CIE XYZ 色彩空间与经典物理学概念和其余色彩空间之间的转换关系。当前被广泛用于流媒体传输和图像颜色信息压缩的 YUV 系列颜色格式(Color Format),便是 CIE RGB 体系下的产物。 既然 CIE RGB 配合 CIE XYZ 色彩空间已经能够达成贯通存粹理论与工程应用的边界,那为什么还要引入或设计其余的色彩空间呢? 其中最典型的问题就在于上文提到的,CIE RGB & CIE XYZ 的“均色问题”。CIE RGB & CIE XYZ 并不能很好的代表人对色彩的直观感受。通俗来讲,就是人对颜色变化的感知是均匀的,而 CIE XYZ 无法将这种主观的均匀感,再不经过参考系转换的情况下,完全等价的表示出来。 所以,CIE 在 1960 年针对性的提出了 “均匀色彩空间”(UCS [Uniform Color Space])色彩空间 [21] [22],来尝试进一步整合相关概念并更换规范化体系。UCS 自诞生后便经过了多次迭代,如 1960 UCS、1976 UCS 等。1976 UCS 对于均色度量非常关键,它还有另外一个更为知名的名称,那就是 CIE LUV 色彩空间。 另一方面,因为受限于设备和技术,很多商业化产品(包括软硬件)根本无法表表示出来全部可见光谱区域。这种情况下,虽然 CIE RGB & CIE XYZ 色彩空间能够起到度量颜色的作用,却不适合用于指定设备来有条件的表示有限的颜色。这也让很多设备供应商,不得不根据自身的软硬件情况,来定制一些色彩模型供给设备使用。诸如 sRGB 就属于此类。 即便如此,在整个现代色彩体系之下,CIE RGB & CIE XYZ 色彩空间仍然是最为通用的度量衡体系。这或许是因为,它们较高的推广程度和便于计算的特性决定的。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_4.html":{"url":"Chapter_2/Language/cn/Docs_2_4.html","title":"2.4 色彩的对比","keywords":"","body":"2.4 色彩的对比 自 1931 年 CIE RGB & CIE XYZ 色彩空间 [12] 被提出后,色彩在工程中的对比标准就被统一在了 CIE 逐步采纳、整理和定义的 一系列规格之下。而 CIE XYZ 色彩空间具有直观客观和正向全可见光色域的特点,使得它更适合被用来作为工业应用的基准体系。所以,我们往往都会将需要处理的颜色数据, 转换到 CIE XYZ 之下进行权衡。 当然,整个 CIE 色彩空间体系,其提出迭代的过程和当下的统治地位也并不是一蹴而就。这里先对工程上由 CIE 规范的关键概念进行介绍。以便于为我们更好的理解后续章节中,不同色彩空间的提出背景和针对性解决的问题困难,提供帮助。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_4_1.html":{"url":"Chapter_2/Language/cn/Docs_2_4_1.html","title":"2.4.1 色域(Color Gamut )","keywords":"","body":"2.4.1 色域(Color Gamut) 色域(Color Gamut) 是一个泛指的广义概念,代表对应色彩空间可被表达的所有颜色构成的区域。不同色彩空间的色域可能是不一样的,所以必须有一个统一的度量衡系统来进行比对。被选中作为度量衡的系统必须能客观的表示颜色的物理信息,并且不受其他主观因素影响。因此,只有设备无关色彩空间可以满足要求。当前最常用的度量衡系统,就是 CIE XYZ 色彩空间。CIE XYZ 色彩空间的色域,涵盖了人眼能够观察到的整个可见光谱范围,被 CIE 称为 CIE 标准观察者色域(CIE Standard Observer Gamut)。简称 标准色域。 通常,我们使用 CIE 色度图 来表示 CIE 标准观察者色域。 图 2-12 CIE 标准观察者色域在 CIE 色度图上的表示 由于 CIE RGB & XYZ 最基本的定义是基于 2° 角 的 视网膜小窝(Fovea Centralis)间隔 来获取的人眼视觉感受效果。因此,通常我们所称的色域以及其相关概念(如色度等),在未明确说明视网膜小窝间隔夹脚的情况下,都是假定指定基于 2° 角的测量结果( 除 2° 角外,相对常用的还有 10° 角 )。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_4_2.html":{"url":"Chapter_2/Language/cn/Docs_2_4_2.html","title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","keywords":"","body":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram) 色度(Chroma|Chromaticity) 是一个泛指的广义概念,是对除 光亮度(Luminance) 之外,由色调和饱和度或其衍生参数组成的颜色信息的统称。现代工程上的色度概念,最早是在 CIE XYZ 色彩空间对标准色度图的推导过程中引入的。 CIE XYZ 将色度定义为:XYZ 色彩空间内代表颜色的三维向量,由指定平面切割和归一化后,沿 Z 轴垂直方向在 XY 轴平面上二维投影向量。这个用于切割降维和压缩参数范围的平面,被称为 色度平面(Chroma Plane|Chromaticity Plane)。整个色彩空间色域在 XY 轴平面的二维投影,被称为 CIE xyY 色度图(CIE xyY Chromaticity Diagram),简称 色度图(Chroma Diagram)。 为什么是 xyY 色度图?因为决定颜色的除了 xy 代表色度外,还需要光亮度(Luminance)关键量。CIE XYZ 直接取用颜色在 XYZ 色彩空间里的 Y 方向分量,代替指代光亮度。 图 2-13 CIE 色度平面切割标准色域并投影色度图的示意图 可见,使用色度的色彩空间,色度的量化和其内部参数的选取息息相关。不同的色彩空间在色度的定义上,存在着表述上的不同。在大多数情况下,CIE XYZ 之后的色彩空间,都会取用 CIE 测定的 700nm 波长标准红点(Red Point) 为 基准轴正轴方向,来构建自身的色度参数。究其原因是,相同的基准可以便于将自身色域转换到 CIE XYZ 统一度量衡上对比。所以,色度也常常被直接用 CIE XYZ 色彩空间的定义来表示。 CIE XYZ 色彩空间取用 [X=1, Y=1, Z=1] 构成的三棱锥底面所在平面为色度平面。该平面上的 XYZ 坐标系内点存在关系: Plane:{X+Y+Z=1} {\\displaystyle \\begin{aligned} &{\\displaystyle Plane :\\{ {X+Y+Z} = 1 \\}} \\\\ \\end{aligned} } Plane:{X+Y+Z=1} 记 XYZ 色彩空间中存在颜色 (X,Y,Z)(X, Y, Z)(X,Y,Z) 在 XY 平面的投影为 (x,y)(x, y)(x,y) ,则有: Set: (x+y+z)=1 Then:Chromaticity:{(x,y)=(XX+Y+Z,YX+Y+Z)}Luminance:{Y} {\\displaystyle \\begin{aligned} &Set:\\ (x+y+z) = 1 \\ \\ \\ {Then:} \\\\ &{\\displaystyle Chromaticity:\\{ (x,y) = ({\\frac {X}{X+Y+Z}}, {\\frac {Y}{X+Y+Z}}) \\} } \\\\ &{\\displaystyle Luminance:\\{ Y \\} } \\\\ \\end{aligned} } Set: (x+y+z)=1 Then:Chromaticity:{(x,y)=(X+Y+ZX,X+Y+ZY)}Luminance:{Y} 在已知 (x,y,Y)(x, y, Y)(x,y,Y) 的情况下,也可以反向获得 (X,Y,Z)(X, Y, Z)(X,Y,Z) : (X,Y,Z)=(Yy⋅x, Y, Yy⋅(1−x−y) ) {\\displaystyle \\begin{aligned} (X, Y, Z) &= ({\\frac {Y}{y}} \\cdot x, \\ \\ Y, \\ \\ {\\frac {Y}{y}} \\cdot (1-x-y) \\ ) \\end{aligned} } (X,Y,Z)=(yY⋅x, Y, yY⋅(1−x−y) ) 所以,只要根据 (x,y,Y)(x, y, Y)(x,y,Y) 值,就能将色度图上的颜色还原到 XYZ 实际坐标。而其中的 (x,y)(x, y)(x,y) 值,就是 CIE 中颜色色度的表示形式。 那么在颜色能够被统一描述的前提下,颜色间的差异怎么来说明呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_4_3.html":{"url":"Chapter_2/Language/cn/Docs_2_4_3.html","title":"2.4.3 色差(Chromatic Aberration)","keywords":"","body":"2.4.3 色差(Chromatic Aberration) 色差(Chromatic Aberration) 是一个相对概念,指的是两个色度不同的颜色之间的差异。 广义色差(gCA [General Chromatic Aberration]) 不限定用于对比的两个颜色对应的色调,此时的色差计算的是颜色色度的差异。 狭义色差(sCA [Special Chromatic Aberration]) 则要求对比的两个颜色具有相同的色调,此时的色差计算的仅仅是颜色饱和度的变化。因此,狭义色差可以被认为是广义色差的一种特殊情况。 色差的计算为了简洁,通常都选择使用欧式距离表示。记对比的两颜色分别为 C1C_1C1 、 C2C_2C2 ,色差为 CCC ,广义色差为 ΔC\\Delta CΔC ,有: C={gCA:{ΔC=ΔH2+ΔS2≈distance(C1, C2)}sCA:{ΔC∣(ΔH=0)=ΔS2=ΔS≈range(C1, C2)} {\\displaystyle \\begin{aligned} &C={ \\begin{cases} &{\\displaystyle gCA: \\{\\Delta C ={\\sqrt {\\Delta H ^{2} + \\Delta S ^{2} }} \\approx {distance} (C_1,\\ C_2)} \\} \\\\ &{\\displaystyle sCA: \\{ {\\Delta C}|_{({\\Delta H = 0})} = {\\sqrt {\\Delta S ^{2}}} = \\Delta S \\approx {range} (C_1,\\ C_2) \\} } \\end{cases}} \\\\ \\end{aligned} } C={gCA:{ΔC=√ΔH2+ΔS2≈distance(C1, C2)}sCA:{ΔC∣(ΔH=0)=√ΔS2=ΔS≈range(C1, C2)} 带入 CIE XYZ 规则,色差的表示就可以直接以色度 (x,y)(x, y)(x,y) 计算了: C=Δx2+Δy2 {\\displaystyle \\begin{aligned} &C = {\\sqrt {\\Delta x ^{2} + \\Delta y ^{2} }} \\\\ \\end{aligned} } C=√Δx2+Δy2 替换了色调饱和度参数,使广义狭义在公式层面得到了统一。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_4_4.html":{"url":"Chapter_2/Language/cn/Docs_2_4_4.html","title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","keywords":"","body":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature) 我们在之前讲述配色函数的理论基础时,已经阐述过色温的概念了。这里做一下回顾。 色温(Color Temperature) 是由物体本身的黑体辐射决定的一个物理量,计量单位为 K(开尔文温度)。它被定义为,绝对黑体从绝对零度(-273.15℃)开始加温后所呈现出的颜色。 CIE 于 1960 UCS 色彩空间 中引入了色温的表示,并根据工业光源特性引入了 相关色温(CCT [Correlated Color Temperature]) 来表示 一系列物理(辐射度)色温的近似值。 色温与 1960 UCS 快速计算 记色温 TTT 有对应复合波长 λT\\lambda_TλT ,色温 TTT 在 CIE XYZ 色彩空间上的颜色为 CT(XT,YT,ZT)C_T(X_T,Y_T,Z_T)CT(XT,YT,ZT) ,则根据前文中对于配色函数理论基础的推导,将波长 λT\\lambda_TλT 带入经典三刺激函数,我们有: CT=FXYZ (λT) , Q=∫0∞S(λ)dλ≈Lv⋅∑360nm780nmuλIeλ=Lv⋅u(λT)XT=∫0∞S(λ)x‾(λ)dλ≈Lv⋅∑360nm780nm(uλIeλ⋅x‾(λ))YT=∫0∞S(λ)y‾(λ)dλ≈Lv⋅∑360nm780nm(uλIeλ⋅y‾(λ))ZT=∫0∞S(λ)z‾(λ)dλ≈Lv⋅∑360nm780nm(uλIeλ⋅z‾(λ)) {\\displaystyle \\begin{aligned} &C_T = F_{XYZ\\ }(\\lambda_T) \\ , \\ \\ \\ Q = \\int _{0}^{\\infty }S(\\lambda )\\,d\\lambda \\approx {L_v} \\cdot \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_e}} \\lambda = {L_v} \\cdot {u (\\lambda_T)} \\\\ &X_T =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {x}}(\\lambda )\\,d\\lambda \\approx {L_v} \\cdot \\sum _{360nm} ^{780nm} ({\\frac {u_{\\lambda}}{I_e}} \\lambda \\cdot {\\overline {x}}(\\lambda )) \\\\ &Y_T =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {y}}(\\lambda )\\,d\\lambda \\approx {L_v} \\cdot \\sum _{360nm} ^{780nm} ({\\frac {u_{\\lambda}}{I_e}} \\lambda \\cdot {\\overline {y}}(\\lambda )) \\\\ &Z_T =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {z}}(\\lambda )\\,d\\lambda \\approx {L_v} \\cdot \\sum _{360nm} ^{780nm} ({\\frac {u_{\\lambda}}{I_e}} \\lambda \\cdot {\\overline {z}}(\\lambda )) \\\\ \\end{aligned} } CT=FXYZ (λT) , Q=∫0∞S(λ)dλ≈Lv⋅360nm∑780nmIeuλλ=Lv⋅u(λT)XT=∫0∞S(λ)x(λ)dλ≈Lv⋅360nm∑780nm(Ieuλλ⋅x(λ))YT=∫0∞S(λ)y(λ)dλ≈Lv⋅360nm∑780nm(Ieuλλ⋅y(λ))ZT=∫0∞S(λ)z(λ)dλ≈Lv⋅360nm∑780nm(Ieuλλ⋅z(λ)) 上式就是色温 TTT 在 CIE XYZ 上的表达,这样的表示不够简练。 如果能够找到一个由 XYZ 衍生的色彩空间,能够直接由色温 TTT 值本身计算相应的颜色就好了。借用此空间,我们就能够依据该色彩空间与 XYZ 之间的联系,快速转换色温在该色彩空间上的表示到 XYZ 内,从而间接起到在 XYZ 上精简色温计算的目的。 1960年,CIE 采用了来自柯达实验室的 大卫·麦克亚当(David Lewis MacAdam,1910 - 1998) 在 迪恩·布鲁斯特·朱迪(Deane Brewster Judd,1900 - 1972) 的研究上提出的简化色度、色温与相关色温表示方案,并将方案应用在了 CIE 1931 XYZ 色彩空间上作为对 CIE 1931 XYZ 体系的补充。1960 UCS 对 XYZ 色彩空间的观察角度进行了透视变换,从不同的方向获取了 XYZ 色度平面的投影,以此构建了自身的色度特征。 记 CIE 1960 UCS 中颜色为 (U,V,W)(U, V, W)(U,V,W) ,有: (X, Y, Z)=(32U, Y, 32U−3V+2W)(U,V,W)=(23X, Y, −X+3Y+2Z2) {\\displaystyle \\begin{aligned} &(X,\\ Y,\\ Z) = ({\\tfrac {3}{2}} U, \\ \\ \\ Y, \\ \\ \\ {\\tfrac {3}{2}} U -3V + 2W ) \\\\ &(U, V, W) = ({\\tfrac {2}{3}} X, \\ \\ \\ Y, \\ \\ \\ {\\frac {-X +3Y + 2Z}{2}} ) \\end{aligned} } (X, Y, Z)=(23U, Y, 23U−3V+2W)(U,V,W)=(32X, Y, 2−X+3Y+2Z) 取 (U,V,W)(U, V, W)(U,V,W) 对应色度为 (u,v)(u, v)(u,v) 。存在: (u,v)=(4x12y−2x+3, 6y12y−2x+3)(x,y)=(3u2u−8v+4 , 2v2u−8v+4 ) {\\displaystyle \\begin{aligned} &(u, v) = ({\\frac {4x}{12y-2x+3}}, \\ {\\frac {6y}{12y-2x+3}}) \\\\ &(x, y) = ({\\frac {3u}{2u-8v+4}}\\ \\ , \\ {\\frac {2v}{2u-8v+4}} \\ ) \\end{aligned} } (u,v)=(12y−2x+34x, 12y−2x+36y)(x,y)=(2u−8v+43u , 2u−8v+42v ) 显然,颜色表示在 1960 UCS 和 1931 XYZ 间的坐标转换仅为 线性变化,计算非常便捷。 1960 UCS 有什么优势能让 CIE 大动干戈呢?关键在于 UCS 能够以 切比雪夫近似值 [27] ,逼近范围在 (1000K, 15000K)(1000K, \\ 15000K)(1000K, 15000K) 的色温对应的色度 (u¯(T), v¯(T))(\\bar u(T),\\ \\bar v(T))(u¯(T), v¯(T)) 取值,且控制在 ∣u−u¯∣8⋅10−5\\left|u-{\\bar {u}}\\right|∣u−u¯∣8⋅10−5 和 ∣v−v¯∣9⋅10−5\\left|v-{\\bar {v}}\\right|∣v−v¯∣9⋅10−5 的误差范围。如此误差可称得上相当精确了。有: u¯(T)≈0.860117757+1.54118254⋅10−4⋅T+1.28641212⋅10−7⋅T21+8.42420235⋅10−4⋅T+7.08145163⋅10−7⋅T2v¯(T)≈0.317398726+4.22806245⋅10−5⋅T+4.20481691⋅10−8⋅T21−2.89741816⋅10−5⋅T+1.61456053⋅10−7⋅T2 {\\displaystyle \\begin{aligned} &{\\bar {u}}(T)\\approx {\\frac {0.860117757+1.54118254\\cdot 10^-4\\cdot T+1.28641212\\cdot 10^-7\\cdot T^{2}} {1+8.42420235\\cdot 10^-4\\cdot T+7.08145163\\cdot 10^-7\\cdot T^{2}}} \\\\ &{\\bar {v}}(T)\\approx {\\frac {0.317398726+4.22806245\\cdot 10^-5\\cdot T+4.20481691\\cdot 10^-8\\cdot T^{2}} {1-2.89741816\\cdot 10^-5\\cdot T+1.61456053\\cdot 10^-7\\cdot T^{2}}} \\end{aligned} } u¯(T)≈1+8.42420235⋅10−4⋅T+7.08145163⋅10−7⋅T20.860117757+1.54118254⋅10−4⋅T+1.28641212⋅10−7⋅T2v¯(T)≈1−2.89741816⋅10−5⋅T+1.61456053⋅10−7⋅T20.317398726+4.22806245⋅10−5⋅T+4.20481691⋅10−8⋅T2 上式也被称为 UCS 色温函数。以色温函数计算色度 (u,v)(u, v)(u,v) ,再通过固定三刺激值的 Y=Y0Y = Y_0Y=Y0 来快速的返向计算色温 TTT 的 (X,Y,Z)(X, Y, Z)(X,Y,Z) 表示。有: (X, Y, Z)=(Y0⋅3u2v, Y0, Y0⋅−u−10v+42v) {\\displaystyle \\begin{aligned} &(X,\\ Y,\\ Z) = (Y_0 \\cdot {\\tfrac {3u}{2v}} , \\ \\ Y_0, \\ \\ Y_0 \\cdot {\\tfrac {-u-10v+4}{2v}}) \\end{aligned} } (X, Y, Z)=(Y0⋅2v3u, Y0, Y0⋅2v−u−10v+4) 基于这一点,到现在为止 1960 UCS 仍然是色温及相关色温的最佳计算工具。 CIE 利用 UCS 特性,将一系列物理色温的色彩学近似概念引入了 CIE 系统。 普朗克轨迹(Planckian Locus) 如果取 TTT 范围为 (0, ∞)(0, \\ \\infty)(0, ∞) 开尔文,那么由 TTT 在指定色彩空间上的所有对应颜色所构成的轨迹曲线,就被称为 普朗克轨迹(Planckian Locus),也被称为 黑体轨迹(Blackbody Locus)。换而言之,通过将色温 带入色温函数所求的的色度,都在普朗克轨迹上。所以,我们只需要将选定范围色温带入 UCS 色温函数,就能将普朗克轨迹表示到 UCS 色度图上。 图 2-14 普朗克轨迹在 UCS 色度图上的表示 把 UCS 色温函数转到 XYZ 的色度表示,就有: (x¯(T),y¯(T))=(3u¯(T)2u¯(T)−8v¯(T)+4 , 2v¯(T)2u¯(T)−8v¯(T)+4 ) {\\displaystyle \\begin{aligned} &({\\bar {x}}(T), {\\bar {y}}(T)) = ({\\frac {3{\\bar {u}}(T)}{2{\\bar {u}}(T)-8{\\bar {v}}(T)+4}}\\ \\ , \\ {\\frac {2{\\bar {v}}(T)}{2{\\bar {u}}(T)-8{\\bar {v}}(T)+4}} \\ ) \\end{aligned} } (x¯(T),y¯(T))=(2u¯(T)−8v¯(T)+43u¯(T) , 2u¯(T)−8v¯(T)+42v¯(T) ) 此表达式,即色温在 CIE 色度图上的普朗克轨迹函数(Planckian Locus Functions),也被称为 CIE 色温函数。效果如下: 图 2-15 普朗克轨迹在 CIE 色度图上的表示 含有 普朗克轨迹 的 CIE 色度图,让我们能够 直观的表现自然辐射源 在 CIE 标准下的色彩特点。但是人眼对色温的感受并不会如此精准,很多在感知上近似物理色温的颜色,实际色度却在普朗克轨迹外。 如何在色温基础上引入人眼感受特征呢?相信已经有读者注意到图中,与 普朗克轨迹 垂直或交叉的直线 了。这就是解决人眼感受问题而用到 相关色温 和 等色温线 工具。作为 CIE 体系内的标准度量衡工具,相关色温和等色温线必须具有体系内完全可求的特点,即:从指定色度推算相关色温,和从相关色温推算对应色度的能力。 那么,什么是相关色温? 相关色温(CCT)的等色温线(CCT Isotherms)与麦克亚当法 相关色温(CCT [Correlated Color Temperature]) 是指在同等光亮度情况下,于感知上趋同于选定色温的范围内颜色的集合 [28] [29]。通常我们会直接以选定的色温参考系的温度,来代替表示相关色温的温度。从主观角度理解,色温与相关色温在颜色上并无差异,或差异无法被明显察觉。 而由选定色温与其相关色温共同构成色彩空间内的连线,就被称为 等色温线(CCT Isotherms),有时也会被简称为 等温线 [29]。 相关色温在 CIE 中依赖于等温线表示,而等温线依赖于对普朗克轨迹。CIE 采用麦克亚当建议的测量方式,以两个视觉恰克区分临界点间的跨度为单位麦勒德( mrdmrdmrd [Maillard]),记为 mrdmrdmrd 。有麦勒德和色温单位开尔文间换算关系: mrd=106Tc mrd = {\\tfrac {10^6}T_c } mrd=T106c CIE 以麦勒德为度量,来等分普朗克轨迹。例如,取 1 mrd=106K1 \\ mrd = 10^6 K1 mrd=106K 就指以 106K10^6 K106K 为分割步长,取 500 mrd=2000 K500 \\ mrd = 2000\\ K500 mrd=2000 K 就指以 2000 K2000\\ K2000 K 为分割步长。具体麦勒德的选取,依赖于实际应用场景下,对相关色温配色精确程度的要求。麦勒德取值越小,精确程度越低;反之,麦勒德取值越大,精确程度越高;即精确度与麦勒德成正比关系。可知当麦勒德取值趋近于 ∞ mrd\\infty \\ mrd∞ mrd 时,整个普朗克轨迹不再分割而是完全连续。 那么在分割后,CIE 怎么计算 CCT 呢?麦克亚当采用的是垂直取值法,即从当前想要知道相关色温的颜色在 UCS 色度图上表示位置处,向 UCS 色度图中的普朗克曲线做垂线。做得垂线与普朗克轨迹交点处对应的色温,就是当前相关色温对应的物理色温。 记目标相关色温 TcctT_{cct}Tcct 的色度为 (u¯(Tcct), v¯(Tcct))(\\bar u(T_{cct}),\\ \\bar v(T_{cct}))(u¯(Tcct), v¯(Tcct)) , TcctT_{cct}Tcct 对应的物理色温 TcT_cTc 的色度为 (u¯(Tc), v¯(Tc))(\\bar u(T_c),\\ \\bar v(T_c))(u¯(Tc), v¯(Tc)) ,那么在 麦克亚当法(MacAdam's CCT method) 之下,我们需要计算: cct⃗⋅c⃗′=vector (u¯(Tcct)−u¯(Tc), v¯(Tcct−v¯(Tc))⋅vector (u¯′(Tcct), v¯′(Tcct)=0 \\vec{cct} \\cdot \\vec{c}' = vector\\ (\\bar u(T_{cct}) - \\bar u(T_c),\\ \\bar v(T_{cct} - \\bar v(T_c)) \\cdot vector\\ (\\bar u'(T_{cct}) ,\\ \\bar v'(T_{cct}) = 0 cct⃗⋅c⃗′=vector (u¯(Tcct)−u¯(Tc), v¯(Tcct−v¯(Tc))⋅vector (u¯′(Tcct), v¯′(Tcct)=0 式中 cct⃗\\vec{cct}cct⃗ 为从交点指向色温色度的向量, c⃗′\\vec{c}'c⃗′ 为普朗克轨迹对应交点色温处的导数(即切线方向),代入色温函数和欲求相关色温色度,所得 TcT_cTc 即为所求。 由色度推算相关色温(CCT)的罗伯逊算法 使用麦克亚当法计算 CCT 不太好定位交点求值,因此 艾伦·罗伯逊(Alan R. Robertson) 在 1968年提出了另一种快速算法:选取两个普朗克轨迹上的色温,以线性插值方法近似计算目标相关色温 [30]。这一方法也被称为 罗伯逊相关色温算法(Robertson's CCT method)。 图 2-16 罗伯逊相关色温算法(Robertson's CCT method)示意图 如图 2-16 所示, (uT,vT)(u_T, v_T)(uT,vT) 代表目标相关色温 TcT_cTc 色度, TiT_iTi 、 Ti+1T_{i+1}Ti+1 代表普朗克轨迹上以指定麦勒德分割的量个最近相邻色温, did_idi 、 di+1d_{i+1}di+1 为 (uT,vT)(u_T, v_T)(uT,vT) 与 TiT_iTi 、 Ti+1T_{i+1}Ti+1 所在等温线的垂直距离, θ1\\theta _1θ1 、 θ2\\theta _2θ2 为延 (uT,vT)(u_T, v_T)(uT,vT) 所做等温线与 TiT_iTi 、 Ti+1T_{i+1}Ti+1 所在等温线的交点处夹脚。有罗伯逊相关色温公式就可以如下表示: 1Tc=1Ti+θ1θ1+θ2(1Ti+1−1Ti) \\frac{1}T_c=\\frac{1}{T_i}+\\frac{\\theta_1}{\\theta_1+\\theta_2} \\left( \\frac{1}{T_{i+1}} - \\frac{1}{T_i} \\right) T1c=Ti1+θ1+θ2θ1(Ti+11−Ti1) 可以等价转换为 did_idi 、 di+1d_{i+1}di+1 表示 θ1\\theta _1θ1 、 θ2\\theta _2θ2 ,即: 1Tc=1Ti+didi−di+1(1Ti+1−1Ti) \\frac{1}T_c=\\frac{1}{T_i}+\\frac{d_i}{d_i-d_{i+1}} \\left( \\frac{1}{T_{i+1}} - \\frac{1}{T_i} \\right) T1c=Ti1+di−di+1di(Ti+11−Ti1) 而 did_idi 、 di+1d_{i+1}di+1 在分割用麦勒德 mrdmrdmrd 固定的情况下,可以表示为: di=(vT−vi)−mrd⋅(uT−ui)1+mrd2 d_i=\\frac{ (v_T-v_i)-mrd \\cdot (u_T-u_i) }{\\sqrt {1+mrd^2}} di=√1+mrd2(vT−vi)−mrd⋅(uT−ui) 带入上式可知: Tc=(di−di+1)⋅Ti⋅Ti+1di⋅Ti−di+1⋅Ti+1 T_c = \\frac{ (d_i-d_{i+1}) \\cdot T_i \\cdot T_{i+1}}{d_i \\cdot T_i -d_{i+1} \\cdot T_{i+1}} Tc=di⋅Ti−di+1⋅Ti+1(di−di+1)⋅Ti⋅Ti+1 显然 罗伯逊相关色温算法虽然化解了麦克亚当法的交点坐标问题,但也不够简便。在不追求过度精度的情况下,是否存在一种足够快捷的算法来达成相关色温的近似取值呢?这便有了 相关色温的快速逼近法。 由色度推算相关色温(CCT)的麦卡米快速逼近算法 1992年,卡尔文·麦卡米(Calvin S. McCamy) 以选定参照点后使用 三次厄尔密样条(Cubic Hermite Spline) 的方法,得到了一组能够在 CIE XYZ 上直接使用的快速逼近公式,进一步简化了相关色温的取值过程 [31] 。为了纪念麦卡米的贡献,CIE 将此快速算法称为 麦卡米算法(McCamy's CCT method)。 由于三次厄尔密样条的准确性依赖于参考点选取的特点。麦卡米优化了 肯尼斯·凯利(Kenneth L. Kelly) 的采样实验,取用了 XYZ 色彩空间上,能够使求得逼近函数更贴近于范围内实际值的关键色度 (0.3320, 0.1858)(0.3320,\\ 0.1858)(0.3320, 0.1858) 作为参考点 [32] 。他将这个关键参考点称为 “震中(Epicenter)”。 如果记震中为 (xe,ye)(x_e, y_e)(xe,ye) 则 (xe=0.3320, ye=0.1858)(x_e = 0.3320,\\ y_e = 0.1858) (xe=0.3320, ye=0.1858) ,记 (x,y)(x, y)(x,y) 为指定希望求得相关色温 TcT_cTc 的颜色色度。取 nnn 使得: n=x−xey−ye {n = \\frac {x-x_e}{y-y_e} } n=y−yex−xe 有麦卡米算法公式: Tc=McCamy(n)=−449⋅n3+3525⋅n2−6823.3⋅n+5520.33 {\\displaystyle T_c = McCamy(n) = -449 \\cdot n^{3} + 3525 \\cdot n^{2}-6823.3 \\cdot n + 5520.33} Tc=McCamy(n)=−449⋅n3+3525⋅n2−6823.3⋅n+5520.33 因为只采用了单点的方式逼近 ,算法在保证 精确度的条件下,仅能用于计算物理色温接近于 间的相关色温。 1999年,哈维尔·埃尔南德斯·安德烈斯(Javier Hernández-Andrés) 等人提出的,“在麦卡米算法基础上,采用指数函数的形式以提升公式适用范围” 的改进建议 [33] 。哈维尔等人在论文中给出了两段测量结果,将估值范围扩展到了 [3000 K, 8⋅105 K][3000 \\ K, \\ 8 \\cdot 10^5\\ K][3000 K, 8⋅105 K] 。其改进的指数估值函数为: Tc=A0+A1⋅e−nT1+A2⋅e−nT2+A3⋅e−nT3 {\\displaystyle T_c = A_{0}+A_{1} \\cdot e^{\\frac{-n}{T_{1}}} + A_{2} \\cdot e^{\\frac{-n}{T_{2}}} + A_{3} \\cdot e^{\\frac{-n}{T_{3}}} } Tc=A0+A1⋅eT1−n+A2⋅eT2−n+A3⋅eT3−n 对应生效范围被分为两段,nnn 值计算同麦卡米,其余固定参照点参数取如下《改进指数估值法适用范围表》[33] 的标定值: 指数改进版虽然提升了估值范围,但同时也提升了算法的复杂度。大部分工程相关色温都在传统麦卡米算法适用范围内,这使得改进方法有些鸡肋。相较于使用范围广但复杂度高的算法,传统麦卡米算法就能胜任,这也是 CIE 暂时没有采纳此建议的原因。不过,CIE 将其列入为对传统麦卡米更广域范围的补充方法中,以被特殊情况使用。 在从已知 CIE 色度获取相关色温的手段已经基本够用的情况下,剩下相对急迫的问题,就是找到从已知相关色温反向求其在 CIE 色度的快速算法了。 由相关色温(CCT)推算色度的反向逼近算法 由相关色温反向计算色度的算法,在 2002年和 2006年前并没有太多突破。一方面是因为,如果已知相关色温,那么我们完全可以将其等效为物理色温带入 CIE 色温函数中,直接以求得的物理色温的色度代替;另一方面,也的确没有找到除了直接使用反向求解外的,在满足精度条件的同时还能降低计算复杂度的近似算法来解决这一问题。 2002年,由 康奉顺(Bongsoon Kang)等人 利用相关色温等温线的特点,用双步逐级进行的三次厄尔密样条差值法,构建了一组误差可接受的求解方程 [34] 。此方法在 2006年,经过金敬熙(Kyounghae Kim)等人的进一步测量和推导后,形成了现有的由 TcT_cTc 求色度 (x,y)(x, y)(x,y) 的快速近似值算法 [34]: x={−0.2661239⋅109Tc3−0.2343589⋅106Tc2+0.8776956⋅103Tc+0.179910 1667K≤Tc≤4000K−3.0258469⋅109Tc3+2.1070379⋅106Tc2+0.2226347⋅103Tc+0.240390 4000K≤Tc≤25000Ky={−1.1063814⋅x3−1.34811020⋅x2+2.18555832⋅x−0.20219683 1667K≤Tc≤2222K−0.9549476⋅x3−1.37418593⋅x2+2.09137015⋅x−0.16748867 2222K≤Tc≤4000K+3.0817580⋅x3−5.87338670⋅x2+3.75112997⋅x−0.37001483 4000K≤Tc≤25000K {\\displaystyle \\begin{aligned} x &= { \\begin{cases} -0.2661239 \\cdot {\\frac {10^9}{T_c^3}} -0.2343589 \\cdot {\\frac {10^6}{T_c^2}} +0.8776956 \\cdot {\\frac {10^3}T_c} +0.179910 \\ \\ \\ &1667{\\text{K}}\\leq T_c\\leq 4000{\\text{K}}\\\\ -3.0258469 \\cdot {\\frac {10^9}{T_c^3}} +2.1070379 \\cdot {\\frac {10^6}{T_c^2}} +0.2226347 \\cdot {\\frac {10^3}T_c} +0.240390 \\ \\ \\ &4000{\\text{K}}\\leq T_c\\leq 25000{\\text{K}} \\end{cases} } \\\\ y &= { \\begin{cases} -1.1063814 \\cdot x^{3} -1.34811020 \\cdot x^{2} +2.18555832 \\cdot x -0.20219683 \\ \\ \\ &1667{\\text{K}}\\leq T_c\\leq 2222{\\text{K}}\\\\ -0.9549476 \\cdot x^{3} -1.37418593 \\cdot x^{2} +2.09137015 \\cdot x -0.16748867 \\ \\ \\ &2222{\\text{K}}\\leq T_c\\leq 4000{\\text{K}}\\\\ +3.0817580 \\cdot x^{3} -5.87338670 \\cdot x^{2} +3.75112997 \\cdot x -0.37001483 \\ \\ \\ &4000{\\text{K}}\\leq T_c\\leq 25000{\\text{K}} \\end{cases} } \\\\ \\end{aligned} } xy=⎩⎪⎪⎨⎪⎪⎧−0.2661239⋅Tc3109−0.2343589⋅Tc2106+0.8776956⋅T103c+0.179910 −3.0258469⋅Tc3109+2.1070379⋅Tc2106+0.2226347⋅T103c+0.240390 1667K≤Tc≤4000K4000K≤Tc≤25000K=⎩⎪⎨⎪⎧−1.1063814⋅x3−1.34811020⋅x2+2.18555832⋅x−0.20219683 −0.9549476⋅x3−1.37418593⋅x2+2.09137015⋅x−0.16748867 +3.0817580⋅x3−5.87338670⋅x2+3.75112997⋅x−0.37001483 1667K≤Tc≤2222K2222K≤Tc≤4000K4000K≤Tc≤25000K 但是这一套算法,仍然无法代替非精确场景下,直接通过对应物理色温计算普朗克轨迹上色度的方法实用。因此,CIE 也和麦卡米指数逼近的情况一样,仅是将其列入了相关色温在需求精确值情况下的补充。这里有所了解即可。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_4_5.html":{"url":"Chapter_2/Language/cn/Docs_2_4_5.html","title":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point)","keywords":"","body":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point) 相比其他几个概念,色温 无疑要“物理”的多。所以在工业体系中,由色温衍生出的指标有着更多的应用。CIE 以色温参考,从 1960 年后定义了一系列指定色温的可见光光源点(族)来规范工业用光标准,被称为 标准光源(Standard Illuminants)*。 CIE 标准光源主要分为由 A 到 F,外加 LED 的 7 个类别,分别是: A类光源点,代表相关色温 Tc≈2856 KT_{c} \\approx 2856 \\ KTc≈2856 K 的白炽发光体; B类光源点,代表相关色温 Tc≈4874 KT_{c} \\approx 4874 \\ KTc≈4874 K 的正午日光; C类光源点,代表相关色温 Tc≈6774 KT_{c} \\approx 6774 \\ KTc≈6774 K 的平均日光; D类光源族,代表标准日光源,其存在多个指标,常用有( D50、D55、D60、D65、D75 ); E类光源点,代表相关色温 Tc≈5455 KT_{c} \\approx 5455 \\ KTc≈5455 K 的均匀发光体; F类光源族,代表荧光发光体,常用为 F1~F12 和 FL3.1~FL3.15 的共计 27 个阶梯指标; LED光源族,代表相关色温范围在 [2700 K, 6600 K][2700 \\ K, \\ 6600\\ K][2700 K, 6600 K] 的 LED 光源,于 2018 年最新提出; 我们将光源的指定色温带入上一节中讲解过的 CIE 色温函数,就可以求得对应的色度了。下表中列出了常用的 CIE 标准光源的对应结果 [36] [37] [38] [39] : 除了为工业用光服务外,标准光源对设备相关色彩空间也有着至关重要的作用。设备指定的作为白点的标准光源,将会直接影响设备的色彩表示,从而产生不同的设备色域范围。想要理解这一点,首先就需要了解什么是 白点(White Point)。 白点(White Point)与白点选择对设备相关色彩空间的影响 白点(White Point) 是指一个被用于表示色彩空间标准纯白色的色度点。白点的选取直接影响到色彩空间对颜色的偏向。因此,我们将通过调整白点指定色度,来影响色彩空间实际颜色表示的操作,称为 色温白平衡(Color Temperature White Balance)。有关于包含 白平衡(White Balance) 在内的 颜色平衡(Color Balance) 部分,本书将会在后续的特效处理一章中详细展开。 白点除了选用标准光源外,也可以使用任意色度点。不过,因为太阳是一个标准的黑体辐射源,而人对光线颜色的感知多依赖于阳光。因此,白点最常见的还是取 D65 日光光源,或者其他位于普朗克轨迹上的色温所对应的色度。由于 CIE 规定标准光源三刺激值的 YYY 值被指定为 Y=100Y = 100Y=100 ,实际的标准光源在 XYZ 下的颜色表示值 C(X,Y,Z)C(X,Y,Z)C(X,Y,Z) 与依据色温 TTT 直接计算的 CT(XT,YT,ZT)C_T(X_T,Y_T,Z_T)CT(XT,YT,ZT) 间,存在放缩关系: C=100YT⋅CT C = {\\frac{100}{Y_T}} \\cdot C_T C=YT100⋅CT 下表列出了一些常被取用作为 D 系标准光源替代 的,其他普朗克轨迹关键点在 XYZ 上的色度表示: 对应从低到高的颜色变化如图: 图 2-17 从 1000K 到 12000K 色温颜色表示 由于 设备相关色彩空间,在颜色表示上依赖于设备本身,而这种依赖关系的直观体现就是:颜色的存储,往往采用色彩空间内选定颜色与白点的色度向量差值,或类似变体,经过归一化来定义的。这么做也是一种无奈的妥协:由于设备存储介质空间有限,颜色在保存上需要离散化和均匀化,而最广泛使用的 RGB & XYZ 设备无关色彩空间都不能满足这两个要求。于是,存储问题结合色温的特征,使得不同白点的选取将会直接导致,颜色从数据还原至当前设备色彩空间后的色度与实际期望色度的偏差。 对于设备厂商(或软件供应商),一种可选的白点决策方案是在基于自身产品特性制定相关色彩空间时,首先通过 CIE 色度图将当前设备可表示的颜色边界确定下来,再依据由可表达颜色边界围成的闭包图形中心色度点附近的标准光源来确定白点。从而在一定程度上避免白点导致的定制色彩空间的均色问题。但均色问题从来不是一个能通过白点来解决的单一问题,在这种场景下,最大的影响其实来源自产品本身。所以大多数厂商还是以 CIE 建议,直接指定 D65 来避免这一吃力不讨好的过程。而有关产品本身色域对标准色域表达程度的衡量指标,则被用 显色指数 来更为直观的说明了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_4_6.html":{"url":"Chapter_2/Language/cn/Docs_2_4_6.html","title":"2.4.6 显色指数(Color Rendering Index)","keywords":"","body":"2.4.6 显色指数(Color Rendering Index) 显色指数(CRI [Color Rendering Index]) 是用来指代设备期望表达颜色与实际展示颜色差异程度的指标,单位为 RaRaRa ,取值范围 [0 Ra, 100 Ra][0 \\ Ra, \\ 100 \\ Ra][0 Ra, 100 Ra] 。通俗来讲, 0 Ra0 \\ Ra0 Ra 意味着完全偏差,比如黑洞; 100 Ra100 \\ Ra100 Ra 则代表着 100% 的颜色还原,比如太阳。 CIE 为了量化显色指数的测量标准,在 1995 年给出了一组被称为 CIE 基准颜色(CIE SC [CIE 1995 Standard Color]) 的 测试用例(TCS [Test Color Samples]) 如下: 这组测试用例,在随后的大量验证中被发现是 不太准确的。 2005 年,专注于日光颜色还原的,现如今已被 爱丽色(X-Rite) 收购的 原科尔莫根(Kollmorgen)子公司格雷塔格(Gretag AG)和麦克白(Macbeth),基于自身及爱丽色(X-Rite)生产的分光光度仪和比色计工程报告的统计信息,对比了 CIE 基准颜色标准的弊端 [40] [41] ,并推出了一套新的颜色测试标准用例。这套用例就是随后被广泛使用的 24 色标准色卡(Color Checker),也被称为 麦克白标准色卡(MCC [Macbeth Color Checker])。 图 2-18 MCC 2005 标准色卡 目前工程上大都采用 MCC 作为设备显色指数的测试标准。除了标准 24 色外,还有更为丰富的 160 色。颜色的丰富程度有助于提升测量的准确性,因此,在更为严苛标准下得到的显色指数结果,将更具有代表性。 最新一次的基于 CIE XYZ 的校准结果《显色指数(CRI)MCC 颜色标准测试用例》[42] [43] 如下,可作为工程参考: 那么怎么计算 CRI 呢?最为简单的方法就是计算设备实际显示颜色和目标颜色的色差,并归一化。1964 年,CIE 提出了 UVW 色彩空间(CIE 1964 U* V* W* Color Space),作为对于 1960 UCS 在归一化能力上的补充。UVW 通过引入白点,使 UCS 上表示的颜色能够被以相对白点坐标的形式转换到一个等大的数值范围内,从而解决了显色指数的计算问题。此后,CIE 将 UVW 作为 UCS 的 特定补充方案,计入到了 XYZ 的体系内并 沿用至今。 假设,当前我们测得的颜色 在 CIE 1960 UCS 中的色度 为 (u, v)(u,\\ v)(u, v) ,取 白点 为 (u0, v0)(u_0,\\ v_0)(u0, v0) 。记 CIE 1960 UCS 中 颜色 为 (U,V,W)(U, V, W)(U,V,W) ,对应 CIE 1964 UVW 中坐标为 (U∗,V∗,W∗)(U^*, V^*, W^*)(U∗,V∗,W∗) ,有: (u,v)=(4x−2x+12y+3, 6y−2x+12y+3)(U∗,V∗,W∗)=(13W∗⋅(u−u0), 13W∗⋅(v−v0), 25Y13−17) {\\displaystyle \\begin{aligned} (u, v) &= ({\\frac {4x}{-2x+12y+3}}, \\ {\\frac {6y}{-2x+12y+3}}) \\\\ (U^*, V^*, W^*) &= (13W^{*} \\cdot (u-u_{0}), \\ \\ \\ 13W^{*} \\cdot (v-v_{0}), \\ \\ \\ {25Y^{\\frac {1}{3}}-17}) \\end{aligned} } (u,v)(U∗,V∗,W∗)=(−2x+12y+34x, −2x+12y+36y)=(13W∗⋅(u−u0), 13W∗⋅(v−v0), 25Y31−17) 带入 CIE XYZ 色差计算规则,就有 色差 ΔC\\Delta CΔC 取欧氏距离: ΔC=ΔE(U∗,V∗,W∗)=(ΔU∗)2+(ΔV∗)2+(ΔW∗)2 {\\displaystyle \\begin{aligned} \\Delta C = \\Delta E(U^*, V^*, W^*)={\\sqrt {\\left(\\Delta U^{*}\\right)^{2}+\\left(\\Delta V^{*}\\right)^{2}+\\left(\\Delta W^{*}\\right)^{2}}} \\end{aligned} } ΔC=ΔE(U∗,V∗,W∗)=√(ΔU∗)2+(ΔV∗)2+(ΔW∗)2 基于 CIE 颜色标准规定,我们要求的 显色指数 为 RaRaRa 在 UVW 中有: Ra=100−4.6⋅ΔEUVW=100−4.6⋅ΔC {\\displaystyle \\begin{aligned} Ra = 100 - 4.6 \\cdot \\Delta E_{UVW} = 100 - 4.6 \\cdot \\Delta C \\end{aligned} } Ra=100−4.6⋅ΔEUVW=100−4.6⋅ΔC 虽然 CIE 对 UVW 的定义是基于 CIE SC,但 MCC 仍然可以使用此快速算法。我们将上述整个计算过程统称为 CIE 色度自适应转换(CAT [Chromatic Adaptation Transformation])的 CRI 公式,简称 CIE CAT-CRI。 到此,色彩的度量的关键指标基本介绍完毕。不难发现,每一次色彩关键标准的制定都与设备无关色彩空间的迭代密切相关。每一个设备无关色彩空间的设计,都针对性的解决某一种顺承而来的色彩度量问题。可以说,正是这些设备无关色彩空间,共同构成了色彩衡量发展的里程碑。 现在,我们已经从各个度量指标的演化角度,对概念进行了整理。是时候从发展史出发,来纵观整个过程中这些里程碑式的经典色彩空间了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_5.html":{"url":"Chapter_2/Language/cn/Docs_2_5.html","title":"2.5 经典色彩空间(Classical Color Space)","keywords":"","body":"2.5 经典色彩空间(Classical Color Space) 统一的标准制定和实践的演化推进总是需要循序渐进。而各个 经典色彩空间(Classical Color Space) 就是此领域内的关键节点。 在色彩的衡量中,我们了解了色彩空间偏重描述的特性。不同色彩空间中的相同颜色,必须得经过适当的映射变化和基准变化,才能相互等价。各类颜色描述,需要依托其描述本身对应的色彩空间来看,才会具有意义。 因此,为了简明扼要的阐述转换关系,此处假设用于例举的经典色彩空间,其 RGB 三色基准波长一致,即都为 CIE 1931 RGB 测定标准值。白点统一取用 D65 。而后文中介绍色彩空间所用的配色函数,如无特殊指定,则都为广义配色函数。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_5_1.html":{"url":"Chapter_2/Language/cn/Docs_2_5_1.html","title":"2.5.1 光学三原色色彩空间(RGB)","keywords":"","body":"2.5.1 光学三原色色彩空间(RGB Color Space) 光学三原色色彩空间(RGB Color Space) 又被称为 光三原色空间 或 RGB 色彩空间。光学三原色色彩空间,是对颜色的加法混合论的有效应用。以光学三原色(RGB)的叠波权重作为三维坐标单位轴,来表示大部分可见光颜色的一种色彩模型。从亥姆霍兹的三色理论之后,光学三原色被广泛的用来表示颜色特性,但并没有形成工程化的系统。 图 2-19 光学三原色色彩空间(RGB Color Space)坐标系 而由格拉斯曼颜色定律可知,人对颜色的感知其实是比较线性的。所以,光学三原色色彩空间的颜色表示非常简洁。如果记目标颜色为 CRGBC_{RGB}CRGB ,那么 配色函数 为: CRGB=R⋅Red+G⋅Green+B⋅Blue=Vector[R,G,B] C_{RGB} = R \\cdot Red + G \\cdot Green + B \\cdot Blue = Vector[R, G, B] CRGB=R⋅Red+G⋅Green+B⋅Blue=Vector[R,G,B] 所有可见光都可以利用此公式表示出来。 光学三原色色彩空间的基准取自 RGB 的锚定,因此 RGB 三色的代表波长选取,将会影响整个光学三原色色彩空间的颜色表示水平。 由于足够简单且便于量化,基于光学三原色色彩空间配色函数的有局限改版模型,如 IBM RGB、Adobe RGB等,被广泛使用于计算机科学。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_5_2.html":{"url":"Chapter_2/Language/cn/Docs_2_5_2.html","title":"2.5.2 颜料三原色色彩空间(CMY / CMYK )","keywords":"","body":"2.5.2 颜料三原色色彩空间(CMY / CMYK Color Space) 颜料三原色色彩空间,根据其是否包含对黑色(Black)的描述,被分为 印刷三分色模型(CMY Color Space) 即 CMY 色彩空间,和 印刷四分色模型(CMYK Color Space) 即 CMYK 色彩空间。其中,CMY 即指代颜料三原色,K 则为 Black 取尾字母,以和纯蓝色(Blue)作为区分。 颜料三原色色彩空间,是对颜色的减法混合论的直接应用。 图 2-20 颜料三原色色彩空间(CMY/CMYK Color Space)坐标系 对于 CMY 色彩空间,如果记目标颜色为 CCMYC_{CMY}CCMY ,那么 配色函数 为: CCMY=C⋅Cyan+M⋅Magenta+Y⋅Yellow=Vector[C,M,Y] C_{CMY} = C \\cdot Cyan + M \\cdot Magenta + Y \\cdot Yellow = Vector[C, M, Y] CCMY=C⋅Cyan+M⋅Magenta+Y⋅Yellow=Vector[C,M,Y] 可以发现 CMY 色彩空间与 RGB 色彩空间,恰好以立方体质心堆成。因此存在转换: CCMY=1−CRGB C_{CMY} = 1 - C_{RGB} CCMY=1−CRGB 印刷三分色模型最早被应用于人们于绘画中。通过对颜料三原色的调色板混合,可以形成不同的颜色。由于 CMY 色彩空间在人类历史长河中,已被应用于绘画创作许久,因此这个颜色空间较难追溯最初的提出者了。不过真正对颜料三原色进行色彩空间的标准化工作,还是在打印机被发明后。 无论是喷墨打印机、照相打印机,还是激光打印机。打印出的结果都是依靠反射光被人们观察到的。这决定了此类型工程和绘画基本一致。早期打印机采用 CMY 色彩空间,并用红、青、黄三色混合,来实现黑色的显示。但是,这样混合出的黑色在显示上偏红黑。为了应对这种现象,人们在工程上引入了独立的黑色墨盒,以求解决黑色的打印问题。因此,为了描述被独立引入的黑色在颜色还原上的转换,提出了 CMYK 色彩空间。 CMYK 色彩空间,对黑色进行了重设。如果记目标颜色为 CCMYKC_{CMYK}CCMYK ,那么配色函数为: CCMYK=C⋅Cyan+M⋅Magenta+Y⋅Yellow+K⋅Black=Vector[C,M,Y,K] C_{CMYK} = C \\cdot Cyan + M \\cdot Magenta + Y \\cdot Yellow + K \\cdot Black = Vector[C, M, Y, K] CCMYK=C⋅Cyan+M⋅Magenta+Y⋅Yellow+K⋅Black=Vector[C,M,Y,K] 由于 CMYK 比 CMY 多一维度K,从 CMY 到 CMYK 的映射就需要进行升维。 记 K=1K = 1K=1 时, CCMYK=Vector[0, 0, 0, 1]C_{CMYK} = Vector[0,\\ 0,\\ 0,\\ 1]CCMYK=Vector[0, 0, 0, 1] ,那么 K≠1K \\neq 1K≠1 时就有: [CMYK]=[(C′−K)/(1−K)(M′−K)/(1−K)(Y′−K)/(1−K)K] ∣ [K=min(C′,M′,Y′), K≠1] {\\begin{bmatrix} C \\\\ M \\\\ Y \\\\K \\end{bmatrix}} = {\\begin{bmatrix} (C^{\\prime} - K) / (1-K) \\\\ (M^{\\prime} -K ) / (1-K) \\\\ (Y^{\\prime} - K) / (1-K) \\\\K \\end{bmatrix}} \\ \\ | \\ \\ [K = min(C^{\\prime}, M^{\\prime}, Y^{\\prime}),\\ \\ K \\neq 1] ⎣⎢⎢⎡CMYK⎦⎥⎥⎤=⎣⎢⎢⎡(C′−K)/(1−K)(M′−K)/(1−K)(Y′−K)/(1−K)K⎦⎥⎥⎤ ∣ [K=min(C′,M′,Y′), K≠1] 而从 CMYK 到 CMY 的映射,就简单了: [C′M′Y′]=[(1−K)⋅C+K(1−K)⋅M+K(1−K)⋅Y+K] {\\begin{bmatrix} C^{\\prime} \\\\ M^{\\prime} \\\\ Y^{\\prime} \\end{bmatrix}} = {\\begin{bmatrix} (1-K) \\cdot C + K \\\\ (1-K) \\cdot M + K \\\\ (1-K) \\cdot Y + K \\end{bmatrix}} ⎣⎡C′M′Y′⎦⎤=⎣⎡(1−K)⋅C+K(1−K)⋅M+K(1−K)⋅Y+K⎦⎤ 而对于 CYMK 色彩空间和 RGB 色彩空间互转,就有需要以 CMY 色彩空间作为桥梁。先根据转换方向,通过 CMY 色彩空间进行 CRGB→CCMYC_{RGB} \\rightarrow C_{CMY}CRGB→CCMY 或者 CCMYK→CCMY C_{CMYK} \\rightarrow C_{CMY}CCMYK→CCMY ,再通过 CMY 与 RGB 与 CMYK 的关系,进行间接转换。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_5_3.html":{"url":"Chapter_2/Language/cn/Docs_2_5_3.html","title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","keywords":"","body":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space) 在经过大量对理论的实践探索后,人们发现三维坐标系统无疑是从空间和原理上,最为合适构建色彩模型的描述载体。但传统的 RGB 色彩空间由于没有系统,且存在 基准波长校准问题,并不适用于现代工业。 1931年,为了解决工业体系内颜色描述的模型化, 国际照明委员会(CIE [International Commission on Illumination]) 进行了对光学三原色色彩空间抽象汇总的工作。 现在我们所称的 RGB 色彩空间,多指 CIE RGB 色彩空间。CIE RGB 色彩空间最为重要的贡献,是在格拉斯曼颜色实验的基础上确定了光谱三刺激值,以 Red 取 700nm、Green 取 546.1nm、Blue 取 435.8nm 作为光学三原色波长的基准标定,将人眼可见光谱范围内的所有颜色,依据前文中提到的 三原色函数(Trichromatic Primaries Functions) 统一到了模型。 图 2-21 CIE RGB 色彩空间(CIE RGB Color Space)顶点色示意图 CIE RGB 色彩空间的 配色函数 直接采用 了传统三原色色彩空间的配色函数,唯一不同的只在于三原色的选取 : CRGB=R⋅Red700+G⋅Green546.1+B⋅Blue435.8=Vector[R,G,B] C_{RGB} = R \\cdot Red_{700} + G \\cdot Green_{546.1} + B \\cdot Blue_{435.8} = Vector[R, G, B] CRGB=R⋅Red700+G⋅Green546.1+B⋅Blue435.8=Vector[R,G,B] 因此,CIE RGB 也不可避免的继承了光学三原色色彩空间的 负色匹配 问题。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_5_4.html":{"url":"Chapter_2/Language/cn/Docs_2_5_4.html","title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","keywords":"","body":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space) 1931年,国际照明委员会(CIE [International Commission on Illumination]) 提出,以经过设计的 XYZ 基准坐标系来锚定 RGB 边界的方案可以解决问题。这一映射方案所对应的颜色描述模型,被称为 XYZ 色彩空间(XYZ Color Space) [12] [13] 。 CIE 以线性等式关系构建了 XYZ 系统与 RGB 系统的转换,以三刺激函数(Tristimulus Values Functions)使可见光基于 XYZ 坐标的混合向量全部局限于正象限 [X≥0, Y≥0, Z≥0]。 如果记目标颜色为 CXYZC_{XYZ}CXYZ ,一单位 RGB 到一单位 XYZ 有: 从 R→XR \\rightarrow XR→X 的转换因子为 CrxC_{rx}Crx ,从 G→YG \\rightarrow YG→Y 的转换因子为 CgyC_{gy}Cgy ,从 B→ZB \\rightarrow ZB→Z 的转换因子为 CbzC_{bz}Cbz 那么 XYZ 色彩空间的 配色函数 为: CXYZ=X⋅CrxR+Y⋅CgyG+Z⋅CbzB=Vector[X,Y,Z] C_{XYZ} = X \\cdot C_{rx}R + Y \\cdot C_{gy}G + Z \\cdot C_{bz}B = Vector[X, Y, Z] CXYZ=X⋅CrxR+Y⋅CgyG+Z⋅CbzB=Vector[X,Y,Z] 而从 RGB 到 XYZ 是天然可转的,记转换矩阵为 MRGB2XYZM_{RGB2XYZ}MRGB2XYZ ,那么有映射: CXYZ=MRGB2XYZ⋅CRGB C_{XYZ} = M_{RGB2XYZ} \\cdot C_{RGB} CXYZ=MRGB2XYZ⋅CRGB 即: [XYZ]=[+0.49000+0.31000+0.20000+0.17697+0.81240+0.01063+0.00000+0.01000+0.99000]⋅[RGB] {\\displaystyle {\\begin{bmatrix} X \\\\ Y \\\\ Z \\end{bmatrix}}= {\\begin{bmatrix} +0.490\\,00 & +0.310\\,00 & +0.200\\,00\\\\ +0.176\\,97 & +0.812\\,40 & +0.010\\,63\\\\ +0.000\\,00 & +0.010\\,00 & +0.990\\,00 \\end{bmatrix}} \\cdot {\\begin{bmatrix} R \\\\ G \\\\ B \\end{bmatrix}} } ⎣⎡XYZ⎦⎤=⎣⎡+0.49000+0.17697+0.00000+0.31000+0.81240+0.01000+0.20000+0.01063+0.99000⎦⎤⋅⎣⎡RGB⎦⎤ 而从 XYZ 到 RGB,就相当于反向求逆,因此如下: CXYZ=MRGB2XYZ−1⋅CRGB C_{XYZ} = {M_{RGB2XYZ}}^{-1} \\cdot C_{RGB} CXYZ=MRGB2XYZ−1⋅CRGB 即: [RGB]≈[+2.36461385−0.89654057−0.46807328−0.51516621+1.42640810+0.08875810+0.00520370−0.01440816+1.00920446][XYZ] {\\displaystyle {\\begin{bmatrix}R\\\\G\\\\B\\end{bmatrix}} \\approx {\\begin{bmatrix} +2.364\\,61385 & -0.896\\,54057 & -0.468\\,07328\\\\ -0.515\\,16621 & +1.426\\,40810 & +0.088\\,75810\\\\ +0.005\\,20370 & -0.014\\,40816 & +1.009\\,20446 \\end{bmatrix}}{\\begin{bmatrix}X\\\\Y\\\\Z\\end{bmatrix}}} ⎣⎡RGB⎦⎤≈⎣⎡+2.36461385−0.51516621+0.00520370−0.89654057+1.42640810−0.01440816−0.46807328+0.08875810+1.00920446⎦⎤⎣⎡XYZ⎦⎤ 其中, MRGB2XYZM_{RGB2XYZ}MRGB2XYZ 为测量所得 [12](见前文)推导而出的坐标映射矩阵。 基于此映射关系,所有实际可见波长的 视觉单色(Monochromat)和混合色 在经过坐标转换后,都可以被描述到由 XYZ 色彩空间。这为统一视觉颜色对比标准和迭代推进色彩空间色设计,创造了有力基础工具。工程中为了表示设备颜色特性,常将设备颜色范围以 XYZ 色彩空间的色度图切面,即 CIE 标准色度图(CIE Standard Observer Chromaticity Diagram) 表示。因此,CIE XYZ 颜色空间的配色函数也被称为 “CIE 标准观测者(CIE Standard Observer )”函数。 但 XYZ 的也继承了 RGB 的 “均匀色差” (即 平均色差 问题) 挑战(见前文)。人眼各类视锥细胞的数目是存在差异的。纯物理描述转换为感知上的情况,在 RGB 与 XYZ 所选基准波长条件下,就会因为人对光学三原色光线的敏感程度不同,产生冷色调区域相近颜色富集,而暖色调相近颜色离散的问题。如果取用广义色差 ,即两个颜色的欧式距离,为色差 ΔC\\Delta CΔC 的话。那么 XYZ 色彩空间中,单位 ΔC\\Delta CΔC 的颜色变化情况就显得不那么均匀。这个就是 平均色差 问题。 如何处理平均色差问题?CIE 和美标给出了不同的思路。CIE 将色差问题,拆分为色度图均匀化和白点取值影响归一化两个问题,区分考虑。提出了着重于细微色差变化的 CIE LAB 色彩空间标准,和偏重标准光源线性归一化的 CIE LUV 色彩空间标准。而美标则以商业出发点,追求色彩还原更接近人眼生理感受,同时还要兼顾工业体系中对色彩信息的精细度要求,进而推进了颜色三要素色彩空间的制定。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_5_5.html":{"url":"Chapter_2/Language/cn/Docs_2_5_5.html","title":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space)","keywords":"","body":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* color space) 1952 年,色彩科学家 里查德·塞瓦尔·亨特(Richard Sewall Hunter,1909–1991) 创建了至今任然是业界最高端颜色解决方案供应商的 亨特联合实验室(HunterLab [Hunter Associates Laboratory]),并在之后提出了著名的 Hunter L,a,b 色彩空间。 Hunter L,a,b 色彩空间结合 CIE XYZ 色彩空间,共同组成了 CIE 1976 LAB 色彩空间的前身。所以,CIE LAB 与 RGB 间需要通过 XYZ 来缔结转换关系。 1976 年,在经过一系列建议的采纳和对 1931 色彩标准体系的完善后,CIE 尝试用一种全新的角度来处理均色问题。CIE 在 Hunter L,a,b 色彩空间的基础上,沿用了 Hunter L,a,b 的色度处理方式与 CIE XYZ 体系结合,将 CIE 标准观察者应用在了 CIE 1976 LAB 色彩空间上。由于 Hunter L,a,b 设定之初的目的,就是将不同颜色间的差异更为显著的客观表示出来,因此 CIE LAB 也继承了这一特点,成为了 设备无关 适合于色差比对的色彩空间。 CIE 1976 LAB 将 XYZ 色度图(非色度平面)在其所在平面,以选定白点为中心拓扑变换为圆形,分别代表:红(Red)、绿(Green)、蓝(Blue)、黄(Yellow) 的 4 个等大象限(扇区),并以平面中心构建了二维坐标系 (a, b)(a,\\ b)(a, b) 。以平面内向量 (a, b)(a,\\ b)(a, b) 来索引实际色度。 我们知道,单纯的靠色度是没办法完全描述颜色特征的。除了色度外,我们还需要引入光亮度因素。CIE LAB 中的依旧沿用了 1960 UCS 和 1931 XYZ 中对光亮度的处理方式,单取由白到黑的 灰度线(Grey Line) 作为了光亮度的刻度。但是对与不同光亮度的切分,LAB 也对 XYZ 原有的亮度表示进行了调整。以在一定程度上保证,每个亮度下切割得到的色度平面都有相对均匀表示。 如果记目标颜色为 CLABC_{LAB}CLAB ,那么 LAB 色彩空间的 配色函数 为: CLAB=L⋆⋅Luminance+Plane(a⋆, b⋆)=Vector[L⋆,a⋆,b⋆] C_{LAB} = L^{\\star } \\cdot Luminance + Plane(a^{\\star },\\ b^{\\star }) = Vector[L^{\\star }, a^{\\star }, b^{\\star }] CLAB=L⋆⋅Luminance+Plane(a⋆, b⋆)=Vector[L⋆,a⋆,b⋆] 记 D65 白点在 XYZ 色彩空间内颜色为 CD65C_{D65}CD65 ,有色温 1960 UCS 快速计算得: CD65 (XD65, YD65, ZD65)≈(95.049, 100, 108.884) {\\displaystyle \\begin{aligned} &C_{D65}\\ (X_{D65},\\ Y_{D65},\\ Z_{D65}) \\approx (95.049,\\ 100,\\ 108.884) \\\\ \\end{aligned} } CD65 (XD65, YD65, ZD65)≈(95.049, 100, 108.884) 如果记目标颜色为 CLABC_{LAB}CLAB ,一单位 XYZ 到一单位 LAB 有: [L⋆a⋆b⋆]=[0+116016+500−500000+200−2000]⋅[F(XXwhite)F(YYwhite)F(ZZwhite)1] {\\displaystyle {\\begin{bmatrix} L^{\\star } \\\\ a^{\\star } \\\\ b^{\\star } \\end{bmatrix}}= {\\begin{bmatrix} 0 & +116 & 0 & 16 \\\\ +500 & -500 & 0 & 0 \\\\ 0 & +200 & -200 & 0 \\\\ \\end{bmatrix}} \\cdot {\\begin{bmatrix} F(\\tfrac{X}{X_{white}}) \\\\ F(\\tfrac{Y}{Y_{white}}) \\\\ F(\\tfrac{Z}{Z_{white}}) \\\\ 1 \\end{bmatrix}} } ⎣⎡L⋆a⋆b⋆⎦⎤=⎣⎢⎢⎡0+5000+116−500+20000−2001600⎦⎥⎥⎤⋅⎣⎢⎢⎡F(XwhiteX)F(YwhiteY)F(ZwhiteZ)1⎦⎥⎥⎤ 即,从 XYZ 到 LAB 有: L⋆=116⋅ F(YYD65)−16a⋆=500⋅(F(XXD65)−F(YYD65))b⋆=200⋅(F(YYD65)−F(ZZD65)) {\\displaystyle \\begin{aligned} L^{\\star }&=116 \\cdot \\ F\\!\\left({\\frac {Y}{Y_{D65}}}\\right)-16 \\\\ a^{\\star }&=500 \\cdot \\left(F\\!\\left({\\frac {X}{X_{D65}}}\\right)-F\\!\\left({\\frac {Y}{Y_{D65}}}\\right)\\right) \\\\ b^{\\star }&=200 \\cdot \\left(F\\!\\left({\\frac {Y}{Y_{D65}}}\\right)-F\\!\\left({\\frac {Z}{Z_{D65}}}\\right)\\right) \\\\ \\end{aligned} } L⋆a⋆b⋆=116⋅ F(YD65Y)−16=500⋅(F(XD65X)−F(YD65Y))=200⋅(F(YD65Y)−F(ZD65Z)) 其中: F(n)={n3 n>δ3n3δ2+429 n≤δ3 , δ=629 {\\displaystyle \\begin{aligned} F(n)&={ \\begin{cases} {\\sqrt [{3}]{n}} & \\ \\ \\ n > \\delta ^{3} \\\\ {\\dfrac {n}{3\\delta ^{2}}}+{\\frac {4}{29}} & \\ \\ \\ n \\le \\delta ^{3} \\end{cases} }\\ \\ \\ , \\ \\ \\delta ={\\tfrac {6}{29}} \\end{aligned} } F(n)=⎩⎨⎧3√n3δ2n+294 n>δ3 n≤δ3 , δ=296 而从 LAB 到 XYZ,就相当于反向求逆,因此如下: X=XD65⋅F−1(L⋆+16116+a⋆500)Y=YD65⋅F−1(L⋆+16116)Z=ZD65⋅F−1(L⋆+16116−b⋆200) {\\displaystyle \\begin{aligned} X &= X_{D65} \\cdot F^{-1}\\left({\\frac {L^{\\star }+16}{116}} + {\\frac {a^{\\star }}{500}}\\right) \\\\ Y &= Y_{D65} \\cdot F^{-1}\\left({\\frac {L^{\\star }+16}{116}} \\right) \\\\ Z &= Z_{D65} \\cdot F^{-1}\\left({\\frac {L^{\\star }+16}{116}} - {\\frac {b^{\\star }}{200}}\\right) \\end{aligned} } XYZ=XD65⋅F−1(116L⋆+16+500a⋆)=YD65⋅F−1(116L⋆+16)=ZD65⋅F−1(116L⋆+16−200b⋆) 其中: F−1(n)={n3 n>δ3δ2(n−429) n≤δ , δ=629 {\\displaystyle \\begin{aligned} F^{-1}(n)&={ \\begin{cases} {n^3} & \\ \\ \\ n > \\delta \\\\ {3\\delta ^2}(n-{\\frac {4}{29})} & \\ \\ \\ n \\le \\delta \\end{cases} }\\ \\ \\ , \\ \\ \\delta ={\\tfrac {6}{29}} \\end{aligned} } F−1(n)=⎩⎨⎧n33δ2(n−294) n>δ n≤δ , δ=296 可见,XYZ 与 LAB 间的转换关系,并不是线性的。由于 CIE LAB 中的白点直接参与了转换运算,白点调参对 LAB 的影响程度会更大一些。带入色差公式 ΔC=(Δa⋆)2+(Δb⋆)2{\\displaystyle \\begin{aligned} {\\displaystyle \\Delta C = {\\sqrt {\\left(\\Delta a^{\\star}\\right)^{2}+\\left(\\Delta b^{\\star}\\right)^{2}}}} \\end{aligned} }ΔC=√(Δa⋆)2+(Δb⋆)2 会发现,通过这种方式切割得到的整个人眼可见光色域范围,色差均匀程度依赖于白点的同时,也并非完全均匀。越靠近色度图白点,色差变化越小;越靠近色度图边缘,色差变化越大,不过相较于 XYZ 已有很大改善。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_5_6.html":{"url":"Chapter_2/Language/cn/Docs_2_5_6.html","title":"2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space)","keywords":"","body":"2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* color space) 1976 年,在 CIE 采纳 CIE LAB 色彩空间的同年,CIE 以 CIE 1960 UCS 和 CIE 1964 UVW(这两个在前文色彩度量中介绍过,做为补充型色彩空间,用于量化色温和 CRI 到 CIE 标准体系内)为基础 进一步拓展,提出了 CIE LUV 色彩空间。 显然,CIE LUV 提出的目的,是为了将 CIE 1960 UCS 和 CIE 1964 UVW 两个色彩空间的 特性统一 到单一色彩空间。通过整合两者在度量衡相关量方面的计算,来 化解得到目标尺度值后的色彩空间互转问题。我们知道 CIE 1960 UCS 是由 XYZ 拓扑变换而得,CIE 1964 UVW 是由 CIE 1960 UCS 引入白点而得,两者的关键点皆在于平面色度值,而两者区别只在于 UVW 引入了白点。因此,整个问题就变为,找到一个合适的映射函数(狭义配色函数),使得在任何白点取值条件下, CIE LUV 中颜色的色度转 XYZ 皆为线性。 基于此,LUV 对光亮度参数 进行了依托于白点的非线性变化。以此来保证,在不同白点选取下的色度,都能维持同 UCS 和 UVW 一致的线性转换方式。这一操作使 LUV 色彩空间不论白点如何选取,都能有从 LUV 到 XYZ 的色度的线性变换和逆变换。 如果记目标颜色为 CLUVC_{LUV}CLUV ,那么 LUV 色彩空间的 配色函数 为: CLUV=L⋆⋅Luminance+Plane(u⋆, v⋆)=Vector[L⋆,u⋆,v⋆] C_{LUV} = L^{\\star } \\cdot Luminance + Plane(u^{\\star },\\ v^{\\star }) = Vector[L^{\\star }, u^{\\star }, v^{\\star }] CLUV=L⋆⋅Luminance+Plane(u⋆, v⋆)=Vector[L⋆,u⋆,v⋆] 记 D65 白点在 XYZ 色彩空间内颜色为 CD65C_{D65}CD65 ,有色温 1960 UCS 快速计算得: CD65 (xD65, yD65, YD65)≈(0.31271, 0.32902, 100) {\\displaystyle \\begin{aligned} &C_{D65}\\ (x_{D65},\\ y_{D65},\\ Y_{D65}) \\approx (0.31271,\\ 0.32902,\\ 100) \\\\ \\end{aligned} } CD65 (xD65, yD65, YD65)≈(0.31271, 0.32902, 100) 如果记目标颜色为 CLUVC_{LUV}CLUV ,从 XYZ 到 LUV 有: (x,y)=( XX+Y+Z , YX+Y+Z )(u,v)=(4x−2x+12y+3, 9y−2x+12y+3)(u⋆,v⋆,L⋆)=F(Y)⋅(13⋅(u−uD65), 13⋅(v−vD65), 1 ) {\\displaystyle \\begin{aligned} &(x, y) = (\\ \\ \\ {\\frac {X}{X+Y+Z}} \\ \\ \\ \\ , \\ \\ \\ {\\frac {Y}{X+Y+Z}} \\ \\ \\ \\ ) \\\\ &(u, v) = ({\\frac {4x}{-2x+12y+3}}, \\ {\\frac {9y}{-2x+12y+3}}) \\\\ &(u^{\\star }, v^{\\star }, L^{\\star }) = F\\!\\left({Y}\\right) \\cdot ( 13 \\cdot \\left(u-u_{D65}\\right), \\ \\ \\ 13 \\cdot \\left(v-v_{D65}\\right), \\ \\ \\ 1\\ ) \\\\ \\end{aligned} } (x,y)=( X+Y+ZX , X+Y+ZY )(u,v)=(−2x+12y+34x, −2x+12y+39y)(u⋆,v⋆,L⋆)=F(Y)⋅(13⋅(u−uD65), 13⋅(v−vD65), 1 ) 其中: L⋆=F(Y)={(293)3⋅YYD65 YYD65≤δ3116⋅YYD653 −16 YYD65>δ3 , δ=629 {\\displaystyle \\begin{aligned} L^{\\star } = F(Y)&={ \\begin{cases} {\\left( {\\frac {29}{3}} \\right)^3 \\cdot {\\frac {Y}{Y_{D65}}}} & \\ \\ \\ {\\frac {Y}{Y_{D65}}} \\le \\delta ^{3} \\\\ {116 \\cdot {\\sqrt [3]{\\frac {Y}{Y_{D65}}}} \\ - 16} & \\ \\ \\ {\\frac {Y}{Y_{D65}}} > \\delta ^{3} \\end{cases} }\\ \\ \\ , \\ \\ \\delta ={\\tfrac {6}{29}} \\end{aligned} } L⋆=F(Y)=⎩⎪⎪⎨⎪⎪⎧(329)3⋅YD65Y116⋅3√YD65Y −16 YD65Y≤δ3 YD65Y>δ3 , δ=296 而从 LUV 到 XYZ,就相当于反向求逆,因此如下: (u,v)=(u⋆13⋅L⋆+uD65 , v⋆13⋅L⋆+vD65 )(x,y)=(9u6u−16v+12 , 4v6u−16v+12 )(X,Y,Z)=F−1(L⋆)⋅(9u4v, 1, 12−3u−20v4v ) {\\displaystyle \\begin{aligned} &(u, v) = ( {\\frac {u^{\\star }}{13 \\cdot L^{\\star }}} + u_{D65}\\ \\ , \\ \\ {\\frac {v^{\\star }}{13 \\cdot L^{\\star }}} + v_{D65} \\ ) \\\\ &(x, y) = ({\\frac {9u}{6u-16v+12}}\\ \\ , \\ {\\frac {4v}{6u-16v+12}} \\ ) \\\\ &(X, Y, Z) = F^{-1}(L^{\\star }) \\cdot ( {\\frac {9 u}{4 v}}, \\ \\ \\ 1, \\ \\ \\ {\\frac {12 - 3 u - 20 v}{4 v}} \\ ) \\\\ \\end{aligned} } (u,v)=(13⋅L⋆u⋆+uD65 , 13⋅L⋆v⋆+vD65 )(x,y)=(6u−16v+129u , 6u−16v+124v )(X,Y,Z)=F−1(L⋆)⋅(4v9u, 1, 4v12−3u−20v ) 其中: Y=F−1(L⋆)={YD65⋅(329)3⋅L⋆ L⋆≤8YD65⋅(L⋆+16116)3 L⋆>8 {\\displaystyle \\begin{aligned} Y = F^{-1}(L^{\\star })&={ \\begin{cases} {Y_{D65} \\cdot \\left( {\\frac {3}{29}} \\right)^3 \\cdot {L^{\\star }}} & \\ \\ \\ L^{\\star } \\le 8 \\\\ {Y_{D65} \\cdot \\left( {\\frac {L^{\\star }+16}{116}} \\right)^3 } & \\ \\ \\ L^{\\star } > 8 \\end{cases} } \\end{aligned} } Y=F−1(L⋆)=⎩⎪⎪⎨⎪⎪⎧YD65⋅(293)3⋅L⋆YD65⋅(116L⋆+16)3 L⋆≤8 L⋆>8 同 LAB,CIE LUV 的优势也在于白点确定后的快速计算。 由于 CIE LUV 并没有针对自身 LUV 色度图所在平面,即 所在平面, 做类似于 LAB 的均匀化拓扑变形。因此,LUV 在色差均匀问题上的表现,要逊于 LAB。 但是,基于 LUV 在选定白点后的线性色彩空间转换特性,LUV 在数据传输和色彩压缩方面却起到了意料之外的表现。其设计思想最终为 YUV 色彩格式的制定打下了理论基础。 既然将色差问题拆分为均匀化和归一化的间接处理方法不太行,那么以颜色三要素角度出发将色差均匀直接做为目标,是否就能得到完美答案呢?之前我们提到,于 LAB 和 LUV 同时期下的挑战者是美标 HSL。HSL 正是探索这一问题答案的先行者,虽然最终得到的结果 可能不尽如人意。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_5_7.html":{"url":"Chapter_2/Language/cn/Docs_2_5_7.html","title":"2.5.7 颜色三要素色彩空间(HSV / HSI / HSL)","keywords":"","body":"2.5.7 颜色三要素色彩空间(HSL [HSV / HSI / HSL(Lightness)]) HSL(Hue,Saturation,Luminance) 色彩空间 又被称为 颜色三要素色彩空间,是对 HSV(Hue,Saturation,Value)色彩空间、 HSI(Hue,Saturation,Intensity)色彩空间、HSL(Hue,Saturation,Lightness)色彩空间的统称,简称 三要素色彩空间。这里的 V(Value)、I(Intensity)、L(Lightness)其实代指的,基本等同于前文中提及的光亮度(Luminance)的简化概念。HSI 色彩空间,在设计理念上趋同于 HSV 色彩空间。HSL(Lightness) 在 HSV 、 HSI上进行了改进整合。因此,通常所称的 HSL 色彩空间,即为 HSL(Lightness)色彩空间。 后文中为了说明,保持 HSL 统一代称,在需要区分说明时,使用 HSV、HSI、HSL(Lightness)独立称谓指定区别。 1938年,为了解决彩色电视信号的转换和传输问题,一位名叫 乔治·瓦伦西(Georges Valensi,1889 - 1980) 的法国电信工程师,提交了以色调与光亮度来进行彩色图片编码传输的解决方案并注册了专利 [14] [15] 。方案中首次引入了使用色调和光亮度,来构建色彩空间的概念。瓦伦西在持有专利的有效时间内,经过反复延长申请,使他的专利权从 1939年 一直持续到了 1971年。尴尬的是,彩色电视机在 1946年才被 约翰·洛吉·贝尔德(J.L.Baird,1888 - 1946) 发明出来。而彩电和彩色信号,真正得到大规模商业化应用和普及的时间节点,几乎到了20世纪70年代。因此,在美电 1953年出台美国安全彩电标准和 1954年推出 RCA彩色电视机之前,瓦伦西几乎没有靠此专利得到任何收益。 图 2-22 乔治·瓦伦西(Georges Valensi)于 1945 年在美专利局注册手稿 [15] 1978年,HSV 色彩空间的概念由 阿尔维·雷·史密斯(A.R Smith,Alvy Ray Smith III,1943 - 今) 提出。HSV 的目的是为了解决计算机显示对 RGB 的主色还原问题。这要求我们提供一种更直观,并更接近经典理论的,可连续变化且动态可分的色彩模型 [16] 。 而于1978年同年,在 乔布洛夫(George H. Joblove) 和 格林伯格(Donald Greenberg) 发表的的论文 《为计算机图形设计的色彩空间》 中 [17],也通过引入 HSI 色彩空间,来尝试解决这个问题。论文同时还拿 HSI 与 HSV 做了比对。 为什么认为 HSV 和 HSI 是可以约等的?仅仅只是因为两者近乎先后出现于同年?并不是。最关键的判断,还是来自于 HSV 和 HSI 对颜色空间的定义。可以认为 HSV 和 HSI 的差异,是一种观察角度导致的偏差,是同种概念的参考位选取的不同而导致的。这种差异主要体现在光亮度与饱和度在模型中的处理。两者的解决方案,在这两个色彩要素的计算与设定上,各有优劣。HSI 的饱和度选取方式,让模型更接近人眼对颜色的感知,使颜色从 RGB 转换 HSI 更为便捷。但同时也导致还原相对麻烦。HSV 正好相反。那么是否存在一种模型,可以取弊存优呢? 1979年,在 美国计算机协会(ACM) 旗下的 计算机图形图像特别兴趣小组(SIGGRAPH) 组织的年度会报会议上。 泰克科技有限公司(Tektronix, Inc. US) 的工程师们提出了 HSL(Lightness)色彩空间 [18],尝试综合 HSV 和 HSI 解决色彩感知还原与颜色空间转换问题。 HSL(Lightness)从数学角度上, 以中值对 HSV 和 HSI 的光亮度概念进行了整合,使饱和度的表示得到简化,并保留了 HSI 的视觉感官还原特点。这也使 HSL(Lightness)模型,于 1979 年年末的计算机图形标准委员会(Computer Graphics Standards Committee,CGSC)报告上,被选定作为 三要素色彩空间基础标准 的原因 [19] 。 为了更好的理解这一点,需要分析 HSV、HSI、HSL(Lightness)的异同。 相同的色调拓扑计算 HSV 和 HSI 色彩空间为了计算机色彩还原服务,本身模型基于 RGB 色彩空间的拓扑变化。如果我们将 RGB 色彩空间中的 白点(White Point) 和 黑点(Black Point) 连线,那么我们就能得到一条由白到灰到黑的渐变对角线,这条线被我们称之为 灰度线(Grey Line)。 HSV 和 HSI 以灰度线作为法线,取过黑点的平面为投影平面,将 RGB 色彩空间的单位立方体投影到了此平面上。为了区别于 标准 CIE 色度平面(CIE Chromaticity Plane),这个平面被称为 HSL 色度平面(HSL Chromaticity Plane)。 图 2-23 RGB 色彩空间投影建立 HSL 色度平面(HSL Chromaticity Plane)示意图 HSL 色彩空间,以该平面做为 基准平面。取从 青色(Cyan)指向红色(Red)的连线作为基准轴,取红色为 0°,青色为 180°。 假设 RGB 色彩空间内存在颜色 CRGBC_{RGB}CRGB ,在 HSL 色度平面上的投影为 CRGB′{C_{RGB}}^{\\prime}CRGB′ 。 CRGB′{C_{RGB}}^{\\prime}CRGB′ 与黑点连线和基准轴的逆时针夹角,记为 HHH 。为了更好的表示 CRGBC_{RGB}CRGB 与其 HSL 色度平面投影的关系,瓦伦西曾在自己的专利 [14] [15] 中将, 与黑点连线的长度称为 色相(Chrominance)。在 HSL 中,继承了这一点,记为 CCC 。 图 2-24 HSL 色度平面(HSL Chromaticity Plane)示意图 需要注意的是,引入色相是为了用一个中间变量,把 CRGB′{C_{RGB}}^{\\prime}CRGB′ 的投影平面特性转化为颜色三要素的物理表述 [14] [18] 。色相本身并不是一个标准概念,在此处的意义为白点颜色与选定颜色之间的欧式距离,而 并非指 色度(Chromaticity)。它是 HSL 引入的对同色调下颜色饱和度的代称,即 狭义色差(sCA)。 介于此,为了便于说明,我们 将 HSL 的中间量 CCC 按照更贴近的含义,称为色差。 而 实际上 HHH 就是 色调(Hue),有 HHH 、 CCC 的关系为: M=max(R,G,B)C=max(R,G,B)−min(R,G,B)H=60∘×{undefined,if C=0G−BC+0,if M=RB−RC+2,if M=GR−GC+4,if M=B {\\displaystyle \\begin{aligned} &M=\\max(R,G,B) \\\\ &C =\\text {max} (R,G,B) - \\text {min} (R,G,B) \\\\ &H = 60^ \\circ \\times {\\begin{cases} \\mathrm {undefined} ,& {\\text{if }} C=0 \\\\ {\\frac {G-B} {C}} + 0 ,& {\\text{if }} M=R \\\\ {\\frac {B-R} {C}} + 2 ,& {\\text{if }} M=G \\\\ {\\frac {R-G} {C}} + 4 ,& {\\text{if }} M=B \\end{cases}} \\end{aligned} } M=max(R,G,B)C=max(R,G,B)−min(R,G,B)H=60∘×⎩⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎧undefined,CG−B+0,CB−R+2,CR−G+4,if C=0if M=Rif M=Gif M=B 这样的表示方法有些不尽如人意。因为 RGB 色彩空间在 HSL 色度平面的投影,是一个正六边形。导致了 H′H^{\\prime}H′ 在转换为角度表示上,存在分段的情况。那么如何使其简化为非条件函数表示呢?HSL 采用了 对正六边形投影做了二维拓扑变为单位圆,来处理此问题。 图 2-25 HSL 色度平面(HSL Chromaticity Plane)连续性处理拓扑示意图 取基准轴从黑点指向红色为 X轴正方向,做直角坐标系。 记为 X轴单位长度为 α\\alphaα ,Y轴单位长度为 β\\betaβ ,有: α=R−G⋅cos(60∘)−B⋅cos(60∘)=12(2R−G−B)β=G⋅sin(60∘)−B⋅sin(60∘)=32(G−B) {\\displaystyle \\begin{aligned} &\\alpha =R-G\\cdot \\cos(60^ \\circ)-B\\cdot \\cos(60^ \\circ)={\\tfrac {1}{2}}(2R-G-B) \\\\ &\\beta =G\\cdot \\sin(60^ \\circ)-B\\cdot \\sin(60^ \\circ)={\\tfrac {\\sqrt {3}}{2}}(G-B) \\\\ \\end{aligned} } α=R−G⋅cos(60∘)−B⋅cos(60∘)=21(2R−G−B)β=G⋅sin(60∘)−B⋅sin(60∘)=2√3(G−B) 那么,中间量 CCC 可以表示为: C=α2+β2 {\\displaystyle C ={\\sqrt {\\alpha ^{2}+\\beta ^{2}}}} C=√α2+β2 同时,色调(Hue) HHH 与 CCC 的关系,就可以转化为 HHH 与 α\\alphaα 、 β\\betaβ 的关系了: H=atan2(β,α) {\\displaystyle \\begin{aligned} &H =\\text{atan2} (\\beta ,\\alpha ) \\end{aligned} } H=atan2(β,α) 这样在描述上,就比较简洁了。便于计算机处理。 不同的光亮度与饱和度处理 计算色调之后,HSV、HSI、HSL(Lightness)在 光亮度(Luminance),和 饱和度(Saturation) 的处理上就存在不同了。因为以灰度线为法线的缘故,光亮度较好抽象。记各模型光亮度(Luminance)表示分别为 Lvalue=VL_{value} = VLvalue=V 、 Lintensity=IL_{intensity} = ILintensity=I 、 Llightness=LL_{lightness} = LLlightness=L 。 有 VVV 、 III 、 LLL 与原 RGB 颜色空间内颜色 CRGBC_{RGB}CRGB 的关系如下: V=max(R,G,B)=MI =avg(R,G,B)=(R+G+B)3L=mid(R,G,B)=(max(R,G,B)+min(R,G,B))2 {\\displaystyle \\begin{aligned} &V = \\max(R,G,B) = M \\\\ &I \\ = \\text{avg} (R,G,B)={\\tfrac {(R+G+B)}{3}} \\\\ &L = \\text {mid} (R,G,B)={\\tfrac {(\\max(R,G,B)+\\min(R,G,B))}{2}} \\end{aligned} } V=max(R,G,B)=MI =avg(R,G,B)=3(R+G+B)L=mid(R,G,B)=2(max(R,G,B)+min(R,G,B)) 如果我们取色调(Hue) HHH 为 50∘50^ \\circ50∘ (偏黄) & 230∘230^ \\circ230∘ (偏蓝)。以色差 CCC 和光亮度构成坐标系,取色差 CCC 为横轴,各模型光亮度参数为纵轴。那么条件下 在 HSV、HSI、HSL(Lightness)的 色差切面(Chrominance Slice),就如下图所示: 图 2-26 HSV、HSI、HSL(Lightness)色差切面(Chrominance Slice)示意图 图中灰色区域为无效值。指定色差 CCC 与光亮度配参构成的切面,需要在坐标范围内避开无效取值。这就意味着 以色差 CCC 作为关键参数的模型,必须以区域限定的方法处理灰区问题。而 HSV、HSI、HSL(Lightness)被设计的目的,是为计算机色彩还原服务的。以条件限定的方式来处理,将会为计算机运算带来大量逻辑判断,极大的影响图片处理效率。因此,色差 CCC 并不能 被直接用作 HSL 的基础参数。这也是为何不以饱和度(Saturation)的称谓,来直接指代色差 CCC 的原因。HSL 中的饱和度概念,与实际颜色三要素的饱和度定义(狭义)存在差异。这里的饱和度,是对实际物理饱和度概念进行衍射拓展后的结果,即广义饱和度。 如何减少这些不必要的运算,得到广义饱和度参数呢?直接的做法是对 色差切面(Chrominance Slice) 进行 一定程度的形变,使得色差切面能够填充整个坐标平面。由于各模型在设定之初,已经通过取用灰度线为投影法线的方式,在几何定义上抽象出纵轴参数 VVV 、 III 、 LLL 。参数 VVV 、 III 、 LLL 直观体现了颜色三要素的光亮度(Luminance)对物理发光强度的描述。因此,只需要做水平方向的拉伸(压缩),用拓扑后的横坐标单位,来替换色差 作为模型的饱和度参数即可。记 各模型饱和度(Saturation)分别为 SHSV=SVS_{HSV} = S_{V}SHSV=SV 、 SHSI=SIS_{HSI} = S_{I}SHSI=SI 、 SHSL=SLS_{HSL} = S_{L}SHSL=SL 。 有 SVS_{V}SV 、 SIS_{I}SI 、 SLS_{L}SL 与 CRGBC_{RGB}CRGB 、色差 CCC 、各自亮度值的关系如下: SV={0,if V=0CV, otherwiseSI={0,if I=01−min(R,G,B)I, if I≠0SL={0,if L=1 or L=0C1−∣2L−1∣, otherwise {\\displaystyle \\begin{aligned} &S_{V}={ \\begin{cases} {0}, &{\\text{if }} V = 0 \\\\ {\\frac {C}{V}}, \\ \\ &{\\text{otherwise}} \\end{cases}} \\\\ &S_{I}={ \\begin{cases} {0}, &{\\text{if }} I = 0 \\\\ {1-{\\frac {\\min(R,G,B)}{I}}}, \\ \\ &{\\text{if }} {I \\neq 0} \\end{cases}} \\\\ &S_{L}={ \\begin{cases} {0}, &{\\text{if }} L = 1 {\\text{ or }} L = 0 \\\\ {\\frac {C}{1-|2L-1|}}, \\ \\ \\ \\ \\ \\ &{\\text{otherwise}} \\end{cases}} \\end{aligned} } SV=⎩⎨⎧0,VC, if V=0otherwiseSI=⎩⎨⎧0,1−Imin(R,G,B), if I=0if I≠0SL=⎩⎨⎧0,1−∣2L−1∣C, if L=1 or L=0otherwise 转换后,的 色差切面(Chrominance Slice) 就 比较连续 了: 图 2-27 HSV、HSI、HSL(Lightness)切面拓扑示意图 很容易看出 HSL(Lightness)在保证自身任意选定色调 HHH 时的色差切面不包含无效区域的同时,还具有 HSI 本身对人眼观察颜色还原的特点。而其计算过程中依赖的条件判断,则可以使用绝对值运算代替。可以说,HSL(Lightness)结合了 HSV、HSI 的优点,且一定程度上避开了两者的缺陷。 三要素色彩空间的配色函数 现在,所有要素准备齐全。如果记目标颜色为 CHSLC_{HSL}CHSL ,则 HSL 配色函数 如下: CHSL=H⋅Hue+S⋅Saturation+L⋅Luminance=Vector[H,S,L] {\\displaystyle C_{HSL} = H \\cdot Hue + S \\cdot Saturation + L \\cdot Luminance = Vector[H, S, L]} CHSL=H⋅Hue+S⋅Saturation+L⋅Luminance=Vector[H,S,L] 如果记 CHSLC_{HSL}CHSL 在 RGB 色彩空间对应颜色为 CRGB=(R,G,B)C_{RGB} = (R, G, B)CRGB=(R,G,B) ,记有 CRGB→CHSLC_{RGB} \\rightarrow C_{HSL}CRGB→CHSL 的转换函数为 FFF ,则 CHSL→CRGBC_{HSL} \\rightarrow C_{RGB}CHSL→CRGB 的反向过程就为 F−1F^{-1}F−1 。有之前使用的通用中间量: α=12(2R−G−B)β=32(G−B) C=α2+β2≈(max(R,G,B)−min(R,G,B))=range(R,G,B) {\\displaystyle \\begin{aligned} &\\quad \\quad \\quad \\quad \\quad \\alpha = {\\tfrac {1}{2}}(2R-G-B) \\quad \\quad \\quad \\beta = {\\tfrac {\\sqrt {3}}{2}}(G-B) \\ \\ \\ \\ \\ \\ \\\\ & C = {\\sqrt {\\alpha ^{2}+\\beta ^{2}}} \\approx (\\max(R,G,B) - min(R,G,B)) = \\text {range} (R,G,B) \\\\ \\end{aligned} } α=21(2R−G−B)β=2√3(G−B) C=√α2+β2≈(max(R,G,B)−min(R,G,B))=range(R,G,B) 存粹使用 α\\alphaα 、 β\\betaβ 会使计算过于复杂,因此中间量 CCC 在处理时大多数都是用原有定义代替。 α\\alphaα 、 β\\betaβ 仅用于角度计算。从之前讲解可知,这样做并不会导致偏色,而只会影响 HSL 色度平面的几何样式。结合之前的完整推导过程,带入 α\\alphaα 、 β\\betaβ 、 CCC ,能得到从 RGB 到 HSL 的映射 FFF 为: FHSV={H=atan2(β,α)S=range(R,G,B)⋅max(R,G,B)−1V=max(R,G,B)FHSI={H=atan2(β,α)S={0,if I=01−min(R,G,B)⋅avg(R,G,B)−1, if I≠0I =avg(R,G,B)=(R+G+B)3FHSL={H=atan2(β,α)S=12⋅range(R,G,B)⋅min(L, 1−L)−1L=mid(R,G,B)=(max(R,G,B)+min(R,G,B))2 {\\displaystyle \\begin{aligned} &F_{HSV} ={ \\begin{cases} & H = \\text {atan2} (\\beta ,\\alpha ) \\\\ & S = \\text {range} (R,G,B) \\cdot \\max(R,G,B)^{-1} \\\\ & V = \\max(R,G,B) \\end{cases} } \\\\ &F_{HSI} ={ \\begin{cases} & H = \\text {atan2} (\\beta ,\\alpha ) \\\\ & S = { \\begin{cases} {0}, &{\\text{if }} I = 0 \\\\ 1- {\\min(R,G,B)} \\cdot {\\text {avg} (R,G,B)^{-1}}, \\ \\ &{\\text{if }} {I \\neq 0} \\end{cases}} \\\\ & I \\ = \\text {avg} (R,G,B)={\\tfrac {(R+G+B)}{3}} \\end{cases} } \\\\ &F_{HSL} ={ \\begin{cases} & H = \\text {atan2} (\\beta ,\\alpha ) \\\\ & S = \\tfrac {1}{2} \\cdot \\text {range} (R,G,B) \\cdot {\\min(L,\\ 1 - L)}^{-1} \\\\ & L = \\text {mid} (R,G,B)={\\tfrac {(\\max(R,G,B)+\\min(R,G,B))}{2}} \\end{cases} } \\end{aligned} } FHSV=⎩⎪⎨⎪⎧H=atan2(β,α)S=range(R,G,B)⋅max(R,G,B)−1V=max(R,G,B)FHSI=⎩⎪⎪⎪⎨⎪⎪⎪⎧H=atan2(β,α)S={0,1−min(R,G,B)⋅avg(R,G,B)−1, if I=0if I≠0I =avg(R,G,B)=3(R+G+B)FHSL=⎩⎪⎨⎪⎧H=atan2(β,α)S=21⋅range(R,G,B)⋅min(L, 1−L)−1L=mid(R,G,B)=2(max(R,G,B)+min(R,G,B)) 而从 HSL 到 RGB ,由于色度被作为了传入参数,在转换为 RGB 时就需要处理扇区划分问题。记 ∠RG:H∈[0∘,120∘)\\angle_{RG}: H \\in [0^{\\circ}, 120^{\\circ})∠RG:H∈[0∘,120∘) , ∠GB:H∈[120∘,240∘)\\angle_{GB}: H \\in [120^{\\circ}, 240^{\\circ})∠GB:H∈[120∘,240∘) , ∠BR:H∈[240∘,360∘)\\angle_{BR}: H \\in [240^{\\circ}, 360^{\\circ})∠BR:H∈[240∘,360∘) ,其中 H=0∘H = 0^{\\circ}H=0∘ 或 H=360∘H = 360^{\\circ}H=360∘ 时,有 (R,G,B)=(1, 0, 0)(R,G,B) = (1,\\ 0,\\ 0)(R,G,B)=(1, 0, 0) 。则映射 F−1F^{-1}F−1 为: H, ∠const∈[0∘, 360∘)FHSV−1={k={(∠const+H60∘) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,V)∠const=f∠consthsv=V−VS⋅sector⇒(R,G,B)=FHSV−1(H,S,V)=(f300∘hsv, f180∘hsv, f60∘hsv)FHSI−1={k={(∠const+H60∘) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,I)∠const=f∠consthsi=I−IS⋅sector⇒(R,G,B)=FHSI−1(H,S,I)=(f300∘hsi, f180∘hsi, f60∘hsi)FHSL−1={k={(∠const+H60∘) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,L)∠const=f∠consthsl=L−LS⋅sector⇒(R,G,B)=FHSL−1(H,S,L)=(f300∘hsl, f180∘hsl, f60∘hsl) {\\displaystyle \\begin{aligned} H ,\\ {\\angle_{const}} &\\in [0^ \\circ,\\ 360^ \\circ) \\\\ {F_{HSV}}^{-1} &= { \\begin{cases} & k = \\{(\\tfrac {\\angle_{const} + H}{60^ \\circ})\\ \\bmod\\ 6\\} \\\\ & sector = \\max(0,\\ \\min(k,\\ 4-k,\\ 1)) \\\\ & f(H,S,V)_{\\angle_{const}} = f_{\\angle_{const}} ^{hsv} = V-VS \\cdot sector \\end{cases} } \\\\ &\\Rightarrow (R,G,B) = {F_{HSV}}^{-1}(H,S,V) = (f_{300^ \\circ}^{hsv},\\ f_{180^ \\circ}^{hsv},\\ f_{60^ \\circ}^{hsv}) \\\\ \\\\ {F_{HSI}}^{-1} &= { \\begin{cases} & k = \\{(\\tfrac {\\angle_{const} + H}{60^ \\circ})\\ \\bmod\\ 6\\} \\\\ & sector = \\max(0,\\ \\min(k,\\ 4-k,\\ 1)) \\\\ & f(H,S,I)_{\\angle_{const}} = f_{\\angle_{const}} ^{hsi} = I-IS \\cdot sector \\end{cases} } \\\\ &\\Rightarrow (R,G,B) = {F_{HSI}}^{-1}(H,S,I) = (f_{300^ \\circ}^{hsi},\\ f_{180^ \\circ}^{hsi},\\ f_{60^ \\circ}^{hsi}) \\\\ \\\\ {F_{HSL}}^{-1} &= { \\begin{cases} & k = \\{(\\tfrac {\\angle_{const} + H}{60^ \\circ})\\ \\bmod\\ 6\\} \\\\ & sector = \\max(0,\\ \\min(k,\\ 4-k,\\ 1)) \\\\ & f(H,S,L)_{\\angle_{const}} = f_{\\angle_{const}} ^{hsl} = {L - LS \\cdot sector} \\end{cases} } \\\\ &\\Rightarrow (R,G,B) = {F_{HSL}}^{-1}(H,S,L) = (f_{300^ \\circ}^{hsl},\\ f_{180^ \\circ}^{hsl},\\ f_{60^ \\circ}^{hsl}) \\\\ \\end{aligned} } H, ∠constFHSV−1FHSI−1FHSL−1∈[0∘, 360∘)=⎩⎪⎨⎪⎧k={(60∘∠const+H) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,V)∠const=f∠consthsv=V−VS⋅sector⇒(R,G,B)=FHSV−1(H,S,V)=(f300∘hsv, f180∘hsv, f60∘hsv)=⎩⎪⎨⎪⎧k={(60∘∠const+H) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,I)∠const=f∠consthsi=I−IS⋅sector⇒(R,G,B)=FHSI−1(H,S,I)=(f300∘hsi, f180∘hsi, f60∘hsi)=⎩⎪⎨⎪⎧k={(60∘∠const+H) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,L)∠const=f∠consthsl=L−LS⋅sector⇒(R,G,B)=FHSL−1(H,S,L)=(f300∘hsl, f180∘hsl, f60∘hsl) 即: (R,G,B)=F−1(H,S,L)=(f300∘, f180∘, f60∘) (R,G,B) = {F}^{-1}(H,S,L) = (f_{300^ \\circ},\\ f_{180^ \\circ},\\ f_{60^ \\circ}) (R,G,B)=F−1(H,S,L)=(f300∘, f180∘, f60∘) 可以看出,排除 CRGB→CHSLC_{RGB} \\rightarrow C_{HSL}CRGB→CHSL 后 HSL 代表值的不同外, F−1F^{-1}F−1 并不存在显著差异。这正是因为 HSV、HSI、HSL(Lightness)三者的色彩空间设定,在本质上是一样的而产生的结果。 差异只存在于几何切面的抽象上。 显然 HSL 模型直观地体现了颜色三要素的两个重要事实,即: 亮度与图像的色彩信息无关,色彩信息体现自其色调和饱和度。这使得 HSL 色彩空间更适合在,对需要偏重于颜色三要素基础,进行色彩基础分析和检测的场景。 所以 HSL 的缺陷也很明显。对比 CIE LAB 和 CIE LUV,虽然 HSL 具有较好的对色彩生理学感知还原的特点,但 HSL 在 RGB 转换上却没法像 LAB 与 LUV 一样快速。后者在指定白点后,就能一线性关系将色彩转换到 XYZ 色彩空间,而 XYZ 到 RGB 只需要一个固定矩阵即可。这就意味着 HSL 在 RGB 换算上更为复杂。 另外,HSL 和 LAB 两者,都没有很好的处理到颜色压缩存储和数据传输方面的设计。除了精准调节和对比场景会使用 HSL 外(这种场景 CIE LAB 也能胜任且更精确),HSL 相较于 CIE LAB 和 CIE LUV 色彩空间(尤其是与 LUV 相比)并没有太大的优势。因此,各个组织(包括 CIE、ITU等)至今仍在尝试用更为先进的色彩统一化方案解决“均色问题”。 虽然存在各种弊端,但 HSL 对数据传输的探索和创造性的色彩空间设定,依旧 为后来 ITU 制定 YUV 色彩格式提供了不少思路上的帮助。使现代色彩存储体系,在结合 CIE 1976 UCS(即 LAB 与 LUV)的归一化和 HSL 的坐标设定的基础上,得以得到拓展。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_6.html":{"url":"Chapter_2/Language/cn/Docs_2_6.html","title":"2.6 色彩的存储","keywords":"","body":"2.6 色彩的存储 1960年,来自 贝尔实验室(Bell Laboratories) 的 穆罕默德·阿塔拉(Mohamed M. Atalla,1924 - 2009) 和 姜大元(Dawon Kahng,1931 - 1992),发现 金属氧化物半导体(MOS [Metal Oxide Semiconductor]) 可以借由场效应进行信息存储的现象,成功开发了 金属氧化物半导体场效应晶体管(MOSFET [Metal Oxide Semiconductor Field Effect Transistor])。随后,贝尔实验室(Bell Laboratories)联合喷气推进实验室(Jet Propulsion Laboratory)与其他研究机构,就 “提高图像在计算机处理过程中的效果增强”,提出了一系列用于数字图像处理(Digital Image Processing)的方法。其中 “关于如何利用有限的物理存储空间来保存图片像素点” 的部分,为图片灰度单色存储的提供了可行方案 [44]。 这一系列理论于 1964年 应用在了徘徊者7号月面探测器(Space Detector Ranger 7)的计算机软硬件设计上,并以此得到了 4300 张高分辨率月面摄影。 月面壮举极大的鼓舞了计算机图形学的发展,同时也让图片压缩存储需求开始变得至关重要。在此背景下,1979年,首个 单片数字信号处理器(DSP [Digital Signal Processor]) 诞生了。数字信号处理器通过 离散余弦变换(DCT) 技术对图片进行了 数模转换。该技术使图像像素能够以 0-1 单字节码(1-bit)的形式,存储在计算机晶体管中,形成了最初的 1-bit 灰度单色格式 [45]。 让离散化存储颜色成为了计算机图像像素存储的物理共识。 随着 19世纪 80年代个人电脑的快速发展。灰度图格式也从 单字节码(1-bit),经过 IBM 单色显示屏适配器(MDA [Monochrome Display Adapter]) 2-bit 格式,Commodore 128 所搭载 8563 显示控制器(VDC [Video Display Controller]) 提供的 4-bit 格式,演变到了Apple II 与 IBM 5150 的 8-bit 单色格式。 1981 年,IBM 结合 CIE 1976 UCS 在 RGB 色彩空间上的补充,开发并发布了携带彩色数据编解码 IBM 彩色图形适配器(CGA [Color Graphics Adapter]) 的 IBM 5153。 标志着计算机正式进入了彩色时代。自此开启了计算机 现代色彩格式(Modern Color Format) 的大门。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_6_1.html":{"url":"Chapter_2/Language/cn/Docs_2_6_1.html","title":"2.6.1 色彩格式(Color Format)与色彩存储","keywords":"","body":"2.6.1 色彩格式(Color Format) 色彩格式(Color Format) 包含了计算机对颜色的 存储格式(Data Format) 和 色彩格式空间(Color Format Space) 两部分。 同其他工业设备一样,计算机也受自身软硬件的限制,而需要特定的色彩模式。考虑到其本身是一种仅应用于计算机工业体系内(虽然现在计算机无处不在)的 设备相关色彩空间(Device Dependent Color Space),业内将之称为 色彩格式空间(Color Format Space),简称为 格式空间(Format Space)。 正如前文所提,色彩格式根据参考设备无关色彩空间的不同,被分为 RGB 色彩格式和 YUV 色彩格式。两者理论均衍生自 CIE 1976 UCS 的补充色彩空间方案,并在之后被分别设备相关化。 RGB 色彩格式,即 原色格式(Primaries Format),属于 CIE RGB 色彩空间体系; YUV 色彩格式,即 传输格式(Transport Format),根据 CIE LUV 特性被分属为 CIE XYZ 色彩空间体系。 RGB 与 YUV 共同组成了现代计算机色彩格式的两大分类。 为了更好的进行对比说明,我们用经典的彩色鹦鹉图片,来辅助说明不同色彩格式对图片携带颜色信息的影响。 图 2-28 模板彩色鹦鹉原色图片 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_6_2.html":{"url":"Chapter_2/Language/cn/Docs_2_6_2.html","title":"2.6.2 RGB 体系色彩格式","keywords":"","body":"2.6.2 RGB 体系色彩格式 原色格式(Primaries Format),或 RGB 体系色彩格式最大的特点就是在于,其对颜色表示的富集程度和存储空间大小密切相关。可以说 RGB 色彩格式中,每个通道能够占用的存储空间越大,则能够表示的颜色越多。非常的简单粗暴。统一的,RGB 色彩格式的格式空间,即为 归一化的 CIE RGB 色彩空间。 3-bit RGB 3-bit RGB 色彩格式 采用了红绿蓝各 1-bit 的存储格式。因此,3-bit RGB 最多只能表示 23=82^3 = 823=8 种颜色: 图 2-29 4-bit RGBI 可表示的所有颜色 以鹦鹉图为例,在 3-bit RGB 格式下的展示效果如下: 图 2-30 3-bit RGB 表示的鹦鹉图 此格式被广泛运用于 Oric 和 NEC 的 PC-8801 与 PC-9801 机型 上。 4-bit RGBI 1981年,IBM 在其 CGA 中,以 4-bit RGBI 格式 对彩色图片进行了存储。在此格式下,颜色被分为 RGBI 4个通道,每个通道各用 1-bit 表示。因此,RGBI 最多只能表示 23×2=162^3 \\times 2 = 1623×2=16 种颜色: 图 2-31 4-bit RGBI 可表示的所有颜色 以鹦鹉图为例,在 4-bit RGBI 格式下的展示效果如下: 图 2-32 4-bit RGBI 表示的鹦鹉图 此格式 只有 IBM 5153 在使用。 6-bit RGB 6-bit RGB 色彩格式 采用了红绿蓝各 2-bit 的存储格式。因此,6-bit RGB 最多只能表示 (22)3=64(2^2)^3 = 64(22)3=64 种颜色: 图 2-33 6-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 6-bit RGB 格式下的展示效果如下: 图 2-34 6-bit RGB 表示的鹦鹉图 此格式在 IBM 的增强图形适配器(EGA [Enhanced Graphics Adapter])上被首次运用。并在之后伴随了多个 IBM 主机版本。 9-bit RGB 9-bit RGB 色彩格式 采用了红绿蓝各 3-bit 的存储格式。因此,9-bit RGB 最多只能表示 (23)3=512(2^3)^3 = 512(23)3=512 种颜色: 图 2-35 9-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 9-bit RGBI 格式下的展示效果如下 图 2-36 9-bit RGB 表示的鹦鹉图 此格式最早在 1985年 的土星520ST(Atari 520ST)机型 上被使用。 12-bit RGB 12-bit RGB 色彩格式 采用了红绿蓝各 4-bit 的存储格式。因此,12-bit RGB 最多能表示 (24)3=4096(2^4)^3 = 4096(24)3=4096 种颜色: 图 2-37 12-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 12-bit RGBI 格式下的展示效果如下 图 2-38 12-bit RGB 表示的鹦鹉图 此格式被运用在 Apple IIGS、土星 STE 系列 和 世嘉(Sega)Game Gear 游戏机 上。 15-bit RGB 15-bit RGB 色彩格式 采用了红绿蓝各 5-bit 的存储格式。因此,15-bit RGB 最多能表示 (25)3=32,768(2^5)^3 = 32,768(25)3=32,768 种颜色: 图 2-39 15-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 15-bit RGBI 格式下的展示效果如下 图 2-40 15-bit RGB 表示的鹦鹉图 此格式被运用在 索尼的 PS1 游戏机 上。 16-bit RGB(RGB565) 16-bit RGB 色彩格式 采用了红蓝各 5-bit ,而绿色 6-bit 的存储格式。因此,16-bit RGB 最多只能表示 (25)2×(26)=65,536(2^5)^2 \\times (2^6) = 65,536(25)2×(26)=65,536 种颜色: 图 2-41 16-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 16-bit RGBI 格式下的展示效果如下 图 2-42 16-bit RGB 表示的鹦鹉图 此格式被运用在 携带有扩展图形阵列(XGA [Extended Graphics Array])的 IBM 机型 上。 18-bit RGB 18-bit RGB 色彩格式 采用了红绿蓝各 6-bit 的存储格式。因此,18-bit RGB 最多能表示 (26)3=262,144(2^6)^3 = 262,144(26)3=262,144 种颜色: 图 2-43 18-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 18-bit RGBI 格式下的展示效果如下 图 2-44 18-bit RGB 表示的鹦鹉图 此格式被运用在 IBM 8514,以及 IBM 携带视频图像阵列(VGA [Video Graphics Array]) 或 多色图像阵列(MCGA [Multi-Color Graphics Array])显卡 的设备上。 24-bit RGB & 32-bit RGBA8888 24-bit RGB 色彩格式 采用了红绿蓝各 8-bit 的存储格式。因此,24-bit RGB 最多能表示多达 (28)3=2563=16,777,216(2^8)^3 = 256^3 = 16,777,216(28)3=2563=16,777,216 种颜色: 图 2-45 24-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 24-bit RGBI 格式下的展示效果如下 图 2-46 24-bit RGB 表示的鹦鹉图 这一格式最早于 1998年,被应用于 IBM 携带超级视频图像阵列(SVGA [Super Video Graphics Array])显卡 的设备上。由于 24-bit 对应 RGB 三通道各 8-bit 的特性和硬件 RAM 非常契合,使此格式至今仍为最常用的 RGB 色彩格式。配合额外 Alpha 透明度通道,24-bit RGB 色彩格式可以被扩充为 32-bit RGBA8888 色彩格式,进一步提升颜色精细度。 显然,RGB 色彩格式和物理存储空间的扩展紧密相关,其每一次可表示色阶的扩充,都意味着一次存储介质和空间的显著提升。 此特点决定了,当市面上绝大多数显卡的存储及处理能力没有发展的情况下,更细腻的 RGB 色彩格式也不太可能得到推广。同理,广泛应用于图像传输的 YUV 色彩格式则是规格驱动,其更多依赖于传输协议的演变和数据带宽的更新迭代。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/Docs_2_6_3.html":{"url":"Chapter_2/Language/cn/Docs_2_6_3.html","title":"2.6.3 YUV 体系色彩格式","keywords":"","body":"2.6.3 YUV 体系色彩格式 传输格式(Transport Format),即当下我们常用的 YUV 色彩格式也被称为 YCbCr、YPbPr、Y′UV 色彩格式。其中,Y/Y′ 指代光亮度分量,U/Cb/Pb 指代蓝色色度分量,V/Cr/Pr 指代红色色度分量。YUV 色彩格式受启发自,CIE LUV 中用与 xyY 色度图有线性转换关系的 uv 分量表示平面色彩信息的思想,最初被 国际电信联盟无线电通信部门(ITU-R [International Telecommunication Union Radiocommunication Sector]) 做为 彩色图像编解码流水线(Color Image Pipeline) 的一部分提出,用来 对图片进行压缩传输。 在之前的讨论中我们知道,CIE LUV 在指定白点情况下,可以直接将其所含色彩经由线性变换转换到 CIE XYZ 色彩空间,再从 CIE XYZ 依托固定转换矩阵,变换到 CIE RGB 色彩空间。将两个过程进行合并可知,存在从 LUV 到 RGB 的固定转换矩阵。因此,做为 CIE LUV 思路衍生下的实践产物,YUV 同样也具有这一特点。不同于 LUV 设备无关,YUV 是设备相关的。其受限于外部因素,对整体色度平面的处理上有一定程度的调整,使 YUV 根据采用规格的不同,有着不同的设备相关化调参。不过设备相关化处理带来的弊端,就是 YUV 相较于 LUV 来说色差变换更为不均匀。 当前 YUV 的常用规格 有三种:BT.601、BT.709、BT.2020。其中,BT.601 最早于 1982 年提出,最新一次修订于 2011年,适用于 标准画质电视(SDTV [Standard Definition Television]) [46] 。BT.709 最早于 1990 年提出,最新一次修订于 2015年,适用于 高清画质电视(HDTV [High Definition Television]) [47] 。BT.2020 最早于 2012 年提出,最新一次修订于 2015年,适用于 超高清画质电视(UHDTV [Ultra-High Definition Television]) [48] 。 YUV 是目前唯一做到了工程意义上打通图像数据压缩、传输和存储的色彩格式。为了便于说明,我们这里假设 Y、U、V 通道皆以当下主流的 8-bit 形式存放数据。 YUV 的数字电信号特征 YUV 被设计的目的主要就是为了进行数据传输,而数据传输就意味着数模信号转换。所以,根据可用电信号区间,YUV 存在两种有效范围:狭隘区间(Narrow Range)、完整区间(Full Range)。 狭隘区间 中,Y通道取值范围为 [16, 235][16,\\ 235][16, 235] ,U、V通道取值范围为 [16, 240][16,\\ 240][16, 240] ; 完整区间 中,Y、U、V通道取值范围均为 [0, 255][0,\\ 255][0, 255] ; 大多数应用场景下,YUV 都以狭隘范围表示,究其原因是由电讯号传输特性决定的。在广播电视系统中,为了防止过高和过低的信号造成 临界电平过载(Critical Level Overload) 现象,会人为的在信号可用模拟电平区段上,预留出一定的 “保护带”,保护最高位和最低位的电平不被使用。为了便于指代,电气学上把“保护带”称为 保护范围(Protection Range),被保护的最高位和最低位电平称为 保护电平(Protection Level),用于指代零信号量的电平被称为 消隐电平(Blanking Level),可用电平区段的上边界称为 白电平(White Level),下边界称为 黑电平(Black Level),黑白电平之间就是 信号电平(Signal Level) 了。 对于 8-bit 传输信号来说,保护电平为 0 mV0 \\ mV0 mV 和 255 mV255 \\ mV255 mV 。Y通道取 16 mV16 \\ mV16 mV 为消隐电平,可用电平区间上下分别预留了 [236, 254][236,\\ 254][236, 254] 和 [1, 15][1,\\ 15][1, 15] 的保护范围;U、V 通道则以 128 mV128 \\ mV128 mV 为消隐电平,可用电平区间上下分别预留了 [241, 254][241,\\ 254][241, 254] 和 [1, 15][1,\\ 15][1, 15] 的保护电平。所有可用的信号电平,分别组成了 Y、U、V 三通道取值范围的狭隘区间。 图 2-47 YUV Y通道信号电平分配图 [49] 图 2-48 YUV U通道信号电平分配图 [49] 图 2-49 YUV V通道信号电平分配图 [49] 对于不需要进行数据传输的场景,就不再需要保护电平了。 此时 8-bit 信号电平可以取用到 [0, 255][0,\\ 255][0, 255] 的完整范围表示。 解释清楚信号范围划分,接下来就该说明 ITU 对于 YUV 色彩格式下的 RGB YUV 颜色互转的定义了。在 YUV 和 RGB 的转换上,狭隘范围(Narrow)和完整范围(Full)并不影响推算,仅影响最终的转换矩阵结果。 YUV 与 RGB 间的相互转换 从工程角度出发,YUV 需要尽可能简单的处理过程。所以,YUV 在采用 LUV 转换思路的基础上结合了 HSL 的坐标处理思想,以 XYZ 坐标系下 xyY 色度图所在平面,截取色域三角形有效范围构建质心坐标的形式,进行了 YUV 色彩格式的格式空间关键要素定义。 不同于 LUV 和 HSL,YUV 并没有对完整的可见光色域进行拓扑变换,而是需要 手动设定 RGB 三原色的代表点和质心,来确定围成的色域范围和坐标系。因此,YUV 的色彩空间天然就是有缺陷的。不过,放弃完整色域换来了 YUV 足够通用的方法论。后续规格上的更新,只用按照工程测定结果来进行色域范围的调整,就能延续同一套计算过程满足新的需求。 这种可根据情况修整的延展性,也是 YUV 被广泛运用的原因之一。 那么,在 YUV 中 RGB 三原色的选取是否就是完全随意的呢? 答案是否定的。 RGB 三原色代表点的选取,完全依赖于设备本身对三原色的设定。即,设备的 RGB 色彩格式的格式空间决定了设备的三原色。由于不同的设备间差异可能非常大,想要使 YUV 格式通用,就必须在 YUV 体系的色彩格式规格制定时,固定做为标准的 RGB 三色坐标,通过自身格式空间的线性特征,来抹平不同设备间的转换误差。 我们假设 YUV 格式空间中,用于参照的 R点取自 xyY 色度图中坐标 R(xR, yR)R(x_R,\\ y_R)R(xR, yR) ,G点取自 xyY 色度图中坐标 G(xG, yG)G(x_G,\\ y_G)G(xG, yG) ,B点取自 xyY 色度图中坐标 B(xB, yB)B(x_B,\\ y_B)B(xB, yB) 。有下图: 图 2-50 YUV 格式空间在 xyY 色度图上的色域裁剪说明 根据图示可知,落于 RGB 围成三角形范围内的任意点 CCC ,与三角形顶点存在关系: C=B+(gB+rB)=R+(bR+gR)=G+(bG+rG)⇒C−G=b⋅(B−G)+r⋅(R−G) {\\displaystyle \\begin{aligned} &C = B + (gB + rB) = R + (bR + gR) = G + (bG + rG) &\\Rightarrow \\\\ &C - G = b \\cdot (B - G) + r \\cdot (R - G) \\end{aligned} } C=B+(gB+rB)=R+(bR+gR)=G+(bG+rG)C−G=b⋅(B−G)+r⋅(R−G)⇒ 取质心 WWW 为 轴心。指定 YUV 色彩空间下 Y+U+V=1Y + U + V = 1Y+U+V=1 ,选择 U=Cb⋅(B−W)U = C_b \\cdot (B - W)U=Cb⋅(B−W) 、 V=Cr⋅(R−W)V = C_r \\cdot (R - W)V=Cr⋅(R−W) 为坐标轴, CbC_bCb 、 CrC_rCr 为归一化因子。有 YYY 有效区间为 [0, 1][0,\\ 1][0, 1] , UUU 有效区间为 [−Umax, Umax][-U_{max},\\ U_{max}][−Umax, Umax] , VVV 有效区间为 [−Vmax, Vmax][-V_{max},\\ V_{max}][−Vmax, Vmax] 。 这里以 YUV 对应规格选定的 RGB 三色电信号,经过 电位差伽马预矫正(Gamma pre-corrected) 得到的归一化电平测量值 (WR ,WG ,WB)(W_R \\ , W_G \\ , W_B )(WR ,WG ,WB) 为依据 [46] [47] [48] ,取 YUV 光亮度参数有线性公式 Y=WR⋅R+WG⋅G+WB⋅BY = W_R \\cdot R + W_G \\cdot G + W_B \\cdot BY=WR⋅R+WG⋅G+WB⋅B 。则由点 CCC 与质心 WWW 的向量差 C−W=(C−G)−(W−G)C - W = (C -G)-(W-G)C−W=(C−G)−(W−G) 推得: Y=WR⋅R+WG⋅G+WB⋅BU=Umax1−WB⋅(B−Y)V=Vmax1−WR⋅(R−Y) {\\displaystyle \\begin{aligned} Y &= W_R \\cdot R + W_G \\cdot G + W_B \\cdot B \\\\ U &= {\\tfrac {U_{max}} {1 - W_B}} \\cdot (B - Y) \\\\ V &= {\\tfrac {V_{max}} {1 - W_R}} \\cdot (R - Y) \\\\ \\end{aligned} } YUV=WR⋅R+WG⋅G+WB⋅B=1−WBUmax⋅(B−Y)=1−WRVmax⋅(R−Y) 上式即为 YUV 格式空间的狭义配色函数。需要注意的是,测量值 (WR ,WG ,WB)(W_R \\ , W_G \\ , W_B )(WR ,WG ,WB) 是规格强相关的。其取值仅取决于规格中指定的 RGB 三色对应电信号电配比。 根据 RGB 与 YUV 归一化后 Y+U+V=R+G+B=1Y + U + V = R + G + B = 1Y+U+V=R+G+B=1 的数理特征,很容易就能证明 YUV 和 RGB 的线性等价关系: Y+U+V=R+G+B=1=1WG(WG⋅R+WG⋅G+WG⋅B)=1WG(Y+(WG−WR)⋅G+(WG−WB)⋅B)=3Y+WG−WRWG⋅(R−Y)+WG−WBWG⋅(B−Y)=3Y+WG−WRWG⋅1−WRVmax⋅V+WG−WBWG⋅1−WBUmax⋅UR+G+B=(Y+1−WRVmax⋅V)+(Y+WRWG⋅1−WRVmax⋅V+WBWG⋅1−WBUmax⋅U)+(Y+1−WBUmax⋅U) {\\displaystyle \\begin{aligned} Y + U + V &= R + G + B = 1 \\\\ &= {\\tfrac {1}{W_G}}\\left( W_G \\cdot R + W_G \\cdot G + W_G \\cdot B \\right) \\\\ &= {\\tfrac {1}{W_G}}\\left( Y + (W_G - W_R) \\cdot G + (W_G - W_B) \\cdot B \\right) \\\\ &=3Y + {\\tfrac {W_G - W_R}{W_G}} \\cdot (R - Y) + {\\tfrac {W_G - W_B}{W_G}} \\cdot (B - Y) \\\\ &=3Y + {\\tfrac {W_G - W_R}{W_G}} \\cdot {\\tfrac {1 - W_R}{V_{max}} \\cdot V} + {\\tfrac {W_G - W_B}{W_G}} \\cdot {\\tfrac {1 - W_B}{U_{max}} \\cdot U} \\\\ R + G + B &=\\left( Y + {\\tfrac {1 - W_R}{V_{max}} \\cdot V} \\right) + \\left( Y + {\\tfrac {W_R}{W_G}} \\cdot {\\tfrac {1 - W_R}{V_{max}} \\cdot V} + {\\tfrac {W_B}{W_G}} \\cdot {\\tfrac {1 - W_B}{U_{max}} \\cdot U} \\right) + \\left( Y + {\\tfrac {1 - W_B}{U_{max}} \\cdot U} \\right) \\end{aligned} } Y+U+VR+G+B=R+G+B=1=WG1(WG⋅R+WG⋅G+WG⋅B)=WG1(Y+(WG−WR)⋅G+(WG−WB)⋅B)=3Y+WGWG−WR⋅(R−Y)+WGWG−WB⋅(B−Y)=3Y+WGWG−WR⋅Vmax1−WR⋅V+WGWG−WB⋅Umax1−WB⋅U=(Y+Vmax1−WR⋅V)+(Y+WGWR⋅Vmax1−WR⋅V+WGWB⋅Umax1−WB⋅U)+(Y+Umax1−WB⋅U) 线性的变化关系对 YUV 相当重要,这意味着上式可直接以转换矩阵 MRGB2YUVM_{RGB2YUV}MRGB2YUV 表示,有: CRGB=MRGB2YUV−1⋅CYUV=MRGB2YUV−1⋅MRGB2YUV⋅CRGB=E⋅CRGB C_{RGB} = {M_{RGB2YUV}}^{-1} \\cdot C_{YUV} = {M_{RGB2YUV}}^{-1} \\cdot M_{RGB2YUV} \\cdot C_{RGB} = E \\cdot C_{RGB} CRGB=MRGB2YUV−1⋅CYUV=MRGB2YUV−1⋅MRGB2YUV⋅CRGB=E⋅CRGB 这一点保证了不论何种设备,设备间经过 YUV 色彩格式传递的 RGB 数据,在转换前后都有一致的值,维护了数据的准确性。 现在,理论基础得到了佐证。在此条件下,如果已经测得关键参数值,怎样计算转换矩阵 MRGB2YUVM_{RGB2YUV}MRGB2YUV 呢? 以 BT.601 的狭隘区间(Narrow Range) 为例。规格中取 D65 作为白点和质心 WWW ,测得 (WR ,WG ,WB)(W_R \\ , W_G \\ , W_B )(WR ,WG ,WB) 为 (0.299, 0.587, 0.114)(0.299, \\ 0.587, \\ 0.114)(0.299, 0.587, 0.114) 。经过值域范围平移后,带入狭义配色函数计算,有: (Y−16)⋅255=(+0.299⋅R+0.587⋅G+0.114⋅B)⋅(235−16)(U−128)⋅255=(−0.299⋅R−0.587⋅G+0.886⋅B)⋅(235−16)(V−128)⋅255=(+0.701⋅R−0.587⋅G−0.114⋅B)⋅(235−16) {\\displaystyle \\begin{aligned} (Y-16) \\cdot 255 &= (+0.299 \\cdot R + 0.587 \\cdot G + 0.114 \\cdot B) \\cdot (235 - 16) \\\\ (U-128) \\cdot 255 &= (-0.299 \\cdot R - 0.587 \\cdot G + 0.886 \\cdot B) \\cdot (235 - 16) \\\\ (V-128) \\cdot 255 &= (+0.701 \\cdot R - 0.587 \\cdot G - 0.114 \\cdot B) \\cdot (235 - 16) \\end{aligned} } (Y−16)⋅255(U−128)⋅255(V−128)⋅255=(+0.299⋅R+0.587⋅G+0.114⋅B)⋅(235−16)=(−0.299⋅R−0.587⋅G+0.886⋅B)⋅(235−16)=(+0.701⋅R−0.587⋅G−0.114⋅B)⋅(235−16) 换算一下就能得到 MRGB2YUVM_{RGB2YUV}MRGB2YUV 的表达式: [YUV]BT.601Narrow=[0.2570.5040.098−0.148−0.2910.4390.439−0.368−0.071]⋅([RGB]−[16128128]) {\\begin{bmatrix} Y \\\\ U \\\\ V \\end{bmatrix}}_{BT.601}^{Narrow}= {\\begin{bmatrix} 0.257 & 0.504 & 0.098 \\\\ -0.148 & -0.291 & 0.439 \\\\ 0.439 & -0.368 & -0.071 \\end{bmatrix}} \\cdot \\left( {\\begin{bmatrix} R \\\\ G \\\\ B \\end{bmatrix}} - {\\begin{bmatrix} 16 \\\\ 128 \\\\ 128 \\end{bmatrix}} \\right) ⎣⎡YUV⎦⎤BT.601Narrow=⎣⎡0.257−0.1480.4390.504−0.291−0.3680.0980.439−0.071⎦⎤⋅⎝⎛⎣⎡RGB⎦⎤−⎣⎡16128128⎦⎤⎠⎞ 可见,转换矩阵 MRGB2YUVM_{RGB2YUV}MRGB2YUV 的计算结果,只依赖于规格条件所指定的 (WR ,WG ,WB)(W_R \\ , W_G \\ , W_B )(WR ,WG ,WB) 测定值和 YUV 的取值范围。 其他规格下的计算方式也是一样,并无差异。这里列出 常用的主流规格带入公式后的结果,方便工程参考: 关于 YUV 色彩格式的格式空间部分,到这里就说明完毕。接下来我们来看组成 YUV 色彩格式的数据存储部分。 YUV 的数据存储 目前主流的 YUV 色彩格式的 存储格式族(Data Format Family) 主要有三种,分别是 YUV420、YUV422、YUV444。 YUV420 族 下的存储格式,以 4个Y通道分量共用一组UV分量构成(YYYY U V); YUV422 族 下的存储格式,以 2个Y通道分量共用一组UV分量构成(YY U V); YUV444 族 下的存储格式,三通道分量一一对应(Y U V); 而每一种 YUV 存储格式族,根据 Y通道、U通道、V通道的数据排布,又可以分为:平面(Planar)、半平面(Semi-Planar)、夹层(Interleaved)、打包(Packed) 四种存储的 数据分组类型。 平面(Planar) 类型,Y、U、V 数据独立存放; 半平面(Semi-Planar) 类型,Y通道数据独立存放,UV通道数据交替打包存放; 夹层(Interleaved) 类型,三通道数据以两个Y与一组UV为数据组,封包排列存放; 打包(Packed) 类型,三通道数据以一组YUV为数据组,封包排列存放; 因此,整个 YUV 的存储格式从属关系如下图所示: 图 2-51 YUV 存储格式(Data Format)谱系图 这些 YUV 存储格式最大的特点在于数据组成上。我们用相同颜色表示位于同一组的 YUV 数据。 假设存在一张包含 36 个像素点的 6x6 的图片(为了避免颜色重复,YUV444 用 12个像素点的 6x2 图片)。 以 Y、U、V 分别代表对应通道的有效数据,所有存储格式数据排布 《YUV 存储格式(Data Format)对比说明表》 如下: 显然,从数据的压缩程度上来说,YUV420 族明显具有较高的压缩比。但由于YUV 格式并不是完全无损的,与之相对的问题就是高压缩比导致的图片细节损失。不过图片的细部信息大都存在于灰度图上,而这部分信息主要由 Y 通道保存,因此人眼难以察觉丢失的颜色细节。相比较高压缩比带来的优势,这部分损失可以忽略不计。所以,在音视频数据传输及图像存储中,工程上常常采用 YUV420 族下的色彩格式进行保存。 至此,有关音视频工程中的图片色彩处理部分,基本讲解完毕。下一章我们将利用目前已掌握的音视频知识,来做针对一段音频和一张图片基本分析的工程实践。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_2/Language/cn/References_2.html":{"url":"Chapter_2/Language/cn/References_2.html","title":"【参考文献】","keywords":"","body":"二、【参考文献】 [1] Isaac Newton, Hypothesis explaining the properties of light, Letter from Newton to Henry Oldenburg, dated 14 December 1675. [2] Moses Harris, The Natural System of Colours and Ignaz Schiffermüller, Versuch eines Farbensystems (Vienna, 1772), plate I - project Gutenberg Ignaz Schiffermüller, Versuch eines Farbensystems (Vienna, 1772), plate I. [3] Young, T. (1802). \"Bakerian Lecture: On the Theory of Light and Colours\". Phil. Trans. R. Soc. Lond. 92: 12–48. doi:10.1098/rstl.1803.0004. [4] Glynn, Ian (2010). Elegance in Science. Oxford: Oxford University Press. pp. 147–150. ISBN 978-0-19-957862-7. [5] Stanley Finger (2001). Origins of Neuroscience: A History of Explorations into Brain Function. p. 100. ISBN 9780195146943. [6] Svaetichin,G. (1956). Spectral response curves from single cones, Actaphysiol. scand. 39, Suppl. 134, 17–46. [7] Schubring, Gert, ed. (1996). Hermann Günther Graßmann (1809–1877): Visionary Mathematician, Scientist and Neohumanist Scholar. Boston Studies in the Philosophy of Science. Vol. 187. Springer. doi:10.1007/978-94-015-8753-2. ISBN 978-94-015-8753-2. ISSN 0068-0346. [8] K-H Schlote, Hermann Günther Grassmann and the theory of hypercomplex number systems, in Hermann Günther Grassmann (1809-1877) : visionary mathematician, scientist and neohumanist scholar (Dordrecht, 1996), 165-173. [9] G Schubring (ed.), Hermann Günther Grassmann (1809-1877) : visionary mathematician, scientist and neohumanist scholar (Dordrecht, 1996). [10] Kirschmann, A., 1896. Color-Saturation and its Quantitative Relations. American Journal of Psychology, 7, 386-404. [11] Schlatter, T., Levinson, D: Visual Usability. Principles and Practices for Designing Digital Applications, 171-211, Morgan Kaufmann, Boston 2013 [12] Smith, Thomas; Guild, John (1931–32). \"The C.I.E. colorimetric standards and their use\". Transactions of the Optical Society. 33 (3): 73–134. [13] CIE (1932). Commission internationale de l'Eclairage proceedings, 1931. Cambridge: Cambridge University Press. [14] FR patent 841335, Valensi, Georges, \"Procédé de télévision en couleurs\", published 1939-05-17, issued 1939-02-06 [15] US patent 2375966, Valensi, Georges, \"System of television in colors\", published 1945-05-15 [16] Smith, Alvy Ray (August 1978). \"Color gamut transform pairs\". Computer Graphics. 12 (3): 12–19. doi:10.1145/965139.807361. [17] Joblove, George H.; Greenberg, Donald (August 1978). \"Color spaces for computer graphics\". Computer Graphics. 12 (3): 20–25. doi:10.1145/965139.807362. [18] Ware Myers (July 1979). \"Interactive Computer Graphics: Flying High-Part I\". Computer. 12 (7): 8–17. doi:10.1109/MC.1979.1658808. S2CID 15344162. [19] Computer Graphics Staff (August 1979). \"Status Report of the Graphics Standards Planning Committee\". ACM SIGGRAPH Computer Graphics. 13 (3): 1–10. doi:10.1145/988497.988498. S2CID 43687764. [20] OpenSource Project, Color-Science, Github, https://github.com/colour-science/colour#32012colour-temperature [21] David L. Fridge, \"Aberration Synthesizer*,\" J. Opt. Soc. Am. 50, 87-87 (1960) [22] Alan J. Werner, \"Luminous Transmittance, and Chromaticity of Colored Filter Glasses in CIE 1964 Uniform Color Space,\" Appl. Opt. 7, 849-855 (1968) [23] Planck, M. (1900a). \"On an Improvement of Wien's Equation for the Spectrum\", Verh. Dtsch. Phys. Ges. Berlin 2, 202 (1900) [24] Planck, M. (1900b). \"On the Theory of the Energy Distribution Law of the Normal Spectrum\", Verh. Dtsch. Phys. Ges. Berlin 2, 237 (1900) [25] Wright, William David (1928). \"A re-determination of the trichromatic coefficients of the spectral colors\". Transactions of the Optical Society. 30 (4): 141–164. doi:10.1088/1475-4878/30/4/301. [26] Guild, J. (1932). \"The colorimetric properties of the spectrum\". Philosophical Transactions of the Royal Society of London. Series A, Containing Papers of a Mathematical or Physical Character. 230 (681–693): 149–187. Bibcode:1932RSPTA.230..149G. doi:10.1098/rsta.1933.0005. JSTOR 91229. [27] Krystek, Michael P. (January 1985). \"An algorithm to calculate correlated colour temperature\". Color Research & Application. 10 (1): 38–40. doi:10.1002/col.5080100109. [28] Borbély, Ákos; Sámson,Árpád; Schanda, János (December 2001). \"The concept of correlated colour temperature revisited\". Color Research & Application. 26 (6): 450–457. doi:10.1002/col.1065. Archived from the original on 2009-02-05. [29] Simons, Ronald Harvey; Bean, Arthur Robert (2001). Lighting Engineering: Applied Calculations. Architectural Press. ISBN 0-7506-5051-6. [30] Robertson, Alan R. (November 1968). \"Computation of Correlated Color Temperature and Distribution Temperature\". JOSA. 58 (11): 1528–1535. Bibcode:1968JOSA...58.1528R. doi:10.1364/JOSA.58.001528. [31] McCamy, Calvin S. (April 1992). \"Correlated Color Temperature as an explicit function of chromaticity coordinates\". Color Research & Application. 17 (2): 142–144. doi:10.1002/col.5080170211. plus erratum doi:10.1002/col.5080180223. [32] Kelly, Kenneth L. (August 1963). \"Lines of Constant Correlated Color Temperature Based on MacAdam's (u,v) Uniform Chromaticity Transformation of the CIE Diagram\". JOSA. 53 (8): 999–1003. Bibcode:1963JOSA...53..999K. doi:10.1364/JOSA.53.000999. [33] Hernández-Andrés, Javier; Lee, RL; Romero, J (September 20, 1999). \"Calculating Correlated Color Temperatures Across the Entire Gamut of Daylight and Skylight Chromaticities\" (PDF). Applied Optics. 38 (27): 5703–5709. Bibcode:1999ApOpt..38.5703H. doi:10.1364/AO.38.005703. PMID 18324081. [34] Bongsoon Kang; Ohak Moon; Changhee Hong; Honam Lee; Bonghwan Cho; Youngsun Kim (December 2002). \"Design of Advanced Color Temperature Control System for HDTV Applications\" (PDF). Journal of the Korean Physical Society. 41 (6): 865–871. Archived from the original (PDF) on 2019-03-03. [35] Kim et al., \"Color Temperature Conversion System and Method Using the Same\", issued 2006-04-04 [36] CIE Publication 15.3, CIE 15:2004, ISBN 3-901-906-33-9 [37] Equivalent White Light Sources, and CIE Illuminants (PDF), archived from the original on 2005-05-23, retrieved 2017-12-11 [38] CIE F-series Spectral Data, CIE 15.2:1986, archived from the original on 2011-07-25, retrieved 2017-12-11 [39] Colorimetry, 4th Edition, vol. CIE 015:2018, doi:10.25039/TR.015.2018, ISBN 978-3-902842-13-8 [40] Sándor, Norbert; Schanda, János (September 1, 2006), \"Visual colour rendering based on colour difference evaluations\", Lighting Research and Technology, 38 (3): 225–239, doi:10.1191/1365782806lrt168oa, S2CID 109858508. [41] Masahura NAKAYAMA and Koichi IKEDA: Comparison of Perceived Colour Differences with Colorimetric Colour Differences in Uniform Colour Spaces and Colour Appearance Model, J. Light & Vis. Env. Vol.28, No.2, 2004. [42] ColorChecker Colorimetric Data (PDF), archived (PDF) from the original on 9 October 2012, retrieved 17 April 2013. [43] Charles Poynton (2008). \"ColorChecker (‘Macbeth’) Chart\". poynton.com [44] Azriel Rosenfeld, Picture Processing by Computer, New York: Academic Press, 1969 [45] Dyer, Stephen A.; Harms, Brian K. (13 August 1993). \"Digital Signal Processing\". In Yovits, Marshall C. (ed.). Advances in Computers. Vol. 37. Academic Press. pp. 59–118. doi:10.1016/S0065-2458(08)60403-9. ISBN 978-0120121373. ISSN 0065-2458. LCCN 59015761. OCLC 858439915. OL 10070096M. [46] ITU-R, Rec. ITU-R BT.601-7, \"BT.601 : Studio encoding parameters of digital television for standard 4:3 and wide screen 16:9 aspect ratios\", Article Number E 70000, archived from the original on 2011-03-08 [47] ITU-R, Rec. ITU-R BT.709-6, \"BT.709 : Parameter values for the HDTV standards for production and international programme exchange\", Article Number E 70000, archived from the original on 2015-06-17 [48] ITU-R, Rec. ITU-R BT.2020-2, \"BT.2020 : Parameter values for ultra-high definition television systems for production and international programme exchange\", Article Number E 70000, archived from the original on 2015-10-14 [49] 雷霄骅, \"Color format conversion: The simplest example of libswscale based on FFmpeg (YUV to RGB)\", archived (Web: https://blog.csdn.net/leixiaohua1020/article/details/42134965) from the original on 2014-12-28 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Apex_3_Introduce.html":{"url":"Chapter_3/Language/cn/Apex_3_Introduce.html","title":"三、音视频常用基础算法","keywords":"","body":"三、音视频常用基础算法 引言 音视频中最为重要的组成部分,即是音频处理和视频处理。 音频处理应用到的基础理论,来源自:数字信号处理(Digital Signal Process)、数字合成音效(Digital Audio Effects)、语音识别(Voice Recognition)等领域。视频处理应用到的基础理论,来源自:数字信号处理(Digital Signal Process)、计算机图形学(Computer Graphics)、计算机视觉(Computer Vision)等领域。 这些学科在工程中或多或少的交叉使用,甚至本身大都为交叉学科,但最为核心的始终只有两个,即数字信号处理(DSP)和计算机图形学(CG)。所以,在正式开始学习音视频工程技术之前,首先需要回顾部分基础算法的工程特征。 本章节主要对此简单梳理,并结合伪码和 C/C++/Python/GLSL等 工程汇总说明。可以做为最小集合的背景算法知识字典,供开发过程中查阅回顾使用。 关键字:傅立叶变换、滤波算法、区域检测、光流补正、冗余控制 目录 3.1 信号分析的核心算法 - 傅立叶变换 3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT) 3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT) 3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT) 3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach) 3.2 频率信息提取 - 常用滤波算法 3.2.1 高斯滤波(Gauss Filter) 3.2.2 双边滤波(Bilateral Filter) 3.2.3 拉普拉斯滤波(Laplacian Filter) 3.2.4 马尔滤波(Marr Filter) 3.2.5 索贝尔滤波(Sobel Filter) 3.2.6 各向异性扩散(Anisotropic Diffusion) 3.3 时间冗余控制 - 常用特征提取与朴素阈值处理 3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient]) 3.3.2 朴素目标检测结果度量 - IoU & GIoU 3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window) 3.4 空域冗余控制 - 基础光流算法与色度压缩 3.4.1 传统光流法(Classic Optical Flow Methods) 3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow]) 3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow]) 3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling]) 3.5 频域冗余控制 - 基础变换编码 3.5.1 整数离散正余弦变换(DST/DCT) 3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform]) 3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform]) 【在线展示】 【参考文献】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_1.html":{"url":"Chapter_3/Language/cn/Docs_3_1.html","title":"3.1 信号分析的核心算法 - 傅立叶变换","keywords":"","body":"3.1 信号分析的核心算法 - 傅立叶变换 傅立叶变换(FT [Fourier Transform]) [1] 可理解为:任意函数都存在由给定复指数函数空间(Complex Exponential Functions Space)的一组正交基(Orthogonal Bases),使得原函数可以被分解为该复指数函数空间下最大完备解的权重向量形式表示 [2] 。利用原函数与分量函数内积为该方向解分量且正交基内任意两个方向的方向函数内积为 0 的特点,来用解的人为限定有限维度子集逼近函数本身的数学方法 [3] 。这里,描述构成原函数的分量函数集与其所占权重分量(即求得的正交基),共同构成了该函数的傅里叶基(Fourier Basis)[4] [5]。 如果记原函数为 FFF,复指数函数空间为 Fω=[Fω1,Fω2, ... ,Fωn]{\\mathcal {F}}_{\\omega} = [{\\mathcal {F}}_{\\omega_1},{\\mathcal {F}}_{\\omega_2},\\ ...\\ ,{\\mathcal {F}}_{\\omega_{n}}]Fω=[Fω1,Fω2, ... ,Fωn],傅里叶基为 F=[f^1,f^2, ... ,f^n]{\\mathcal {F}} = [\\hat{f}_1,\\hat{f}_2,\\ ...\\ ,\\hat{f}_n]F=[f^1,f^2, ... ,f^n],且 nmax=Nn_{max} = Nnmax=N,则这一关系从空间投影变换角度来看 [6],可以表示为: N⋅F=FωT⋅F=[Fω1Fω2⋮Fωn]⋅[f^1,f^2, ... ,f^n] {\\displaystyle \\begin{aligned} N \\cdot F = {\\mathcal{F}_{\\omega}}^T \\cdot {\\mathcal {F}} = { \\begin{bmatrix} \\mathcal{F}_{\\omega_1} \\\\ \\mathcal{F}_{\\omega_2} \\\\ \\vdots \\\\ \\mathcal{F}_{\\omega_n} \\end{bmatrix} } \\cdot [\\hat{f}_1,\\hat{f}_2,\\ ...\\ ,\\hat{f}_n] \\end{aligned} } N⋅F=FωT⋅F=⎣⎢⎢⎡Fω1Fω2⋮Fωn⎦⎥⎥⎤⋅[f^1,f^2, ... ,f^n] 傅里叶变换被作为基础原理之一运用在数字信号(广义)的处理过程并处于核心地位。而在数字信号处理(DSP)中,我们把所有类型信号都抽象成,由一系列离散化数据构成的函数模型表示。这些函数并不一定都是周期性、单一维度的。这时我们需要一种手段,使得能够用统一的方式描述所有不同表征的函数,从而一致性的交付系统(不一定是电脑)处理。傅里叶变换正是这种化繁为简的理论工具(Theoretical Tools),通过它我们能够将任意信号函数转换为傅里叶级数展开,进而转化为复数平面上一系列构成谐波关系的周期性基础三角函数和表示。傅里叶变化作为对信号进行分解描述的方法论,并不局限于单维声音信号,针对二维图片信号或更高维的数据也能够拓展延伸(即可拓展性)。而这也是我们进行感知数据数字化的理论依据。 因此,理解上式如何被运用是进行学习的关键。那么这在工程上主要体现在哪儿呢?我们需要从最简单的傅里叶变换,即一维傅里叶变换开始了解。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_1_1.html":{"url":"Chapter_3/Language/cn/Docs_3_1_1.html","title":"3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT)","keywords":"","body":"3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT) 信号学中,将沿平面分布的信号称为一维信号(1D Signal),例如音频信号。 一维傅里叶变换,能够将一组满足狄利克雷条件(Dirichlet Theorem)的一维信号分解到周期性复指数波(Complex Exponential Wave)构成的二维向量空间。 从傅里叶级数(FS)到傅里叶变换(FT) 狄利克雷条件 最初被用作傅里叶级数(FS [Fourier Series])在三角函数域上进行分解的充分不必要条件 [2] [7]。在狄利克雷条件描述中,如果选定分析的周期信号 同时满足: 【单周期内,连续或存在有限个第一类间断点】; 【单周期内,存在有限数目的极大值与极小值】; 【单周期内,绝对可积】; 则,此周期信号就一定存在傅里叶三角级数的分解表示。 如果记周期信号函数 s(t)s(t)s(t) 的波长(周期)为 TTT ,角频率(角速度)为 2πT\\tfrac{2\\pi}{T}T2π 。则以信号函数波长 TTT 做可变 n∈[0, N]n \\in [0, \\ N]n∈[0, N] 等分(即步长 Step=1NStep = \\tfrac{1}{N}Step=N1 )选取分离函数。有分离函数(周期)为 Tn\\tfrac{T}{n}nT ,角频率(角速度)为 ωn=2πnT{\\omega_n} = \\tfrac{2\\pi n}{T}ωn=T2πn 。原周期信号函数 s(t)s(t)s(t) 就可以被分解为: s(t)=1N∑n=0Nan⋅cos(2πnTt) + 1N∑n=0Nbn⋅sin(2πnTt)an=∫−T2+T2s(t)⋅cos(ωnt) dt bn=∫−T2+T2s(t)⋅sin(ωnt) dt {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N} \\sum_{n =0}^{N} a_n \\cdot cos(\\tfrac{2\\pi n}{T}t)\\ \\ \\ \\ \\ \\ +\\ \\ \\ \\ \\ \\frac{1}{N} \\sum_{n =0}^{N} b_n \\cdot sin(\\tfrac{2\\pi n}{T}t) \\\\ a_n &= \\int_{-\\tfrac{T}{2}}^{+\\tfrac{T}{2}} s(t) \\cdot cos(\\omega_n t) \\ dt \\ \\ \\ \\ \\ b_n = \\int_{-\\tfrac{T}{2}}^{+\\tfrac{T}{2}} s(t) \\cdot sin(\\omega_n t) \\ dt \\\\ \\end{aligned} } s(t)an=N1n=0∑Nan⋅cos(T2πnt) + N1n=0∑Nbn⋅sin(T2πnt)=∫−2T+2Ts(t)⋅cos(ωnt) dt bn=∫−2T+2Ts(t)⋅sin(ωnt) dt 如果我们对函数周期进行平移,将区间从 (−T2, +T2)(-\\tfrac{T}{2},\\ +\\tfrac{T}{2})(−2T, +2T) 偏移 +T2+\\tfrac{T}{2}+2T ,即变换到 (0, T)(0,\\ T)(0, T) ,使原周期信号函数 s(t)s(t)s(t) 偏移为奇函数(即 s(−t)=−s(t)s(-t) = - s(t)s(−t)=−s(t) ),而奇函数式可证明是不需要余弦函数项的。此时,就可以进一步化简 s(t)s(t)s(t) 为存粹正弦函数表示: s(t)=1N∑n=0Nbn⋅sin(2πnλt)=1N∑n=0Nbn⋅sin(ωnt)bn=∫0Ts(t)⋅sin(ωnt) dt {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N} \\sum_{n =0}^{N} b_n \\cdot sin(\\tfrac{2\\pi n}{\\lambda}t) = \\frac{1}{N} \\sum_{n =0}^{N} b_n \\cdot sin(\\omega_n t) \\\\ b_n &= \\int_{0}^{T} s(t) \\cdot sin(\\omega_n t) \\ dt \\\\ \\end{aligned} } s(t)bn=N1n=0∑Nbn⋅sin(λ2πnt)=N1n=0∑Nbn⋅sin(ωnt)=∫0Ts(t)⋅sin(ωnt) dt 简化表示 ωn{\\omega_n}ωn 为 ω{\\omega}ω ,当我们将傅里叶级数从三角函数域,扩展到复变函数域时,基底函数由正余弦函数变为了以 λ=2πω=Tn{\\displaystyle \\begin{aligned} \\lambda = \\tfrac{2 \\pi}{\\omega} = \\tfrac{T}{n}\\\\ \\end{aligned} }λ=ω2π=nT 为周期(波长)的复指数函数 Sω(t)=eiωt{\\displaystyle \\begin{aligned} {\\mathcal {S}}_{\\omega}(t) = e^{i\\omega t}\\\\ \\end{aligned} }Sω(t)=eiωt 。信号函数 s(t)s(t)s(t) 的分解函数就可以表示为: s(t)=1N∑n=0Ns^(2πnT)⋅ei2πnTt=1N∑ω=0ωNs^(ω)⋅eiωt=1N∑n=0Ns^(ω)⋅Sω(t)s^(ω)=∫0Ts(t)⋅e−iωt dt {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N} \\sum_{n = 0}^{N} \\hat{s}(\\tfrac{2\\pi n}{T}) \\cdot e^{i \\tfrac{2\\pi n}{T}t} = \\frac{1}{N} \\sum_{\\omega = 0}^{\\omega_N} \\hat{s}(\\omega) \\cdot e^{i \\omega t} \\\\ &= \\frac{1}{N}\\sum_{n = 0}^{N} \\hat{s}(\\omega) \\cdot {\\mathcal {S}}_{\\omega}(t) \\\\ \\hat{s}(\\omega) &= \\int_{0}^{T} s(t) \\cdot e^{-i \\omega t} \\ dt \\\\ \\end{aligned} } s(t)s^(ω)=N1n=0∑Ns^(T2πn)⋅eiT2πnt=N1ω=0∑ωNs^(ω)⋅eiωt=N1n=0∑Ns^(ω)⋅Sω(t)=∫0Ts(t)⋅e−iωt dt 根据 欧拉公式(Euler's Formula) 可知 eix=cos(x)+i⋅sin(x){\\displaystyle \\begin{aligned} e^{ix} = cos(x) + i \\cdot sin(x) \\end{aligned} }eix=cos(x)+i⋅sin(x) , 带入上式有: s(t)=1N∑n=0Na^ω⋅cos(ωt)+i⋅b^ω⋅sin(ωt)a^ω=s^(−ω)+s^(ω)b^ω=1i⋅(s^(−ω)−s^(ω)) {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N}\\sum_{n = 0}^{N} \\hat{a}_{\\omega} \\cdot cos(\\omega t) + i \\cdot \\hat{b}_{\\omega} \\cdot sin(\\omega t)\\\\ \\hat{a}_{\\omega} &= \\hat{s}(-\\omega) + \\hat{s}(\\omega) \\quad \\quad \\hat{b}_{\\omega} = \\tfrac{1}{i} \\cdot (\\hat{s}(-\\omega)-\\hat{s}(\\omega)) \\end{aligned} } s(t)a^ω=N1n=0∑Na^ω⋅cos(ωt)+i⋅b^ω⋅sin(ωt)=s^(−ω)+s^(ω)b^ω=i1⋅(s^(−ω)−s^(ω)) 转换到欧氏空间下的三角函数表示 Sω(t){\\mathcal {S}}_{\\omega}(t)Sω(t) ,记构成原信号函数 s(t)s(t)s(t) 的复指数函数 Sω(t){\\mathcal {S}}_{\\omega}(t)Sω(t) 的初相为 ∠ϕω\\angle\\phi_{\\omega}∠ϕω ,振幅为 AωA_{\\omega}Aω ,则: Sω(t):∠ϕω=arctan(a^ωb^ω)Aω=(a^ω)2+(b^ω)2 {\\displaystyle \\begin{aligned} {\\mathcal {S}}_{\\omega}(t) : \\quad \\angle\\phi_{\\omega} = \\arctan(\\tfrac{\\hat{a}_{\\omega}}{\\hat{b}_{\\omega}}) \\quad A_{\\omega} = \\sqrt{ (\\hat{a}_{\\omega}) ^2 + (\\hat{b}_{\\omega}) ^2 } \\\\ \\end{aligned} } Sω(t):∠ϕω=arctan(b^ωa^ω)Aω=√(a^ω)2+(b^ω)2 同三角函数域的情况,复变函数域下的傅里叶级数仍然可以进一步精简。我们仍然需要对原函数 s(t)s(t)s(t) 平移 +λ2+\\tfrac{\\lambda}{2}+2λ 并将周期变换到 (0, λ)(0,\\ \\lambda)(0, λ) ,使 s(t)s(t)s(t) 表现为奇函数。由于原信号函数 s(t)s(t)s(t) 必为实函数的特性,会使得 aωa_{\\omega}aω 与 bωb_{\\omega}bω 互为共轭复数。因此在奇函数条件下, aωa_{\\omega}aω 与 bωb_{\\omega}bω 表现为符号相反的纯虚数,此时: a^ω=1⋅[s^(−ω)+s^(ω)]=0 b^ω=1i⋅[s^(−ω)−s^(ω)]=2i⋅s^(−ω)s(t)=1N∑ω=0ωN 0⋅cos(ωt) + i⋅(2i⋅s^(−ω))⋅sin(ωt)= 1N∑n=0Ns^(−ω)⋅sin(ωt) {\\displaystyle \\begin{aligned} \\hat{a}_{\\omega} &= 1 \\cdot [\\hat{s}(-\\omega) + \\hat{s}(\\omega)] = 0 \\ \\ \\ \\ \\ \\hat{b}_{\\omega} = \\tfrac{1}{i} \\cdot [\\hat{s}(-\\omega)-\\hat{s}(\\omega)] = \\tfrac{2}{i} \\cdot \\hat{s}(-\\omega) \\\\ s(t) &= \\frac{1}{N} \\sum_{\\omega =0}^{\\omega_N} \\ \\ \\ \\ 0 \\cdot cos(\\omega t) \\ \\ \\ \\ \\ + \\ \\ \\ \\ i \\cdot (\\tfrac{2}{i} \\cdot \\hat{s}(-\\omega)) \\cdot sin(\\omega t) \\\\ &= \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\frac{1}{N}\\sum_{n = 0}^{N} \\hat{s}(-\\omega) \\cdot sin(\\omega t) \\\\ \\end{aligned} } a^ωs(t)=1⋅[s^(−ω)+s^(ω)]=0 b^ω=i1⋅[s^(−ω)−s^(ω)]=i2⋅s^(−ω)=N1ω=0∑ωN 0⋅cos(ωt) + i⋅(i2⋅s^(−ω))⋅sin(ωt)= N1n=0∑Ns^(−ω)⋅sin(ωt) 如果我们将 s^(−ω)\\hat{s}(-\\omega)s^(−ω) 的负号划入公式,并将离散级数扩展到原信号函数 s(t)s(t)s(t) 的连续实数空间上以积分形式表示。则 s(t)s(t)s(t) 与 s^(−ω)\\hat{s}(-\\omega)s^(−ω) 的关系就展现为: s(t)=1N∫0Ns^(ω)⋅sin(ωt) dns^(ω)=∫0Ts(t)⋅sin(−ωt) dt {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N}\\int_{0}^{N} \\hat{s}(\\omega) \\cdot sin(\\omega t) \\ d{n} \\\\ \\hat{s}(\\omega) &= \\int_{0}^{T} s(t) \\cdot sin(-\\omega t) \\ dt \\\\ \\end{aligned} } s(t)s^(ω)=N1∫0Ns^(ω)⋅sin(ωt) dn=∫0Ts(t)⋅sin(−ωt) dt 这就是傅里叶变换的奇函数表达式,也被称为 正弦傅里叶变换(SFT [Sine Fourier Transform])。 同理,如果我们取偶函数,有 aωa_{\\omega}aω 与 bωb_{\\omega}bω 表现为符号相同的纯实数。即: a^ω=1⋅[s^(−ω)+s^(ω)]=2⋅s^(ω) b^ω=1i⋅[s^(−ω)−s^(ω)]=0s(t)=1N∑ω=0ωN 2⋅s^(ω)⋅cos(ωt) + i⋅0⋅sin(ωt)= 1N∑n=0Ns^(ω)⋅cos(ωt) {\\displaystyle \\begin{aligned} \\hat{a}_{\\omega} &= 1 \\cdot [\\hat{s}(-\\omega) + \\hat{s}(\\omega)] = 2 \\cdot \\hat{s}(\\omega) \\ \\ \\ \\ \\ \\hat{b}_{\\omega} = \\tfrac{1}{i} \\cdot [\\hat{s}(-\\omega)-\\hat{s}(\\omega)] = 0 \\\\ s(t) &= \\frac{1}{N} \\sum_{\\omega =0}^{\\omega_N} \\ \\ \\ \\ {2 \\cdot \\hat{s}(\\omega)} \\cdot cos(\\omega t) \\ \\ \\ \\ \\ + \\ \\ \\ \\ i \\cdot 0 \\cdot sin(\\omega t) \\\\ &= \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\frac{1}{N}\\sum_{n = 0}^{N} \\hat{s}(\\omega) \\cdot cos(\\omega t) \\\\ \\end{aligned} } a^ωs(t)=1⋅[s^(−ω)+s^(ω)]=2⋅s^(ω) b^ω=i1⋅[s^(−ω)−s^(ω)]=0=N1ω=0∑ωN 2⋅s^(ω)⋅cos(ωt) + i⋅0⋅sin(ωt)= N1n=0∑Ns^(ω)⋅cos(ωt) 采用相同处理,有余 弦傅里叶变换(CFT [Cosine Fourier Transform]) 结果如下: s(t)=1N∫0Ns^(ω)⋅cos(ωt) dns^(ω)=∫−T2+T2s(t)⋅cos(−ωt) dt {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N} \\int_{0}^{N} \\hat{s}(\\omega) \\cdot cos(\\omega t) \\ d{n} \\\\ \\hat{s}(\\omega) &= \\int_{-\\tfrac{T}{2}}^{+\\tfrac{T}{2}} s(t) \\cdot cos(-\\omega t) \\ dt \\\\ \\end{aligned} } s(t)s^(ω)=N1∫0Ns^(ω)⋅cos(ωt) dn=∫−2T+2Ts(t)⋅cos(−ωt) dt 然而工程中的信号并不存在有限周期且并不都能判定奇偶性,这是否意味着我们无法对其进行分解和化简? 答案是否定的。首先来看,针对周期性需要进行的操作。 解构一维信号 - 时频分离(Time-Frequency Separation) 如果我们换个角度就会发现,不存在有限周期只不过是因为周期太长,以至函数周期等于信号完整时长或着趋近无穷而导致的。所以我们分解原函数到对应的复指数函数和,所选择基底复指数函数也趋近于无穷,并使其对应频率从 000 到 ∞\\infty∞ 而周期从极大到极小即可。不过在计算上就需要利用傅立叶变化的空间特征了。 结合上文,记被分解的原信号函数为 f(t)f(t)f(t) 。根据傅立叶基的正交特性,如果存在 F(t){\\mathcal {F}}(t)F(t) 为当前 f(t)f(t)f(t) 的解函数空间,则必然有 f(t)⋅Fω−1(t)f(t) \\cdot {\\mathcal {F}}_{\\omega}^{-1}(t)f(t)⋅Fω−1(t) 内积在时间 ttt 范围为 (0, ∞)(0,\\ \\infty)(0, ∞) 有固定值 f^(ω)\\hat{f}(\\omega)f^(ω) ,使得: f^(ω)=∫0∞f(t)⋅Fω−1(t) dt=∫0∞f(t)⋅e−iωt dt {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) &= \\int_{0}^{\\infty} f(t) \\cdot {\\mathcal {F}}_{\\omega}^{-1}(t) \\ dt = \\int_{0}^{\\infty} f(t) \\cdot e^{-i \\omega t}\\ dt \\\\ \\end{aligned} } f^(ω)=∫0∞f(t)⋅Fω−1(t) dt=∫0∞f(t)⋅e−iωt dt 以函数空间角度排除 f(t)f(t)f(t) 周期干扰。而复指数波的波函数,顾名思义就是复指数函数,有: f^(ω)=∫−∞+∞aω⋅cos(ωt)+i⋅bω⋅sin(ωt) dt {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) &= \\int_{-\\infty}^{+\\infty} a_{\\omega} \\cdot cos(\\omega t) + i \\cdot b_{\\omega} \\cdot sin(\\omega t) \\ dt\\\\ \\end{aligned} } f^(ω)=∫−∞+∞aω⋅cos(ωt)+i⋅bω⋅sin(ωt) dt 使 bωb_{\\omega}bω 可取复数域,就可以转换为: f^(ω)=∫−∞+∞aω⋅cos(ωt)+bω⋅sin(ωt) dt {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) &= \\int_{-\\infty}^{+\\infty} a_{\\omega} \\cdot cos(\\omega t) + b_{\\omega} \\cdot sin(\\omega t) \\ dt\\\\ \\end{aligned} } f^(ω)=∫−∞+∞aω⋅cos(ωt)+bω⋅sin(ωt) dt 由于实际信号并不能严格确定奇偶性,不过对于小于四维的情况下,大多数条件都能保证其本身为实函数(即函数只有实数域取值),因而构成原信号的分离基底函数是存在不同强度和初项的。我们沿用前文中对初相和振幅的定义,记 Fω(t){\\mathcal {F}}_{\\omega}(t)Fω(t) 初相为 ∠ϕω\\angle\\phi_{\\omega}∠ϕω ,振幅为 AωA_{\\omega}Aω ,则有: Fω(t):∠ϕω=arctan(a^ωb^ω) Aω=(a^ω)2+(b^ω)2 {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega}(t) : \\quad \\angle\\phi_{\\omega} = \\arctan(\\tfrac{\\hat{a}_{\\omega}}{\\hat{b}_{\\omega}}) \\ \\ \\ \\ A_{\\omega} = \\sqrt{ (\\hat{a}_{\\omega}) ^2 + (\\hat{b}_{\\omega}) ^2 } \\\\ \\end{aligned} } Fω(t):∠ϕω=arctan(b^ωa^ω) Aω=√(a^ω)2+(b^ω)2 根据 帕西瓦尔定理(Parseval’s Theorem) 转复数空间,我们会发现 AωA_{\\omega}Aω 就是 f^(ω)\\hat{f}(\\omega)f^(ω) 取 222 范数后的结果,而初项其实就是 f^(ω)\\hat{f}(\\omega)f^(ω) 在 t=0t = 0t=0 时,自身相位在复数空间上与实轴的夹角。即: Fω(t):∠ϕω=∠∣f^(t)∣ =arctan(a^ωb^ω)Aω= ∥f^(t)∥2=(a^ω)2+(b^ω)2 {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega}(t) &: \\\\ \\angle\\phi_{\\omega} &= \\angle{\\vert \\hat{f}(t) \\vert} \\ = \\arctan(\\tfrac{\\hat{a}_{\\omega}}{\\hat{b}_{\\omega}}) \\\\ A_{\\omega} &=\\ \\ \\Vert \\hat{f}(t) \\Vert _2 =\\sqrt{ (\\hat{a}_{\\omega}) ^2 + (\\hat{b}_{\\omega}) ^2 } \\\\ \\end{aligned} } Fω(t)∠ϕωAω:=∠∣f^(t)∣ =arctan(b^ωa^ω)= ∥f^(t)∥2=√(a^ω)2+(b^ω)2 进而有: Fω(t)=Aω⋅sin(ωt−∠ϕω)=Aω⋅cos(ωt+∠ϕω)=∥f^(t)∥2⋅sin(ωt−∠∣f^(t)∣)=∥f^(t)∥2⋅cos(ωt+∠∣f^(t)∣)f^(ω)=∫0∞f(t)⋅e−iωt dt ⇔ f(t)=1N∫−∞+∞f^(ω)⋅Fω(t) dω {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega}(t) &= A_{\\omega} \\cdot sin(\\omega t -\\angle\\phi_{\\omega}) = A_{\\omega} \\cdot cos(\\omega t +\\angle\\phi_{\\omega}) \\\\ &= {\\Vert \\hat{f}(t) \\Vert _2} \\cdot sin(\\omega t -\\angle{\\vert \\hat{f}(t) \\vert}) = {\\Vert \\hat{f}(t) \\Vert _2} \\cdot cos(\\omega t +\\angle{\\vert \\hat{f}(t) \\vert}) \\\\ \\hat{f}(\\omega) &= \\int_{0}^{\\infty} f(t) \\cdot e^{-i \\omega t}\\ dt \\ \\ \\ \\ \\ \\Leftrightarrow \\ \\ \\ \\ \\ f(t) = \\frac{1}{N} \\int_{-\\infty}^{+\\infty} \\hat{f}(\\omega) \\cdot {\\mathcal {F}}_{\\omega}(t) \\ d \\omega \\\\ \\end{aligned} } Fω(t)f^(ω)=Aω⋅sin(ωt−∠ϕω)=Aω⋅cos(ωt+∠ϕω)=∥f^(t)∥2⋅sin(ωt−∠∣f^(t)∣)=∥f^(t)∥2⋅cos(ωt+∠∣f^(t)∣)=∫0∞f(t)⋅e−iωt dt ⇔ f(t)=N1∫−∞+∞f^(ω)⋅Fω(t) dω 显然,大部分信号都是有限时间下的,且基本都能满足无穷区间的狄利克雷条件,也因此可以使用傅里叶变换分解。 如果频率范围在 ω∈[ω0, ω1]\\omega \\in [\\omega_{0},\\ \\omega_{1}]ω∈[ω0, ω1] ,对于选定的时间点 t=tct = t_ct=tc ,有频率 ω\\omegaω 、原函数 f(t)f(t)f(t) 在 t=tct = t_ct=tc 时的取值 f(tc)f(t_c)f(tc) 、基底函数族 Fω(t){\\mathcal {F}}_{\\omega}(t)Fω(t) 锁定时间 t=tct = t_ct=tc 的变体 Ftc(ω){\\mathcal {F}}_{t_c}(\\omega)Ftc(ω) ,构成该频率范围的 频域投影(FDP [Frequency Domain Projection]); 反之,如果时间范围在 t∈[ t0, t1]t\\in [\\ t_0,\\ \\ t_1]t∈[ t0, t1] ,对于频率范围 ω∈[ω0, ω1]\\omega \\in [\\omega_{0},\\ \\omega_{1}]ω∈[ω0, ω1] ,有时间 ttt 、原函数 f(t)f(t)f(t) 、基底函数族 Fω(t){\\mathcal {F}}_{\\omega}(t) Fω(t),就构成了原函数在该时间范围的 时域投影(TDP [Time Domain Projection])。 两者的区别仅在于观察角度的不同: Frequency Domain Projection: ( ω , f(tc) , Ftc(ω) )Time Domain Projection: ( t , f(t) , Fω(t) )ω∈[ω0, ωn] t ∈[ t0, tn ] {\\displaystyle \\begin{aligned} {Frequency\\ Domain\\ Projection:} &\\ \\ (\\ \\ \\omega\\ ,\\ \\ f(t_c)\\ ,\\ \\ {\\mathcal {F}}_{t_c}(\\omega) \\ \\ ) \\\\ {Time\\ Domain\\ Projection:} &\\ \\ (\\ \\ t\\ \\ ,\\ \\ f(t)\\ \\ ,\\ \\ {\\mathcal {F}}_{\\omega}(t) \\ \\ \\ ) \\\\ {\\omega \\in [\\omega_0,\\ \\omega_n]} \\ \\ \\ \\ & \\ \\ {\\ t\\ \\in [\\ t_0,\\ \\ t_n\\ ]} \\\\ \\end{aligned} } Frequency Domain Projection:Time Domain Projection:ω∈[ω0, ωn] ( ω , f(tc) , Ftc(ω) ) ( t , f(t) , Fω(t) ) t ∈[ t0, tn ] 周期的问题解决了,现在我们能够拿到时频分离(Time-Frequency Separation)的原信号函数信息并可以依此还原信号本身。但积分对于计算机来说任务有些繁重。同时,由于计算机只能处理离散化后的数字信号,因此离散化的算法才能够被计算机有效使用。 所以还需要在此基础上,找到更为便捷的算法实现。 精简运算过程 - 一维离散傅立叶变换(1D-DFT) 如果将积分重新转换为级数形式积化和差表示,并在允许误差范围内取有限子集。那么就能够化解掉大部分运算量,从而得到一个相对理论而言的低时间复杂度算法。这种想法促成了基于计算机运算的一维离散傅立叶(1D-DFT)的诞生。 一维离散傅立叶(1D-DFT [1D-Discrete Fourier Transform])本质上包含了两部分离散化作业,即对时域的离散化(TDD [Time Domain Discrete])和对频域的离散化(FDD [Frequency Domain Discrete])。 时域离散化(TDD) 方面,一维离散傅立叶采用了离散时间傅立叶变化(DTFT [Discrete Time Fourier Transform])中,对时域信号间隔采样的操作。即将: f^(ω)=∫0∞f(t)⋅e−iωt dt {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) &= \\int_{0}^{\\infty} f(t) \\cdot e^{-i \\omega t}\\ dt \\\\ \\end{aligned} } f^(ω)=∫0∞f(t)⋅e−iωt dt 以时间采样(切片)数量为 n1{n_1}n1 ,转为级数形式: f^(ω)=∑t=t0tn1f(t)⋅e−iωt {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) &= \\sum_{t = t_0}^{t_{n_1}} f(t) \\cdot e^{-i \\omega t} \\\\ \\end{aligned} } f^(ω)=t=t0∑tn1f(t)⋅e−iωt 打破时间上的连续性。需要注意的是,此时频域仍然是连续的。 频域离散化(FDD) 方面,离散傅立叶做的操作就更为直观了。如果在频率采样时就以离散化的方式采样数据,那得到的频域信息天然就是离散的。同样,从某个时刻 t=tct = t_ct=tc 离散化的频域信息上还原当前实际频率,则也是一个线性求和的过程。因此有: f(t)=1N∫−∞+∞f^(ω)⋅Fω(t) dω {\\displaystyle \\begin{aligned} f(t) = \\frac{1}{N} \\int_{-\\infty}^{+\\infty} \\hat{f}(\\omega) \\cdot {\\mathcal {F}}_{\\omega}(t) \\ d \\omega \\\\ \\end{aligned} } f(t)=N1∫−∞+∞f^(ω)⋅Fω(t) dω 以频率采样(切片)数量为 n2{n_2}n2 ,转为级数形式: f(t)=1n2∑ω=ω0ωn2f^(ω)⋅Fω(t) {\\displaystyle \\begin{aligned} f(t) = \\frac{1}{n_2} \\sum_{\\omega = \\omega_0}^{\\omega_{n_2}} \\hat{f}(\\omega) \\cdot {\\mathcal {F}}_{\\omega}(t) \\\\ \\end{aligned} } f(t)=n21ω=ω0∑ωn2f^(ω)⋅Fω(t) 而随着有限采样,基底函数族 Fω(t){\\mathcal {F}}_{\\omega}(t) Fω(t)$ 构成的解函数空间也是有限维的,即: Fω=[Fω1,Fω2, ... ,Fωn2] {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega} = [{\\mathcal {F}}_{\\omega_1},{\\mathcal {F}}_{\\omega_2},\\ ...\\ ,{\\mathcal {F}}_{\\omega_{n_2}}] \\\\ \\end{aligned} } Fω=[Fω1,Fω2, ... ,Fωn2] 至此,由时域离散化(TDD)与频域离散化(FDD)共同构成离散傅立叶(DFT)的完整表达如下所示: Fω=[Fω1,Fω2, ... ,Fωn2]f^(ω)=∑t=t0tn1f(t)⋅e−iωt ⇔ f(t)=1n2∑ω=ω0ωn2f^(ω)⋅Fω(t) {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega} = [{\\mathcal {F}}_{\\omega_1},&{\\mathcal {F}}_{\\omega_2},\\ ...\\ ,{\\mathcal {F}}_{\\omega_{n_2}}] \\\\ \\hat{f}(\\omega) = \\sum_{t = t_0}^{t_{n_1}} f(t) \\cdot e^{-i \\omega t} \\ \\ \\ \\ \\ &\\Leftrightarrow \\ \\ \\ \\ \\ f(t) = \\frac{1}{n_2} \\sum_{\\omega = \\omega_0}^{\\omega_{n_2}} \\hat{f}(\\omega) \\cdot {\\mathcal {F}}_{\\omega}(t) \\\\ \\end{aligned} } Fω=[Fω1,f^(ω)=t=t0∑tn1f(t)⋅e−iωt Fω2, ... ,Fωn2]⇔ f(t)=n21ω=ω0∑ωn2f^(ω)⋅Fω(t) 经过离散化后的有限采样更适合计算机有限的算力,因此才能被程序化。不过由于并没有保存连续的完整信息,经过离散傅里叶变换后再还原的数据,相对于采样自然源的原始数据终归还是会有一定损失的。但是由于变换与逆变换,并不会导致解构再还原后的数据存在差异。所以离散傅里叶变换被归类为 有损采样(Lossy Sampling)的无损算法(Lossless Algorithm)。 一维离散傅立叶变换(1D-DFT)的 C 语言实现 既然需要做程序化,那么首先需要将离散傅里叶变换的过程抽象化。理清逻辑思路的同时,方便构造迭代器和代码的处理流水线。这里我们使用伪码表示: /** * 1D-DFT [Discrete Fourier Transform] * [How to Use] * * Fo[T] = {...}; * Fn[N] = {}; * dft_1d(&Fo, &Fn, T, N); * [theorem::definitions] * Fo meanings Original Function * Fn meanings Fourier Basis at [n] * pi meanings π * T meanings Periodic of Fo * N meanings Slice of Frequency * Wn meanings Angular Frequency of Basis Fn is Wn = ((2*pi*n)/T) * [theorem::formula] * Fo[t] = sum_{n=0}^{N-1} x Fn[t] * exp( i * ((2*pi*n)/T) * t), 0 同时,我们还需要提供离散傅里叶变换的逆变换(IDFT [Inverse Discrete Fourier Transform])来使得电脑能够还原信息: /** * 1D-IDFT [Inverse Discrete Fourier Transform] * [How to Use] * * Fo[T] = {}; * Fn[N] = {...}; * dft_1d(&Fo, &Fn, T, N); * [theorem::definitions] * Fo meanings Original Function * Fn meanings Fourier Basis at [n] * pi meanings π * T meanings Periodic of Fo * N meanings Slice of Frequency * Wn meanings Angular Frequency of Basis Fn is Wn = ((2*pi*n)/T) * [theorem::formula] * Fo[t] = sum_{n=0}^{N-1} x Fn[t], 0 现在思路有了,只需要以代码实现即可: #include \"stdio.h\" #include \"math.h\" #define PI 3.1415926f typedef struct FBasis { double re_; double im_; double w_; } FBasis; void dft_1d(double *Fo, FBasis *Fn, size_t T, size_t N) { for (int n = 0; n 写完后简单测试一下: int main(void) { FBasis Fn[6] = {}; double Fo[6] = {1, 2, 3, 4, 5, 6}; double iFo[6] = {}; size_t T = sizeof(Fo) / sizeof(double); size_t N = sizeof(Fn) / sizeof(FBasis); printf(\"\\n Original_data: \\n\"); for (int t = 0; t 得到结果和标准几近相同: Original data: 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 DFT_result: 21.000000 + i 0.000000 -3.000003 + i -5.196152 -3.000002 + i -1.732048 -3.000000 + i -0.000002 -2.999996 + i 1.732057 -2.999979 + i 5.196158 IDFT_result: 1.000003 2.000000 2.999999 3.999999 4.999999 6.000000 运行结束。 到这里,我们已经基本掌握了傅里叶变换原理和最基础的应用。 如果拓展傅里叶变换到相对复杂的二维情况,那么和一维时有哪些不同呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_1_2.html":{"url":"Chapter_3/Language/cn/Docs_3_1_2.html","title":"3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT)","keywords":"","body":"3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT) 信号学中,将沿空间分布的信号称为二维信号(2D Signal)。图像信号就是二维信号。 二维傅里叶变换,能够将一组二维信号分解到 周期性复平面波(Complex Plane Wave) 构成的三维向量空间。 什么是平面波呢?延空间传播的选定波,若有任意时刻的相位相同的点连接起来得到的波阵面(同相位面)为互相平行的平面,就可以被称为 平面波(Plane Wave)。如果平面波同时满足简谐振动(即以正余弦规律振动)的特征,则可称为 平面简谐波(Plane Harmonic Waves)。复平面波则指的是在复数空间下的平面波。 从一维到二维傅里叶变换(2D-FT) 如果说一维信号是由一组数据延单一方向排布组成的数字序列,那么二维信号就是由一组数据延横向和纵向两个方向排布构成的数字平面。在一维信号处理时,我们将复指数函数分解为一系列一维简谐波的组合。同样的处理方式,我们也可以类比应用在二维信号场景,将构成二维信号的相关复平面波分解为在复数空间下的一系列复平面简谐波的聚合,进而把二维信号以相关强度参数,转化为平面简谐波的叠加表示。 一维信号和二维信号仅仅是维度上的差异。因此,结合向量空间,我们引入波的方向矢量,并取其大小等于当前波的角频率来表示波本身,称为波矢 k⃗{\\vec{k}}k⃗ 。将波矢为 k⃗{\\vec{k}}k⃗ 的平面简谐波,称为 k⃗{\\vec{k}}k⃗ 平面波。 对于周期为 TTT 的一维信号,因为时间只能沿着时间轴正向流动,所以此时的 k⃗{\\vec{k}}k⃗ 不存在方向。其基础波函数的波矢 k⃗{\\vec{k}}k⃗ 只有大小,即 ω=∣k⃗∣\\omega = \\vert {\\vec{k}} \\vertω=∣k⃗∣ 。所以在一维傅里叶变换中,我们只考虑了时间与频率的关系,即一维的时频关系。 对于周期为 TTT 的二维信号,以可变 nnn 等分选取作为基础的复平面波,记波函数为 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) ,则波长(周期)λ=Tn\\lambda = \\tfrac{T}{n}λ=nT ,角频率(角速度)为 ω=∣k⃗∣=2πλ\\omega = \\vert {\\vec{k}} \\vert = \\tfrac{2\\pi}{\\lambda}ω=∣k⃗∣=λ2π 。将 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 的传播方向 (u,v)(u,v)(u,v) 限定 u∈[−U2, +U2]u \\in [-\\tfrac{U}{2}, \\ +\\tfrac{U}{2}]u∈[−2U, +2U] ,v∈[−V2, +V2]v \\in [-\\tfrac{V}{2}, \\ +\\tfrac{V}{2}]v∈[−2V, +2V] 的范围。则 (u,v)(u,v)(u,v) 与原点的欧式距离,实际代表的是该方向上的分割强度值 nnn ,有: U2 + V2 = T → (uT)2 + (vT)2 = n {\\displaystyle \\begin{aligned} \\sqrt{U^2 \\ \\ + \\ \\ V^2} \\ = \\ T \\ \\ \\ \\rightarrow \\ \\ \\ \\sqrt{(\\tfrac{u}{T})^2 \\ \\ + \\ \\ (\\tfrac{v}{T})^2} \\ = \\ n \\\\ \\end{aligned} } √U2 + V2 = T → √(Tu)2 + (Tv)2 = n 因此,代表 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 的波矢 k⃗=(2π⋅uU , 2π⋅vV)=(ξ, η){\\vec{k}} = (\\tfrac{2 \\pi \\cdot {u}}{U} \\ , \\ \\tfrac{2 \\pi \\cdot {v}}{V} ) = (\\xi, \\ \\eta)k⃗=(U2π⋅u , V2π⋅v)=(ξ, η) ,推得: ω=∣k⃗∣=(2π⋅uU)2+(2π⋅vV)2=2πλ→ξ2 + η2 = ω2 {\\displaystyle \\begin{aligned} &\\omega = \\vert {\\vec{k}} \\vert = \\sqrt{({\\tfrac{2 \\pi \\cdot u}{U}})^2 + ({\\tfrac{2 \\pi \\cdot v}{V}})^2} = \\tfrac{2 \\pi}{\\lambda} \\\\ & \\quad \\rightarrow {\\xi}^2 \\ \\ + \\ \\ {\\eta}^2 \\ = \\ {\\omega}^2 \\\\ \\end{aligned} } ω=∣k⃗∣=√(U2π⋅u)2+(V2π⋅v)2=λ2π→ξ2 + η2 = ω2 Fω(x,y)=eik⃗⋅(x,y)T=ei⋅2π(uUx+vVy)=Fξ(x)⋅Fη(y) {\\displaystyle {\\mathcal {F}_{\\omega}(x,y)} = e^{i \\vec{k} \\cdot (x,y)^T } = e^{i \\cdot {2 \\pi} (\\tfrac{u}{U}x+\\tfrac{v}{V}y)} = {\\mathcal {F}_{\\xi}(x)} \\cdot {\\mathcal {F}_{\\eta}(y)} } Fω(x,y)=eik⃗⋅(x,y)T=ei⋅2π(Uux+Vvy)=Fξ(x)⋅Fη(y) 上式对复平面波 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 的拆解,从数理上表明了,Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 是由沿着 xxx 轴方向的一维波 Fξ(x){\\mathcal {F}}_{\\xi}(x)Fξ(x) 和沿着 yyy 轴方向的一维波 Fη(y){\\mathcal {F}}_{\\eta}(y)Fη(y) 两部分构成。其中,ξ=2π⋅uU\\xi = \\tfrac{2\\pi \\cdot u}{U}ξ=U2π⋅u 为 Fξ(x){\\mathcal {F}}_{\\xi}(x)Fξ(x) 的角频率,η=2π⋅vV\\eta = \\tfrac{2\\pi \\cdot v}{V}η=V2π⋅v 为 Fη(y){\\mathcal {F}}_{\\eta}(y)Fη(y) 的角频率。点位 (x,y)(x,y)(x,y) 在二维信号中代表的是实际像素数据在数字平面上的空间位置信息。所以在处理二维傅里叶变换时,我们需要考虑的是平面空间点 P⃗(x,y)\\vec{P}(x,y)P⃗(x,y) 与 k⃗{\\vec{k}}k⃗ 平面波间的关系,即二维的空频关系。 解构二维信号 - 空频分离(Spacial-Frequency Separation) 记原二维信号的函数表达为 f(x,y)f(x,y)f(x,y) ,有任意点 P⃗(x,y)\\vec{P}(x,y)P⃗(x,y) 可取 x∈[0, W]x \\in [0, \\ W]x∈[0, W] , y∈[0, H]y \\in [0, \\ H]y∈[0, H] ,那么对于二维信号来说,周期 T=W2+H2T= \\sqrt{W^2+H^2}T=√W2+H2。保持 u∈[−U2, U2]u \\in [-\\tfrac{U}{2}, \\ \\tfrac{U}{2}]u∈[−2U, 2U] 、v∈[−V2, V2]v \\in [-\\tfrac{V}{2}, \\ \\tfrac{V}{2}]v∈[−2V, 2V] 范围,则 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 沿传播方向角频率 (ξ,η)(\\xi, \\eta)(ξ,η) 就有 ξ∈[−π, +π]\\xi \\in [-\\pi, \\ +\\pi]ξ∈[−π, +π] , η∈[−π, +π]\\eta \\in [-\\pi, \\ +\\pi]η∈[−π, +π] 。则由一维拓展至二维傅里叶级数可知, f(x,y)f(x,y)f(x,y) 与波函数 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 的分量权重系数 a^ω(u,v){\\hat{a}_{\\omega}} (u, v) a^ω(u,v) 、 b^ω(u,v){\\hat{b}_{\\omega}} (u, v) b^ω(u,v) 存在: f(x,y)=1U⋅V(∑u=0∞∑v=0∞a^ω⋅cos(k⃗⋅P⃗T) + ∑u=0∞∑v=0∞b^ω⋅sin(k⃗⋅P⃗T))a^ω(u,v)=∫0H∫0Wf(x,y)⋅cos(2π⋅(uUx+vVy)) dx dyb^ω(u,v)=∫0H∫0Wf(x,y)⋅sin(2π⋅(uUx+vVy)) dx dy {\\displaystyle \\begin{aligned} f(x, y) &= \\frac{1}{U\\cdot V} (\\sum_{u =0}^{\\infty} \\sum_{v =0}^{\\infty} {\\hat{a}_{\\omega}} \\cdot cos(\\vec{k} \\cdot \\vec{P}^T)\\ \\ \\ + \\ \\ \\sum_{u =0}^{\\infty} \\sum_{v =0}^{\\infty} {\\hat{b}_{\\omega}} \\cdot sin(\\vec{k} \\cdot \\vec{P}^T)) \\\\ {\\hat{a}_{\\omega}} (u, v) &= \\int_{0}^{H} \\int_{0}^{W} f(x,y) \\cdot cos({2 \\pi} \\cdot (\\tfrac{u}{U}x+\\tfrac{v}{V}y)) \\ dx \\ dy \\\\ {\\hat{b}_{\\omega}} (u, v) &= \\int_{0}^{H} \\int_{0}^{W} f(x,y) \\cdot sin({2 \\pi} \\cdot (\\tfrac{u}{U}x+\\tfrac{v}{V}y)) \\ dx \\ dy \\\\ \\end{aligned} } f(x,y)a^ω(u,v)b^ω(u,v)=U⋅V1(u=0∑∞v=0∑∞a^ω⋅cos(k⃗⋅P⃗T) + u=0∑∞v=0∑∞b^ω⋅sin(k⃗⋅P⃗T))=∫0H∫0Wf(x,y)⋅cos(2π⋅(Uux+Vvy)) dx dy=∫0H∫0Wf(x,y)⋅sin(2π⋅(Uux+Vvy)) dx dy 取 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 初相为 ∠ϕω\\angle\\phi_{\\omega}∠ϕω ,振幅为 AωA_{\\omega}Aω ,则仍然有 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 的简谐波特征表示: Fω(x,y):∠ϕω=∠∣f^(x,y)∣ =arctan(a^ωb^ω)Aω= ∥f^(x,y)∥2=(a^ω)2+(b^ω)2 {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega}(x,y) &: \\\\ \\angle\\phi_{\\omega} &= \\angle{\\vert \\hat{f}(x,y) \\vert} \\ = \\arctan(\\tfrac{\\hat{a}_{\\omega}}{\\hat{b}_{\\omega}}) \\\\ A_{\\omega} &=\\ \\ \\Vert \\hat{f}(x,y) \\Vert _2 =\\sqrt{ (\\hat{a}_{\\omega}) ^2 + (\\hat{b}_{\\omega}) ^2 } \\\\ \\end{aligned} } Fω(x,y)∠ϕωAω:=∠∣f^(x,y)∣ =arctan(b^ωa^ω)= ∥f^(x,y)∥2=√(a^ω)2+(b^ω)2 因此,带入 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) ,有 f(x,y)f(x,y)f(x,y) 的二维傅里叶变换展开为: Fω(x,y)=Fω(P⃗)=Aω⋅sin(k⃗⋅P⃗T−∠ϕω)=Aω⋅cos(k⃗⋅P⃗T+∠ϕω)=∥f^(x,y)∥2⋅sin(ω⋅v⃗T−∠∣f^(x,y)∣)=∥f^(x,y)∥2⋅cos(ω⋅v⃗T+∠∣f^(x,y)∣)f^(u,v)=∫0H∫0Wf(x,y)⋅e−i(ux+vy) dx dy⇔f(x,y)=1U⋅V∫−V2+V2∫−U2+U2f^(u,v)⋅Fω(x,y) du dv {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega}(x,y) &= {\\mathcal {F}}_{\\omega} (\\vec{P}) = A_{\\omega} \\cdot sin(\\vec{k} \\cdot \\vec{P}^T -\\angle\\phi_{\\omega}) = A_{\\omega} \\cdot cos(\\vec{k} \\cdot \\vec{P}^T +\\angle\\phi_{\\omega}) \\\\ &= {\\Vert \\hat{f}(x,y) \\Vert _2} \\cdot sin(\\omega \\cdot \\vec{v}^T -\\angle{\\vert \\hat{f}(x,y) \\vert}) \\\\ &= {\\Vert \\hat{f}(x,y) \\Vert _2} \\cdot cos(\\omega \\cdot \\vec{v}^T +\\angle{\\vert \\hat{f}(x,y) \\vert}) \\\\ \\\\ \\hat{f}(u,v) &= \\int_{0}^{H} \\int_{0}^{W} f(x,y) \\cdot e^{-i (ux+vy)}\\ dx \\ dy \\\\ &\\Leftrightarrow \\\\ f(x,y) &= \\frac{1}{U\\cdot V} \\int_{-\\tfrac{V}{2}}^{+\\tfrac{V}{2}} \\int_{-\\tfrac{U}{2}}^{+\\tfrac{U}{2}} \\hat{f}(u,v) \\cdot {\\mathcal {F}}_{\\omega}(x, y) \\ du \\ dv \\\\ \\end{aligned} } Fω(x,y)f^(u,v)f(x,y)=Fω(P⃗)=Aω⋅sin(k⃗⋅P⃗T−∠ϕω)=Aω⋅cos(k⃗⋅P⃗T+∠ϕω)=∥f^(x,y)∥2⋅sin(ω⋅v⃗T−∠∣f^(x,y)∣)=∥f^(x,y)∥2⋅cos(ω⋅v⃗T+∠∣f^(x,y)∣)=∫0H∫0Wf(x,y)⋅e−i(ux+vy) dx dy⇔=U⋅V1∫−2V+2V∫−2U+2Uf^(u,v)⋅Fω(x,y) du dv 一般情况为了方便起见,常取 U=WU = WU=W 、 V=HV = HV=H ,化简分离参数。上式即为二维傅里叶变换的基本形式。 如果波矢范围在 k⃗∈[k0⃗, k1⃗]\\vec{k} \\in [\\vec{k_0},\\ \\vec{k_1}]k⃗∈[k0⃗, k1⃗] ,对于任意数据平面的像素点 P(x,y)P(x,y)P(x,y) ,有频率 ω=∥k⃗∥2\\omega = \\Vert \\vec{k} \\Vert_2ω=∥k⃗∥2 传播方向 (u,v)(u,v)(u,v) 、基底函数族 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y) Fω(x,y) 的强度系数 f^ω(u,v)\\hat{f}_{\\omega}(u,v)f^ω(u,v) ,构成该波矢范围的 频域投影(FDP [Frequency Domain Projection]); 反之,如果选定像素点 P(x,y)P(x,y)P(x,y) ,对于波矢范围在 k⃗∈[k0⃗, k1⃗]\\vec{k} \\in [\\vec{k_0},\\ \\vec{k_1}]k⃗∈[k0⃗, k1⃗] ,有平面位置 P(x,y)P(x,y)P(x,y) 、原函数值 f(x,y)f(x,y)f(x,y) 、基底函数族 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y) Fω(x,y) ,构成原函数在该空间(二维)范围的 空域投影(SDP [Spacial Domain Projection])。 两者的区别同一维一样,仅在于观察角度的不同: Frequency Domain Projection: ( u , v , f^ω(u,v) )Spacial Domain Projection: ( x , y , f(x,y) , Fω(x,y) )k⃗∈[k0⃗, k1⃗] P⃗(x,y) ∈[ (0, 0) , (W, H) ] {\\displaystyle \\begin{aligned} {Frequency\\ Domain\\ Projection:} &\\ \\ (\\ \\ u\\ \\ ,\\ \\ v\\ \\ ,\\ \\ \\hat{f}_{\\omega}(u,v)\\ \\ ) \\\\ {Spacial\\ Domain\\ Projection:} &\\ \\ (\\ \\ x\\ \\ ,\\ \\ y\\ \\ ,\\ \\ f(x,y)\\ \\ ,\\ \\ {\\mathcal {F}}_{\\omega}(x,y) \\ \\ ) \\\\ {\\vec{k} \\in [\\vec{k_0},\\ \\vec{k_1}]} \\ \\ \\ \\ & \\ \\ {\\ \\vec{P}(x,y)\\ \\in [\\ (0,\\ 0)\\ \\ ,\\ \\ (W,\\ H)\\ ]} \\\\ \\end{aligned} } Frequency Domain Projection:Spacial Domain Projection:k⃗∈[k0⃗, k1⃗] ( u , v , f^ω(u,v) ) ( x , y , f(x,y) , Fω(x,y) ) P⃗(x,y) ∈[ (0, 0) , (W, H) ] 显然,二维和一维情况的差异很明显且必然:二维傅里叶变换下所获的的分离投影结果位于三维欧式空间,而非一维时的平面(二维)。 精简运算过程 - 二维离散傅立叶变换(2D-DFT) 同一维傅里叶变换需要做时域离散化(TDD)和频域离散化(FDD)来精简运算量。二维傅里叶变换由于引入了新的维度,更需要依赖离散化处理,才能被计算机在有限算力的前提下使用。 二维离散傅里叶变换(2D-DFT)分为 空域离散化(SDD [Spacial Domain Discrete]) 和 频域离散化(FDD [Frequency Domain Discrete])。当然,此处的空域为二维空域(平面),是不包含 zzz 轴的。我们将两者结合称为 空频离散化(SFD [Spacial Frequency Discrete])。 如果取任意点 P⃗(x,y)\\vec{P}(x,y)P⃗(x,y) 可取 x∈[0, 1, ... , W]x \\in [0, \\ 1, \\ \\ ...\\ , \\ W]x∈[0, 1, ... , W] , y∈[0, 1, ... , H]y \\in [0, \\ 1, \\ \\ ...\\ , \\ H]y∈[0, 1, ... , H] ,只取整数位置。同时, u∈[−U2, ... , +U2]u \\in [-\\tfrac{U}{2}, \\ \\ ...\\ , \\ +\\tfrac{U}{2}]u∈[−2U, ... , +2U] 、 v∈[−V2, ... , +V2]v \\in [-\\tfrac{V}{2}, \\ \\ ...\\ , \\ +\\tfrac{V}{2}]v∈[−2V, ... , +2V] ,有离散 k⃗∈[k0⃗, k1⃗, ... , kn⃗]\\vec{k} \\in [\\vec{k_0}, \\ \\vec{k_1}, \\ \\ ...\\ , \\ \\vec{k_{n}}]k⃗∈[k0⃗, k1⃗, ... , kn⃗] , n=UV=HWn = UV = HWn=UV=HW ,则: SDD: f^(u,v)=∑x=0W∑y=0Hf(x,y)⋅e−i(ux+vy)FDD: f(x,y)=1U⋅V∑u=−U/2+U/2∑v=−V/2+V/2f^(u,v)⋅Fω(x,y) {\\displaystyle \\begin{aligned} SDD: \\ \\ \\hat{f}(u,v) &= \\sum_{x = 0}^{W} \\sum_{y = 0}^{H} f(x,y) \\cdot e^{-i (ux+vy)} \\\\ FDD: \\ \\ f(x,y) &= \\frac{1}{U\\cdot V} \\sum_{u=-U/2}^{+U/2} \\sum_{v= -V/2}^{+V/2} \\hat{f}(u,v) \\cdot {\\mathcal {F}}_{\\omega}(x, y) \\\\ \\end{aligned} } SDD: f^(u,v)FDD: f(x,y)=x=0∑Wy=0∑Hf(x,y)⋅e−i(ux+vy)=U⋅V1u=−U/2∑+U/2v=−V/2∑+V/2f^(u,v)⋅Fω(x,y) 至此,由空域离散化(SDD)与频域离散化(FDD)共同构成二维离散傅立叶(2D-DFT)的完整表达如下所示: Fω=[Fk0⃗,Fk1⃗, ... ,Fkn⃗]f^(u,v)=∑x=0W∑y=0Hf(x,y)⋅e−i(ux+vy)⇔f(x,y)=1U⋅V∑u=−U/2+U/2∑v=−V/2+V/2f^(u,v)⋅Fω(x,y) {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega} &= [{\\mathcal {F}}_{\\vec{k_0}}, {\\mathcal {F}}_{\\vec{k_1}},\\ ...\\ ,{\\mathcal {F}}_{\\vec{k_n}}] \\\\ \\hat{f}(u,v) &= \\sum_{x = 0}^{W} \\sum_{y = 0}^{H} f(x,y) \\cdot e^{-i (ux+vy)} \\\\ &\\Leftrightarrow \\\\ f(x,y) &= \\frac{1}{U\\cdot V} \\sum_{u=-U/2}^{+U/2} \\sum_{v= -V/2}^{+V/2} \\hat{f}(u,v) \\cdot {\\mathcal {F}}_{\\omega}(x, y) \\\\ \\end{aligned} } Fωf^(u,v)f(x,y)=[Fk0⃗,Fk1⃗, ... ,Fkn⃗]=x=0∑Wy=0∑Hf(x,y)⋅e−i(ux+vy)⇔=U⋅V1u=−U/2∑+U/2v=−V/2∑+V/2f^(u,v)⋅Fω(x,y) 利用上式,既可做算法实现。 二维离散傅立叶变换(1D-DFT)的 C 语言实现 第一步还是将二维离散傅立叶变化的过程抽象化。这里依旧采用伪码表示: /** * 2D-DFT [Discrete Fourier Transform] * [How to Use] * * Fo[W][H] = {...}; * Fn[U][V] = {}; * dft_2d(&Fo, &Fn); * [logistic] * { * result[U][V] = []; // as byte 2D-array * // do SDD: * for u in range(NU-Horizontal_Slices) { * for v in range(NV-Vertical_Slices) { * An = 0; Bn = 0; * // do FDD: * for y in Range(Height) { * for x in Range(Wight) { * Wn = (2 * PI) * Vec; * An = Re += Cos(Wn · VecT) * Fo(t); * Bn = Im += Sin(Wn · VecT) * Fo(t); * }} * result[u][v] = Fn.to_complex_angular_form(An, Bn) * }} * return result; * } * @param original_ Original Function input 2D-array * (image include width & height) * @param analyzed_ Fourier Basis info in 2D */ 同时,二维情况也需要提供离散傅里叶变换的逆变换(IDFT [Inverse Discrete Fourier Transform])来使得电脑能够还原信息: /** * 2D-IDFT [Inverse Discrete Fourier Transform] * [How to Use] * * Fo[W][H] = {}; * Fn[U][V] = {...}; * idft_2d(&Fo, &Fn); * [logistic] * { * result[W][H] = []; // as byte 2D-array * // do SDD: * for y in Range(Height) { * for x in Range(Wight) { * Re = 0; Im = 0; * // do FDD: * for u in range(NU-Horizontal_Slices) { * for v in range(NV-Vertical_Slices) { * Wn = (2 * PI) * Vec;; * An = Re * (Fn[n] · VecT); * Bn = Im * (Fn[n] · VecT); * result[t] += Fn[n].to_value(Wn, An, Bn) / (U * V); * }} * } * return result; * } * @param original_ Original Function input 2D-array * (image include width & height) * @param analyzed_ Fourier Basis analyzed info in 2D */ 接下来只需要根据思路做代码实现即可: #include \"stdio.h\" #include \"math.h\" #define PI 3.1415926f typedef struct FBasis { double re_; double im_; double w_[2]; } FBasis; typedef struct Signal2DOriginal { int GW_; int GH_; double *Fo_; } Signal2DOriginal; typedef struct Signal2DAnalyzed { int NU_; int NV_; FBasis *Fn_; } Signal2DAnalyzed; void dft_2d(Signal2DOriginal *original_, Signal2DAnalyzed *analyzed_) { for (int u = 0; u NU_; ++u) { for (int v = 0; v NV_; ++v) { double An = 0; double Bn = 0; double Un = (2 * PI / analyzed_->NU_) * u ; double Vn = (2 * PI / analyzed_->NV_) * v ; for (int y = 0; y GH_; ++y) { for (int x = 0; x GW_; ++x) { An += cos(Un * x + Vn * y) * original_->Fo_[y * original_->GW_ + x]; Bn += sin(Un * x + Vn * y) * original_->Fo_[y * original_->GW_ + x]; } } FBasis e_ = {An, Bn, {Un, Vn}}; analyzed_->Fn_[u * analyzed_->NV_ + v] = e_; } } } void idft_2d(Signal2DOriginal *original_, Signal2DAnalyzed *analyzed_) { for (int y = 0; y GH_; ++y) { for (int x = 0; x GW_; ++x) { for (int u = 0; u NU_; ++u) { for (int v = 0; v NV_; ++v) { FBasis e_ = analyzed_->Fn_[u * analyzed_->NV_ + v]; original_->Fo_[y * original_->GW_ + x] += ( e_.re_ * cos(e_.w_[0] * x + e_.w_[1] * y) + e_.im_ * sin(e_.w_[0] * x + e_.w_[1] * y) ) / (analyzed_->NU_ * analyzed_->NV_); } } } } } 写完后还是需要简单测试一下: int main(void) { double input_data_[36] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; FBasis output_data_[36] = {}; double versed_data_[36] = {}; Signal2DOriginal Fo = { 6, 6, input_data_ }; Signal2DAnalyzed Fn = { 6, 6, output_data_ }; Signal2DOriginal iFo = { 6, 6, versed_data_ }; printf(\"\\n Original_data: \\n\"); for (int y = 0; y 得到结果和标准几近相同: Original_data: 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 2.000000 3.000000 4.000000 5.000000 6.000000 1.000000 3.000000 4.000000 5.000000 6.000000 1.000000 2.000000 4.000000 5.000000 6.000000 1.000000 2.000000 3.000000 5.000000 6.000000 1.000000 2.000000 3.000000 4.000000 6.000000 1.000000 2.000000 3.000000 4.000000 5.000000 DFT_result: 126.000 + i 0.000 -0.000 + i 0.000 -0.000 + i 0.000 0.000 + i 0.000 0.000 + i 0.000 0.000 + i 0.000 -0.000 + i 0.000 -18.000 + i -31.177 0.000 + i 0.000 0.000 + i -0.000 0.000 + i -0.000 0.000 + i -0.000 -0.000 + i 0.000 0.000 + i 0.000 -18.000 + i -10.392 0.000 + i -0.000 0.000 + i -0.000 0.000 + i -0.000 0.000 + i 0.000 0.000 + i -0.000 0.000 + i -0.000 -18.000 + i 0.000 0.000 + i -0.000 -0.000 + i -0.000 0.000 + i 0.000 0.000 + i -0.000 0.000 + i -0.000 0.000 + i -0.000 -18.000 + i 10.392 -0.000 + i -0.000 0.000 + i 0.000 0.000 + i -0.000 0.000 + i -0.000 -0.000 + i -0.000 -0.000 + i -0.000 -18.000 + i 31.177 IDFT_result: 1.000007 2.000001 2.999999 3.999999 5.000001 6.000003 2.000001 2.999998 3.999998 4.999998 5.999999 1.000000 2.999999 3.999998 4.999997 5.999998 0.999998 2.000000 3.999999 4.999998 5.999998 0.999997 1.999999 3.000001 5.000001 5.999999 0.999998 1.999999 3.000000 4.000003 6.000003 1.000000 2.000000 3.000001 4.000003 5.000005 运行结束。 二维离散傅里叶变换到此结束,那么更多维度的傅里叶变换该怎么处理呢?我们只需要拓展波矢 k⃗{\\vec{k}}k⃗ 的维度即可。而多维和一维、二维情况,在离散傅里叶变换的逻辑流程上并没有不同。但是,随着波矢 k⃗{\\vec{k}}k⃗ 的参数维度扩展,我们发现现有的直接计算法实现的离散傅里叶变换,其算法时间复杂度 O{n2}O\\{ n^2\\}O{n2} 已不足以支撑超过二维参数量后的敏捷计算。因此,我们迫切需要一种更快的代替算法。 这就是促成快速傅立叶蝴蝶法工程化的要素。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_1_3.html":{"url":"Chapter_3/Language/cn/Docs_3_1_3.html","title":"3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT)","keywords":"","body":"3.1.3 快速傅立叶(FFT [Fast Fourier Transform]) 快速傅立叶是对离散傅立叶的数学逼近。其旨在通过有限点的分布拟合,快速逼近离散傅立叶变换结果。 快速傅立叶变换最早由 高斯(Carl Friedrich Gauss,1777 - 1855) 为了解决天文学中有关于智神星(Pallas)和婚神星(Juno)的位姿计算问题,而在 1805 年提出的 [8] [9] 。不过由于种种意料之外的因素,让该论文并没有被及时的发表。因此,论文在当时也没有获得太多的关注。直到计算机开始兴起,有关傅里叶变换等算法的更为低时间复杂度的要求变的迫切,才让后续研究者们又一次察觉到了这一篇文献(以及包括 19 世纪中叶和 20 世纪初的旁类研究)的贡献 [9] 。 1965 年,来自 IBM 普林斯通实验室的 詹姆士·库利(James Cooley) 和来自普林斯通大学的 约翰·图奇(John Tukey) 教授,联合发表了基于快速傅里叶变换的机器实现 [10] ,首次将该算法迁移到了计算机上。他们的研究提出了,通过采用分治法的思想来减少变换所需步数。这成功的使得,多维信号分析所用的傅立叶算法的时间复杂度算法,降至 。促进了数字信号处理(DSP)和计算机图形学的技术更新 [11] 。所以,为纪念两位的贡献,这套程序化的快速傅里叶变换(FFT [Fast Fourier Transform])方法论,被称为 库利-图奇算法(Cooley-Tukey Algorithm)。库利-图奇算法目标是一维信号,不过高维信号是可以被拆解为低维信号的向量积的,因此 并不影响其泛化。 在库利-图奇算法提出的时候,分治法已经被广泛的用来做计算机数组求最大值(Max)和排序(Sort)的处理当中。虽然离散的数组和周期信号之间,在信息密度和特征上存在较大差异。但如果考虑到周期信号沿传播维度重复,和傅里叶变换傅里叶基的特征,会发现: 如果将一维信号离散傅里叶变换的有限基底函数族 Fω\\mathcal {F}_{\\omega}Fω 构成的傅里叶基看作最小元,那么对其在时域上进行分组重排,也是可行的。从而使信号的一组基底函数基,能够以树状结构分类,并拆解特征表示原信号函数。 这就是库利-图奇算法的关键,在后续的算法的演进过程中逐步被提炼,形成了时域抽取这一核心概念 [11] 。 时域抽取(DIT [Decimation-in-Time]) 时域抽取(DIT [Decimation-in-Time])是从时域(TD [Time Domain])对一维信号进行可逆解构的一种数学工具。 它的工作流包含有两个阶段: 分组离散傅立叶(Grouped DFT) 和 旋转因子转换(Rotation Factor Convert) 时域抽取 - 分组离散傅立叶(Grouped DFT) 分组离散傅立叶(Grouped DFT) 是指,在信号的单个周期 TTT 内,以等间距有限次取 个原始离散采样后。将周期内所有采样点信息以 step=TK=Nstep =\\tfrac {T}{K} = Nstep=KT=N 的步长等分,得到 KKK 组顺序连续的子采样分组,依照组别记为样本子集 [S1,S2, ... ,SK][S_1,S_2,\\ ...\\ , S_K][S1,S2, ... ,SK] 。每组子集都有 Sk∈[fk((k−1)⋅N), fk(k⋅N))S_k \\in [f_k((k-1) \\cdot N),\\ f_k(k \\cdot N))Sk∈[fk((k−1)⋅N), fk(k⋅N)) 的样本取样区间。 此时,记组内索引为 nnn ,有 n∈[1, N]n \\in [1,\\ N]n∈[1, N] 。按照顺序从各组中,取组内索引位置为 nnn 的元素,组成包含数据量为 Fωn\\mathcal {F}_{\\omega_n}Fωn 的基底函数 Fωn\\mathcal {F}_{\\omega_n}Fωn 的波峰数组。可以逐个拟合,得到一组当前一维信号的有限基底函数族 Fω=[Fω1,Fω2, ... ,FωN]\\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2},\\ ...\\ ,\\mathcal {F}_{\\omega_N}]Fω=[Fω1,Fω2, ... ,FωN] ,记为当前解的最小傅立叶基。根据一维离散傅立叶变换有: Fω=[Fω1,Fω2, ... ,FωN]T=NKf^(ω)=∑t=0Tf(t)⋅e−iωt ⇔ f(t)=1K∑ω0ωNf^(ω)⋅Fω(t) {\\displaystyle \\begin{aligned} \\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1},\\mathcal {F}_{\\omega_2},& \\ ...\\ ,\\mathcal {F}_{\\omega_N}] \\quad \\quad T = NK \\\\ \\hat{f}(\\omega) = \\sum_{t = 0}^{T} f(t) \\cdot e^{-i \\omega t} \\ \\ \\ \\ \\ &\\Leftrightarrow \\ \\ \\ \\ \\ f(t) = \\frac{1}{K} \\sum_{\\omega_0}^{\\omega_N} \\hat{f}(\\omega) \\cdot \\mathcal {F}_{\\omega}(t) \\\\ \\end{aligned} } Fω=[Fω1,Fω2,f^(ω)=t=0∑Tf(t)⋅e−iωt ... ,FωN]T=NK⇔ f(t)=K1ω0∑ωNf^(ω)⋅Fω(t) 又因 ωn=2πnT{\\omega_n} = \\tfrac{2\\pi n}{T}ωn=T2πn ,强度系数 f^(ω)\\hat{f}(\\omega)f^(ω) 与 f(t)f(t)f(t) 的关系,可以被转换为 f^(n)\\hat{f}(n)f^(n) 与 f(t)f(t)f(t) 的关系: f^(ω)=∑t=0Tf(t)⋅e−iωt→f^(n)=∑t=0Tf(t)⋅e−i2πnTtf(t)=1N∑ω0ωNf^(ω)⋅Fω(t)→f(t)=1N∑n=1Nf^(n)⋅Fω(t)f^(n)=∑t=0Tf(t)⋅e−i2πnTt⇔f(t)=1N∑n=1Nf^(n)⋅Fω(t) {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) = \\sum_{t = 0}^{T} f(t) \\cdot e^{-i \\omega t} &\\rightarrow \\hat{f}(n) =\\sum_{t = 0}^{T} f(t) \\cdot e^{-i \\tfrac{2\\pi n}{T} t } \\\\ f(t) = \\frac{1}{N} \\sum_{\\omega_0}^{\\omega_{N}} \\hat{f}(\\omega) \\cdot \\mathcal {F}_{\\omega}(t) &\\rightarrow f(t) = \\frac{1}{N} \\sum_{n=1}^{N} \\hat{f}(n) \\cdot \\mathcal {F}_{\\omega}(t) \\\\ \\hat{f}(n) =\\sum_{t = 0}^{T} f(t) \\cdot e^{-i \\tfrac{2\\pi n}{T} t } \\quad \\quad &\\Leftrightarrow \\quad \\quad f(t) = \\frac{1}{N} \\sum_{n=1}^{N} \\hat{f}(n) \\cdot \\mathcal {F}_{\\omega}(t) \\end{aligned} } f^(ω)=t=0∑Tf(t)⋅e−iωtf(t)=N1ω0∑ωNf^(ω)⋅Fω(t)f^(n)=t=0∑Tf(t)⋅e−iT2πnt→f^(n)=t=0∑Tf(t)⋅e−iT2πnt→f(t)=N1n=1∑Nf^(n)⋅Fω(t)⇔f(t)=N1n=1∑Nf^(n)⋅Fω(t) 带入 KKK 分组情况( T=NKT = NKT=NK ),上式可化为: f^(n)=∑k=1K∑(k−1)Nt=kN−1f(t)⋅e−i2πnTt⇔f(t)=1N∑n=1Nf^(n)⋅Fω(t) {\\displaystyle \\begin{aligned} \\hat{f}(n) =\\sum_{k=1}^{K}\\sum_{(k-1)N}^{t = kN-1} f(t) \\cdot e^{-i \\tfrac{2\\pi n}{T} t } \\quad \\quad &\\Leftrightarrow \\quad \\quad f(t) = \\frac{1}{N} \\sum_{n=1}^{N} \\hat{f}(n) \\cdot \\mathcal {F}_{\\omega}(t) \\end{aligned} } f^(n)=k=1∑K(k−1)N∑t=kN−1f(t)⋅e−iT2πnt⇔f(t)=N1n=1∑Nf^(n)⋅Fω(t) 即强度系数 f^(n)\\hat{f}(n)f^(n) 存在展开式: f^(n)=∑k=1K∑(k−1)Nt=kN−1f(t)⋅e−i2πnTt=∑t=0N−1f(t)⋅e−i2πtT⋅n+∑t=N2N−1f(t)⋅e−i2πtT⋅n+ ... +∑(K−1)Nt=KN−1f(t)⋅e−i2πtT⋅n=∑t=0N−1f(t)⋅e−i2πtT⋅n+∑t=0N−1f(t+N)⋅e−i2π(t+N)T⋅n+ ... +∑t=0N−1f(t+(K−1)N)⋅e−i2π(t+(K−1)N)T⋅n=∑k=1K∑t=0N−1f(t+(k−1)N)⋅e−i2πtTn⋅e−i2π(k−1)Kn {\\displaystyle \\begin{aligned} \\hat{f}(n) &= \\sum_{k=1}^{K}\\sum_{(k-1)N}^{t = kN-1} f(t) \\cdot e^{-i \\tfrac{2\\pi n}{T} t } \\\\ &= \\sum_{t=0}^{N-1} f(t) \\cdot e^{-i \\tfrac{2\\pi t}{T} \\cdot n } + \\sum_{t=N}^{2N-1} f(t) \\cdot e^{-i \\tfrac{2\\pi t}{T} \\cdot n } + \\ ...\\ + \\sum_{(K-1)N}^{t=KN-1} f(t) \\cdot e^{-i \\tfrac{2\\pi t}{T} \\cdot n } \\\\ &= \\sum_{t=0}^{N-1} f(t) \\cdot e^{-i \\tfrac{2\\pi t}{T} \\cdot n } + \\sum_{t=0}^{N-1} f(t+N) \\cdot e^{-i \\tfrac{2\\pi (t+N)}{T} \\cdot n } + \\ ...\\ + \\sum_{t=0}^{N-1} f(t + (K-1)N) \\cdot e^{-i \\tfrac{2\\pi (t + (K-1)N)}{T} \\cdot n } \\\\ &= \\sum_{k=1}^{K} \\sum_{t=0}^{N-1} f(t+ (k-1)N) \\cdot e^{-i \\tfrac{2\\pi t}{T} n } \\cdot e^{-i \\tfrac{2\\pi (k-1)}{K} n } \\\\ \\end{aligned} } f^(n)=k=1∑K(k−1)N∑t=kN−1f(t)⋅e−iT2πnt=t=0∑N−1f(t)⋅e−iT2πt⋅n+t=N∑2N−1f(t)⋅e−iT2πt⋅n+ ... +(K−1)N∑t=KN−1f(t)⋅e−iT2πt⋅n=t=0∑N−1f(t)⋅e−iT2πt⋅n+t=0∑N−1f(t+N)⋅e−iT2π(t+N)⋅n+ ... +t=0∑N−1f(t+(K−1)N)⋅e−iT2π(t+(K−1)N)⋅n=k=1∑Kt=0∑N−1f(t+(k−1)N)⋅e−iT2πtn⋅e−iK2π(k−1)n 要点就出现在这里,此时,由于有限基底函数族 Fω=[Fω1,Fω2, ... ,FωN]\\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2},\\ ...\\ ,\\mathcal {F}_{\\omega_N}]Fω=[Fω1,Fω2, ... ,FωN] 的拟合样本选取自各个分组的对应角标数据,则显然任意 Fωi\\mathcal {F}_{\\omega_i}Fωi 的周期都有 Ti=2πnωi≥NT_i = \\tfrac{2\\pi n}{\\omega_i} \\geq NTi=ωi2πn≥N 且必然有 TimodN=0T_i \\mod N = 0TimodN=0 。因此,强度系数 f^(n)\\hat{f}(n)f^(n) 关于 kkk 的展开式能进一步精简为: f^(n)=∑k=1K(∑t=0N−1f(t+(k−1)N)⋅e−i2πtTn)⋅e−i2π(k−1)Kn=∑k=1Ke−i2π(k−1)Kn⋅[∑(k−1)NkN−1∣t f(t)⋅Fω−1(tn)] {\\displaystyle \\begin{aligned} \\hat{f}(n) &= \\sum_{k=1}^{K} (\\sum_{t=0}^{N-1} f(t+ (k-1)N) \\cdot e^{-i \\tfrac{2\\pi t}{T} n }) \\cdot e^{-i \\tfrac{2\\pi (k-1)}{K} n } \\\\ &= \\sum_{k=1}^{K} e^{-i \\tfrac{2\\pi (k-1)}{K} n } \\cdot [\\sum_{(k-1)N}^{kN-1} \\vert_t \\ f(t) \\cdot \\mathcal {F}_{\\omega}^{-1}(tn) \\quad ] \\\\ \\end{aligned} } f^(n)=k=1∑K(t=0∑N−1f(t+(k−1)N)⋅e−iT2πtn)⋅e−iK2π(k−1)n=k=1∑Ke−iK2π(k−1)n⋅[(k−1)N∑kN−1∣t f(t)⋅Fω−1(tn)] 记 f^k(n)=∑(k−1)NkN−1∣t f(t)⋅Fω−1(tn)\\hat{f}_k(n) =\\sum_{(k-1)N}^{kN-1} \\vert_t \\ f(t) \\cdot \\mathcal {F}_{\\omega}^{-1}(tn)f^k(n)=∑(k−1)NkN−1∣t f(t)⋅Fω−1(tn) ,则 f^k(n)\\hat{f}_k(n)f^k(n) 即为分组样本子集 [S1,S2, ... ,SK][S_1,S_2,\\ ...\\ , S_K][S1,S2, ... ,SK] 在自己的分组样本区间 Sk∈[fk((k−1)⋅N), fk(k⋅N))S_k \\in [f_k((k-1) \\cdot N),\\ f_k(k \\cdot N))Sk∈[fk((k−1)⋅N), fk(k⋅N)) 内,进行离散傅里叶变换的分组强度系数结果。而 e−i2π(k−1)Kne^{-i \\tfrac{2\\pi (k-1)}{K} n }e−iK2π(k−1)n 在样本顺序 nnn 给定时,只与所处分组的组序 kkk 有关,且本身在三角函数空间表现为 n(k−1)n(k-1)n(k−1) 的角度固定值,所以我们记其为旋转因子(Rotation Factor) Rk(n)=e−i2π(k−1)KnR_k(n) = e^{-i \\tfrac{2\\pi (k-1)}{K} n }Rk(n)=e−iK2π(k−1)n 。 将 f^k(n)\\hat{f}_k(n)f^k(n) 、 Rk(n)R_k(n)Rk(n) 带入 f^(n)\\hat{f}(n)f^(n) ,则 f^(n)\\hat{f}(n)f^(n) 最终表现为: R1(n)=1f^(n)=∑k=1KRk(n)⋅f^k(n)=R1(n)⋅f^1(n)+R2(n)⋅f^2(n)+ ... +RK(n)⋅f^K(n)f^(n)=f^1(n)+R2(n)⋅f^2(n)+ ... +RK(n)⋅f^K(n) {\\displaystyle \\begin{aligned} R_1(n) & = 1 \\\\ \\hat{f}(n) &= \\sum_{k=1}^{K} R_k(n) \\cdot \\hat{f}_k(n) = R_1(n) \\cdot \\hat{f}_1(n) + R_2(n) \\cdot \\hat{f}_2(n) + \\ ...\\ + R_K(n) \\cdot \\hat{f}_K(n) \\\\ \\hat{f}(n) &= \\hat{f}_1(n) + R_2(n) \\cdot \\hat{f}_2(n) + \\ ...\\ + R_K(n) \\cdot \\hat{f}_K(n) \\\\ \\end{aligned} } R1(n)f^(n)f^(n)=1=k=1∑KRk(n)⋅f^k(n)=R1(n)⋅f^1(n)+R2(n)⋅f^2(n)+ ... +RK(n)⋅f^K(n)=f^1(n)+R2(n)⋅f^2(n)+ ... +RK(n)⋅f^K(n) 上式就是时域抽取(DIT)有关分组离散傅立叶(Grouped DFT)的通用完整过程。单从公式来看,由于切割了样本集,我们只能通过分组离散傅立叶(Grouped DFT)直接求得原一维信号前 NNN 个信号量的傅里叶解。反而因为样本不足的问题,无法直接求得剩余的 (K−1)N(K-1)N(K−1)N 个信号量。 那么我们大费周章的这么做有什么用处呢?原因就在于旋转因子间是存在关系的。 时域抽取 - 旋转因子转换(Rotation Factor Convert) 这个问题,需要从复变函数的三角函数特性来回答。记 Rk(n)R_k(n)Rk(n) 变换到三角函数域,其实部为 aka_kak ,虚部为 bkb_kbk 。则 Rk(n)R_k(n)Rk(n) 可以表示为: Rk(n)=e−i2π(k−1)Kn=ak⋅cos(2π(k−1)Kn)+i⋅bk⋅sin(2π(k−1)Kn) dt {\\displaystyle \\begin{aligned} R_k(n) &= e^{-i \\tfrac{2\\pi (k-1)}{K} n } \\\\ &= a_k \\cdot cos(\\tfrac{2\\pi (k-1)}{K} n) + i \\cdot b_k \\cdot sin(\\tfrac{2\\pi (k-1)}{K} n) \\ dt \\\\ \\end{aligned} } Rk(n)=e−iK2π(k−1)n=ak⋅cos(K2π(k−1)n)+i⋅bk⋅sin(K2π(k−1)n) dt 依此,取 aka_kak 为 yyy 轴、 bkb_kbk 为 xxx 轴。我们假设分组 K=2mK = 2^mK=2m ,信号周期 T=2π⋅MT = 2 \\pi \\cdot MT=2π⋅M 且 Tmod2π=0T \\mod 2\\pi = 0Tmod2π=0 ,有此时步长 N=π2m−1⋅MN = \\tfrac{\\pi}{2^{m-1}} \\cdot MN=2m−1π⋅M 。为便于说明,我们取 M=1M = 1M=1 , m=1m = 1m=1 ,且 n=π6=∠30∘n = \\tfrac{\\pi}{6} = \\angle 30^\\circn=6π=∠30∘ 来进行绘制。实际上 nnn 只能取 [1, N][1, \\ N][1, N] 的整数,但那样会不便于图示,这里取固定角并不影响后续结论。则 Rk(n)R_k(n)Rk(n) 在 akbka_kb_kakbk 构成的平面坐标系上有如下取值范围: 图 3-1 旋转因子的三角函数系取值演示 在图像表示下 Rk(n)R_k(n)Rk(n) 的特性更易察觉,当分组 K=2mK = 2^mK=2m 且 m≥1m \\geq 1m≥1 取整时, 单个 2π2\\pi2π 周期内,以 N=2πKN = \\tfrac{2\\pi}{K}N=K2π 可以分为 2m−12^{m-1}2m−1 组。每组分组都包涵两个子样本集 [Sk ,Sk+2m−1][S_k\\ ,S_{k+2^{m-1}}][Sk ,Sk+2m−1] ,此时,这两个字样本集旋转因子原点对称,有 Rk(n)=−Rk(n+π)n∈[2π(k−1)K, 2πkK]R_k(n) = -R_k(n+\\pi) \\quad n \\in [\\tfrac{2\\pi (k-1)}{K}, \\ \\tfrac{2\\pi k}{K}]Rk(n)=−Rk(n+π)n∈[K2π(k−1), K2πk] 。而对于信号 M>1M > 1M>1 时,间隔为 2π2\\pi2π 的分组有 2M2^M2M 组,且旋转因子取值相同,即 Rk(n)=Rk+2π⋅M(n)R_k(n) = R_{k+2\\pi \\cdot M}(n)Rk(n)=Rk+2π⋅M(n) 。 如果我们取 K=2K = 2K=2 ,即 m=1m = 1m=1 ,对整体信号的 TTT 个样本分为两组,两组原点对称有: f^(n)=f^1(n)+e−iπn⋅f^2(n) =f^1(n)+R2(n)⋅f^2(n)f^(n+π)=f^1(n)+e−iπ(n+π)⋅f^2(n)=f^1(n)−R2(n)⋅f^2(n) {\\displaystyle \\begin{aligned} \\hat{f}(n) &= \\hat{f}_1(n) + e^{-i \\pi n} \\cdot \\hat{f}_2(n) \\quad \\ = \\hat{f}_1(n) + R_2(n) \\cdot \\hat{f}_2(n) \\\\ \\hat{f}(n+\\pi) &= \\hat{f}_1(n) + e^{-i \\pi (n+\\pi)} \\cdot \\hat{f}_2(n) = \\hat{f}_1(n) - R_2(n) \\cdot \\hat{f}_2(n) \\\\ \\end{aligned} } f^(n)f^(n+π)=f^1(n)+e−iπn⋅f^2(n) =f^1(n)+R2(n)⋅f^2(n)=f^1(n)+e−iπ(n+π)⋅f^2(n)=f^1(n)−R2(n)⋅f^2(n) 如果我们取 K=4K = 4K=4 ,即 m=2m = 2m=2 ,对整体信号的 TTT 个样本分为四组,间隔两两原点对称,即相邻组间实虚轴反转,有: Rk(n+π2)=[(k−1)%2]⋅(−1)k−1⋅Rk(n)+[(k−1)%2+1]⋅(−i)k−1⋅Rk(n) {\\displaystyle \\begin{aligned} R_k(n+\\tfrac{\\pi}{2}) = [(k-1)\\%2] \\cdot (-1)^{k-1} \\cdot R_k(n) + [(k-1)\\%2 + 1] \\cdot(-i)^{k-1} \\cdot R_k(n) \\\\ \\end{aligned} } Rk(n+2π)=[(k−1)%2]⋅(−1)k−1⋅Rk(n)+[(k−1)%2+1]⋅(−i)k−1⋅Rk(n) 则 f^(n)\\hat{f}(n)f^(n) 有 n∈[0, π2]n \\in [0, \\ \\tfrac{\\pi}{2}]n∈[0, 2π] 范围的表达式: f^(n)=f^1(n)+ R2(n)⋅f^2(n)+ R3(n)⋅f^3(n)+ R4(n)⋅f^4(n)f^(n+π2)=f^1(n)−iR2(n)⋅f^2(n)− R3(n)⋅f^3(n)+iR4(n)⋅f^4(n)f^(n+π)=f^1(n)− R2(n)⋅f^2(n)+ R3(n)⋅f^3(n)− R4(n)⋅f^4(n)f^(n+3π2)=f^1(n)+iR2(n)⋅f^2(n)− R3(n)⋅f^3(n)−iR4(n)⋅f^4(n) {\\displaystyle \\begin{aligned} \\hat{f}(n) &= \\hat{f}_1(n) + \\ R_2(n) \\cdot \\hat{f}_2(n) + \\ R_3(n) \\cdot \\hat{f}_3(n) + \\ R_4(n) \\cdot \\hat{f}_4(n) \\\\ \\hat{f}(n+\\tfrac{\\pi}{2}) &= \\hat{f}_1(n) - i R_2(n) \\cdot \\hat{f}_2(n) - \\ R_3(n) \\cdot \\hat{f}_3(n) + i R_4(n) \\cdot \\hat{f}_4(n) \\\\ \\hat{f}(n+\\pi) &= \\hat{f}_1(n) - \\ R_2(n) \\cdot \\hat{f}_2(n) + \\ R_3(n) \\cdot \\hat{f}_3(n) - \\ R_4(n) \\cdot \\hat{f}_4(n) \\\\ \\hat{f}(n+\\tfrac{3\\pi}{2})&= \\hat{f}_1(n) + i R_2(n) \\cdot \\hat{f}_2(n) - \\ R_3(n) \\cdot \\hat{f}_3(n) - i R_4(n) \\cdot \\hat{f}_4(n) \\\\ \\end{aligned} } f^(n)f^(n+2π)f^(n+π)f^(n+23π)=f^1(n)+ R2(n)⋅f^2(n)+ R3(n)⋅f^3(n)+ R4(n)⋅f^4(n)=f^1(n)−iR2(n)⋅f^2(n)− R3(n)⋅f^3(n)+iR4(n)⋅f^4(n)=f^1(n)− R2(n)⋅f^2(n)+ R3(n)⋅f^3(n)− R4(n)⋅f^4(n)=f^1(n)+iR2(n)⋅f^2(n)− R3(n)⋅f^3(n)−iR4(n)⋅f^4(n) 不论上述哪一种分组方法,我们都可以将求解范围从有限子集 SkS_kSk 中 n∈[2π(k−1)K, 2πkK]n \\in [\\tfrac{2\\pi (k-1)}{K}, \\ \\tfrac{2\\pi k}{K}]n∈[K2π(k−1), K2πk] 的离散傅立叶结果,拓展到完整信号周期 TTT 。而只需要求任意一有限子集 SkS_kSk 的傅立叶基即可。 根据 K=2mK = 2^mK=2m 的不同取值,时域抽取(DIT)过程的时间复杂度,通过计算分片耗时,能够简单得到为 O(K−1Kn⋅log2mn)=O(K−1K⋅2m−1n⋅log2n)O(\\tfrac{K-1}{K}n \\cdot log_{2^m}n) = O(\\tfrac{K-1}{K \\cdot 2^{m-1} }n \\cdot log_2n)O(KK−1n⋅log2mn)=O(K⋅2m−1K−1n⋅log2n) 。 显然,O∣K=2=O(12n⋅log2n)O|_{K=2} =O(\\tfrac{1}{2}n \\cdot log_2n)O∣K=2=O(21n⋅log2n) 、 O∣K=4=O(38n⋅log2n)O|_{K=4} =O(\\tfrac{3}{8}n \\cdot log_2n)O∣K=4=O(83n⋅log2n) 虽然分组间耗时差异不大,但相较于直接对一维信号使用离散傅里叶变换(DFT)的 O(n2)O(n^2)O(n2) 耗时来说,直接减少了一个数量级。这即是快速傅立叶的 “快”。 对于 KKK 取不同值时的时域抽取(DIT),为了做区分,根据 KKK 值的不同被分别称为 双模时域抽取(Radix-2 DIT) 和 四模时域抽取(Radix-4 DIT)。同理,我们将 K=2K = 2K=2 时的库利-图奇算法称为 双模快速傅里叶变换(Radix-2 FFT),将 K=4K = 4K=4 时的库利-图奇算法称为 四模快速傅里叶变换(Radix-4 FFT)。两者差异如上,主要即是在划分导致推算上的不同。 至于为什么快速傅里叶变换又被称为蝴蝶法这点。则和经过时域抽取(DIT)处理后,有限基底函数族 Fω=[Fω1,Fω2, ... ,FωN]\\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2},\\ ...\\ ,\\mathcal {F}_{\\omega_N}]Fω=[Fω1,Fω2, ... ,FωN] 的对应强度系数 f^(ω)\\hat{f}(\\omega)f^(ω) 与分组 f^k(n)\\hat{f}_k(n)f^k(n) 的换算方式有关。 处理单元最小化 - 交叉求值与“蝴蝶”的由来 以 双模快速傅里叶变换(Radix-2 FFT) 为例。在最简情况下,当样本取 T=2T = 2T=2 ,有 K=2K = 2K=2 且 N=1N = 1N=1 ,基底函数族 Fω=[Fω1,Fω2]\\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2}]Fω=[Fω1,Fω2] ,此时: ∵f^k(n)=∑(k−1)NkN−1∣t f(t)⋅Fω−1(tn)∴f^(n)=f^1(n) + (−1)n⋅R2(n)⋅f^2(n)=Fω1−1(n)⋅f(0)+ Fω2−1(n)⋅f(1)= f(0) + (−1)n⋅Fω2−1(n)⋅f(1) {\\displaystyle \\begin{aligned} \\because \\hat{f}_k(n) &=\\sum_{(k-1)N}^{kN-1} \\vert_t \\ f(t) \\cdot \\mathcal {F}_{\\omega}^{-1}(tn) \\\\ \\therefore \\hat{f}(n) &=\\quad \\quad \\hat{f}_1(n)\\quad \\ +\\ (-1)^n \\cdot R_2(n) \\cdot \\hat{f}_2(n) \\\\ &= \\mathcal {F}_{\\omega_1}^{-1}(n) \\cdot f(0) + \\quad \\ \\mathcal {F}_{\\omega_2}^{-1}(n) \\cdot f(1) \\\\ &= \\quad \\quad \\ f(0) \\ \\quad +\\ (-1)^n \\cdot \\mathcal {F}_{\\omega_2}^{-1}(n) \\cdot f(1) \\\\ \\end{aligned} } ∵f^k(n)∴f^(n)=(k−1)N∑kN−1∣t f(t)⋅Fω−1(tn)=f^1(n) + (−1)n⋅R2(n)⋅f^2(n)=Fω1−1(n)⋅f(0)+ Fω2−1(n)⋅f(1)= f(0) + (−1)n⋅Fω2−1(n)⋅f(1) 显然,对于足够小的样本,其库利-图奇解的旋转因子 Rk(n)R_k(n)Rk(n) ,就是它所对应的傅里叶基函数与转置因子的乘机,即: Rk(n)=(−1)n⋅Fω2−1(n),k∣n∈int[0,1] R_k(n) = (-1)^n \\cdot \\mathcal {F}_{\\omega_2}^{-1}(n) \\quad , k|n \\in int[0,1] Rk(n)=(−1)n⋅Fω2−1(n),k∣n∈int[0,1] 我们在傅里叶变换章节开始时提到过,傅里叶变换从空间投影变换角度,可以表示为: N⋅F=FωT⋅F=[Fω1Fω2⋮Fωn]⋅[f^1,f^2, ... ,f^n] {\\displaystyle \\begin{aligned} N \\cdot F = {\\mathcal {F}_{\\omega}}^T \\cdot \\mathcal {F} = {\\begin{bmatrix} \\mathcal {F}_{\\omega_1} \\\\ \\mathcal {F}_{\\omega_2} \\\\ \\vdots \\\\ \\mathcal {F}_{\\omega_n} \\end{bmatrix}} \\cdot [\\hat{f}_1,\\hat{f}_2,\\ ...\\ ,\\hat{f}_n] \\\\ \\end{aligned} } N⋅F=FωT⋅F=⎣⎢⎢⎡Fω1Fω2⋮Fωn⎦⎥⎥⎤⋅[f^1,f^2, ... ,f^n] 那么,在引入了转置因子的情况下,原信号 f(n)f(n)f(n) 与 f^(n)\\hat{f}(n)f^(n) 的关系就可以被写为: [f(0)f(1)]=[1,+Fω21,−Fω2]⋅[f^(0)f^(1)]=[1,+11,−1]⋅[Fω1,Fω2]⋅[f^(0)f^(1)] {\\displaystyle \\begin{aligned} { \\begin{bmatrix} f(0) \\\\ f(1) \\end{bmatrix} } = { \\begin{bmatrix} 1 \\quad , +\\mathcal{F}_{\\omega_2} \\\\ 1 \\quad , -\\mathcal{F}_{\\omega_2} \\end{bmatrix} } \\cdot { \\begin{bmatrix} \\hat{f}(0) \\\\ \\hat{f}(1) \\end{bmatrix} } = { \\begin{bmatrix} 1 \\quad , & +1 \\\\ 1 \\quad , & -1 \\end{bmatrix} } \\cdot [\\mathcal{F}_{\\omega_1}, \\mathcal{F}_{\\omega_2}] \\cdot { \\begin{bmatrix} \\hat{f}(0) \\\\ \\hat{f}(1) \\end{bmatrix} } \\\\ \\end{aligned} } [f(0)f(1)]=[1,+Fω21,−Fω2]⋅[f^(0)f^(1)]=[1,1,+1−1]⋅[Fω1,Fω2]⋅[f^(0)f^(1)] 而这个过程如果换到拓扑图表示,就是大名鼎鼎的 “蝴蝶” 流造型了 (注意,颜色表示转子输出方向) : 同理,当采用 四模快速傅里叶变换(Radix-4 FFT) 时,有在最简情况下样本取 T=4T = 4T=4 。有 K=4K = 4K=4 且 N=1N = 1N=1 ,基底函数族 Fω=[Fω1,Fω2,Fω3,Fω4]\\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2}, \\mathcal {F}_{\\omega_3}, \\mathcal {F}_{\\omega_4}]Fω=[Fω1,Fω2,Fω3,Fω4] 。省略同质的推导过程,有原信号 f(n)f(n)f(n) 与 f^(n)\\hat{f}(n)f^(n) 的关系: [f(0)f(1)f(2)f(3)]=[1,1,1,11,−i,−1,i1,−1,1,−11,i,−1,−i]⋅[Fω1,Fω2,Fω3,Fω4]⋅[f^(0)f^(1)f^(2)f^(3)] {\\displaystyle \\begin{aligned} { \\begin{bmatrix} f(0) \\\\ f(1) \\\\ f(2) \\\\ f(3) \\end{bmatrix} } = { \\begin{bmatrix} 1 ,& &1,& &1,& &1 \\\\ 1 ,& -&i,& -&1,& &i \\\\ 1 ,& -&1,& &1,& -&1 \\\\ 1 ,& &i,& -&1,& -&i \\end{bmatrix} } \\cdot [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2}, \\mathcal {F}_{\\omega_3}, \\mathcal {F}_{\\omega_4}] \\cdot { \\begin{bmatrix} \\hat{f}(0) \\\\ \\hat{f}(1) \\\\ \\hat{f}(2) \\\\ \\hat{f}(3) \\end{bmatrix} } \\\\ \\end{aligned} } ⎣⎢⎢⎡f(0)f(1)f(2)f(3)⎦⎥⎥⎤=⎣⎢⎢⎡1,1,1,1,−−1,i,1,i,−−1,1,1,1,−−1i1i⎦⎥⎥⎤⋅[Fω1,Fω2,Fω3,Fω4]⋅⎣⎢⎢⎡f^(0)f^(1)f^(2)f^(3)⎦⎥⎥⎤ 四模的 “蝴蝶” 流造型如下 (注意,颜色表示前级数据来源) : 可见,单元的最小化抽象是通用的方法论。 对于多样本情况,只需要层层分解组装即可完成整体的快速处理。由于时间差异并不明显,但转置矩阵复杂度差异较大,因此我们一般选择 双模(Radix-2) 简化整体处理过程。 分批处理层级树 - 单元组装与完整流水线 和简单情况不一样的是,更多的样本采样使得我们没办法通过一次计算就得到最终结果。而在之前的推导过程中我们提到,对于不同子样本集抽参求解 f^k(n)\\hat{f}_k(n)f^k(n) 的过程,其本质也是一个傅里叶变换,只不过在解构过程中被我们以整体进行了代指换元。因此,随着 T=2lT = 2^lT=2l 与 K=2mK = 2^mK=2m 的变化,对信号处理的层数 LayerLayerLayer 也会产生变更有: Layer=logK(T)=lm Layer = \\log_{K}(T) = \\frac{l}{m} Layer=logK(T)=ml 假设样本取 T=4T = 4T=4 ,有 K=2K = 2K=2 ,则 N=2N = 2N=2 ,此时所需层数为 Layer=2Layer = 2Layer=2 。根据其上我们的分析可知,存在整合后的基底函数族为: Fω=[Fω1,Fω2]=[Fω11,Fω12,Fω21,Fω22] \\mathcal{F}_{\\omega} = [\\mathcal{F}_{\\omega_1}, \\mathcal{F}_{\\omega_2}] = [\\mathcal{F}_{\\omega_{11}}, \\mathcal{F}_{\\omega_{12}}, \\mathcal{F}_{\\omega_{21}}, \\mathcal{F}_{\\omega_{22}}] Fω=[Fω1,Fω2]=[Fω11,Fω12,Fω21,Fω22] 使得原信号 f(n)f(n)f(n) 与 f^(n)\\hat{f}(n)f^(n) 的关系为: ∵f^k(n)=∑(k−1)NkN−1∣t f(t)⋅Fω−1(tn)mark:Ri(n)⋅Fωij−1(n)=Rij(n)∣(T=4,K=2)∴f^1(n)= Fω11−1(n)⋅f1(0)+ (−1)n⋅Fω12−1(n)⋅f1(1)= + (−1)n⋅Fω12−1(n)⋅f(2)= DFT(f1(n))f^2(n)= Fω21−1(n)⋅f2(0)+ (−1)n⋅Fω22−1(n)⋅f2(1)= f(1) + (−1)n⋅Fω22−1(n)⋅f(3)= DFT(f2(n))∴f^(n)=[ f^1(n) + (−1)n⋅R2(n)⋅f^2(n) ]∣(T=8,K=2)= R1(n)⋅DFT(f1(n))+ (−1)n⋅R2(n)⋅DFT(f2(n))= R1(n)⋅Fω11−1(n)⋅f(0) + (−1)n⋅R1(n)⋅Fω12−1(n)⋅f(2)+ (−1)n⋅R2(n)⋅f(1)+R2(n)⋅Fω22−1(n)⋅f(3)⇒f^(n)= R11(n)⋅f(0)+ (−1)n⋅R12(n)⋅f(2)+ (−1)n⋅R21(n)⋅f(1)+ R22(n)⋅f(3) {\\displaystyle \\begin{aligned} \\because \\hat{f}_k(n) =&\\sum_{(k-1)N}^{kN-1} \\vert_t \\ f(t) \\cdot \\mathcal{F}_{\\omega}^{-1}(tn) \\quad \\quad \\quad \\quad \\quad \\quad mark: R_i(n) \\cdot \\mathcal{F}_{\\omega_{ij}}^{-1}(n) = R_{ij}(n) \\vert_{(T=4,K=2)} \\\\ \\therefore \\hat{f}_1(n) =& \\ \\mathcal{F}_{\\omega_{11}}^{-1}(n) \\cdot f_1(0) \\quad +\\ (-1)^n \\cdot \\mathcal{F}_{\\omega_{12}}^{-1}(n) \\cdot f_1(1) \\\\ =& \\ +\\ (-1)^n \\cdot \\mathcal{F}_{\\omega_{12}}^{-1}(n) \\cdot f(2) \\\\ =& \\ DFT(f_1(n)) \\\\ \\hat{f}_2(n) =& \\ \\mathcal{F}_{\\omega_{21}}^{-1}(n) \\cdot f_2(0) \\quad +\\ (-1)^n \\cdot \\mathcal{F}_{\\omega_{22}}^{-1}(n) \\cdot f_2(1) \\\\ =& \\ f(1) \\ +\\ (-1)^n \\cdot \\mathcal{F}_{\\omega_{22}}^{-1}(n) \\cdot f(3) \\\\ =& \\ DFT(f_2(n)) \\\\ \\\\ \\therefore \\hat{f}(n) =& [\\ \\hat{f}_1(n)\\ +\\ (-1)^n \\cdot R_2(n) \\cdot \\hat{f}_2(n)\\ ]\\vert_{(T=8,K=2)} \\\\ =& \\ R_1(n) \\cdot DFT(f_1(n)) \\quad +\\ (-1)^n \\cdot R_2(n) \\cdot DFT(f_2(n)) \\\\ =& \\ R_1(n) \\cdot \\mathcal{F}_{\\omega_{11}}^{-1}(n) \\cdot f(0) \\ \\ + \\ (-1)^n \\cdot R_1(n) \\cdot \\mathcal{F}_{\\omega_{12}}^{-1}(n) \\cdot f(2) \\quad + \\\\ & \\ (-1)^n \\cdot R_2(n) \\cdot f(1) \\quad + \\quad \\quad R_2(n) \\cdot \\mathcal{F}_{\\omega_{22}}^{-1}(n) \\cdot f(3) \\\\ \\Rightarrow \\\\ \\hat{f}(n) =& \\quad \\quad \\ R_{11}(n) \\cdot f(0)\\quad \\quad +\\ \\quad (-1)^n \\cdot R_{12}(n)\\cdot f(2)\\quad +\\ \\\\ & (-1)^n \\cdot R_{21}(n) \\cdot f(1) \\quad +\\ \\quad \\quad \\quad \\ R_{22}(n)\\cdot f(3) \\\\ \\end{aligned} } ∵f^k(n)=∴f^1(n)===f^2(n)===∴f^(n)===⇒f^(n)=(k−1)N∑kN−1∣t f(t)⋅Fω−1(tn)mark:Ri(n)⋅Fωij−1(n)=Rij(n)∣(T=4,K=2) Fω11−1(n)⋅f1(0)+ (−1)n⋅Fω12−1(n)⋅f1(1) + (−1)n⋅Fω12−1(n)⋅f(2) DFT(f1(n)) Fω21−1(n)⋅f2(0)+ (−1)n⋅Fω22−1(n)⋅f2(1) f(1) + (−1)n⋅Fω22−1(n)⋅f(3) DFT(f2(n))[ f^1(n) + (−1)n⋅R2(n)⋅f^2(n) ]∣(T=8,K=2) R1(n)⋅DFT(f1(n))+ (−1)n⋅R2(n)⋅DFT(f2(n)) R1(n)⋅Fω11−1(n)⋅f(0) + (−1)n⋅R1(n)⋅Fω12−1(n)⋅f(2)+ (−1)n⋅R2(n)⋅f(1)+R2(n)⋅Fω22−1(n)⋅f(3) R11(n)⋅f(0)+ (−1)n⋅R12(n)⋅f(2)+ (−1)n⋅R21(n)⋅f(1)+ R22(n)⋅f(3) 同理,当 T=8T = 8T=8 ,有 K=2K = 2K=2 ,则 N=4N = 4N=4 ,此时所需层数为 Layer=3Layer = 3Layer=3 。存在整合后的基底函数族: Fω=[Fω1,Fω2]=[Fω11,Fω12,Fω21,Fω22]=[Fω111,Fω112,Fω121,Fω122,Fω211,Fω212,Fω221,Fω222] {\\displaystyle \\begin{aligned} \\mathcal{F}_{\\omega} &= [\\mathcal{F}_{\\omega_1}, \\mathcal{F}_{\\omega_2}] \\\\ &= [\\mathcal{F}_{\\omega_{11}}, \\mathcal{F}_{\\omega_{12}}, \\mathcal{F}_{\\omega_{21}}, \\mathcal{F}_{\\omega_{22}}] \\\\ &= [\\mathcal{F}_{\\omega_{111}}, \\mathcal{F}_{\\omega_{112}}, \\mathcal{F}_{\\omega_{121}}, \\mathcal{F}_{\\omega_{122}}, \\mathcal{F}_{\\omega_{211}}, \\mathcal{F}_{\\omega_{212}}, \\mathcal{F}_{\\omega_{221}}, \\mathcal{F}_{\\omega_{222}}] \\\\ \\end{aligned} } Fω=[Fω1,Fω2]=[Fω11,Fω12,Fω21,Fω22]=[Fω111,Fω112,Fω121,Fω122,Fω211,Fω212,Fω221,Fω222] 使得原信号 f(n)f(n)f(n) 与 f^(n)\\hat{f}(n)f^(n) 的关系为 (省略同质化过程) : f^(n)= R11(n)⋅f^11(n)∣0,4+ (−1)n⋅R12(n)⋅f^12(n)∣1,5+ (−1)n⋅R21(n)⋅f^21(1)∣2,6+ R22(n)⋅f^22(3)∣3,7=[R111(n)⋅f(0) + (−1)n⋅R112(n)⋅f(4)+ R221(n)⋅f(2)+ (−1)n⋅R222(n)⋅f(6)]feven+[R121(n)⋅f(1) + (−1)n⋅R122(n)⋅f(5)+ R321(n)⋅f(3)+ (−1)n⋅R322(n)⋅f(7)]fodds {\\displaystyle \\begin{aligned} \\hat{f}(n) =& \\quad \\quad \\ R_{11}(n) \\cdot \\hat{f}_{11}(n) \\vert_{0,4} \\quad \\quad + \\quad \\quad \\ (-1)^n \\cdot R_{12}(n)\\cdot \\hat{f}_{12}(n) \\vert_{1,5} \\quad \\quad + \\\\ & \\ (-1)^n \\cdot R_{21}(n) \\cdot \\hat{f}_{21}(1) \\vert_{2,6} \\quad + \\quad \\quad \\ R_{22}(n)\\cdot \\hat{f}_{22}(3) \\vert_{3,7} \\\\ =& [ R_{111}(n) \\cdot f(0)\\ + \\ (-1)^n \\cdot R_{112}(n) \\cdot f(4) + \\ R_{221}(n) \\cdot f(2) + \\ (-1)^n \\cdot R_{222}(n) \\cdot f(6) ]_{f_{even}} \\quad + \\\\ & [ R_{121}(n) \\cdot f(1)\\ + \\ (-1)^n \\cdot R_{122}(n) \\cdot f(5) + \\ R_{321}(n) \\cdot f(3) + \\ (-1)^n \\cdot R_{322}(n) \\cdot f(7) ]_{f_{odds}} \\\\ \\end{aligned} } f^(n)== R11(n)⋅f^11(n)∣0,4+ (−1)n⋅R12(n)⋅f^12(n)∣1,5+ (−1)n⋅R21(n)⋅f^21(1)∣2,6+ R22(n)⋅f^22(3)∣3,7[R111(n)⋅f(0) + (−1)n⋅R112(n)⋅f(4)+ R221(n)⋅f(2)+ (−1)n⋅R222(n)⋅f(6)]feven+[R121(n)⋅f(1) + (−1)n⋅R122(n)⋅f(5)+ R321(n)⋅f(3)+ (−1)n⋅R322(n)⋅f(7)]fodds 此时的“蝴蝶”流造型,就要复杂一些了 : 从图上可知,每层都可以被分割为 2la−12^{l_a - 1}2la−1 个蝶形单元,其中 lal_ala 为当前层级。而完整的计算,则需要历经共计 2l/m−12^{l/m} - 12l/m−1 个单元才能完成。 如果我们开始就对总样本集 SSS ,按照奇偶样本分为 S1′=[f(0), f(2), f(4), f(6)]S_1^{\\prime} = [f(0),\\ f(2),\\ f(4) ,\\ f(6)]S1′=[f(0), f(2), f(4), f(6)] 和 S2′=[f(1), f(3), f(5), f(7)]S_2^{\\prime} = [f(1),\\ f(3),\\ f(5) ,\\ f(7)]S2′=[f(1), f(3), f(5), f(7)] 这两个子集。使单一分组求单一解,来方便分离的离散傅里叶变换调用。那么整个蝴蝶图就变成如下样子了 (同色线表示相同流向) : 结果同样一致,可见奇偶分割实质上是一个以 K=2K = 2K=2 为步长的抽样再迭代计算的过程。这点也能够从 K=4K = 4K=4 时,四模对原数据取样 T=8T = 8T=8 会使 f(n)f(n)f(n) 被分为: f^(n)=[R11(n)⋅f(0) + (−1)n⋅R12(n)⋅f(4)]f1/4+[R21(n)⋅f(1) + (−1)n⋅R22(n)⋅f(5)]f2/4+[R31(n)⋅f(2) + (−1)n⋅R32(n)⋅f(6)]f3/4+[R41(n)⋅f(3) + (−1)n⋅R42(n)⋅f(7)]f4/4 {\\displaystyle \\begin{aligned} \\hat{f}(n) =& [ R_{11}(n) \\cdot f(0)\\ + \\ (-1)^n \\cdot R_{12}(n) \\cdot f(4) ]_{f_{1/4}} \\quad + \\\\ & [ R_{21}(n) \\cdot f(1)\\ + \\ (-1)^n \\cdot R_{22}(n) \\cdot f(5) ]_{f_{2/4}} \\quad + \\\\ & [ R_{31}(n) \\cdot f(2)\\ + \\ (-1)^n \\cdot R_{32}(n) \\cdot f(6) ]_{f_{3/4}} \\quad + \\\\ & [ R_{41}(n) \\cdot f(3)\\ + \\ (-1)^n \\cdot R_{42}(n) \\cdot f(7) ]_{f_{4/4}} \\\\ \\end{aligned} } f^(n)=[R11(n)⋅f(0) + (−1)n⋅R12(n)⋅f(4)]f1/4+[R21(n)⋅f(1) + (−1)n⋅R22(n)⋅f(5)]f2/4+[R31(n)⋅f(2) + (−1)n⋅R32(n)⋅f(6)]f3/4+[R41(n)⋅f(3) + (−1)n⋅R42(n)⋅f(7)]f4/4 的情况,得到间接的阐明。 因此,我们可以通过封装固定 KKK 时的最小蝶形单元,采用递归的方式来计算 f(n)f(n)f(n) 与 f^(n)\\hat{f}(n)f^(n) 的相互转换。分组的产生,是由顺序输入在算法作用下经过每层的蝶形单元处理后,导致的必然结果。是一个自然而然的过程而并非强行去做的设定,切勿本末倒置。 而我们期望的是有序的输出,这也就产生了对输入进行排序的要求。 基于数据的优化 - 位反转(Bit Reversal)输入索引重排 经过前面的一系列分析,不难归纳得到:最终算法的输出顺序,是原序列经过 Layer−1Layer - 1Layer−1 层反转的结果。即每个蝶形单元,会反转当前对应字样本周期跨度的一半。 还是采用当 T=8T = 8T=8 ,有 K=2K = 2K=2 时的情形。我们将所有的处理过程排除,以原样本数据序列角标的传递过程来标记处理流,则有: 当代计算机采用的二进制计数,我们将上述样本角标 采用二进制表示,有: 这一现象即被称为 位反转(Bit Reversal)。我们可以利用这一特点,在工程运算过程中每个蝶形单元的数据装配处,以顺序序列对应位反转的角标来取用输入数据,从而保证迭代运算结果的顺序。 一维快速傅立叶变换(1D-FFT)的 C 语言实现 现在,万事俱备。可以进行代码实现了。先用伪码缕清算法程序化思路: /** * 1D-FFT [Fast Fourier Transform] * [How to Use] * * Fo[T] = {...}; * Fn[T] = {}; * fft_1d(&Fo, &Fn, T); * [logistic] * { * result = []; // as byte array * // do Bit-Reversal * Fo_sorted = bit_reversal(Fn, Fn, T); * // do DIT: * for (int layer_at_ = 0; layer_at_ 依然,快速傅立叶变换也需要有逆变换(IDFT [Inverse Fast Fourier Transform]),来帮我们进行数据还原: /** * 1D-IFFT [Inverse Fast Fourier Transform] * [How to Use] * * Fo[T] = {}; * Fn[T] = {...}; * fft_1d(&Fo, &Fn, T); * [logistic] * { * result = []; // as byte array * // do Bit-Reversal * Fo_sorted = bit_reversal(Fn, Fn, T) / T; dont forget divide N(num equal T) [ 到此,快速傅里叶变换的 工程优势 就体现出来了。从上面的工作流可以看出,FFT 和 IFFT 唯一的实现上的不同的地方,就在于两点: 分片计算均值,这个是傅里叶变换的通性; 旋转因子互逆,转换三角函数时的对称性; 这正是我们在之前推倒时,双模快速傅里叶变换(Radix-2 FFT)所利用的最为显著的特征。而其他部分的计算,则可以用相同的流水线进行统一。 所以,一维双模快速傅里叶变换(1D Radix-2 FFT)的工程化,并没有想象中的复杂: #include \"stdio.h\" #include \"math.h\" #define PI 3.1415926f typedef struct Complex { double re_; double im_; Complex operator+(const Complex &b_) const { Complex result_; result_.re_ = re_ + b_.re_; result_.im_ = im_ + b_.im_; return result_; } Complex operator-(const Complex &b_) const { Complex result_; result_.re_ = re_ - b_.re_; result_.im_ = im_ - b_.im_; return result_; } Complex operator*(const Complex &b_) const { Complex result_; result_.re_ = re_ * b_.re_ - im_ * b_.im_; result_.im_ = re_ * b_.im_ + im_ * b_.re_; return result_; } } Rotator, FBasis; void digital_convert(double *digital_, Complex *complex_, size_t size_, bool inverse = false) { if (!inverse) { for (int i = 0; i 0) { j = j > 1; } if (j > i) { Complex temp = input_[i]; result_[i] = input_[j]; result_[j] = temp; } } } void butterfly(Complex *target_, int step_, int slice_idx_, bool inverse = false) { int start_at_ = slice_idx_ * 2 * step_; for (int inner_idx_ = 0; inner_idx_ 写完后简单测试一下: int main(void) { FBasis Fn[8] = {}; double Fo[8] = {0, 1, 2, 3, 4, 5, 6, 7}; double iFo[8] = {}; size_t T = sizeof(Fo) / sizeof(double); size_t N = sizeof(Fn) / sizeof(FBasis); printf(\"\\n Original_data: \\n\"); for (int t = 0; t 得到结果和标准基本相同: Original_data: 0.000000 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000 FFT_result: 28.000000 + i 0.000000 -4.000001 + i -9.656855 -4.000000 + i -4.000000 -4.000000 + i -1.656854 -4.000000 + i 0.000000 -4.000000 + i 1.656855 -4.000000 + i 4.000000 -3.999999 + i 9.656854 IFFT_result: 0.000000 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000 运行结束。 至此,快速傅立叶变换的简单工程化基本完毕。二维情况,可以类比二维离散傅里叶变换的拓展思想,来进行改造。 另外,快速傅立叶变换 并不只有 时域抽取(DIT)、 双模(Radix-2)、四模(Radix-4)等这些处理手段。通用的其他类型,包括并不限于 频域抽取(FIT)、八模(Radix-8)、多模混合(Mixed-Radix)等。但亦可触类旁通。 这些方法共同构成了当今快速傅立叶变换的高性能函数库,甚至 配合硬件的特殊门电路设计,还能够进一步压缩过程中非理论因素的处理耗时。而在工作中,除特殊情况外,通常会在项目允许范畴内引入一些由研究机构校准的快速傅立叶变换函数库,这里按量级列举 三个经典库,以供参考使用之便: 小:Ooura's Mathematical Software Packages. by Takuya Ooura. 中:FXT: a library of algorithms. by Jörg Arndt. 大:FFTW: Fastest Fourier Transform in the West. by Matteo Frigo and Steven G. Johnson. at MIT. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_1_4.html":{"url":"Chapter_3/Language/cn/Docs_3_1_4.html","title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","keywords":"","body":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach) 2011 年, [12]。 【申请 IEEE 授权中】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_2.html":{"url":"Chapter_3/Language/cn/Docs_3_2.html","title":"3.2 频率信息提取 - 常用滤波算法","keywords":"","body":"3.2 频率信息提取 - 常用滤波算法 上一节中,我们就数字信号处理(DSP)的核心算法,傅里叶变换,进行了详细的说明。而在对二维傅里叶变换进行讲解的时候。细心的读者可能已经发现了 图像在空频分布上的一些特点。在分布的频率(波矢 k⃗{\\vec{k}}k⃗ 的频率 ∣k⃗∣=ω\\vert {\\vec{k}} \\vert = \\omega∣k⃗∣=ω )的占比(强度系数 f^ω(u,v)\\hat{f}_{\\omega}(u,v)f^ω(u,v) )中,低频信号的占比高,而高频信号的占比低。 这一现象产生的原因在于: 当一张图片处于色彩变化大且明显的区域时, k⃗{\\vec{k}}k⃗ 平面波在 uvuvuv 平面上的相邻点,单次数值变化的跨度就越大,直观体现就是波矢 k⃗{\\vec{k}}k⃗ 更长,即频率 ω\\omegaω 更高,波长 λ\\lambdaλ 更短。相反,当图片处于色彩变化相对平稳的区域时,相邻两个像素点的色彩差异就越小,单次数值变化的跨度就越小,对应的波矢 k⃗{\\vec{k}}k⃗ 更短,即频率 ω\\omegaω 更低,波长 λ\\lambdaλ 更长。这种变化明显处,往往是图片中的噪点或物体的轮廓位置。显然,色彩差异较小的相邻像素区域,才是占有图片空间较多的部分。 传统的图像处理,即是对图像频率的处理。其本质上是根据不同的目标,提炼出图像中被认为有用的信息,这一步被称为滤波(Filter)。滤波是对信号已有频率的过滤,通过增强(阻塞/增强阻塞)一部分频段,来达到筛选的效果。 因此,由于信息量的关系,滤波算法更多的使用场景是被运用在已得图像的结果上。相较于一维信号,二维信号明显对算法敏捷程度有更低的容忍度。而直接以傅里叶空频分离(SFS)进行科学处理,依旧会有些臃肿。毕竟非分析场景一般不需要特别高的精度,通常只在意一定范围内的频率特征,且并不会对细部有过多的要求。那么有没有满足条件下,针对目标频段的,更实用的变体呢? 考虑到简易的滤波手段多为均值与阈限共同作用。从算法层面,优化均值与阈限的求与取,就是切入点。如果可以将算法抽象为足够小的有限参数单元,我们就能够以 卷积核(Convolution Nucleus / Convolution Kernel / Sliding Window / Filter) 数学工具,封装整个运算过程。从而充分的利用现代 GPU 设备进行并行计算,批量处理并降低耗时。 欲期望达成此要求,被抽象的有限参数单元必然不能太复杂。 为了便于演示说明,本节采用 OpenGL 的 GLSL 程序片脚本语言,并使用 WebGL 环境预览,来进行算法的演示工作。其他驱动,如 DirectX 的 HLSL 或 Metal 的 MLSL,皆可参照 GLSL 逻辑达到相同效果。 在线演示 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_2_1.html":{"url":"Chapter_3/Language/cn/Docs_3_2_1.html","title":"3.2.1 高斯滤波(Gauss Filter)","keywords":"","body":"3.2.1 高斯滤波(Gauss Filter) 高斯滤波是我们最常用的一种滤波器。 想要理解高斯滤波的作用,首先需要回顾一下 高斯分布(Gaussian Distribution),即 正态分布(Normal Distribution) 的数学特征。高斯分布公式 : f(x,μ)=12π⋅δe−(x−μ)22⋅δ2 f(x,\\mu) = \\frac{1}{\\sqrt{2\\pi} \\cdot \\delta} e ^{-\\tfrac{(x-\\mu)^2}{2 \\cdot \\delta^2}} f(x,μ)=√2π⋅δ1e−2⋅δ2(x−μ)2 其在 xxx 为一维时的平面的对应分布如下: 图 3-2 一维正态分布示意图 从图像可见,高斯分布的 μ\\muμ 决定了分部的中心,而 δ\\deltaδ 决定了形变的剧烈程度。而线下曲线面积,则代表了对应区间段内的取值发生概率。从离散角度则指 x∈int[xc−n2,xc+n2]x \\in int[x_c-\\tfrac{n}2, x_c+\\tfrac{n}2]x∈int[xc−2n,xc+2n] 范围内,有 x=xcx = x_cx=xc 的取值概率为 f(xc)f(x_c)f(xc) 。 记原信号为 S(x)S(x)S(x) 。以 ∣target∣1\\vert target \\vert_1∣target∣1 表示归一化操作,则 ∣∑xc−n/2xc+n/2(f(x)⋅S(x))∣1\\vert {\\sum}_{x_c -n/2}^{x_c+n/2}(f(x) \\cdot S(x)) \\vert_1∣∑xc−n/2xc+n/2(f(x)⋅S(x))∣1 代表在当前给定 (δ,μ)(\\delta, \\mu)(δ,μ) 的高斯分布 f(x,μ)f(x, \\mu)f(x,μ) 下,考虑 x=xcx = x_cx=xc 时左右相邻含 xcx_cxc 在内共 nnn 个节点取值情况的 S(xc)S(x_c)S(xc) 的概率均值。我们记 xcx_cxc 为中心点,数据采样数为 TTT ,有: xc∈int[n2,T−n2],n∈intoddsFn(xc)=∣∑xc−n/2xc+n/2(f(x,xc)⋅S(x))∣1 {\\displaystyle \\begin{aligned} x_c \\in &int [\\tfrac{n}{2}, T-\\tfrac{n}{2}], \\quad n \\in int_{odds} \\\\ \\\\ F_n(x_c) &= \\vert {\\sum}_{x_c -n/2}^{x_c+n/2}(f(x, x_c) \\cdot S(x)) \\vert_1 \\\\ \\end{aligned} } xc∈Fn(xc)int[2n,T−2n],n∈intodds=∣∑xc−n/2xc+n/2(f(x,xc)⋅S(x))∣1 上式中,Fn(xc)F_n(x_c)Fn(xc) 即为一维情况下的 nnn 步滑动窗口,也可以称为 n×1n \\times 1n×1 卷积核。通过沿信号的数据顺序,滑动 Fn(xc)F_n(x_c)Fn(xc) 求取原值 xcx_cxc 替换值的操作。我们可以在一定程度上利用分布的概率关系,以调整 δ\\deltaδ 取值的方式来影响核内相邻数据的波动性,进而影响整体波动性达到滤波目的。 取 δ\\deltaδ 越小,波动性越强越激烈,图片越尖锐;反之 δ\\deltaδ 越大,波动性越弱越平缓,图片越模糊。 一维信号早期常用这种手段来一定程度的进行降噪(现今已被优秀和复杂的多的算法替换了)。而二维信号,即图片,在我们之前讲解傅里叶变化时以提到过,和一维主要差别是在维度上。所以当我们记数据采样数为 (W×H)(W \\times H)(W×H) ,有将 xxx 换为向量 x⃗=(x,y)\\vec{x} = (x,y)x⃗=(x,y) 表示: xc∈int[n2,W−n2],yc∈int[n2,H−n2]n∈intoddsFn(xc⃗)=Fn(xc,yc)=∣∑yc−n/2yc+n/2∑xc−n/2xc+n/2(f(x⃗,xc⃗)⋅S(x⃗))∣1 {\\displaystyle \\begin{aligned} x_c \\in &int [\\tfrac{n}{2}, W-\\tfrac{n}{2}], \\quad y_c \\in int [\\tfrac{n}{2}, H-\\tfrac{n}{2}] \\quad n \\in int_{odds} \\\\ \\\\ F_n(\\vec{x_c}) &= F_n(x_c, y_c) =\\vert {\\sum}_{y_c -n/2}^{y_c+n/2}{\\sum}_{x_c -n/2}^{x_c+n/2}(f(\\vec{x}, \\vec{x_c}) \\cdot S(\\vec{x})) \\vert_1 \\\\ \\end{aligned} } xc∈Fn(xc⃗)int[2n,W−2n],yc∈int[2n,H−2n]n∈intodds=Fn(xc,yc)=∣∑yc−n/2yc+n/2∑xc−n/2xc+n/2(f(x⃗,xc⃗)⋅S(x⃗))∣1 则 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 即为二维情况下的 n×nn \\times nn×n 高斯滤波卷积核。同理,更多维情况只需要扩展参数 x⃗\\vec{x}x⃗ 的向量空间即可。 可是看上去,目前的公式算不上简单。但真的是这样吗? 假设 n=3n = 3n=3 那么 3×33 \\times 33×3 高斯滤波卷积核,实际描述的是 xc⃗\\vec{x_c}xc⃗ 点周围单位距离内,相邻含 xc⃗\\vec{x_c}xc⃗ 在内共 999 个节点的波动关系,有: Fn(xc⃗)=∣∑xySxy⋅f((xc,yc)−[(−1,−1),(0,−1),(1,−1)(−1,0),(0,0),(1,0)(−1,1),(0,1),(1,1)])∣1=∣∑xySxy⋅f(xc⃗−N3×3⃗)∣1 {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) &= \\vert \\sum_{xy} S_{xy} \\cdot f ( (x_c,y_c) - { \\begin{bmatrix} (-1, -1) ,& \\quad (\\quad 0, -1) ,& \\quad (\\quad 1, -1) \\\\ (-1,\\quad 0) ,& \\quad (\\quad 0,\\quad 0) ,& \\quad (\\quad 1,\\quad 0) \\\\ (-1,\\quad 1) ,& \\quad (\\quad 0,\\quad 1) ,& \\quad (\\quad 1,\\quad 1) \\end{bmatrix} }) \\vert_1 \\\\ &= \\vert \\sum_{xy}S_{xy} \\cdot f ( \\vec{x_c} - \\vec{N_{3 \\times 3}} ) \\vert_1 \\\\ \\end{aligned} } Fn(xc⃗)=∣xy∑Sxy⋅f((xc,yc)−⎣⎡(−1,−1),(−1,0),(−1,1),(0,−1),(0,0),(0,1),(1,−1)(1,0)(1,1)⎦⎤)∣1=∣xy∑Sxy⋅f(xc⃗−N3×3⃗)∣1 一般情况,我们不会在单批(single batch)数据处理时,改变 δ\\deltaδ 的取值。假设 δ\\deltaδ 为标准正态分布取值 δ=1\\delta=1δ=1 ,那么 f(x⃗,μ⃗)f(\\vec{x},\\vec{\\mu})f(x⃗,μ⃗) 有: f(x⃗,μ⃗)=12πe−12(x⃗−μ⃗)2 f(\\vec{x},\\vec{\\mu}) = \\frac{1}{\\sqrt{2\\pi}} e ^{-\\tfrac{1}{2}(\\vec{x}-\\vec{\\mu})^2} f(x⃗,μ⃗)=√2π1e−21(x⃗−μ⃗)2 显然, f(x⃗,μ⃗)f(\\vec{x},\\vec{\\mu})f(x⃗,μ⃗) 在 δ\\deltaδ 取固定值的情况下,只和 (x⃗−μ⃗)(\\vec{x}-\\vec{\\mu})(x⃗−μ⃗) 的计算有关。而由于我们取 μ⃗=xc⃗\\vec{\\mu} = \\vec{x_c}μ⃗=xc⃗ ,在 (x⃗−μ⃗)(\\vec{x}-\\vec{\\mu})(x⃗−μ⃗) 的计算中: ∑(x⃗−μ⃗)=∑(x⃗−xc⃗)=N3×3⃗ \\sum(\\vec{x}-\\vec{\\mu}) = \\sum(\\vec{x}-\\vec{x_c}) = \\vec{N_{3 \\times 3}} ∑(x⃗−μ⃗)=∑(x⃗−xc⃗)=N3×3⃗ 正好消除了变化的 x⃗\\vec{x}x⃗ 的部分,因此 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 可以被化简为: Fn(xc⃗)=∣∑xySxy⋅f(xc⃗−N3×3⃗)∣1=∣∑xySxy⋅f(N3×3⃗)∣1=∑xySxy⋅∣(12πe−12(Δx2+Δy2))xy∣1=∑xySxy⋅∣[0.075,0.124,0.0750.124,1.000,0.1240.075,0.124,0.075]∣1 {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) &= \\vert \\sum_{xy}S_{xy} \\cdot f ( \\vec{x_c} - \\vec{N_{3 \\times 3}} ) \\vert_1 = \\vert \\sum_{xy}S_{xy} \\cdot f (\\vec{N_{3 \\times 3}} ) \\vert_1 \\\\ &= \\sum_{xy}S_{xy} \\cdot \\vert ( \\frac{1}{\\sqrt{2\\pi}} e ^{-\\tfrac{1}{2}(\\Delta x^2+\\Delta y^2)} )_{xy} \\vert_1 \\\\ &= \\sum_{xy}S_{xy} \\cdot \\vert { \\begin{bmatrix} 0.075 ,& \\quad 0.124 ,& \\quad 0.075 \\\\ 0.124 ,& \\quad 1.000 ,& \\quad 0.124 \\\\ 0.075 ,& \\quad 0.124 ,& \\quad 0.075 \\end{bmatrix} } \\vert_1 \\\\ \\end{aligned} } Fn(xc⃗)=∣xy∑Sxy⋅f(xc⃗−N3×3⃗)∣1=∣xy∑Sxy⋅f(N3×3⃗)∣1=xy∑Sxy⋅∣(√2π1e−21(Δx2+Δy2))xy∣1=xy∑Sxy⋅∣⎣⎡0.075,0.124,0.075,0.124,1.000,0.124,0.0750.1240.075⎦⎤∣1 我们只需要依次计算卷积核范围内的点,对应信号值与概率相乘之和即可,即: Fn(xc⃗)=∣0.075⋅S(xc−1,yc−1)+0.124⋅S(xc,yc−1) +0.075⋅S(xc+1,yc−1) +0.124⋅S(xc−1,yc)+1.000⋅S(xc,yc)+0.124⋅S(xc+1,yc) +0.075⋅S(xc−1,yc−1)+0.124⋅S(xc,yc+1) +0.075⋅S(xc+1,yc+1)∣1 {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) = \\vert & 0.075 \\cdot S_{(x_c-1,y_c-1)} + 0.124 \\cdot S_{(x_c,y_c-1)}\\ + 0.075 \\cdot S_{(x_c+1,y_c-1)} \\ + \\\\ & 0.124 \\cdot S_{(x_c-1,y_c )}\\quad + 1.000 \\cdot S_{(x_c,y_c)}\\quad + 0.124 \\cdot S_{(x_c+1,y_c)} \\quad \\ + \\\\ & 0.075 \\cdot S_{(x_c-1,y_c-1)} + 0.124 \\cdot S_{(x_c,y_c+1)}\\ + 0.075 \\cdot S_{(x_c+1,y_c+1)} \\vert_1 \\\\ \\end{aligned} } Fn(xc⃗)=∣0.075⋅S(xc−1,yc−1)+0.124⋅S(xc,yc−1) +0.075⋅S(xc+1,yc−1) +0.124⋅S(xc−1,yc)+1.000⋅S(xc,yc)+0.124⋅S(xc+1,yc) +0.075⋅S(xc−1,yc−1)+0.124⋅S(xc,yc+1) +0.075⋅S(xc+1,yc+1)∣1 为了保证输入输出数据一致。根据卷积核的大小,我们还需要在数据的外围补充一圈空值,以保证感受野等大数据源。如果当前需要处理的数据为 (W×H)=(5×5)(W \\times H) = (5 \\times 5)(W×H)=(5×5) ,即总共 252525 个像素的单通道灰度图。经过 n×n=3×3n \\times n = 3 \\times 3n×n=3×3 大小的高斯卷积核处理后,有如下结果: 不难发现上面的 求值过大,这是因为我们 并没有 使用 δ=1.0\\delta = 1.0δ=1.0 时归一化后的高斯算子: f(N3×3⃗)=∣[0.075,0.124,0.0750.124,1.000,0.1240.075,0.124,0.075]∣1=[0.042,0.069,0.0420.069,0.557,0.0690.042,0.069,0.042] {\\displaystyle \\begin{aligned} f(\\vec{N_{3 \\times 3}}) &= \\vert { \\begin{bmatrix} 0.075 ,& \\quad 0.124 ,& \\quad 0.075 \\\\ 0.124 ,& \\quad 1.000 ,& \\quad 0.124 \\\\ 0.075 ,& \\quad 0.124 ,& \\quad 0.075 \\end{bmatrix} } \\vert_1 = { \\begin{bmatrix} 0.042 ,& \\quad 0.069 ,& \\quad 0.042 \\\\ 0.069 ,& \\quad 0.557 ,& \\quad 0.069 \\\\ 0.042 ,& \\quad 0.069 ,& \\quad 0.042 \\end{bmatrix} } \\\\ \\end{aligned} } f(N3×3⃗)=∣⎣⎡0.075,0.124,0.075,0.124,1.000,0.124,0.0750.1240.075⎦⎤∣1=⎣⎡0.042,0.069,0.042,0.069,0.557,0.069,0.0420.0690.042⎦⎤ 当然,也可以直接除以 f(N3×3⃗)f(\\vec{N_{3 \\times 3}})f(N3×3⃗) 矩阵的秩,即 ∣f(N3×3⃗)∣δ=1.0=1.796\\vert f(\\vec{N_{3 \\times 3}}) \\vert_{\\delta = 1.0} = 1.796∣f(N3×3⃗)∣δ=1.0=1.796 ,作用在最终结果上。完成这一步后,整个高斯滤波单元才真正封装完毕。 对一张 (W×H)(W \\times H)(W×H) 的图片,单次标准高斯滤波需要经过 O(N)=((W−(n−2))×(H−(n−2))×8)O(N) =((W-(n-2)) \\times (H-(n-2)) \\times 8) O(N)=((W−(n−2))×(H−(n−2))×8) 次加法运算,外加单独进行的一次 n×nn \\times nn×n 卷积核大小的 f(x⃗,μ⃗)f(\\vec{x},\\vec{\\mu})f(x⃗,μ⃗) 归一化概率计算。而通过计算 f(x⃗,μ⃗)f(\\vec{x},\\vec{\\mu})f(x⃗,μ⃗) 得到的 f(N3×3⃗)f(\\vec{N_{3 \\times 3}})f(N3×3⃗) ,在 δ\\deltaδ 发生改变前都可以无限复用。因此,算法非常快捷。 高斯滤波的简易 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装了。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 没有太多操作,因为关键的部分在 像素程序片(Pixel Shader/Fragment Shader) 上: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform vec2 pixel_bias; uniform mat3 gaussian_matrix; uniform sampler2D target_texture; void main() { vec3 output_; for (int i = 0; i 完成对算法求和过程的迁移。传入的 高斯算子 gaussian_matrix 和 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。由于采用 Web 展示,此处方法以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_gaussian_kernel(step, delta) { let n = step * 2 + 1; let kernel = new Float32Array(n * n); let factor_1 = 1.0 / (Math.sqrt(2.0 * Math.PI) * delta); let factor_2 = 1.0 / (2.0 * delta * delta); let normalize_div = 0; for (let i = 0; i 至此,一个简单但实用的高斯滤波器就完成了。除了上述这种使用卷积核大小一对一采样的方式外,采用单一方向的高斯滤波滑动窗口,如 v⃗n×1=(vx,vy)orient\\vec{v}_{n \\times 1} = (v_x, v_y)_{orient}v⃗n×1=(vx,vy)orient ,也是一种减少采样数量,从而提高运算效率的方式。但由于只有指定方向的颜色关系参与了运算,单一方向高斯滤波,或者说更为通用的是近乎所有单一方向的滤波器,对数据处理后的结果,都只会表现为固定方向的过滤效果。这会使画面显得有些割裂,因此建议慎重使用。 而如果要求在保证滤波效果的同时,还能精简运算。那么我们就需更为快捷且采样更少的高斯单元了。 高斯滤波的线性插值采加速 一种通用的方式,就是在采样时引入 线性插值(Linear Sampling),减少采样次数。我们用 WWW 代表高斯算子,用 Wij=w(x⃗)W_{ij} =w(\\vec{x})Wij=w(x⃗) 代表高斯算子在 x⃗\\vec{x}x⃗ 所处 N3×3⃗\\vec{N_{3 \\times 3}}N3×3⃗ 中位置的对应 fij(N3×3⃗)f_{ij} ( \\vec{N_{3 \\times 3}})fij(N3×3⃗) 值,用 s(x⃗)s(\\vec{x})s(x⃗) 代表 x⃗\\vec{x}x⃗ 在图片中的像素值。则对于采样 3×33 \\times 33×3 的 N3×3⃗\\vec{N_{3 \\times 3}}N3×3⃗ 来说,由差值公式: sdst(x1⃗,x2⃗)=ssrc(x1⃗)⋅wsrc(x1⃗)+ssrc(x2⃗)⋅wsrc(x2⃗)wsrc(x1⃗)+wsrc(x2⃗) {\\displaystyle \\begin{aligned} s_{dst}(\\vec{x_1},\\vec{x_2}) &= \\frac{s_{src}(\\vec{x_1}) \\cdot w_{src}(\\vec{x_1}) + s_{src}(\\vec{x_2}) \\cdot w_{src}(\\vec{x_2})}{w_{src}(\\vec{x_1}) + w_{src}(\\vec{x_2})} \\\\ \\end{aligned} } sdst(x1⃗,x2⃗)=wsrc(x1⃗)+wsrc(x2⃗)ssrc(x1⃗)⋅wsrc(x1⃗)+ssrc(x2⃗)⋅wsrc(x2⃗) 可知,999 次采样能够两两差值,从而减少到只需 555 次实际的纹理数据读。卷积核的采样位置,取四角记为 [C1,C2,C3,C4]=[S(xc−1,yc−1),S(xc−1,yc+1),S(xc+1,yc−1),S(xc+1,yc+1)][C_1, C_2, C_3, C_4] =[S_{(x_c-1,y_c-1)} , S_{(x_c-1,y_c+1)}, S_{(x_c+1,y_c-1)}, S_{(x_c+1,y_c+1)}][C1,C2,C3,C4]=[S(xc−1,yc−1),S(xc−1,yc+1),S(xc+1,yc−1),S(xc+1,yc+1)] 和中心 C0=S(xc,yc)C_0 = S_{(x_c,y_c)}C0=S(xc,yc) ,如下: Samplexy⋅[1,0,10,1,01,0,1]=[C1C2C0C3C4] {\\displaystyle \\begin{aligned} Sample_{xy} \\cdot { \\begin{bmatrix} 1 ,& \\quad 0 ,& \\quad 1 \\\\ 0 ,& \\quad 1 ,& \\quad 0 \\\\ 1 ,& \\quad 0 ,& \\quad 1 \\end{bmatrix} } = { \\begin{bmatrix} C_1 & \\quad & \\quad C_2 \\\\ & \\quad C_0 \\\\ C_3 & \\quad & \\quad C_4 \\end{bmatrix} } \\\\ \\end{aligned} } Samplexy⋅⎣⎡1,0,1,0,1,0,101⎦⎤=⎣⎡C1C3C0C2C4⎦⎤ 则 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 就可以表示为: Fn(xc⃗)=W00⋅C1 +W01⋅C12 +W02⋅C2 +W10⋅C13+W11⋅C0 +W12⋅C24 +W20⋅C3 +W21⋅C34 +W22⋅C4=W00⋅C1 +W01⋅W00⋅C1+W02⋅C2W00+W02 +W02⋅C2 +W10⋅W00⋅C1+W20⋅C3W00+W20 +W11⋅C0 +W12⋅W02⋅C2+W22⋅C4W02+W22 +W20⋅C3 +W21⋅W20⋅C3+W22⋅C4W20+W22 +W22⋅C4=(W00 + W00⋅W01W00 + W02+W00⋅W10W00 + W20)⋅C1 +(W02 + W02⋅W01W00 + W02+W02⋅W12W02 + W22)⋅C2 +(W20 + W20⋅W10W00 + W20+W20⋅W21W20 + W22)⋅C3 +(W22 + W22⋅W12W02 + W22+W22⋅W21W20 + W22)⋅C4 +W11⋅C0 {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) =& W_{00} \\cdot C_1 \\ + W_{01} \\cdot C_{12} \\ + W_{02} \\cdot C_{2} \\ \\ + \\\\ & W_{10} \\cdot C_{13} + W_{11} \\cdot C_{0} \\ \\ + W_{12} \\cdot C_{24} \\ + \\\\ & W_{20} \\cdot C_3 \\ + W_{21} \\cdot C_{34} \\ + W_{22} \\cdot C_{4} \\\\ =& W_{00} \\cdot C_1 \\ + W_{01} \\cdot \\tfrac{W_{00} \\cdot C_1 + W_{02} \\cdot C_2}{W_{00} + W_{02} } \\ \\ + \\\\ & W_{02} \\cdot C_{2} \\ + W_{10} \\cdot \\tfrac{W_{00} \\cdot C_1 + W_{20} \\cdot C_3}{W_{00} + W_{20} } \\ \\ + \\\\ & W_{11} \\cdot C_{0} \\ + W_{12} \\cdot \\tfrac{W_{02} \\cdot C_2 + W_{22} \\cdot C_4}{W_{02} + W_{22} } \\ \\ + \\\\ & W_{20} \\cdot C_{3} \\ + W_{21} \\cdot \\tfrac{W_{20} \\cdot C_3 + W_{22} \\cdot C_4}{W_{20} + W_{22} } \\ \\ + \\\\ & W_{22} \\cdot C_{4} \\\\ =& (W_{00}\\ +\\ \\tfrac{W_{00} \\cdot W_{01}}{W_{00}\\ +\\ W_{02}} + \\tfrac{W_{00} \\cdot W_{10}}{W_{00}\\ +\\ W_{20}})\\cdot C_1 \\ + \\\\ & (W_{02}\\ +\\ \\tfrac{W_{02} \\cdot W_{01}}{W_{00}\\ +\\ W_{02}} + \\tfrac{W_{02} \\cdot W_{12}}{W_{02}\\ +\\ W_{22}})\\cdot C_2 \\ + \\\\ & (W_{20}\\ +\\ \\tfrac{W_{20} \\cdot W_{10}}{W_{00}\\ +\\ W_{20}} + \\tfrac{W_{20} \\cdot W_{21}}{W_{20}\\ +\\ W_{22}})\\cdot C_3 \\ + \\\\ & (W_{22}\\ +\\ \\tfrac{W_{22} \\cdot W_{12}}{W_{02}\\ +\\ W_{22}} + \\tfrac{W_{22} \\cdot W_{21}}{W_{20}\\ +\\ W_{22}})\\cdot C_4 \\ + \\\\ & W_{11} \\cdot C_{0} \\\\ \\end{aligned} } Fn(xc⃗)===W00⋅C1 +W01⋅C12 +W02⋅C2 +W10⋅C13+W11⋅C0 +W12⋅C24 +W20⋅C3 +W21⋅C34 +W22⋅C4W00⋅C1 +W01⋅W00+W02W00⋅C1+W02⋅C2 +W02⋅C2 +W10⋅W00+W20W00⋅C1+W20⋅C3 +W11⋅C0 +W12⋅W02+W22W02⋅C2+W22⋅C4 +W20⋅C3 +W21⋅W20+W22W20⋅C3+W22⋅C4 +W22⋅C4(W00 + W00 + W02W00⋅W01+W00 + W20W00⋅W10)⋅C1 +(W02 + W00 + W02W02⋅W01+W02 + W22W02⋅W12)⋅C2 +(W20 + W00 + W20W20⋅W10+W20 + W22W20⋅W21)⋅C3 +(W22 + W02 + W22W22⋅W12+W20 + W22W22⋅W21)⋅C4 +W11⋅C0 看上去很复杂,但取中心点的二维高斯分布,其 fij(N3×3⃗)f_{ij} (\\vec{N_{3 \\times 3}} )fij(N3×3⃗) 的值是随 xc⃗\\vec{x_c}xc⃗ 中心对称的,有: W0=[W11]W1=[W01=W10=W12=W21]W2=[W00=W02=W20=W22] {\\displaystyle \\begin{aligned} W_0 &= [W_{11}] \\\\ W_1 &= [W_{01} = W_{10} = W_{12} = W_{21}] \\\\ W_2 &= [W_{00} = W_{02} = W_{20} = W_{22}] \\\\ \\end{aligned} } W0W1W2=[W11]=[W01=W10=W12=W21]=[W00=W02=W20=W22] 带入到线性插值 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 表达式,则: Fn(xc⃗)=W0⋅C0+[(W1 + W2)⋅(C1 +C2 +C3 +C4 )] {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) =& W_0 \\cdot C_0 +[(W_1\\ +\\ W_2)\\cdot (C_1 \\ + C_2 \\ + C_3 \\ + C_4 \\ )] \\\\ \\end{aligned} } Fn(xc⃗)=W0⋅C0+[(W1 + W2)⋅(C1 +C2 +C3 +C4 )] 当取 δ=1.0\\delta = 1.0δ=1.0 时,三值得到固定的归一化取值 [W0,W1,W2]=[0.557, 0.069, 0.042][W_0,W_1,W_2] = [0.557,\\ 0.069,\\ 0.042][W0,W1,W2]=[0.557, 0.069, 0.042] ,而 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 的表达式就只和采样相关了: Fn(xc⃗)=0.557⋅C0 + 0.111⋅(C1 +C2 +C3 +C4 ) {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) = 0.557 \\cdot C_0\\ +\\ 0.111 \\cdot (C_1 \\ + C_2 \\ + C_3 \\ + C_4 \\ ) \\\\ \\end{aligned} } Fn(xc⃗)=0.557⋅C0 + 0.111⋅(C1 +C2 +C3 +C4 ) 所以,插值采样的高斯滤波非常精简。只需要略微调整像素程序片(Pixel Shader/Fragment Shader)的实现,而不需要对其他处理进行改动,就能完成改造: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform vec2 pixel_bias; uniform mat3 gaussian_matrix; uniform sampler2D target_texture; void main() { float gauss_factor = gaussian_matrix[0][0]+gaussian_matrix[0][1]; vec3 output_; output_ += texture2D(target_texture, fs_texcoord.xy ).rgb * gaussian_matrix[1][1]; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(-1, -1) * pixel_bias).rgb * gauss_factor; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(-1, +1) * pixel_bias).rgb * gauss_factor; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(+1, -1) * pixel_bias).rgb * gauss_factor; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(+1, +1) * pixel_bias).rgb * gauss_factor; gl_FragColor = vec4(output_, 1.0); } 加速后的高斯滤波单元,对一张 (W×H)(W \\times H)(W×H) 图片的处理的理论耗时,减少到了原耗时的 0.625⋅O(N)0.625 \\cdot O(N)0.625⋅O(N) 。采样数也同比减少了 37.5%37.5\\%37.5% 。效果上和直算相比,几乎无差别。 高斯滤波的局限性 由于高斯滤波的通用卷积核是 各向同性(Isotropic) 的,在核范围内的各方向向量与中心点的方差,仅和向量终点与核中心点的相对距离有关。因此,高斯滤波并不是没有弊端的。 我们仍然选择 μ=xc⃗\\mu = \\vec{x_c}μ=xc⃗ 为核中心,假设核范围内有不包含 xc⃗\\vec{x_c}xc⃗ 在内的,总计为 NNN 的 nnn 维向量 x⃗=(x1,x2, ... ,xn)∈Rn\\vec{x} = (x_1,x_2,\\ ...\\ ,x_n) \\in \\mathbb{R}^nx⃗=(x1,x2, ... ,xn)∈Rn 的采样数据 SN={Sx1⃗,Sx2⃗, ... ,SxN⃗}S_N = \\{ S_{\\vec{x_1}} , S_{\\vec{x_2}},\\ ...\\ , S_{\\vec{x_N}} \\}SN={Sx1⃗,Sx2⃗, ... ,SxN⃗} 。将高斯滤波卷积核的离散程度,以非概率密度 协方差矩阵(Covariance Matrix) 的 Mcov(x⃗)M_{cov}(\\vec{x})Mcov(x⃗) 形式表示,记 III 为单位对角矩阵,有: Mcov(x⃗)=1N∑i=1NSxi⃗⋅[(x1−xc1)2(x2−xc2)2...(xn−xcn)2]=∑Δx2⋅I∈Rn×n {\\displaystyle \\begin{aligned} M_{cov}(\\vec{x}) &= \\tfrac{1}{N} \\sum_{i = 1}^{N} S_{\\vec{x_i}} \\cdot { \\begin{bmatrix} (x_1-x_{c1})^2 & \\quad & \\quad & \\quad \\\\ & \\quad (x_2-x_{c2})^2 & \\quad & \\quad \\\\ & \\quad & \\quad ... & \\quad\\\\ & \\quad & \\quad & \\quad (x_n-x_{cn})^2 \\end{bmatrix} } \\\\ &= \\sum \\Delta x^2 \\cdot I \\in \\mathbb{R}^{n \\times n} \\\\ \\end{aligned} } Mcov(x⃗)=N1i=1∑NSxi⃗⋅⎣⎢⎢⎡(x1−xc1)2(x2−xc2)2...(xn−xcn)2⎦⎥⎥⎤=∑Δx2⋅I∈Rn×n 多维高斯的协方差矩阵,只有对角线的 方差(Variance)存在非 000 取值,而衡量参数交叠影响的 协方差(Covariance)皆为 000 值。所以,高斯滤波没有考虑维度方位信息带来的数据间的差异,每一个维度仅对自身属性产生影响。因此,高斯核总是中心对称。 这一特征体现在二维信号的处理上时,就表现为经过高斯滤波处理的图片,轮廓细节会有所丢失(物体更不容易分辨,而非单纯颜色变得规整)。同时,也更容易因为算法导致的频率扰动,产生高频变化规律缺失,像素朝核的外边缘等量的分散运动而出现摩尔纹(Moire Pattern)。毕竟图片的高频部分,才是保存轮廓信息的关键。但高斯滤波本质上却是全通量的概率权重控制。 那么有没有能够在一定程度上,既保留高频细节的同时,又能够相对独立的处理低频波动的算法呢? 考虑问题主要出现在高斯滤波的各向同性,或许可以通过引入高低频差异修饰滤波器,来达成要求。这种做法被称为 边缘保存(Edge Preserving)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_2_2.html":{"url":"Chapter_3/Language/cn/Docs_3_2_2.html","title":"3.2.2 双边滤波(Bilateral Filter)","keywords":"","body":"3.2.2 双边滤波(Bilateral Filter) 双边滤波(Bilateral Filter) 是在高斯滤波基础上,基于 边缘保存(Edge Preserving) 滤波思想,通过一个 空间域(Spatial Domain/Domain)标准高斯滤波 和 灰度值(Gray Range/Range)朴素高斯分布 的共同作用,形成的 高斯滤波变体。 由于二维信号的高频部分,在灰度通道上体现的更为明确(本质起作用的是物理意义上的光亮度信息,人眼主要通过光亮度差异来感知物体轮廓。光亮度的多种衍生抽象,和相关概念是如何迁移数据化到计算机视觉体系内的,会在本书第三章详细讲解)。所以,双边滤波引入对灰度值的高斯,是期望提取核内灰度变化特征,来得到各频率波的核内密度分布情况。 进而对核内标准高斯滤波像素值概率密度结果进行修饰,得到 带有截面的单向滤波卷积核(Single Orientation Filter)。 图 3-3 双边滤波经过灰度裁剪后,在轮廓边缘处的卷积核示意图 [13] 因此,双边滤波属于 混合高斯卷积核(Combined Gaussian Kernel) 滤波器的一种。我们需要分别计算 空间高斯权重(SGW [Spatial Gaussian Weight]) 和 灰度高斯权重(GGW [Gray Gaussian Weight]) 两部分,并混合权重得到最终的双边滤波矩阵。 双边滤波的混合高斯权重 空间高斯权重(SGW),也被称为 领域权重(Domain Weight),记为 Gs(x⃗,μ⃗)G_s(\\vec{x},\\vec{\\mu})Gs(x⃗,μ⃗) ,有波动参数 δs\\delta_sδs 。其本身代表,以选定中心点 μ⃗=xc⃗\\vec{\\mu} = \\vec{x_c}μ⃗=xc⃗ 与卷积核内相邻点的欧式距离,求得的 二维高斯概率分布 结果。即: Gs(x⃗,xc⃗)=12π⋅δse−(x⃗−xc⃗)22⋅δs2=12π⋅δse−(Δx2+Δy2)2⋅δs2 {\\displaystyle \\begin{aligned} G_s(\\vec{x},\\vec{x_c}) = \\frac{1}{\\sqrt{2\\pi} \\cdot \\delta_s} e ^{-\\tfrac{(\\vec{x}-\\vec{x_c})^2}{2 \\cdot {\\delta_s}^2}} = \\frac{1}{\\sqrt{2\\pi} \\cdot \\delta_s} e ^{-\\tfrac{(\\Delta x^2+\\Delta y^2)}{2 \\cdot {\\delta_s}^2}} \\\\ \\end{aligned} } Gs(x⃗,xc⃗)=√2π⋅δs1e−2⋅δs2(x⃗−xc⃗)2=√2π⋅δs1e−2⋅δs2(Δx2+Δy2) 灰度高斯权重(GGW),也被称为 尺度权重(Range Weight),记为 Gr(x⃗,μ⃗)G_r(\\vec{x},\\vec{\\mu})Gr(x⃗,μ⃗) ,有波动参数 δr\\delta_rδr 。其本身代表,以选定中心点 μ⃗=xc⃗\\vec{\\mu} = \\vec{x_c}μ⃗=xc⃗ 灰度 gray(xc⃗)gray(\\vec{x_c})gray(xc⃗) 与卷积核内相邻点灰度 gray(x⃗)gray(\\vec{x})gray(x⃗) 的方差,求得的 一维高斯概率分布 结果。记 S(x)={r,g,b}S(x) = \\{r,g,b \\}S(x)={r,g,b} 有: Gr(x⃗,xc⃗)=12π⋅δre−(gray(x⃗)−gray(xc⃗))22⋅δr2=12π⋅δre−(Δr2+Δg2+Δb2)2⋅δr2 {\\displaystyle \\begin{aligned} G_r(\\vec{x},\\vec{x_c}) = \\frac{1}{\\sqrt{2\\pi} \\cdot \\delta_r} e ^{-\\tfrac{(gray(\\vec{x})-gray(\\vec{x_c}))^2}{2 \\cdot {\\delta_r}^2}} = \\frac{1}{\\sqrt{2\\pi} \\cdot \\delta_r} e ^{-\\tfrac{(\\Delta r^2+\\Delta g^2 +\\Delta b^2)}{2 \\cdot {\\delta_r}^2}} \\\\ \\end{aligned} } Gr(x⃗,xc⃗)=√2π⋅δr1e−2⋅δr2(gray(x⃗)−gray(xc⃗))2=√2π⋅δr1e−2⋅δr2(Δr2+Δg2+Δb2) 以 ∣target∣1\\vert target \\vert_1∣target∣1 表示归一化操作,记混合高斯权重为 W(x⃗,μ⃗)W(\\vec{x},\\vec{\\mu})W(x⃗,μ⃗) ,则: W(x⃗,xc⃗)=∣Gs(x⃗,xc⃗)⋅Gr(x⃗,xc⃗)∣1 {\\displaystyle \\begin{aligned} W(\\vec{x},\\vec{x_c}) &= \\vert G_s(\\vec{x},\\vec{x_c}) \\cdot G_r(\\vec{x},\\vec{x_c}) \\vert_1 \\\\ \\end{aligned} } W(x⃗,xc⃗)=∣Gs(x⃗,xc⃗)⋅Gr(x⃗,xc⃗)∣1 由于,空间高斯权重其实就是标准高斯滤波权重,因此 ∣Gs(x⃗,μ⃗)∣1=f(Nn×n⃗)\\vert G_s(\\vec{x},\\vec{\\mu}) \\vert_1 = f( \\vec{N_{n \\times n}} )∣Gs(x⃗,μ⃗)∣1=f(Nn×n⃗) 。我们沿用上节高斯滤波的设定,取用 n×n=3×3n \\times n = 3 \\times 3n×n=3×3 大小卷积核,滤波函数记为 Bn(xc⃗)B_n(\\vec{x_c})Bn(xc⃗) ,则: Bn(xc⃗)=∣∑xySxy⋅W(xc⃗−N3×3⃗)∣1=∣∑xySxy⋅W(N3×3⃗)∣1=∣∑xySxyGs(x⃗,xc⃗)⋅Gr(x⃗,xc⃗)∣1=∣∑xySxyf(x⃗,xc⃗)∣1⋅[Gr(x⃗,xc⃗)∑Gr(xc⃗)]∈R3×3Bn(xc⃗)=Fn(xc⃗)⋅∣Gr(xc⃗)∣1∈R3×3 {\\displaystyle \\begin{aligned} B_n(\\vec{x_c}) &= \\vert \\sum_{xy}S_{xy} \\cdot W( \\vec{x_c} - \\vec{N_{3 \\times 3}} ) \\vert_1 = \\vert \\sum_{xy}S_{xy} \\cdot W( \\vec{N_{3 \\times 3}} ) \\vert_1 \\\\ &= \\vert \\sum_{xy}S_{xy} G_s(\\vec{x},\\vec{x_c}) \\cdot G_r(\\vec{x},\\vec{x_c}) \\vert_1 \\\\ &= \\vert \\sum_{xy}S_{xy} f(\\vec{x},\\vec{x_c}) \\vert_1 \\cdot [\\frac{ G_r(\\vec{x},\\vec{x_c}) }{\\sum G_r(\\vec{x_c})}] \\in \\mathbb{R}^{3 \\times 3} \\\\ B_n(\\vec{x_c}) &= F_n(\\vec{x_c}) \\cdot \\vert G_r(\\vec{x_c}) \\vert_1 \\in \\mathbb{R}^{3 \\times 3} \\\\ \\end{aligned} } Bn(xc⃗)Bn(xc⃗)=∣xy∑Sxy⋅W(xc⃗−N3×3⃗)∣1=∣xy∑Sxy⋅W(N3×3⃗)∣1=∣xy∑SxyGs(x⃗,xc⃗)⋅Gr(x⃗,xc⃗)∣1=∣xy∑Sxyf(x⃗,xc⃗)∣1⋅[∑Gr(xc⃗)Gr(x⃗,xc⃗)]∈R3×3=Fn(xc⃗)⋅∣Gr(xc⃗)∣1∈R3×3 而 ∑Gr(xc⃗)\\sum G_r(\\vec{x_c})∑Gr(xc⃗) 就是一维高斯曲线的线下面积,有 ∑Gr(xc⃗)=1\\sum G_r(\\vec{x_c}) = 1∑Gr(xc⃗)=1 ,所以: Bn(xc⃗)=Fn(xc⃗)⋅Gr(xc⃗)∈R3×3 {\\displaystyle \\begin{aligned} B_n(\\vec{x_c}) &= F_n(\\vec{x_c}) \\cdot G_r(\\vec{x_c}) \\in \\mathbb{R}^{3 \\times 3} \\\\ \\end{aligned} } Bn(xc⃗)=Fn(xc⃗)⋅Gr(xc⃗)∈R3×3 上式中 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 即为高斯滤波核函数 。 可见,适用于高斯滤波 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 的快速算法,同样也适用于双边滤波 Bn(xc⃗)B_n(\\vec{x_c})Bn(xc⃗) 。 为什么通过核内频率采用朴素高斯分布,能够达到裁切的目的呢?这是因为,当卷积核目标中心点处于图像中物体的轮廓位置附近时,卷积核内的频率分布会出现相对非轮廓区域更为强烈的波动。 而高斯分布,即正态分布,恰恰是一种常用的放缩范围内数据波动的手段。 在标准高斯滤波中,我们通过多维高斯,粗浅的处理了整体数据上的波动性。这种处理方式,相当于将图像经过二维傅里叶变换得到的空域(SD)数据和频域(FD)数据,统一按照全通道空域的像素均值分布情况进行了概率平均。忽略了频域本身所具有的实际意义。而灰度值高斯的作用,就是 间接 的达成抽象频域数据波动特征的目的。 通过降低 δr\\delta_rδr 取值,放大核内频率差异情况。增强高频部分的权重,衰减低频占比。因此,对于双边滤波来说:在满足取 δd\\delta_dδd 越小,波动性越强越激烈,图片越尖锐;反之 δd\\delta_dδd 越大,波动性越弱越平缓,图片越模糊的同时;取 δr\\delta_rδr 越大,高低频差异缩减,边缘越模糊;反之 δr\\delta_rδr 越小,高低频差异被放大,边缘越清晰。 双边滤波的 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装了。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 没有太多操作,因为关键的部分在 像素程序片(Pixel Shader/Fragment Shader) 上: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform vec2 pixel_bias; uniform mat3 gaussian_matrix; uniform float gaussian_range; uniform sampler2D target_texture; float variance(vec3 c1, vec3 c2){ vec3 temp = c2 - c1; return temp[0] * temp[0] + temp[1] * temp[1] + temp[2] * temp[2]; } void main() { vec3 output_; vec4 color_center = texture2D(target_texture, fs_texcoord.xy); for (int i = 0; i 完成对算法求和过程的迁移。传入的 高斯算子 gaussian_matrix 和 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。而 灰度高斯权重 gaussian_range 涉及到实际采样,需要直接传入。由于采用 Web 展示,此处方法以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_gaussian_kernel(step, delta) { let n = step * 2 + 1; let kernel = new Float32Array(n * n); let factor_1 = 1.0 / (Math.sqrt(2.0 * Math.PI) * delta); let factor_2 = 1.0 / (2.0 * delta * delta); let normalize_div = 0; for (let i = 0; i 如上,双边滤波需要固定计算的部分,和标准高斯滤波并无不同。工程中,仅在像素程序片的实现上存在差异。 同理,双边滤波也是可以使用 线性插值(Linear Sampling) 代替部分采样,来进行加速。和标准高斯滤波一样,只需要略微调整像素程序片(Pixel Shader/Fragment Shader)的实现: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform vec2 pixel_bias; uniform mat3 gaussian_matrix; uniform float gaussian_range; uniform sampler2D target_texture; float variance(vec3 c1, vec3 c2){ vec3 temp = c2 - c1; return temp[0] * temp[0] + temp[1] * temp[1] + temp[2] * temp[2]; } void main() { vec4 color_center = texture2D(target_texture, fs_texcoord.xy); float gauss_factor = gaussian_matrix[0][0]+gaussian_matrix[0][1]; vec3 output_ = texture2D(target_texture, fs_texcoord.xy ).rgb * gaussian_matrix[1][1]; for (int i = 0; i 至此,一个标准双边滤波器,和它的线性采样快速版就完成了。 双边滤波的局限性 双边滤波是否彻底的解决了高斯滤波的局限性问题呢?答案是解决了 一部分。 引入高低频分布密度权重,虽然能够处理图像中物体轮廓边缘模糊现象,达到强度可控的 边缘保存(Edge Preserving)。但由于灰度高斯权重,单一维度单一方向梯度的特点。在利用双边滤波增强高频波权重的同时,也会 增大由标准高斯滤波高频分散运动带来的干扰。这反而会让增强边缘细节过程中产生的 摩尔纹(Moire Pattern)更加显著。 为处理这个问题,我们相对放松对算力的限制。一个可行的方案是在标准高斯滤波的基础上,通过使用多个方向梯度共同作用,重新构造一个满足 非各向同性(Not Isotropic) 条件的滤波单元 (毕竟非全方位的梯度差异,还无法满足各向异性条件),来保存和引入核内像素移动和频率波传导关系。使我们能够对核内像素所占均值比重进行更为合理的分配,起到缓解效果。 这种多梯度的方式,会增强算法对图像边缘的处理能力,保存边缘的同时增强细节。因此也被称为 边缘锐化(Edge Sharpening)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_2_3.html":{"url":"Chapter_3/Language/cn/Docs_3_2_3.html","title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","keywords":"","body":"3.2.3 拉普拉斯滤波(Laplacian Filter) 拉普拉斯滤波(Laplacian Filter) 是一种基于二阶微分方程的差异扩大化算子(Operator)。其不仅可以从灰度出发用于物体的 边缘锐化(Edge Sharpening),也可以应用于全通道的色彩变化增强,即 广义锐化(Sharpening)。 数学上,一阶微分能够突出原函数连续变化的幅度特征(即原函数斜率),二阶微分则进一步扩大了对这种变化趋势(即导数的斜率)的描述。而基于多参数的二阶偏导数方程,在展示参数本身对趋势影响的同时,也能够说明两两参数间的影响关系。 由于是对趋势的求导,以离散数据逼近信号的二阶微分方程,只需要使用目标相邻采样做差值计算即可,且并不会影响周边点各自的趋势判断。正好符合目标情况卷积核,对核内关系闭环和抗干扰的要求。所以,拉普拉斯滤波以卷积核中心点构建包含全部方向参数(Orient Axis)的平面坐标系,核内采样求得中心点突变权重的二阶导数展式。用它增强核内数据中心的突变特征。 二维拉普拉斯滤波核 对于二维信号,即图片信号,来说。拉普拉斯卷积核只有 xyxyxy 两个方向参数。记原信号为 S(x)S(x)S(x) ,原信号的二阶导数为 ∇2S(x)\\nabla^2 S(x)∇2S(x) 。仍然取用大小 n×n=3×3n \\times n = 3 \\times 3n×n=3×3 ,中心点 xc⃗\\vec{x_c}xc⃗ 的卷积核,记边缘检测拉普帕斯滤波核函数为 Lp(xc⃗)\\mathcal{L}_p(\\vec{x_c})Lp(xc⃗) ,则: Lp(xc⃗)=−K⋅∇2S(xc⃗) {\\displaystyle \\begin{aligned} \\mathcal{L}_p(\\vec{x_c}) = -K \\cdot \\nabla^2 S(\\vec{x_c}) \\\\ \\end{aligned} } Lp(xc⃗)=−K⋅∇2S(xc⃗) 考虑到需要调节边缘检测强弱。我们采用强度因子 K∈(−∞,+∞)K \\in (-\\infty, +\\infty)K∈(−∞,+∞) 作为权重,以便进行敏感度控制。 则 KKK 取正值时增强, KKK 取负值时衰减, 绝对值 ∣K∣\\vert K \\vert∣K∣ 大小表示放缩强度。 记核函数为 Ln(xc⃗)\\mathcal{L}_n(\\vec{x_c})Ln(xc⃗) ,有: Ln(xc⃗)=S(xc⃗)+Lp(xc⃗)=S(xc⃗)−K⋅∇2S(xc⃗) {\\displaystyle \\begin{aligned} \\mathcal{L}_n(\\vec{x_c}) =& S(\\vec{x_c}) + \\mathcal{L}_p(\\vec{x_c}) \\\\ =& S(\\vec{x_c}) - K \\cdot \\nabla^2 S(\\vec{x_c}) \\\\ \\end{aligned} } Ln(xc⃗)==S(xc⃗)+Lp(xc⃗)S(xc⃗)−K⋅∇2S(xc⃗) 若 Lp(xc⃗)\\mathcal{L}_p(\\vec{x_c})Lp(xc⃗) 不计算偏导数在内,即 只处理轴方向二阶导数。我们就可以得到 双通(2-Way)拉普拉斯核 : ∇2S(x)=d2S(xc⃗)dxc⃗2=∂2S∂x2+∂2S∂y2=S(x−1, y) − 2⋅S(x,y) + S(x+1, y) + S(x,y−1) − 2⋅S(x,y) + S(x,y+1)Lp(xc⃗)=−K⋅∑xySxy⋅[0, 1, 01,−4, 10, 1, 0]Ln(xc⃗)=−K⋅∑xySxy⋅[0, 1, 01,−4, 10, 1, 0] + S(xc⃗) {\\displaystyle \\begin{aligned} \\nabla^2 S(x) =& \\tfrac{\\mathrm{d}^2 S(\\vec{x_c})}{\\mathrm{d}{\\vec{x_c}^2}} = \\tfrac{ \\partial^2 S}{\\partial x^2} + \\tfrac{ \\partial^2 S}{\\partial y^2} \\\\ =& S(x-1,\\ y)\\ -\\ 2 \\cdot S(x,y)\\ +\\ S(x+1,\\ y)\\ +\\ \\\\ & S(x,y-1)\\ -\\ 2 \\cdot S(x,y)\\ +\\ S(x,y+1) \\\\ \\mathcal{L}_p(\\vec{x_c}) =& -K \\cdot \\sum_{xy}S_{xy} \\cdot { \\begin{bmatrix} 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\\\ 1 ,& \\quad -4 ,& \\quad \\ \\ 1 \\\\ 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\end{bmatrix} }\\\\ \\mathcal{L}_n(\\vec{x_c}) =& - K \\cdot \\sum_{xy}S_{xy} \\cdot { \\begin{bmatrix} 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\\\ 1 ,& \\quad -4 ,& \\quad \\ \\ 1 \\\\ 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\end{bmatrix} }\\ +\\ S(\\vec{x_c}) \\\\ \\end{aligned} } ∇2S(x)==Lp(xc⃗)=Ln(xc⃗)=dxc⃗2d2S(xc⃗)=∂x2∂2S+∂y2∂2SS(x−1, y) − 2⋅S(x,y) + S(x+1, y) + S(x,y−1) − 2⋅S(x,y) + S(x,y+1)−K⋅xy∑Sxy⋅⎣⎡0,1,0, 1,−4, 1, 0 1 0⎦⎤−K⋅xy∑Sxy⋅⎣⎡0,1,0, 1,−4, 1, 0 1 0⎦⎤ + S(xc⃗) 若 Lp(xc⃗)\\mathcal{L}_p(\\vec{x_c})Lp(xc⃗) 包含对角方向 的影响,即处理偏导数情况,我们就可以得到 四通(4-Way)拉普拉斯核 : ∇2S(x)=d2S(xc⃗)dxc⃗2=∂2S∂x2+∂2S∂x∂y+∂2S∂y∂x+∂2S∂y2=S(x−1, y+0) − 2⋅S(x, y) + S(x+1, y+0) + S(x−1, y−1) − 2⋅S(x, y) + S(x+1, y+1) + S(x+1, y−1) − 2⋅S(x, y) + S(x−1, y+1) + S(x+0, y−1) − 2⋅S(x, y) + S(x+0, y+1) Lp(xc⃗)=−K⋅∑xySxy⋅[1, 1, 11,−8, 11, 1, 1]Ln(xc⃗)=−K⋅∑xySxy⋅[1, 1, 11,−8, 11, 1, 1] + S(xc⃗) {\\displaystyle \\begin{aligned} \\nabla^2 S(x) =& \\tfrac{\\mathrm{d}^2 S(\\vec{x_c})}{\\mathrm{d}{\\vec{x_c}^2}} = \\tfrac{ \\partial^2 S}{\\partial x^2} + \\tfrac{ \\partial^2 S}{\\partial x \\partial y} + \\tfrac{ \\partial^2 S}{\\partial y \\partial x} + \\tfrac{ \\partial^2 S}{\\partial y^2} \\\\ =& S(x-1,\\ y+0)\\ -\\ 2 \\cdot S(x,\\ y)\\ +\\ S(x+1,\\ y+0)\\ +\\ \\\\ & S(x-1,\\ y-1)\\ -\\ 2 \\cdot S(x,\\ y)\\ +\\ S(x+1,\\ y+1)\\ +\\ \\\\ & S(x+1,\\ y-1)\\ -\\ 2 \\cdot S(x,\\ y)\\ +\\ S(x-1,\\ y+1)\\ +\\ \\\\ & S(x+0,\\ y-1)\\ -\\ 2 \\cdot S(x,\\ y)\\ +\\ S(x+0,\\ y+1)\\ \\\\ \\mathcal{L}_p(\\vec{x_c}) =& -K \\cdot \\sum_{xy}S_{xy} \\cdot { \\begin{bmatrix} 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad -8 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 \\end{bmatrix} }\\\\ \\mathcal{L}_n(\\vec{x_c}) =& - K \\cdot \\sum_{xy}S_{xy} \\cdot { \\begin{bmatrix} 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad -8 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 \\end{bmatrix} }\\ +\\ S(\\vec{x_c}) \\\\ \\end{aligned} } ∇2S(x)==Lp(xc⃗)=Ln(xc⃗)=dxc⃗2d2S(xc⃗)=∂x2∂2S+∂x∂y∂2S+∂y∂x∂2S+∂y2∂2SS(x−1, y+0) − 2⋅S(x, y) + S(x+1, y+0) + S(x−1, y−1) − 2⋅S(x, y) + S(x+1, y+1) + S(x+1, y−1) − 2⋅S(x, y) + S(x−1, y+1) + S(x+0, y−1) − 2⋅S(x, y) + S(x+0, y+1) −K⋅xy∑Sxy⋅⎣⎡1,1,1, 1,−8, 1, 1 1 1⎦⎤−K⋅xy∑Sxy⋅⎣⎡1,1,1, 1,−8, 1, 1 1 1⎦⎤ + S(xc⃗) 显然,四通拉普拉斯对中心点突变特征能有更好的提炼。如果需要对更多方向进行评估,则需要增大核面积。根据拉普拉斯二阶微分自身的特性可知,大小为 n×nn \\times nn×n 的卷积核,可选评估方向为 2(n−1)2(n-1)2(n−1) 个,相应的需求采样也会成倍扩增。且增大采样面积仅仅是预先提炼出,中心点周边的相邻点的突变情况。用这些点的加权增强值来计算中心点加权增强值。所以,更大的拉普拉斯核只是利用了小核的富集,反而并不一定能够得到更优秀的筛选结果(比如单核内波动,具有复杂高低差变化时)。因此,为了相对保证结果的稳定性,我们一般不会采用超过 n×n=3×3n \\times n = 3 \\times 3n×n=3×3 大小的拉普拉斯卷积核。 拉普拉斯滤波的 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装了。 如果是 边缘锐化(Edge Sharpening) 的场景,数据只采用灰度值处理即可。对于 原色格式(Primaries Format)为 CIE RGB 1931 色彩空间 的数据,可按下式用 RGB 快速换算: Grey=0.299⋅R + 0.587⋅G + 0.114⋅B Grey = 0.299 \\cdot R\\ +\\ 0.587 \\cdot G\\ +\\ 0.114 \\cdot B Grey=0.299⋅R + 0.587⋅G + 0.114⋅B 此处演示为了便于说明和展示,选择采用更广泛的适用范围,针对广义锐化(Sharpening)构造像素全通道采样的拉普拉斯滤波器。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 没有太多操作,因为关键的部分在 像素程序片(Pixel Shader/Fragment Shader) 上。依据双通还是四通做一下区分。我们采用两种实现,双通情况下直接计算,有: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform bool only_edge; uniform vec2 pixel_bias; uniform mat3 laplacian_matrix; uniform sampler2D target_texture; void main() { vec3 output_; output_ += texture2D(target_texture, fs_texcoord.xy).rgb * ((only_edge? 0.0 : 1.0) + laplacian_matrix[1][1]); output_ += texture2D(target_texture, fs_texcoord.xy + vec2(-1, -1) * pixel_bias).rgb * laplacian_matrix[0][0]; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(-1, +1) * pixel_bias).rgb * laplacian_matrix[2][0]; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(+1, -1) * pixel_bias).rgb * laplacian_matrix[0][2]; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(+1, +1) * pixel_bias).rgb * laplacian_matrix[2][2]; gl_FragColor = vec4(output_, 1.0); } 四通则采用 for 循环实现,传入双通的 拉普拉斯算子 laplacian_matrix 即可兼容,有: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform bool only_edge; uniform vec2 pixel_bias; uniform mat3 laplacian_matrix; uniform sampler2D target_texture; void main() { vec3 output_ = only_edge? vec3(0) : texture2D(target_texture, fs_texcoord.xy).rgb; for (int i = 0; i 上述程序片中,我们通过 only_edge 开关 控制是否只获取边缘信息。而传入的 拉普拉斯算子 laplacian_matrix 和 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。由于采用 Web 展示,此处方法以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_laplacian_kernel(step, way_count, str_factor) { let n = step * 2 + 1; let max_way = (n - 1) * 2; let cur_way = Math.min(way_count, max_way); let way_step = Math.floor(max_way / cur_way); let kernel = new Float32Array(n * n); for (let i = 0; i 至此,双通和四通的标准拉普拉斯广义锐化滤波器程序片就完成了。 拉普拉斯滤波的局限性 从卷积核可以看出,拉普拉斯滤波仍然是固定梯度的。但是否启用对角元素(Diagonal Elements)对卷积核特性还是会有较大的影响的。 双通拉普拉斯,只对于横纵方向上的数据敏感,构成的卷积核为 非各向同性(Not Isotropic) 卷积核。但是在有权重的方向上,数据变化梯度(Gradient)却是等大的。因此,双通拉普拉斯也 非各向异性(Not Anisotropic)。 四通拉普拉斯,由于引入对角线方向代表的 45∘45^{\\circ}45∘ 、 135∘135^{\\circ}135∘ 、 225∘225^{\\circ}225∘ 、 315∘315^{\\circ}315∘ 的计算,使 3×33 \\times 33×3 核心相邻元素所含所有方向上的梯度都成为等大参考值,因此,四通拉普拉斯的卷积核,为 各向同性(Isotropic) 卷积核。 所以,虽然四通拉普拉斯能够更好的提取临界边缘特征,但也会同步的保留并增强高频扰动,从而在结果中留存更多的高频噪音。双通则要相对好一些,但相应的临界特征提取能力也变得更弱。不过,若是能够提升数据源的质量,通过 先行降噪(NRF [Noise Reduction First]) 过滤部分干扰。那么理论上,最终提取产物的质量也会有一定程度的提升。马尔滤波(Marr Filter) 就是对此方向的探索。 同时,拉普拉斯滤波 并非是脱离中心参考值的边缘锐化(Edge Sharpening)算法,对于一些复杂的边缘位置波动情况,会有 边缘扩散(Edge Spread) 的风险。且由于 包含高权重的中心值参与了计算过程,使得拉普拉斯滤波对噪声非常敏感,从而极易丢失边缘方向信息,最终导致检测得到的边缘不连续。基于该情况,部分后续的改进算法采用了 *去中心化(Center Insensitive) 思想,来一定程度上避免问题发生。比如, 索贝尔滤波(Sobel Filter)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_2_4.html":{"url":"Chapter_3/Language/cn/Docs_3_2_4.html","title":"3.2.4 马尔滤波(Marr Filter)","keywords":"","body":"3.2.4 马尔滤波(Marr Filter) 马尔滤波(Marr Filter) 是拉普拉斯滤波采用 先行降噪(NRF [Noise Reduction First]) 的改进算法。利用高斯滤波对频率波动性的处理能力,对图片的高频信息进行模糊过滤。再行使标准拉普拉斯边缘检测,筛选突变明显的剩余高频部分并增强,达到更好的效果 [14] 。 因此马尔滤波也被称为 拉普拉斯-高斯滤波(LoG [Laplacian of Gaussian]),或 马尔-希德雷斯算法(Marr–Hildreth Algorithm)。还是以 ∣target∣1\\vert target \\vert_1∣target∣1 表示归一化操作。我们记高斯滤波核函数为 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) ,记 LoG 的边缘检测核函数为 LoGp(xc⃗){LoG}_p(\\vec{x_c})LoGp(xc⃗) ,有: LoGn(xc⃗)=Lp(xc⃗)∣Fn=−K⋅∇2Fn(xc⃗) {\\displaystyle \\begin{aligned} {LoG}_n(\\vec{x_c}) =& \\mathcal{L}_p(\\vec{x_c})|_{F_n} = -K \\cdot \\nabla^2 F_n(\\vec{x_c}) \\\\ \\end{aligned} } LoGn(xc⃗)=Lp(xc⃗)∣Fn=−K⋅∇2Fn(xc⃗) 其中 KKK 是我们取来控制强度的强度因子,展开简化上式有: LoGp(xc⃗)=−K⋅∑xySxy⋅∣(−1πδ4⋅[1−(Δx2+Δy2)2⋅δ2]⋅e−(Δx2+Δy2)2⋅δ2)xy∣1 {\\displaystyle \\begin{aligned} {LoG}_p(\\vec{x_c}) =& -K \\cdot \\sum_{xy}S_{xy} \\cdot \\vert ( -\\tfrac{1}{\\pi \\delta ^4} \\cdot [1- \\tfrac{(\\Delta x^2+\\Delta y^2)}{2 \\cdot \\delta ^2}] \\cdot e ^{-\\tfrac{(\\Delta x^2+\\Delta y^2)}{2 \\cdot \\delta ^2}})_{xy} \\vert_1 \\\\ \\end{aligned} } LoGp(xc⃗)=−K⋅xy∑Sxy⋅∣(−πδ41⋅[1−2⋅δ2(Δx2+Δy2)]⋅e−2⋅δ2(Δx2+Δy2))xy∣1 显然,LoGp(xc⃗){LoG}_p(\\vec{x_c})LoGp(xc⃗) 也满足高斯滤波的特性,在 δ\\deltaδ 确定的情况下具有固定大小的算子。如果选用的高斯核大小为 3×33 \\times 33×3 ,则考虑到最大程度生效的感受野大小,算法的卷积核必须得保证有至少 n×n≥3×3n \\times n \\geq 3 \\times 3n×n≥3×3 的取值。但也不能太大。如果超过核心高斯算子大小的 555 倍,即 n×n≥15×15n \\times n \\geq 15 \\times 15n×n≥15×15 时,会非常容易产生采样元素的过度富集,导致边缘取值偏移和过曝问题。 因此,一般而言 LoGn(xc⃗){LoG}_n(\\vec{x_c})LoGn(xc⃗) 算子的大小会取奇数范围 n×n∈[5×5, 11×11]∣oddn \\times n \\in [5 \\times 5, \\ 11 \\times 11]|_{odd}n×n∈[5×5, 11×11]∣odd , 记为 MLoGM_{LoG}MLoG 。 为了便于说明,我们采用 n×n=9×9n \\times n = 9 \\times 9n×n=9×9 的核大小做计算。当 δ=1.4\\delta = 1.4δ=1.4 且 K=1.0K = 1.0K=1.0 时,未归一化的 MLoGM_{LoG}MLoG 可算得为: MLoG∣δ=1.4K=1.0=[0, 1, 1, 2, 2, 2, 1, 1, 01, 2, 4, 5, 5, 5, 4, 2, 11, 4, 5, 3, 0, 3, 5, 4, 12, 5, 3,−12,−24,−12, 3, 5, 22, 5, 0,−24,−40,−24, 0, 5, 22, 5, 3,−12,−24,−12, 3, 5, 21, 4, 5, 3, 0, 3, 5, 4, 11, 2, 4, 5, 5, 5, 4, 2, 10, 1, 1, 2, 2, 2, 1, 1, 0]9×9 {\\displaystyle \\begin{aligned} M_{LoG}|_{\\delta=1.4}^{K=1.0} =& { \\begin{bmatrix} 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\\\ 1 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 0 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 1 \\\\ 2 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 3 ,& \\quad -12 ,& \\quad -24 ,& \\quad -12 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 2 \\\\ 2 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 0 ,& \\quad -24 ,& \\quad -40 ,& \\quad -24 ,& \\quad \\ \\ 0 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 2 \\\\ 2 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 3 ,& \\quad -12 ,& \\quad -24 ,& \\quad -12 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 2 \\\\ 1 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 0 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 1 \\\\ 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\end{bmatrix} } _{9 \\times 9} \\\\ \\end{aligned} } MLoG∣δ=1.4K=1.0=⎣⎢⎢⎢⎢⎢⎢⎢⎢⎢⎢⎢⎢⎡0,1,1,2,2,2,1,1,0, 1, 2, 4, 5, 5, 5, 4, 2, 1, 1, 4, 5, 3, 0, 3, 5, 4, 1, 2, 5, 3,−12,−24,−12, 3, 5, 2, 2, 5, 0,−24,−40,−24, 0, 5, 2, 2, 5, 3,−12,−24,−12, 3, 5, 2, 1, 4, 5, 3, 0, 3, 5, 4, 1, 1, 2, 4, 5, 5, 5, 4, 2, 1, 0 1 1 2 2 2 1 1 0⎦⎥⎥⎥⎥⎥⎥⎥⎥⎥⎥⎥⎥⎤9×9 此时,有 LoGn(xc⃗)∣δ=1.4K=1.0{LoG}_n(\\vec{x_c})|_{\\delta=1.4}^{K=1.0}LoGn(xc⃗)∣δ=1.4K=1.0 可表示如下: LoGn(xc⃗)∣δ=1.4=∑xySxy⋅∣(MLoG∣δ=1.4K=1.0)∣1∈R9×9 {\\displaystyle \\begin{aligned} {LoG}_n(\\vec{x_c})|_{\\delta=1.4} =& \\sum_{xy}S_{xy} \\cdot \\vert (M_{LoG}|_{\\delta=1.4}^{K=1.0}) \\vert_1 \\in \\mathbb{R}^{9 \\times 9} \\\\ \\end{aligned} } LoGn(xc⃗)∣δ=1.4=xy∑Sxy⋅∣(MLoG∣δ=1.4K=1.0)∣1∈R9×9 除了采样不占优势外,马尔滤波核本身在确定 δ\\deltaδ 取值后并不复杂。考虑到最小采样成本,我们一般取用 5×55 \\times 55×5 大小的卷积核。且不建议对马尔滤波核使用线性采样简化运算,否则会扩大误差。 马尔滤波的 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 程序化马尔滤波的关键处理部分,依旧在 像素程序片(Pixel Shader/Fragment Shader)上和 CPU 的马尔算子的计算上。我们先看像素程序片(Pixel Shader/Fragment Shader)是怎么实现的: precision mediump float; const int n = 5; varying vec4 fs_position; varying vec2 fs_texcoord; uniform vec2 pixel_bias; uniform float marr_matrix[n * n]; uniform sampler2D target_texture; void main() { vec3 output_; for (int i = 0; i 完全就是高斯的像素程序片。或者说,对于以矩阵形式传入的固定算子,在程序片的实现上都是可以复用的。因此,如果遇到类似场景,此类程序片也可以考虑合并或者同态转换。 而传入的 马尔算子 marr_matrix 和 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_marr_kernel(step, delta) { let n = step * 2 + 1; let kernel = new Float32Array(n * n); let factor_1 = 1.0 / (Math.PI * Math.pow(delta, 4)); // trick: normalized skip let factor_2 = 1.0 / (2.0 * delta * delta); let normalize_div = 0; for (let i = 0; i 至此,简易马尔滤波器程序片就完成了。 马尔滤波的局限性 马尔滤波最大问题就在于采样数上。但如果不考虑采样的消耗,其本身也并非毫无缺点。 虽然马尔滤波因 具有对信号数据所携带高频干扰(即高频噪声)的一定抗性,使得算法结果相较于拉普拉斯滤波而言,有较大的改善。但却不能避免非各向异性(Not Anisotropic)引入并增强摩尔纹的缺点。 且马尔滤波更容易受没有针对中心高权重进行处理,而采用大卷积核进一步 增加了中心占比 的影响,出现 边缘扩散 和 非连续 的问题。 不过在取 δ1.0\\delta δ1.0 时,利用高斯算法对波动性的削弱,马尔滤波能够在抑制噪音的同时,进行有限程度并考虑相邻波动特征的边缘增强。这让马尔滤波配合原始数据下,能够达到更自然的滤波效果。所以,我们一般不采用马尔滤波检测边缘,而是使用其处理广义锐化场景。 马尔滤波的广义锐化应用 马尔滤波在广义锐化下的核函数是怎样的呢?参考拉普拉斯滤波,我们只需要替换掉权重部分即可: Ln(xc⃗)=S(xc⃗)+LoGn(xc⃗)=S(xc⃗)−K⋅∇2Fn(xc⃗) {\\displaystyle \\begin{aligned} \\mathcal{L}_n(\\vec{x_c}) =& S(\\vec{x_c}) + {LoG}_n(\\vec{x_c}) \\\\ =& S(\\vec{x_c}) -K \\cdot \\nabla^2 F_n(\\vec{x_c}) \\\\ \\end{aligned} } Ln(xc⃗)==S(xc⃗)+LoGn(xc⃗)S(xc⃗)−K⋅∇2Fn(xc⃗) 这里已经有一些复合函数的感觉了。如果我们将数据源 S(xc⃗)S(\\vec{x_c})S(xc⃗) 更换为高斯滤波结果,为区别于 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) ,这里我们记为 Gn(xc⃗)G_n(\\vec{x_c})Gn(xc⃗) 。则整个处理函数就成为了,在高斯模糊的基础上再行锐化,达到模糊着色面,增强轮廓边缘的效果。此时的核函数为: Ln(xc⃗)=Gn(xc⃗)−K⋅∇2Fn(xc⃗) {\\displaystyle \\begin{aligned} \\mathcal{L}_n(\\vec{x_c}) =& G_n(\\vec{x_c}) -K \\cdot \\nabla^2 F_n(\\vec{x_c}) \\\\ \\end{aligned} } Ln(xc⃗)=Gn(xc⃗)−K⋅∇2Fn(xc⃗) 以此类推,我们也可以将数据源 S(xc⃗)S(\\vec{x_c})S(xc⃗) 换成其他滤波的结果,将马尔滤波(进一步衍生到所有可行的滤波函数)作为后级处理,构建连续的滤波处理流水线。这种思想,即是 滤波链路(Filter Chain) 技术的概念起源。 所以,应用于锐化的马尔滤波链路,也被称为 马尔锐化(Marr Sharpening),或简称为 朴素锐化(Simple Sharpening) 算法。 马尔锐化的 GLSL 渲染程序片 根据上文的分析,马尔锐化包含两部分:前级数据 和 后级数据。前级数据用于内容主体,后级数据用于叠加锐化。这里我们取用可配置是否采用高斯模糊,作为可选前级数据的程序片方案,对已实现的马尔滤波进行改造。 由于顶点程序片仍然可以被沿用,此处我们单独来看 像素程序片(Pixel Shader/Fragment Shader) 该怎么定义: precision mediump float; const int n = 3; const int m = 5; varying vec4 fs_position; varying vec2 fs_texcoord; uniform bool only_edge; uniform bool marr_blur; uniform vec2 pixel_bias; uniform float gaussian_matrix[n * n]; uniform float marr_matrix[m * m]; uniform float str_factor; uniform sampler2D target_texture; vec3 gauss_operation() { vec3 output_; for (int i = 0; i 显然,作为前级输入的高斯滤波,其滤波核大小并不一定需要和后级处理核大小保持一致。我们依旧采用 强度参数 str_factor,对锐化介入的强度进行了直接调控。而传入的 高斯算子 gaussian_matrix 、 马尔算子 marr_matrix 和 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_gaussian_kernel(step, delta) { let n = step * 2 + 1; let kernel = new Float32Array(n * n); let factor_1 = 1.0 / (Math.sqrt(2.0 * Math.PI) * delta); let factor_2 = 1.0 / (2.0 * delta * delta); let normalize_div = 0; for (let i = 0; i 至此,马尔锐化基本完成。 看来更稳定的边缘检测,还是需要依赖去中心化的索贝尔滤波了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_2_5.html":{"url":"Chapter_3/Language/cn/Docs_3_2_5.html","title":"3.2.5 索贝尔滤波(Sobel Filter)","keywords":"","body":"3.2.5 索贝尔滤波(Sobel Filter) 索贝尔滤波(Sobel Filter) 是由 斯坦福人工智能实验室(SAIL [Stanford Artificial Intelligence Laboratory]) 的 艾尔文·索贝尔(Irwin Sobel,1940 - present) 和 格雷·费尔德曼(Gary Feldman,1942 - present) 于 1968 年提出的一种用于 边缘检测(Edge Detection) 的 去中心化(Center Insensitive)一阶离散微分算子 [15] 。 通过在构建 3×33 \\times 33×3 卷积核中,对横纵两个方向距离中心点不同偏移的相邻点,采用不同的方位权重占比的方式,针对性的计算边缘变化影响。其实,是将平面点漂移的方向向量,拆解为以卷积核中心点构建的 xyxyxy 坐标系下的方向分量。通过抽象方向分量的 一维简易高斯分布(1D Simple Gaussian Distribution) 密度函数到方差同位表示,来记录中心点的运动情况。而核内不同取值,则代表垂直于该取值方向的分量高斯分布函数切片,占当前相位的百分比( 归一化后 )。 因此,仍然取用大小 n×n=3×3n \\times n = 3 \\times 3n×n=3×3 ,中心点 xc⃗\\vec{x_c}xc⃗ 的卷积核。记原信号为 S(x)S(x)S(x) ,边缘检测索贝尔滤波核函数为 Sp(xc⃗)\\mathcal{S}_p(\\vec{x_c})Sp(xc⃗) ,则: Sp(xc⃗)=K⋅Gx2+Gy2 {\\displaystyle \\begin{aligned} \\mathcal{S}_p(\\vec{x_c}) =& K \\cdot \\sqrt{ {G_x}^{2} + {G_y}^{2} } \\\\ \\end{aligned} } Sp(xc⃗)=K⋅√Gx2+Gy2 横向 xxx 轴方向的滤波核函数 GxG_xGx 为: Gx(xc⃗)=Kx⋅[+1, 0, −1+2, 0, −2+1, 0, −1]⋅∑xyxc⃗Sxy∈R3×3 {\\displaystyle \\begin{aligned} G_x(\\vec{x_c}) =& K_x \\cdot { \\begin{bmatrix} +1 ,& \\ \\ 0 ,& \\ \\ -1 \\\\ +2 ,& \\ \\ 0 ,& \\ \\ -2 \\\\ +1 ,& \\ \\ 0 ,& \\ \\ -1 \\end{bmatrix} } \\cdot \\sum_{xy}^{\\vec{x_c}}S_{xy} \\in \\mathbb{R}^{3 \\times 3} \\\\ \\end{aligned} } Gx(xc⃗)=Kx⋅⎣⎡+1,+2,+1, 0, 0, 0, −1 −2 −1⎦⎤⋅xy∑xc⃗Sxy∈R3×3 横向 yyy 轴方向的滤波核函数 GyG_yGy 为: Gy(xc⃗)=Ky⋅[+1, +2, +10, 0,0−1, −2, −1]⋅∑xyxc⃗Sxy∈R3×3 {\\displaystyle \\begin{aligned} G_y(\\vec{x_c}) =& K_y \\cdot { \\begin{bmatrix} +1 ,& \\ +2 ,& \\ +1 \\\\ 0 ,& \\ \\ 0 ,& \\quad 0 \\\\ -1 ,& \\ -2 ,& \\ -1 \\end{bmatrix} } \\cdot \\sum_{xy}^{\\vec{x_c}}S_{xy} \\in \\mathbb{R}^{3 \\times 3} \\\\ \\end{aligned} } Gy(xc⃗)=Ky⋅⎣⎡+1,0,−1, +2, 0, −2, +10 −1⎦⎤⋅xy∑xc⃗Sxy∈R3×3 从上式可知,强度系数 KKK 可以拆分到 xyxyxy 各自方向的子核中,记为 K⃗=(Kx,Ky)\\vec{K} = (K_x,K_y)K⃗=(Kx,Ky) 。则,当 K⃗=(0, 1)\\vec{K} = (0,\\ 1)K⃗=(0, 1) 时 Sp(xc⃗)=K⋅Gy(xc⃗)\\mathcal{S}_p(\\vec{x_c}) = K \\cdot G_y(\\vec{x_c})Sp(xc⃗)=K⋅Gy(xc⃗) 只保留纵向滤波结果,当 K⃗=(1, 0)\\vec{K} = (1,\\ 0)K⃗=(1, 0) 时 Sp(xc⃗)=K⋅Gx(xc⃗)\\mathcal{S}_p(\\vec{x_c}) = K \\cdot G_x(\\vec{x_c})Sp(xc⃗)=K⋅Gx(xc⃗) 只保留横向滤波结果。不过,一般情况下我们不会只进行单边检测,因此方便起见还是采用在整体滤波结果上进行强度控制,即使用 K∈RK \\in \\mathbb{R}K∈R 来调整。 显然,索贝尔滤波是同时具有 梯度方向(Orientate) 和 强度(Magnitude) 的。记方向为 Θ\\ThetaΘ ,强度为 AAA 。则有: A=∣Sp(xc⃗)∣=K⋅Gx2+Gy2Θ=∠Sp(xc⃗) =atan2(Gy, Gx) {\\displaystyle \\begin{aligned} A =& \\vert {\\mathcal{S}_p(\\vec{x_c})} \\vert = K \\cdot \\sqrt{ {G_x}^{2} + {G_y}^{2} } \\\\ \\Theta =& \\angle \\mathcal{S}_p(\\vec{x_c})\\ = {atan2}(G_y,\\ G_x)\\\\ \\end{aligned} } A=Θ=∣Sp(xc⃗)∣=K⋅√Gx2+Gy2∠Sp(xc⃗) =atan2(Gy, Gx) 此时,有 LoGn(xc⃗)∣δ=1.4K=1.0{LoG}_n(\\vec{x_c})|_{\\delta=1.4}^{K=1.0}LoGn(xc⃗)∣δ=1.4K=1.0 可表示如下: LoGn(xc⃗)∣δ=1.4=∑xySxy⋅∣(MLoG∣δ=1.4K=1.0)∣1∈R9×9 {\\displaystyle \\begin{aligned} {LoG}_n(\\vec{x_c})|_{\\delta=1.4} =& \\sum_{xy}S_{xy} \\cdot \\vert (M_{LoG}|_{\\delta=1.4}^{K=1.0}) \\vert_1 \\in \\mathbb{R}^{9 \\times 9} \\\\ \\end{aligned} } LoGn(xc⃗)∣δ=1.4=xy∑Sxy⋅∣(MLoG∣δ=1.4K=1.0)∣1∈R9×9 因此,用索贝尔滤波也可以得到图像中心像素的 运动漂移信息,可用于 方向梯度直方图(HOG [Histogram of Oriented Gradient]) 中获取像素点梯度矢量的计算方法。此部分我们在随后的章节中进行。 那么,基于索贝尔滤波的边界检测该怎样实现呢? 索贝尔滤波的 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 程序化索贝尔滤波的关键处理部分,依旧在 像素程序片(Pixel Shader/Fragment Shader)上和 CPU 的索贝尔算子的计算上。我们先看像素程序片(Pixel Shader/Fragment Shader)是怎么实现的: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform bool only_edge; uniform vec2 pixel_bias; uniform mat3 sobel_matrix_x; uniform mat3 sobel_matrix_y; uniform sampler2D target_texture; void main() { vec3 output_ = only_edge? vec3(0) : texture2D(target_texture, fs_texcoord.xy).rgb; vec3 color_center_x; vec3 color_center_y; for (int i = 0; i 我们依旧采用 强度参数 str_factor,对锐化介入的强度进行直接调控。而传入的 索贝尔算子分为两个方向记为 sobel_matrix_x 和 sobel_matrix_y。同 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_sobel_kernel(use_horizontal, str_factor) { let kernel = new Float32Array(use_horizontal ? [ +1.0, 0.0, -1.0, +2.0, 0.0, -2.0, +1.0, 0.0, -1.0 ] : [ +1.0, +2.0, +1.0, 0.0, 0.0, 0.0, -1.0, -2.0, -1.0 ]) for (let i = 0; i 至此,简易索贝尔滤波器程序片就完成了。 索贝尔滤波的局限性 虽然索贝尔滤波通过去中心化检测目标像素点周边的运动情况,检测结果也 相对准确,并摆脱了 由卷积核中心权值造成像素富集而导致对干扰抗性较弱的问题。但也正因此 进一步扩大了边缘扩散(Edge Spread)的风险。且当物体轮廓处的灰度(光亮度)变化过于发散时,算法会有一定程度的丢失,即 对抗弱边缘(Weak Edge)的能力较差。 不过,这些缺点在只需要边缘位置的情况下,可以通过 阈值限定二值化(Thresholding) 来得到一定程度的改善( 这种做法经常出现在机器学习的数据前处理过程中 )。由于一般音视频工程并不会需要如此精度,考虑到索贝尔滤波的快捷、简单、高效和高干扰抗性的特点,算法本身常被用于各种场景下的 边缘数据提取 和 像素信息预测 过程。但本身不适合(也不应该)作为噪音抑制算法使用。 经过几个滤波算法的辨析,我们发现想要真正的有效抑制噪音,达到自然模糊且边缘保存的目的,单纯以多 非各向异性 滤波器组合的形式,还是很难得到同 各向异性 滤波算法相同的效果。 当然,不同的算法各有自身的优势,并非是独一的非此即彼的对立关系。作为工程师,在不同需求下,还是要灵活取用和组合达成所求。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_2_6.html":{"url":"Chapter_3/Language/cn/Docs_3_2_6.html","title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","keywords":"","body":"3.2.6 各向异性扩散(Anisotropic Diffusion) 【待补充】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_3.html":{"url":"Chapter_3/Language/cn/Docs_3_3.html","title":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理","keywords":"","body":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理 在本节之前,本书已经讲解了如何分离的处理 一维动态音频 和 二维静态图片 信号。如果我们 将一系列图片以时间轴串联,就得到一组由二维静态信号按序构成的二维动态信号。这种类型的信号,被称为 视频流(Visual Stream)。 相较于一维信号,静态二维信号本就具有 信息密度高 的特征。而动态化则会进一步 加剧 其对 算力资源 的消耗。不经合适的方法控制数据,将会产生大量的 冗余信息。 严重不利于数据的保存、传输和处理。 考虑到被采样的运动物体,其前后总是存在时序关联性的客观事实。视频流作为观察物体得到的数据载体,相邻的两个时间节点采样图片,像素值上必然也可以抽象出相应运动特征的 位移向量投影,得到 关联前后数据的变化关系。借此,工程上就可以利用像素的漂移情况,来筛选出未发生改变的数据,从而复用前值以求降低不必要计算和更新,减少消耗。 为此,需要对 运动区域进行检测,并提取运动矢量信息。 在线演示 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_3_1.html":{"url":"Chapter_3/Language/cn/Docs_3_3_1.html","title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","keywords":"","body":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient]) 在前文中,我们提到了索贝尔滤波(Sobel Filter)卷积核对中心点周边方向信息的提炼,可以被用来获取方向梯度直方图的梯度矢量计算中。那么什么是方向梯度直方图呢? 方向梯度直方图最早的 概念原型(Prototype) 来自于 罗伯特·麦康纳尔(Robert K. McConnell) 在 1986 年申请的有关模式识别专利中,对 视野(FoV [Field of View]) 方向性输入产生输出结果差异的判断过程。并于 1994 年 三菱电子研究实验室(Mitsubishi Electric Research Laboratories) 在手势识别应用的区域检测过程中,首次总结为当前称谓 [16] 。最终经过 2005 年 CVPR 顶会参会论文验证,重新确认了 HOG 在动态检测上的高适配度,才开始被人熟知 [17] 。 方向梯度直方图(HOG [Histogram of Oriented Gradient]) 是对用于提炼并描述区域范围内像素漂移情况方法论的概念抽象。是对过程的抽象,而非对结果的抽象。由于本身最终运算能够表示为处理单元形式,因而属于 特征描述算子(Feature Descriptor) 的一种。整体思想则是在单元间隔均匀的卷积核内,使用重叠的局部梯度提炼算法并 录表统计归一化(Normalization),以取得中心点变化方向矢量。方法常结合 阈值限定(Thresholding) 筛选结果,提高运动预测的准确度。 显然,方向梯度直方图并不只适用于索贝尔, 只要能够提供中心点周边梯度变化的大小和方向的算子,都可以被应用于 HOG 的求解中。一个方向梯度直方图是否优秀,最大的影响点就在于梯度提炼的是否精准。 HOG 的标准处理流 HOG 有一套相对固定的标准过程的。基本可以按照如下顺序进行: 数据优化,通过滤波算法(如高斯滤波),减少干扰信息并增强灰度(光亮度)对比; 梯度计算,通过梯度滤波器(如索贝尔滤波)提取图像每个像素的梯度矢量; 分组抽象,指定梯度矢量采样卷积核范围,即分组(Cell) 矢量合并,将分组内所有像素的梯度矢量,以方向投票统计合并权重,获取 HOG 块归一化,指定块大小(由分组为单位),整合 HOG 统计结果并归一化快内分组权重 五个步骤,共同构成了方向梯度直方图方法论本身。 且四五两步概念不同,但密不可分。 数据优化 数据优化的目的是为了增强光亮度变化差异,并减少干扰噪声,从而更好的保存并放大像素梯度变化情况。我们记原信号为 S(x)S(x)S(x) ,记经过滤波降噪和修饰后的灰度(光亮度)数据为 Sg(x)S_g(x)Sg(x) 。从 S(x)S(x)S(x) 到 Sg(x)S_g(x)Sg(x) 的处理过程就不再赘述(见滤波,类比处理)。记经过优化函数 Og(x)O_g(x)Og(x) 处理,以 Sg(x)S_g(x)Sg(x) 获取的优化结果为 So(x)S_o(x)So(x) 。那么,相对简单的处理方式,就是直接对 Sg(x)S_g(x)Sg(x) 进行 伽马矫正(Gamma Correction)来得到 So(x)S_o(x)So(x) 。取伽马因子为 γ\\gammaγ ,矫正系数(Adjust Factor)为 AAA (一般情况 A=1.0A = 1.0A=1.0 为常量),有: Og(x)=Gamma(S)=A⋅S(x)γ {\\displaystyle \\begin{aligned} O_g(x) =& Gamma(S) = A \\cdot S(x)^{\\gamma} \\\\ \\end{aligned} } Og(x)=Gamma(S)=A⋅S(x)γ 伽马矫正(Gamma Correction) 本是用于应对,早期 阴极射线管(CRT [Cathode Ray Tube])显示器 的电子偏转特征,引入的采样源数据非线性转换算法。传统的 CRT 显示器在显示时就会完成对偏转数据的自然逆向过程,而在 液晶显示器(LCD [Liquid Crystal Display]) 上,则需要 主动的实现这一反向运算,否则会面临数据亮度过爆的问题。 由于采样时采用 γ1\\gamma γ1 应用于数据修正, 所以 γ1\\gamma γ1 时的 γ\\gammaγ 值被称为 编码伽马值(Encoding Gamma)。相应的,γ>1\\gamma > 1γ>1 时的 γ\\gammaγ 值被称为 解码伽马值(Decoding Gamma)。而采样到还原的过程中,对伽马矫正的不同运用被分别称为 伽马编码(Gamma Encode) 和 伽马解码(Gamma Decode)。 图 3-4 原数据经过伽马编解码(伽马矫正)的还原过程示意图 伽马矫正本身的作用正是针对原图色彩通道数据,进行非线性的映射。衍生为对图片整体光亮度的调节,因此在灰度值上的体现最为明显。我们利用这种特性,来增强图片的对比信息,放大像素梯度变化。 这一步,通常取用 γ∈[0.45, 1.25]\\gamma \\in [0.45,\\ 1.25]γ∈[0.45, 1.25] 区间内的值,或 γ=0.5\\gamma = 0.5γ=0.5 的原论文推荐值来进行修正。得到用于后续处理的灰度数据源 So(x)S_o(x)So(x) 。 梯度计算 在经过优化得到高对比度的 灰度(光亮度)图 后,就可以利用一些方向梯度卷积核算法,来计算每一个像素点光亮度变换的梯度矢量了。 此时应用边缘检测索贝尔滤波,目的同 HOG 的默认设定中,采用横纵方向均取 单一中线 的简化 普雷维特算子(Prewitt Operator),以求取梯度 方向(Orientate) 和 强度(Magnitude) 的作用一致。显然,并不只有索贝尔算法或普雷维特算法,适用于方向梯度直方图中梯度矢量的计算。只要能够提供中心点周边梯度变化的大小和方向的算子,都可以被应用于 HOG 的此步的求解计算中。 我们记方向为 Θ\\ThetaΘ ,强度为 AAA ,横向 xxx 轴方向的滤波核函数 GxG_xGx ,纵向 yyy 轴方向的滤波核函数 GyG_yGy 。强度系数 KKK 为同态值 K=Kx=KyK= K_x = K_yK=Kx=Ky$ 。此处不含推导展示结论。 记 边缘检测普雷维特滤波核函数 为 Pp(xc⃗)\\mathcal{P}_p(\\vec{x_c})Pp(xc⃗) ,有: Gx=Kx⋅[+1,0,−1] ⋅So(xc⃗)3×1Gy=Ky⋅[+1,0,−1]T⋅So(xc⃗)1×3A=∣Pp(xc⃗)∣=K⋅Gx2+Gy2Θ=∠Pp(xc⃗) =atan2(Gy, Gx) {\\displaystyle \\begin{aligned} G_x =& K_x \\cdot { \\begin{bmatrix} +1 ,& \\quad 0 ,& \\quad -1 \\end{bmatrix} } \\ \\cdot S_o(\\vec{x_c})^{3 \\times 1} \\\\ G_y =& K_y \\cdot { \\begin{bmatrix} +1 ,& \\quad 0 ,& \\quad -1 \\end{bmatrix} ^{T} } \\cdot S_o(\\vec{x_c})^{1 \\times 3} \\\\ A =& \\vert {\\mathcal{P}_p(\\vec{x_c})} \\vert = K \\cdot \\sqrt{ {G_x}^{2} + {G_y}^{2} } \\\\ \\Theta =& \\angle \\mathcal{P}_p(\\vec{x_c})\\ = {atan2}(G_y,\\ G_x)\\\\ \\end{aligned} } Gx=Gy=A=Θ=Kx⋅[+1,0,−1] ⋅So(xc⃗)3×1Ky⋅[+1,0,−1]T⋅So(xc⃗)1×3∣Pp(xc⃗)∣=K⋅√Gx2+Gy2∠Pp(xc⃗) =atan2(Gy, Gx) 记 边缘检测索贝尔滤波核函数 为 Sp(xc⃗)\\mathcal{S}_p(\\vec{x_c})Sp(xc⃗) ,有: Gx=Kx⋅[+1,0, −1+2,0, −2+1,0, −1]⋅So(xc⃗)3×3Gy=Ky⋅[+1, +2, +1 0,0,0−1, −2, −1]⋅So(xc⃗)3×3A=∣Sp(xc⃗)∣=K⋅Gx2+Gy2Θ=∠Sp(xc⃗) =atan2(Gy, Gx) {\\displaystyle \\begin{aligned} G_x =& K_x \\cdot { \\begin{bmatrix} +1 ,& \\quad \\quad 0 ,& \\quad \\ -1 \\\\ +2 ,& \\quad \\quad 0 ,& \\quad \\ -2 \\\\ +1 ,& \\quad \\quad 0 ,& \\quad \\ -1 \\end{bmatrix} } \\cdot S_o(\\vec{x_c})^{3 \\times 3} \\\\ G_y =& K_y \\cdot { \\begin{bmatrix} +1 ,& \\quad \\ +2 ,& \\quad \\ +1 \\\\ \\ \\ \\ 0 ,& \\quad \\quad 0 ,& \\quad \\quad 0 \\\\ -1 ,& \\quad \\ -2 ,& \\quad \\ -1 \\end{bmatrix} } \\cdot S_o(\\vec{x_c})^{3 \\times 3} \\\\ A =& \\vert {\\mathcal{S}_p(\\vec{x_c})} \\vert = K \\cdot \\sqrt{ {G_x}^{2} + {G_y}^{2} } \\\\ \\Theta =& \\angle \\mathcal{S}_p(\\vec{x_c})\\ = {atan2}(G_y,\\ G_x)\\\\ \\end{aligned} } Gx=Gy=A=Θ=Kx⋅⎣⎡+1,+2,+1,0,0,0, −1 −2 −1⎦⎤⋅So(xc⃗)3×3Ky⋅⎣⎡+1, 0,−1, +2,0, −2, +10 −1⎦⎤⋅So(xc⃗)3×3∣Sp(xc⃗)∣=K⋅√Gx2+Gy2∠Sp(xc⃗) =atan2(Gy, Gx) 更明确的,当我们采用不同算法进行梯度计算时,梯度提炼的结果,将会在较大程度上影响最终得到的方向梯度直方图。是需要更准确、更快捷,还是需要高抗性、低波动,应以实际工程角度考量。根据具体需要来采用不同的边缘检测算法。 而梯度方向和强度的计算则可统一为共识: A=K⋅Gx2+Gy2Θ=∠ [tan−1(GyGx)] {\\displaystyle \\begin{aligned} A =& K \\cdot \\sqrt{ {G_x}^{2} + {G_y}^{2} } \\\\ \\Theta =& \\angle \\ [{tan^{-1}}(\\tfrac{G_y}{G_x})] \\\\ \\end{aligned} } A=Θ=K⋅√Gx2+Gy2∠ [tan−1(GxGy)] 称为 通用卷积核梯度矢量公式(Formula of Kernel Gradient Vector)。 经过此步计算后,灰度数据源 So(x)S_o(x)So(x) 的输入就被转换为原信号为 S(x)S(x)S(x) 的所有像素点,梯度方向数据集 Θ(x)\\Theta(x)Θ(x) 和 梯度强度数据集 A(x)A(x)A(x) 。不过此时的数据量相对较大,不便于计算处理,还需 简化信息量。 分组抽象 & 矢量合并 分组抽象的目的是为了提炼每个像素点的数据,汇总分组内逐个像素特征到分组整体的单元特征。 由于原有梯度方向的平面完整性,以 Θ\\ThetaΘ 范围即便只限定为整数角,也包含 [0∘, 360∘)[0^{\\circ},\\ 360^{\\circ})[0∘, 360∘) 共 360360360 个取值。 这样造成的数据膨胀,不利于有限算力的处理。 因此,以尽可能不损失方向包含实际意义为前提, 将角度按照权重分割 来表示原梯度包含信息,是个不错的办法。 假设我们将 [0∘, 360∘)[0^{\\circ},\\ 360^{\\circ})[0∘, 360∘) 按照 ∠Θ=[Θ0 , ... , Θθ−1]\\angle \\Theta = [\\Theta_0\\ ,\\ ...\\ ,\\ \\Theta_{\\theta-1}]∠Θ=[Θ0 , ... , Θθ−1] 的边界角度,拆分为 θ\\thetaθ 个指定方向。记存在像素点 xc⃗\\vec{x_c}xc⃗ 的梯度 G⃗(xc⃗)=(Ac, Θc)\\vec{G}(\\vec{x_c}) = (A_c,\\ \\Theta_c)G⃗(xc⃗)=(Ac, Θc) 的方向落于角度区间 [Θa, Θb)[\\Theta_a,\\ \\Theta_b)[Θa, Θb) 内,有: Ac=Wa⋅Aa+Wb⋅AbΘc=Wa⋅Θa+Wb⋅ΘbWa=Θc−ΘaΘb−ΘaWb=Θb−ΘcΘb−Θa {\\displaystyle \\begin{aligned} A_c = W_a & \\cdot A_a + W_b \\cdot A_b \\\\ \\Theta_c = W_a & \\cdot \\Theta_a + W_b \\cdot \\Theta_b \\\\ W_a = \\frac{\\Theta_c - \\Theta_a}{\\Theta_b - \\Theta_a} & \\quad \\quad W_b = \\frac{\\Theta_b - \\Theta_c}{\\Theta_b - \\Theta_a} \\\\ \\end{aligned} } Ac=WaΘc=WaWa=Θb−ΘaΘc−Θa⋅Aa+Wb⋅Ab⋅Θa+Wb⋅ΘbWb=Θb−ΘaΘb−Θc 其中 Wa+Wb=1W_a + W_b = 1Wa+Wb=1 ,按照权重 WaW_aWa 、 WbW_bWb 即可拆分 G⃗(xc⃗)\\vec{G}(\\vec{x_c})G⃗(xc⃗) 数据到 Θa\\Theta_aΘa 、 Θb\\Theta_bΘb 角度分量混合表示。记两个角度方向的分量分别为 Ga⃗\\vec{G_a}Ga⃗ 、 Gb⃗\\vec{G_b}Gb⃗ ,则: Ga⃗=(Wa⋅Ac, Wa⋅Θc)Gb⃗=(Wb⋅Ac, Wa⋅Θc)G⃗(xc⃗)=Ga⃗+Gb⃗ {\\displaystyle \\begin{aligned} \\vec{G_a} =& (W_a \\cdot A_c ,\\ W_a \\cdot \\Theta_c) \\\\ \\vec{G_b} =& (W_b \\cdot A_c ,\\ W_a \\cdot \\Theta_c) \\\\ \\vec{G}(\\vec{x_c}) &= \\vec{G_a} + \\vec{G_b} \\\\ \\end{aligned} } Ga⃗=Gb⃗=G⃗(xc⃗)(Wa⋅Ac, Wa⋅Θc)(Wb⋅Ac, Wa⋅Θc)=Ga⃗+Gb⃗ 显然,以 ∠Θ=[Θ0 , ... , Θθ−1]\\angle \\Theta = [\\Theta_0\\ ,\\ ...\\ ,\\ \\Theta_{\\theta-1}]∠Θ=[Θ0 , ... , Θθ−1] 指定方向的矢量合形式表示, G⃗(xc⃗)\\vec{G}(\\vec{x_c})G⃗(xc⃗) 除了 Θa\\Theta_aΘa 、 Θb\\Theta_bΘb 角度外,其余角度分量为 000 , 有: G⃗(xc⃗)=∠Θ(0, ... ,Wa,Wb, ... ,0) {\\displaystyle \\begin{aligned} \\vec{G}(\\vec{x_c}) &= \\angle \\Theta(0, \\ ...\\ , W_a, W_b,\\ ...\\ ,0) \\\\ \\end{aligned} } G⃗(xc⃗)=∠Θ(0, ... ,Wa,Wb, ... ,0) 由于不需要考虑反向的数据还原,核内采样按照 ∠Θ=[Θ0 , ... , Θθ−1]\\angle \\Theta = [\\Theta_0\\ ,\\ ...\\ ,\\ \\Theta_{\\theta-1}]∠Θ=[Θ0 , ... , Θθ−1] 的边界角度的方向矢量合形式求和,即可完成分组内的特征整合。记得到分组的 θ\\thetaθ 维特征向量 Cell⃗\\vec{Cell}Cell⃗ ,则: Cell⃗=∑∠G⃗(xc⃗) {\\displaystyle \\begin{aligned} \\vec{Cell} &= \\sum \\angle \\vec{G}(\\vec{x_c}) \\\\ \\end{aligned} } Cell⃗=∑∠G⃗(xc⃗) 那么现在的问题就是如何分组,或者分为几组了。 当采样核为 n×nn \\times nn×n 时,我们取边界整数点出发过核心 (12n, 12n)(\\tfrac {1}{2}n,\\ \\tfrac {1}{2}n)(21n, 21n) 的连线,加上对角线一起作为分组分割线。 由任意两条相邻分割线间的夹角,构成以核心为原点的角度分组。 所以, ∠Θ=[Θ0 , ... , Θθ−1]\\angle \\Theta = [\\Theta_0\\ ,\\ ...\\ ,\\ \\Theta_{\\theta-1}]∠Θ=[Θ0 , ... , Θθ−1] 代表的正是分割线角度。因此,当不区分夹角及其对角方向时,中心角能够分为 θ=n+1\\theta = n + 1θ=n+1 组,称为 无符号梯度(Unsigned Gradient) 分组。当考虑夹角与对角方向互反时,中心角能够分为 θ=2(n+1)\\theta = 2(n+1)θ=2(n+1) 组,称为 有符号梯度(Signed Gradient) 分组。 采样核一般为 n×n=8×8n \\times n = 8 \\times 8n×n=8×8 大小,此时无符号梯度以方向标记,可分为 999 组即: ∠Θ=[0∘, 20∘, 40∘, 60∘, 80∘, 100∘, 120∘, 140∘, 160∘] {\\displaystyle \\begin{aligned} \\angle \\Theta =& [0^{\\circ},\\ 20^{\\circ},\\ 40^{\\circ},\\ 60^{\\circ},\\ 80^{\\circ},\\ 100^{\\circ},\\ 120^{\\circ},\\ 140^{\\circ},\\ 160^{\\circ}] \\\\ \\end{aligned} } ∠Θ=[0∘, 20∘, 40∘, 60∘, 80∘, 100∘, 120∘, 140∘, 160∘] 而有符号梯度则可分为 181818 组: ∠Θ=[∠Θlt∠Θrb]=[0∘,20∘,40∘,60∘,80∘,100∘,120∘,140∘,160∘180∘,200∘,220∘,240∘,260∘,280∘,300∘,320∘,340∘] {\\displaystyle \\begin{aligned} \\angle \\Theta = \\begin{bmatrix} &\\angle \\Theta_{lt} \\\\ &\\angle \\Theta_{rb} \\end{bmatrix} = \\begin{bmatrix} 0^{\\circ},& 20^{\\circ},& 40^{\\circ},& 60^{\\circ},& 80^{\\circ},& 100^{\\circ},& 120^{\\circ},& 140^{\\circ},& 160^{\\circ} \\\\ 180^{\\circ},& 200^{\\circ},& 220^{\\circ},& 240^{\\circ},& 260^{\\circ},& 280^{\\circ},& 300^{\\circ},& 320^{\\circ},& 340^{\\circ} \\end{bmatrix} \\end{aligned} } ∠Θ=[∠Θlt∠Θrb]=[0∘,180∘,20∘,200∘,40∘,220∘,60∘,240∘,80∘,260∘,100∘,280∘,120∘,300∘,140∘,320∘,160∘340∘] 以无符号梯度的 999 组分组为例,统计只需累计入组即可: 图 3-5 核大小 8x8 的无符号梯度(Unsigned Gradient)分组示意图 随后依次统计分组的采样核内数据。上图数据统计结果如下(概略图): 图 3-6 无符号梯度分组的单组采样核内统计结果示意直方图 统计完毕时,特征向量 Cell⃗\\vec{Cell}Cell⃗ 随即生成完毕。我们以 WθW_{\\theta}Wθ 表示分组的特征向量,在方向 θ\\thetaθ 上的强度大小(即此方向矢量的秩),则对于无符号梯度(Unsigned Gradient)分组: Cell⃗=∑∠G⃗(xc⃗)=Θ⃗(W0∘, ... ,W160∘)∈R9×1 {\\displaystyle \\begin{aligned} \\vec{Cell} &= \\sum \\angle \\vec{G}(\\vec{x_c}) = \\vec{\\Theta}(W_{0^{\\circ}}, \\ ...\\ , W_{160^{\\circ}}) \\in \\mathbb{R}^{9 \\times 1} \\\\ \\end{aligned} } Cell⃗=∑∠G⃗(xc⃗)=Θ⃗(W0∘, ... ,W160∘)∈R9×1 同样,对有符号梯度(Signed Gradient)分组: Cell⃗=∑∠G⃗(xc⃗)=Θ⃗(W0∘, ... ,W160∘, ... ,W340∘)∈R18×1 {\\displaystyle \\begin{aligned} \\vec{Cell} &= \\sum \\angle \\vec{G}(\\vec{x_c}) = \\vec{\\Theta}(W_{0^{\\circ}}, \\ ...\\ , W_{160^{\\circ}}, \\ ...\\ , W_{340^{\\circ}}) \\in \\mathbb{R}^{18 \\times 1} \\\\ \\end{aligned} } Cell⃗=∑∠G⃗(xc⃗)=Θ⃗(W0∘, ... ,W160∘, ... ,W340∘)∈R18×1 至此,完成分组提炼。 这种对数据梯度的蒸馏手段非常重要,因为它不只可以运用于物体识别等情况的中间步骤,也可以被运用于粗糙的运动特征检测。 而从分组的数据得来的分组特征,还需要归一化才能被有效使用。 块归一化 由于分组内梯度矢量的分解叠加有可能会使某个方向上的梯度强度 远超其他方向,因而造成该方向上的灰度(光亮度)变化会极大的影响结果。 这样的影响当然是有利的,但无法相对统一的权重,也会给处理带来大量的不确定性。 如图例: 图 3-7 块归一化说明图例(数据源) 取绿色框中以 n×n=8×8n \\times n = 8 \\times 8n×n=8×8 采样核,经过前几步以无符号梯度(Unsigned Gradient)方式处理,会得到的四个分组: 图 3-8 图例(数据源)绿色框中四个分组特征向量直方图表示 如果能够将这种变化趋势原封不动的保存下来,并缩小尺度到统一标准,就可以实现即保证特征不被不必要的削减,也有足够一致的度量衡。 因此,归一化就是解决办法。 归一化(Normalization) 是将目标数据集,按照总体权重等比放缩到指定区间范围的一种数学工具。通常我们选取当前采样分组包含的数据,即为归一化的目标数据集。组与组间独立归一化。但 块归一化(Block Normalization) 和一般情况下不完全一样,是以 块(Block) 为样本源而非 组(Cell) 样本源本身,来进行归一化处理的。 什么是块(Block)呢? 块(Block)是对于由一系列分组(Cell)按照一定规则(例如四叉树、标准单元等)组合构成的分组并集单元的称谓。 是组的集合。对块的分法有各种形式,但在方向梯度直方图中,使用的是一种直接切入的固定设置。记块大小为 N×NN \\times NN×N ,块的最小单位为组,则取 N×N=2×2N \\times N = 2 \\times 2N×N=2×2 的固定大小组采样,构成 HOG 的分块。即图例中的绿色方块: 图 3-9 图例(数据源)块划分单一块示意图 同分组一样,分块的目的也是为了更好的将特征数据进行汇总。只不过分块时的基础单元,从分组时的像素梯度矢量,变为了分组特征向量。记分块为 BlockBlockBlock ,分块特征向量为 Block⃗\\vec{Block}Block⃗ 。仍以 ∣target∣1\\vert target \\vert_1∣target∣1 表示归一化操作,有: Block⃗=∣[Cell⃗1, Cell⃗2, Cell⃗3, Cell⃗4]∣1∈R(N×N)⋅θ×1 {\\displaystyle \\begin{aligned} \\vec{Block} &= \\vert [\\vec{Cell}_1,\\ \\vec{Cell}_2,\\ \\vec{Cell}_3,\\ \\vec{Cell}_4] \\vert_1 \\in \\mathbb{R}^{(N \\times N) \\cdot \\theta \\times 1} \\\\ \\end{aligned} } Block⃗=∣[Cell⃗1, Cell⃗2, Cell⃗3, Cell⃗4]∣1∈R(N×N)⋅θ×1 可见,在 2×22 \\times 22×2 大小的固定分块下,分块特征向量 的维度即为分组特征向量方向的 444 倍,即 (N×N)⋅θ(N \\times N) \\cdot \\theta(N×N)⋅θ 。如果我们采用 L-2 归一化(即 L2范数)处理,记归一化因子为 L2L_2L2 ,则: L2=∣Cell⃗1∣2+ ∣Cell⃗2∣2+ ∣Cell⃗3∣2+ ∣Cell⃗4∣2=∑(∣∠G⃗1∣2+ ∣∠G⃗2∣2+ ∣∠G⃗3∣2+ ∣∠G⃗4∣2)Block⃗=1L2[Cell⃗1, Cell⃗2, Cell⃗3, Cell⃗4]∈R(N×N)⋅θ×1 {\\displaystyle \\begin{aligned} L_2 &= \\sqrt{|\\vec{Cell}_1 |^2+\\ |\\vec{Cell}_2 |^2+\\ |\\vec{Cell}_3 |^2+\\ |\\vec{Cell}_4 |^2} \\\\ &= \\sqrt{\\sum (| \\angle \\vec{G}_1|^2 +\\ | \\angle \\vec{G}_2|^2 +\\ | \\angle \\vec{G}_3|^2 +\\ | \\angle \\vec{G}_4|^2 )} \\\\ \\vec{Block} &= \\frac{1}{L_2}[\\vec{Cell}_1,\\ \\vec{Cell}_2,\\ \\vec{Cell}_3,\\ \\vec{Cell}_4] \\in \\mathbb{R}^{(N \\times N) \\cdot \\theta \\times 1} \\end{aligned} } L2Block⃗=√∣Cell⃗1∣2+ ∣Cell⃗2∣2+ ∣Cell⃗3∣2+ ∣Cell⃗4∣2=√∑(∣∠G⃗1∣2+ ∣∠G⃗2∣2+ ∣∠G⃗3∣2+ ∣∠G⃗4∣2)=L21[Cell⃗1, Cell⃗2, Cell⃗3, Cell⃗4]∈R(N×N)⋅θ×1 那么,对图例中的分组进行块归一化到 [0, 1][0,\\ 1][0, 1] 区间,所得如下: 图 3-10 图例(数据源)绿色框对应块的块归一化特征向量结果 之后,按照块大小为步长,对全图分块计算即可得到输入图片的方向梯度直方图运算结果。达成对图片整体和分块区域的运动检测目的。 那么,在具体实践中是怎么做的呢? 同前文中对滤波的处理方法类似,对于此类存在核操作流的方法论,为了充分利用 GPU 并行计算能力,通用思路仍然是抽象为可执行的渲染程序片来交由 GPU 加速。 以索贝尔梯度计算 HOG 的 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 程序化 HOG 的关键处理部分,依旧在 像素程序片(Pixel Shader/Fragment Shader) 上。相比之前对于滤波算法的实现,这里 显然复杂得多 : precision mediump float; const float PI = 3.1415927; const int n = 8; const int N = 2; const int SIZE_CV = (n + 1); const int SIZE_BV = /*N * N **/ SIZE_CV; // for orientation weight sum const float ANGLE_GAP = 20.0 * PI / 180.0; const vec3 ANGLE_0 = vec3(cos(ANGLE_GAP * 0.0), sin(ANGLE_GAP * 0.0), 100); // x=cos y=sin z=cot const vec3 ANGLE_20 = vec3(cos(ANGLE_GAP * 1.0), sin(ANGLE_GAP * 1.0), 2.74747742); const vec3 ANGLE_40 = vec3(cos(ANGLE_GAP * 2.0), sin(ANGLE_GAP * 2.0), 1.19175359); const vec3 ANGLE_60 = vec3(cos(ANGLE_GAP * 3.0), sin(ANGLE_GAP * 3.0), 0.57735027); const vec3 ANGLE_80 = vec3(cos(ANGLE_GAP * 4.0), sin(ANGLE_GAP * 4.0), 0.17632698); const vec3 ANGLE_100 = vec3(cos(ANGLE_GAP * 5.0), sin(ANGLE_GAP * 5.0), -0.17632698); const vec3 ANGLE_120 = vec3(cos(ANGLE_GAP * 6.0), sin(ANGLE_GAP * 6.0), -0.57735027); const vec3 ANGLE_140 = vec3(cos(ANGLE_GAP * 7.0), sin(ANGLE_GAP * 7.0), -1.19175359); const vec3 ANGLE_160 = vec3(cos(ANGLE_GAP * 8.0), sin(ANGLE_GAP * 8.0), -2.74747742); const vec3 ANGLE_180 = vec3(cos(ANGLE_GAP * 9.0), sin(ANGLE_GAP * 9.0), -100); const float CELL_TILE_SIZE = 8.0; //pixels const float BLOCK_TILE_SIZE = 2.0; //cells const float HOG_TILE_SIZE = 16.0; //pixels(n*N) const float HOG_SHAFT_LENGTH = 14.0; const float HOG_SHAFT_THICKNESS = 0.5; const float HOG_SHAFT_HEAD_RATE = 64.0; const vec3 HOG_COLOR = vec3(1.0, 1.0, 0.0); const float HOG_MIN_MAGNITUDE = 0.1; varying vec4 fs_position; varying vec2 fs_texcoord; uniform bool only_edge; uniform vec2 pixel_bias; uniform mat3 sobel_matrix_x; uniform mat3 sobel_matrix_y; uniform float hog_magnitude_limit; uniform sampler2D target_texture; /* Simple Grey */ float grey(vec3 c) { return 0.299 * c[0] + 0.587 * c[1] + 0.114 * c[2]; } /* Calucate HOG Orient-hog Density (pixel by pixel) */ float hog_density(vec2 target_coord, vec3 field_vector) { vec2 ori_pos = target_coord.xy / pixel_bias; vec2 tile_center = (floor(ori_pos / HOG_TILE_SIZE) + 0.5) * HOG_TILE_SIZE; float magnitude = abs(field_vector.z); if (magnitude > max(HOG_MIN_MAGNITUDE, hog_magnitude_limit)) { float distance = clamp(magnitude * HOG_SHAFT_LENGTH, 0.1, HOG_SHAFT_LENGTH); vec2 normalizer = normalize(field_vector.xy); vec2 tile_offset = ori_pos - tile_center; float density = HOG_SHAFT_THICKNESS / HOG_SHAFT_HEAD_RATE - max( abs(dot(tile_offset, vec2(+normalizer.y, -normalizer.x))), abs(dot(tile_offset, vec2(+normalizer.x, +normalizer.y))) - distance ); return clamp(1.0 + density, 0.0, 1.0); } return 0.0; } /* Calucate Sobel Field at target center */ vec3 sobel_edge_detection(vec2 target_coord) { float gradient_center_x; float gradient_center_y; for (int i = 0; i = seek_to && seek_to >= ANGLE_20.z){ wight_as = abs((seek_to - ANGLE_0.z)/(ANGLE_20.z - ANGLE_0.z)); result[0][0] += field_vector[2] *wight_as; result[0][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_20.z>= seek_to && seek_to >= ANGLE_40.z){ wight_as = abs((seek_to - ANGLE_20.z)/(ANGLE_40.z - ANGLE_20.z)); result[0][1] += field_vector[2] * wight_as; result[0][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_40.z>= seek_to && seek_to >= ANGLE_60.z){ wight_as = abs((seek_to - ANGLE_40.z)/(ANGLE_60.z - ANGLE_40.z)); result[0][2] += field_vector[2] * wight_as; result[1][0] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_60.z>= seek_to && seek_to >= ANGLE_80.z){ wight_as = abs((seek_to - ANGLE_60.z)/(ANGLE_80.z - ANGLE_60.z)); result[1][0] += field_vector[2] * wight_as; result[1][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_80.z>= seek_to && seek_to >= ANGLE_100.z){ wight_as = abs((seek_to - ANGLE_80.z)/(ANGLE_100.z - ANGLE_80.z)); result[1][1] += field_vector[2] * wight_as; result[1][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_100.z>= seek_to && seek_to >= ANGLE_120.z){ wight_as = abs((seek_to - ANGLE_100.z)/(ANGLE_120.z - ANGLE_100.z)); result[1][2] += field_vector[2] * wight_as; result[2][0] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_120.z>= seek_to && seek_to >= ANGLE_140.z){ wight_as = abs((seek_to - ANGLE_120.z)/(ANGLE_140.z - ANGLE_120.z)); result[2][0] += field_vector[2] * wight_as; result[2][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_140.z>= seek_to && seek_to >= ANGLE_160.z){ wight_as = abs((seek_to - ANGLE_140.z)/(ANGLE_160.z - ANGLE_140.z)); result[2][1] += field_vector[2] * wight_as; result[2][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_160.z>= seek_to && seek_to >= ANGLE_180.z){ wight_as = abs((seek_to - ANGLE_160.z)/(ANGLE_180.z - ANGLE_160.z)); result[2][2] += field_vector[2] * wight_as; result[0][0] += field_vector[2] * (1.0 - wight_as); } } } } return result; } /* Calucate Block Feature at target center */ float block_feature_extraction(vec2 target_coord) { float orient_hog_density = 0.0; float block_feature_vector[SIZE_BV]; vec2 cell_bias = vec2(n, n) * pixel_bias; mat3 cell_lt = cell_feature_extraction(target_coord); mat3 cell_rt = cell_feature_extraction(target_coord + vec2(cell_bias.x, 0.0)); mat3 cell_lb = cell_feature_extraction(target_coord + vec2(0.0, cell_bias.y)); mat3 cell_rb = cell_feature_extraction(target_coord + cell_bias); float normalization_factor = 0.0; for (int i = 0; i 样例采用 单一流水线过程,我们将几个关键流程节点封装为方法,实现了 HOG 的处理。相对于顶点程序片,像素程序片不太容易理解,还需分步拆开解读。 HOG 片元着色器(Fragment Shader)的细节拆解 首先需要在处理前,进行一部分方法和常量准备。这些 前置工作包含两个部分。 第一部分由纯常量构成。用于辅助实现 方向梯度直方图(HOG)算法 中,各个步骤所使用到的关键恒定参数,有: const float PI = 3.1415927; const int n = 8; const int N = 2; const int SIZE_CV = (n + 1); const int SIZE_BV = /*N * N **/ SIZE_CV; // for orientation weight sum const float ANGLE_GAP = 20.0 * PI / 180.0; const vec3 ANGLE_0 = vec3(cos(ANGLE_GAP * 0.0), sin(ANGLE_GAP * 0.0), 100); // x=cos y=sin z=cot const vec3 ANGLE_20 = vec3(cos(ANGLE_GAP * 1.0), sin(ANGLE_GAP * 1.0), 2.74747742); const vec3 ANGLE_40 = vec3(cos(ANGLE_GAP * 2.0), sin(ANGLE_GAP * 2.0), 1.19175359); const vec3 ANGLE_60 = vec3(cos(ANGLE_GAP * 3.0), sin(ANGLE_GAP * 3.0), 0.57735027); const vec3 ANGLE_80 = vec3(cos(ANGLE_GAP * 4.0), sin(ANGLE_GAP * 4.0), 0.17632698); const vec3 ANGLE_100 = vec3(cos(ANGLE_GAP * 5.0), sin(ANGLE_GAP * 5.0), -0.17632698); const vec3 ANGLE_120 = vec3(cos(ANGLE_GAP * 6.0), sin(ANGLE_GAP * 6.0), -0.57735027); const vec3 ANGLE_140 = vec3(cos(ANGLE_GAP * 7.0), sin(ANGLE_GAP * 7.0), -1.19175359); const vec3 ANGLE_160 = vec3(cos(ANGLE_GAP * 8.0), sin(ANGLE_GAP * 8.0), -2.74747742); const vec3 ANGLE_180 = vec3(cos(ANGLE_GAP * 9.0), sin(ANGLE_GAP * 9.0), -100); const float CELL_TILE_SIZE = 8.0; //pixels const float BLOCK_TILE_SIZE = 2.0; //cells 第二部分则包含常量和辅助方法。用于辅助 HOG 最终结果的图像化显示,有: const float CELL_TILE_SIZE = 8.0; //pixels const float BLOCK_TILE_SIZE = 2.0; //cells const float HOG_TILE_SIZE = 16.0; //pixels(n*N) const float HOG_SHAFT_LENGTH = 14.0; const float HOG_SHAFT_THICKNESS = 0.5; const float HOG_SHAFT_HEAD_RATE = 64.0; const vec3 HOG_COLOR = vec3(1.0, 1.0, 0.0); const float HOG_MIN_MAGNITUDE = 0.1; /* Simple Grey */ float grey(vec3 c) { return 0.299 * c[0] + 0.587 * c[1] + 0.114 * c[2]; } /* Calucate HOG Orient-hog Density (pixel by pixel) */ float hog_density(vec2 target_coord, vec3 field_vector) { vec2 ori_pos = target_coord.xy / pixel_bias; vec2 tile_center = (floor(ori_pos / HOG_TILE_SIZE) + 0.5) * HOG_TILE_SIZE; float magnitude = abs(field_vector.z); if (magnitude > max(HOG_MIN_MAGNITUDE, hog_magnitude_limit)) { float distance = clamp(magnitude * HOG_SHAFT_LENGTH, 0.1, HOG_SHAFT_LENGTH); vec2 normalizer = normalize(field_vector.xy); vec2 tile_offset = ori_pos - tile_center; float density = HOG_SHAFT_THICKNESS / HOG_SHAFT_HEAD_RATE - max( abs(dot(tile_offset, vec2(+normalizer.y, -normalizer.x))), abs(dot(tile_offset, vec2(+normalizer.x, +normalizer.y))) - distance ); return clamp(1.0 + density, 0.0, 1.0); } return 0.0; } 灰度(光亮度)值采用 BT.601 的狭隘区间(Narrow Range) 标准快速计算,运用中也可以替换为均值(部分场景)或根据情况更换其他标准( 如 RGB数据 非采样得原始数据的标准原色格式而来,则因根据转换前的传输格式来选择配套的规格,见上一章)。 注意以 HOG_[xx] 为格式的常量。这些常量被用于计算,上屏显示的无符号梯度(Unsigned Gradient)对应方向上的权重柱形轴。 柱形轴过分块中心,轴的长度和颜色的深浅(即能量密度)代表归一化后的权重大小。而方法计算所得 density 则为当前像素点对应块内位置的能量密度值。显然,密度值只有在轴方向上才存在有效值。另一方面,较小的能量密度也不具有代表性,需要通过 阈值限定进行过滤,此处采用 max(HOG_MIN_MAGNITUDE, hog_magnitude_limit) 进行设置。 准备完成后,就该正式流程的处理了。这里的封装思路,是以 生成的最小结果单元为分割依据 进行的。所以,将 HOG 步骤方法封为一下三个: sobel_edge_detection 针对 像素点(Pixel)梯度矢量 的 索贝尔边界检测 /* Calucate Sobel Field at target center */ vec3 sobel_edge_detection(vec2 target_coord) { float gradient_center_x; float gradient_center_y; for (int i = 0; i cell_feature_extraction 针对 分组(Cell)特征提取 为结果的 矢量统计合并 /* Calucate Cell Feature at target center */ mat3 cell_feature_extraction(vec2 target_coord) { mat3 result; float bias_unit = float(n-1)/2.0; vec2 ori_pos = target_coord.xy / pixel_bias; vec2 cell_center = (floor(ori_pos / CELL_TILE_SIZE) + 0.5) * CELL_TILE_SIZE; float normalization_factor = 0.0; for (int i = 0; i = seek_to && seek_to >= ANGLE_20.z){ wight_as = abs((seek_to - ANGLE_0.z)/(ANGLE_20.z - ANGLE_0.z)); result[0][0] += field_vector[2] *wight_as; result[0][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_20.z>= seek_to && seek_to >= ANGLE_40.z){ wight_as = abs((seek_to - ANGLE_20.z)/(ANGLE_40.z - ANGLE_20.z)); result[0][1] += field_vector[2] * wight_as; result[0][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_40.z>= seek_to && seek_to >= ANGLE_60.z){ wight_as = abs((seek_to - ANGLE_40.z)/(ANGLE_60.z - ANGLE_40.z)); result[0][2] += field_vector[2] * wight_as; result[1][0] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_60.z>= seek_to && seek_to >= ANGLE_80.z){ wight_as = abs((seek_to - ANGLE_60.z)/(ANGLE_80.z - ANGLE_60.z)); result[1][0] += field_vector[2] * wight_as; result[1][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_80.z>= seek_to && seek_to >= ANGLE_100.z){ wight_as = abs((seek_to - ANGLE_80.z)/(ANGLE_100.z - ANGLE_80.z)); result[1][1] += field_vector[2] * wight_as; result[1][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_100.z>= seek_to && seek_to >= ANGLE_120.z){ wight_as = abs((seek_to - ANGLE_100.z)/(ANGLE_120.z - ANGLE_100.z)); result[1][2] += field_vector[2] * wight_as; result[2][0] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_120.z>= seek_to && seek_to >= ANGLE_140.z){ wight_as = abs((seek_to - ANGLE_120.z)/(ANGLE_140.z - ANGLE_120.z)); result[2][0] += field_vector[2] * wight_as; result[2][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_140.z>= seek_to && seek_to >= ANGLE_160.z){ wight_as = abs((seek_to - ANGLE_140.z)/(ANGLE_160.z - ANGLE_140.z)); result[2][1] += field_vector[2] * wight_as; result[2][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_160.z>= seek_to && seek_to >= ANGLE_180.z){ wight_as = abs((seek_to - ANGLE_160.z)/(ANGLE_180.z - ANGLE_160.z)); result[2][2] += field_vector[2] * wight_as; result[0][0] += field_vector[2] * (1.0 - wight_as); } } } } return result; } block_feature_extraction 针对 分块(Block)特征提取 为结果的 块归一化 /* Calucate Block Feature at target center */ float block_feature_extraction(vec2 target_coord) { float orient_hog_density = 0.0; float block_feature_vector[SIZE_BV]; vec2 cell_bias = vec2(n, n) * pixel_bias; mat3 cell_lt = cell_feature_extraction(target_coord); mat3 cell_rt = cell_feature_extraction(target_coord + vec2(cell_bias.x, 0.0)); mat3 cell_lb = cell_feature_extraction(target_coord + vec2(0.0, cell_bias.y)); mat3 cell_rb = cell_feature_extraction(target_coord + cell_bias); float normalization_factor = 0.0; for (int i = 0; i 考虑到思路连贯性,样例中的实现将所有步骤放在一张纹理过程中处理,且没有对核计算做优化。这会导致每个像素都存在一次 HOG 计算金字塔,而按理来说 一个块内并不需要重复计算。样例中相当于将块内运算重复了 16×1616 \\times 1616×16 次,极大的增加了消耗。 因此,在实际应用中,需要对上文的实现进行改造。 把文中程序片内的各个步骤的方法,分配到不同阶的程序片中,并优化纹理过程。 之后才能被更为高效的予以运用。介于骨干并无不同,此处就不再展开赘述。 经过处理后的最终结果,以能量密度的形式附加到当前像素点的色彩值上,实现最终的图形化展示: void main() { vec3 output_ = only_edge? vec3(0) : texture2D(target_texture, fs_texcoord.xy).rgb; float orient_hog_density = block_feature_extraction(fs_texcoord.xy); vec3 hogs_ = orient_hog_density * HOG_COLOR; gl_FragColor = vec4(output_ + hogs_, 1.0); } 现在,整个 HOG 的简易程序片就完成了。 到此为止,方向梯度直方图技术可以初步应用于音视频当中了。 虽然在上文样例的渲染程序片实现过程中,但从普遍意义上来讲,HOG 仍然属于相对高消耗的算法, HOG 提供的方法论更多被应用在 编解码规格制定的时域冗余处理 上。其本身具有一定的 硬件门槛。 HOG 最终产物的用处 假设输入帧长宽为 W×H=256×256W \\times H = 256 \\times 256W×H=256×256 。按照前文采用块大小 2×22 \\times 22×2 ,分组大小 8×88 \\times 88×8 进行处理,则得到方向梯度直方图最终输出结果为包含 16×16=25616 \\times 16 = 25616×16=256 个块特征向量的数据集合。每一个块特征向量由 (2×2)⋅9=36(2 \\times 2) \\cdot 9 = 36(2×2)⋅9=36 维(参数)构成。为了方便描述,我们将输出数据集称为 HOG 数据帧。 HOG 数据帧(HOG Frame)更多被作为经过特征提取后的预处理输入数据,传入目标物体检测等人工智能计算机视觉方向的算法模型。 通过模型获取的物体识别结果后,再利用训练好的目标跟踪模型,或传统目标跟踪算法(诸如:核卷积滤波(KCF [Kernelized Correlation Filter])[18] 、MOSSE 算法等)等,来获取视频流中运动物体在时序上的关联性。 那么,用于判断目标检测结果是否准确的方法,也就是目标检测模型的 损失函数(Loss Function) 是什么呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_3_2.html":{"url":"Chapter_3/Language/cn/Docs_3_3_2.html","title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","keywords":"","body":"3.3.2 朴素目标检测结果度量 - IoU & GIoU 考虑到算法本身需要作为目标检测结果准确性的衡量标准,并用于模型的计算过程。所以不能采用较高复杂程度的算法。而 交并比(IoU [Intersection over Union]) 计算作为相对简单的区域检测算法,则是可被采用的不错方案。 交并比顾名思义,即为交集和并集的比值。 只不过这里的交集和并集,指的是 预测结果(Prediction)对应的预测框(Anchor Box)和标注框(Ground Truth)的交集与并集,记为 I=IntersectionI = IntersectionI=Intersection 和 U=UnionU = UnionU=Union 。 图 3-11 原论文中交并比示意图[19] 如图所示,交并比公式非常简洁(注意 并非 IoU Loss ),可记为: IoU=Intersection(Anchor, Truth)Union(Anchor, Truth)=IU {\\displaystyle \\begin{aligned} IoU &= \\frac{Intersection(Anchor,\\ Truth)}{Union(Anchor,\\ Truth)} = \\frac{I}{U} \\\\ \\end{aligned} } IoU=Union(Anchor, Truth)Intersection(Anchor, Truth)=UI 而根据交并比设计的损失函数,就是交并比损失函数(IoU Loss)。 同其他有关深度学习领域,针对损失函数提出的算法理论一致。IoU Loss 在模型中同样存在两项应用,分别为 前向预测(Forward Prediction) 和 反向传播(Backward Propagation)。 即标准损失的计算和模型梯度迭代的加速。 交并比损失函数(IoU Loss) 根据原论文的设计,IoU 前向扩散作用在 ReLU 激活函数层(ReLU Layer)后,以代替传统物体识别模型采用的 L2L_2L2 损失函数,判断待筛选的预测框是否命中。由于始终有 IoU∈[0, 1]IoU \\in [0, \\ 1]IoU∈[0, 1] ,交并比损失函数可被认为是 p(IoU=1)=1p(IoU = 1) = 1p(IoU=1)=1 的 特殊交叉熵损失函数(cross-entropy Loss),有: IoU Loss=−p⋅ln(IoU)−(1−p)⋅ln(1−IoU)∣p(IoU=1)=1=−ln(IoU) {\\displaystyle \\begin{aligned} IoU \\ \\mathcal{L}oss &= -p \\cdot ln(IoU) - (1 - p) \\cdot ln(1-IoU) | \\quad p(IoU = 1) = 1 \\\\ &= -ln(IoU) \\\\ \\end{aligned} } IoU Loss=−p⋅ln(IoU)−(1−p)⋅ln(1−IoU)∣p(IoU=1)=1=−ln(IoU) 带入交并比实际值,有: IoU Loss=−lnIntersection(Anchor, Truth)Union(Anchor, Truth)=−lnIU {\\displaystyle \\begin{aligned} IoU \\ \\mathcal{L}oss &= -ln \\frac{Intersection(Anchor,\\ Truth)}{Union(Anchor,\\ Truth)} = -ln \\frac{I}{U} \\\\ \\end{aligned} } IoU Loss=−lnUnion(Anchor, Truth)Intersection(Anchor, Truth)=−lnUI 此即为 交并比损失函数。由于 IoU∈[0, 1]IoU \\in [0, \\ 1]IoU∈[0, 1] 有 −ln(IoU)≈1−IoU-ln(IoU) \\approx 1-IoU−ln(IoU)≈1−IoU ,考虑到计算便利性,在条件范围内常用差值代替对数计算。即: IoU Loss≈1−IoUIoU∈[0, 1] {\\displaystyle \\begin{aligned} IoU \\ \\mathcal{L}oss &\\approx 1-IoU \\quad IoU \\in [0, \\ 1] \\\\ \\end{aligned} } IoU Loss≈1−IoUIoU∈[0, 1] 相比 L2L_2L2 损失函数的简单区域差值来衡量命中的方式, IoU 考虑到了 预测框与标准框的平面空间位置关系,并通过对位置的衡量 锁定了两者间的平面位姿独立优化,因而具有更贴合客观的代表性。且在交叉熵类型损失函数(详见下一章)的特性作用下,结果落于单位量化的百分比区间,利于阈值衡量和操作之便。 交并比损失函数(IoU Loss)的反向传播(Backward Propagation) 反向传播(Backward Propagation) 简单来说,是通过当前学习到的参数在参数空间内指定方向的运动趋势,来反相强化或衰减该方向上的参数权重,进而达到更快使模型拟合的数学方法论统称。自 杰弗里·辛顿(Geoffrey Hinton,“深度学习之父”,当代人工智能领域三巨头之一) 教授提出并汇总这一概念以来,持续的被作为深度学习根基理论之一,应用在各类算法的学习过程中。 如果从物理学角度来看,把参与训练的相关模型参数的权重向量比作速度,那么,损失函数的反向传播,就相当于 速度在各个方向上的某一时刻的加速度。所以,其影响的是权重在方向上的迭代步长变化,即为优化算法的输出。 交并比损失函数的反向传播,为便于称呼,简称 反向交并比(Backward IoU/ IoU Back)。取图 3.3.2-1 说明,记预测框为 x=(xl,xt,xr,xb)x = (x_l, x_t, x_r, x_b)x=(xl,xt,xr,xb) 面积为 XXX ,标注框为 x~=(x~l,x~t,x~r,x~b)\\tilde{x} = (\\tilde{x}_l, \\tilde{x}_t, \\tilde{x}_r, \\tilde{x}_b)x~=(x~l,x~t,x~r,x~b) 面积为 X~\\tilde{X}X~ ,则反向交并比可表示为: IoU Back=∂L∂x=I⋅(∇xX−∇xI)−U⋅∇xIU2⋅IoU=1U⋅∇xX − U+IUI⋅∇xI {\\displaystyle \\begin{aligned} IoU\\ \\mathcal{B}ack &= \\frac{\\partial \\mathcal{L}}{\\partial x} = \\frac{I \\cdot (\\nabla_xX - \\nabla_xI) - U \\cdot \\nabla_xI}{U^2 \\cdot IoU} \\\\ &= \\tfrac{1}{U} \\cdot \\nabla_xX \\ - \\ \\tfrac{U+I}{UI} \\cdot \\nabla_xI \\\\ \\end{aligned} } IoU Back=∂x∂L=U2⋅IoUI⋅(∇xX−∇xI)−U⋅∇xI=U1⋅∇xX − UIU+I⋅∇xI 其中, ∇xX\\nabla_xX∇xX 是 预测框面积关于位置的偏导数(Partial Derivative), ∇xI\\nabla_xI∇xI 是 交集区域面积关于位置的偏导数,有: Iw=min(xl, x~l)+min(xr, x~r)Ih=min(xt, x~t)+min(xb, x~b)∇xX={∂X∂xt(or ∂xb)=xl+xr∂X∂xl(or ∂xr)=xt+xb∇xI={∂I∂xt(or ∂xb)={Iw, if(xtx~t or xbx~b)0,otherwise∂I∂xl(or ∂xr)={Ih, if(xlx~l or xrx~r)0,otherwise {\\displaystyle \\begin{aligned} I_w &= min(x_l,\\ \\tilde{x}_l) + min(x_r,\\ \\tilde{x}_r) \\\\ I_h &= min(x_t,\\ \\tilde{x}_t) + min(x_b,\\ \\tilde{x}_b) \\\\ \\nabla_xX &= { \\begin{cases} \\frac{\\partial X}{\\partial x_t( \\mathbf{or}\\ \\partial x_b)} = x_l + x_r \\\\ \\frac{\\partial X}{\\partial x_l( \\mathbf{or}\\ \\partial x_r)} = x_t + x_b \\end{cases} } \\\\ \\nabla_xI &= { \\begin{cases} \\frac{\\partial I}{\\partial x_t( \\mathbf{or}\\ \\partial x_b)} = { \\begin{cases} I_w &, \\ if ( x_t IwIh∇xX∇xI=min(xl, x~l)+min(xr, x~r)=min(xt, x~t)+min(xb, x~b)=⎩⎪⎨⎪⎧∂xt(or ∂xb)∂X=xl+xr∂xl(or ∂xr)∂X=xt+xb=⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧∂xt(or ∂xb)∂I={Iw0, if(xtx~t or xbx~b),otherwise∂xl(or ∂xr)∂I={Ih0, if(xlx~l or xrx~r),otherwise 带入求得 IoU BackIoU\\ \\mathcal{B}ackIoU Back 值,作用于 优化算法的梯度变换,如 自适应动量算法(Adam) 等。来发挥相应作用。 交并比损失函数(IoU Loss)的简单 C++ 语言实现 到这里,我们就可以根据基本情况来做一下交并比的代码实现了。由于需要进行一些基本的矩阵运算,我们选择采用引入 轻量级的 GLM(GL Mathematics) 开源库,来协助完成基本工作。 我们选择 GLM 库的原因,是因为其可以通过纯粹的包含头文件的方式,简便轻巧的启动包含基本图形矩阵数据结构和方法的完整库功能。在其开源协议保证下,非常适合运用于大部分工程项目。如果需要也可以自己分装部分算法和操作。例如在某些场景下,我们需要计算物体体积方块区域,到视窗平面上的投影位置: #include #include \"stdio.h\" #include \"math.h\" typedef glm::vec2 Vector_2f; typedef glm::vec3 Vector_3f; typedef glm::vec4 Vector_4f; typedef glm::mat2 Matrix_2x2f; typedef glm::mat3 Matrix_3x3f; typedef glm::mat4 Matrix_4x4f; #define XC_PI 3.14159265358979323846 #define XC_RADIAN(d_) (XC_PI * d_ / 180.0f) #define XC_VECTOR_NORMALIZE(v_) glm::normalize(v_) #define XC_VECTOR_CROSS(vl_, vr_) glm::cross(vl_, vr_) #define XC_VECTOR_DOT(vl_, vr_) glm::dot(vl_, vr_) #define XC_MATRIX_INVERSE(m_) glm::inverse(m_) #define XC_MATRIX_TRANSPOSE(m_) glm::transpose(m_) #define XC_MATRIX_DOT(ml_, mr_) dot_m4x4(ml_, mr_) #define XC_V4_M44_DOT(vl_, mr_) dot_v4_m4x4(vl_, mr_) Vector_4f dot_v4_m4x4(Vector_4f v4_, Matrix_4x4f m4x4_) { return m4x4_[0] * v4_[0] + m4x4_[1] * v4_[1] + m4x4_[2] * v4_[2] + m4x4_[3] * v4_[3]; } Matrix_4x4f dot_m4x4(Matrix_4x4f ml_, Matrix_4x4f mr_) { Matrix_4x4f result_; result_[0] = mr_[0] * ml_[0][0] + mr_[1] * ml_[0][1] + mr_[2] * ml_[0][2] + mr_[3] * ml_[0][3]; result_[1] = mr_[0] * ml_[1][0] + mr_[1] * ml_[1][1] + mr_[2] * ml_[1][2] + mr_[3] * ml_[1][3]; result_[2] = mr_[0] * ml_[2][0] + mr_[1] * ml_[2][1] + mr_[2] * ml_[2][2] + mr_[3] * ml_[2][3]; result_[3] = mr_[0] * ml_[3][0] + mr_[1] * ml_[3][1] + mr_[2] * ml_[3][2] + mr_[3] * ml_[3][3]; return result_; } 此处我们简单的实现了两个快速算法,用于协助我们完成目标 4×14 \\times 14×1 向量与 4×44 \\times 44×4 矩阵的点乘,和两个 4×44 \\times 44×4 矩阵的点乘。 其实类似的快速算法已在库内有封装,此处仅是用于说明 GLM 的一些基本用法。 不过,对于交并比的代码工程化来说,并不需要这么复杂: #include #include \"stdio.h\" #include \"math.h\" typedef glm::vec2 Vector_2f; typedef glm::vec4 Vector_4f; bool static IoU_simple(Vector_4f anchor_box_, Vector_4f ground_box_, float threshold_ = 0.8f) { float M_area_, T_area_, I_area_, U_area_; float IoU_mark_; { Vector_2f I_lt = { MAX(anchor_box_[0], ground_box_[0]), MAX(anchor_box_[1], ground_box_[1]) }; Vector_2f I_rb = { MIN(anchor_box_[2], ground_box_[2]), MIN(anchor_box_[3], ground_box_[3]) }; if (I_rb.x threshold_); } 上面的简短过程,就是整个交并比的 C++ 语言封装了。可见易于迁移。 IoU 的缺点与 GIoU 的改进 交并比损失函数并非是没有缺陷的。 一个显而易见的问题就是 IoU 无法评估预测框和标注框无交集区域时,预测框的优劣程度(梯度消失)。 这所造成的直接问题就是,当 无交集情况出现,我们将无法只通过 IoU 损失函数,来使预测框快速的向标注框方向运动。从而导致数据浪费并产生不准确的结果,且有可能使模型陷入局部解而导致停滞。 2019 年的 CVPR 上,来自斯坦福大学的研究团队以交并比为基础,提出了 IoU 的改进版 通用交并比(GIoU [Generalized Intersection over Union])算法 [20] 。解决了无交集的判断问题。 GIoU 采用的处理办法为,在原有 IoU 计算的基础上,引入预测框与标注框区域所构成的最小外接矩形,即 两者的最小外接闭包(smallest enclosing convex) 参与损失函数计算,来辅助量化两者之间的远近到权重迭代中, 记为 C=ConvexC = ConvexC=Convex 。 图 3-12 红框即为 IoU 图例中,I 和 U 的最小外接矩形 改进后的通用交并比公式 同样非常简洁 (注意 并非 GIoU Loss ),可记为: GIoU=IoU−∣C−(A∪B)∣∣C∣=IoU−∣C−U∣∣C∣ {\\displaystyle \\begin{aligned} GIoU &= IoU - \\frac{|C - (A \\cup B)|}{|C|} = IoU - \\frac{|C - U|}{|C|} \\\\ \\end{aligned} } GIoU=IoU−∣C∣∣C−(A∪B)∣=IoU−∣C∣∣C−U∣ 从公式可知,当 预测框与标注框不存在交集时, U=∣A∪B∣=0→IoU=0U = |A \\cup B| = 0 \\rightarrow IoU = 0U=∣A∪B∣=0→IoU=0 有: GIoU=IoU−C−0C=−1 {\\displaystyle \\begin{aligned} GIoU &= IoU - \\frac{C-0}{C} = -1 \\\\ \\end{aligned} } GIoU=IoU−CC−0=−1 当 预测框与标注框完全重合时, I=∣A∩B∣=∣A∪B∣=U→IoU=1I = |A \\cap B| = |A \\cup B| = U \\rightarrow IoU = 1I=∣A∩B∣=∣A∪B∣=U→IoU=1 有: GIoU=IoU−C−UC=IoU−0C=1 {\\displaystyle \\begin{aligned} GIoU &= IoU - \\frac{C-U}{C} = IoU - \\frac{0}{C} = 1 \\\\ \\end{aligned} } GIoU=IoU−CC−U=IoU−C0=1 基于此,GIoU 的取值范围为 GIoU∈[−1, +1]GIoU \\in [-1, \\ +1]GIoU∈[−1, +1] 。 通用交并比损失函数(GIoU Loss) GIoU 本质是一种对 IoU 算法的 泛化补充,所以在损失函数 GIoU LossGIoU \\ \\mathcal{L}ossGIoU Loss 的表达上,直接采用 GIoU 代替 IoU 作为影响因子即可。有: GIoU Loss=−ln(GIoU)≈1−GIoUGIoU∈[−1, 1] {\\displaystyle \\begin{aligned} GIoU \\ \\mathcal{L}oss & = -ln(GIoU) \\approx 1-GIoU \\quad GIoU \\in [-1, \\ 1] \\\\ \\end{aligned} } GIoU Loss=−ln(GIoU)≈1−GIoUGIoU∈[−1, 1] 同理,记 ∇xX\\nabla_xX∇xX 是预测框面积关于位置的偏导数(Partial Derivative), ∇xX~\\nabla_x\\tilde{X}∇xX~ 是标注框面积关于位置的偏导数(Partial Derivative), ∇xI\\nabla_xI∇xI 是交集区域面积关于位置的偏导数,有: GIoU Back=∂L∂x=∂LIoU∂x+∂LUoC∂x=I⋅(∇xX−∇xI)−U⋅∇xIU2⋅IoU+U⋅(∇xX+∇xX~)−C⋅(∇xX−∇xI)C⋅U=1U⋅∇xX − U+IUI⋅∇xI + 1U⋅∇xI +1C⋅∇xX~ − C−UCU⋅∇xX=1C⋅∇xX − 1I⋅∇xI + 1C⋅∇xX~ {\\displaystyle \\begin{aligned} GIoU\\ \\mathcal{B}ack &= \\frac{\\partial \\mathcal{L}}{\\partial x} = \\frac{\\partial \\mathcal{L}_{IoU}}{\\partial x} + \\frac{\\partial \\mathcal{L}_{UoC}}{\\partial x} \\\\ &= \\frac{I \\cdot (\\nabla_xX - \\nabla_xI) - U \\cdot \\nabla_xI}{U^2 \\cdot IoU} + \\frac{U \\cdot (\\nabla_xX + \\nabla_x\\tilde{X}) - C \\cdot (\\nabla_xX - \\nabla_xI)}{C \\cdot U} \\\\ &= \\tfrac{1}{U} \\cdot \\nabla_xX \\ - \\ \\tfrac{U+I}{UI} \\cdot \\nabla_xI \\ + \\ \\tfrac{1}{U} \\cdot \\nabla_xI \\ + \\tfrac{1}{C} \\cdot \\nabla_x\\tilde{X} \\ - \\ \\tfrac{C-U}{CU} \\cdot \\nabla_xX \\\\ &= \\tfrac{1}{C} \\cdot \\nabla_xX \\ - \\ \\tfrac{1}{I} \\cdot \\nabla_xI \\ + \\ \\tfrac{1}{C} \\cdot \\nabla_x\\tilde{X} \\\\ \\end{aligned} } GIoU Back=∂x∂L=∂x∂LIoU+∂x∂LUoC=U2⋅IoUI⋅(∇xX−∇xI)−U⋅∇xI+C⋅UU⋅(∇xX+∇xX~)−C⋅(∇xX−∇xI)=U1⋅∇xX − UIU+I⋅∇xI + U1⋅∇xI +C1⋅∇xX~ − CUC−U⋅∇xX=C1⋅∇xX − I1⋅∇xI + C1⋅∇xX~ 而 标注框在单次迭代中是常量值,即 ∇xX~=0\\nabla_x\\tilde{X} = 0∇xX~=0 代入: GIoU Back=1C⋅∇xX − 1I⋅∇xI {\\displaystyle \\begin{aligned} GIoU\\ \\mathcal{B}ack &= \\tfrac{1}{C} \\cdot \\nabla_xX \\ - \\ \\tfrac{1}{I} \\cdot \\nabla_xI \\end{aligned} } GIoU Back=C1⋅∇xX − I1⋅∇xI 显然 GIoU 的反向传播计算相比 IoU 更为快捷有效。这也是其 通用性 的体现之一。 通用交并比损失函数(GIoU Loss)的简单 C++ 语言实现 万事具备,现在只需要代码实现 GIoU 算法即可,仍然非常便捷。只需在原 IoU 算法上补充改进部分即可: #include #include \"stdio.h\" #include \"math.h\" typedef glm::vec2 Vector_2f; typedef glm::vec4 Vector_4f; bool static GIoU_simple(Vector_4f anchor_box_, Vector_4f ground_box_, float threshold_ = 0.8f) { float M_area_, T_area_, I_area_, U_area_, C_area_; float IoU_mark_, GIoU_mark_; { Vector_2f I_lt = { MAX(anchor_box_[0], ground_box_[0]), MAX(anchor_box_[1], ground_box_[1]) }; Vector_2f I_rb = { MIN(anchor_box_[2], ground_box_[2]), MIN(anchor_box_[3], ground_box_[3]) }; if (I_rb.x threshold_); } 完成 GIoU 算法的程序化封装。 GIoU 的缺点与 IoU 算法族的发展 那么,GIoU 算法是否依旧存在缺陷呢? 虽然 GIoU 可以适度的缓解无交集情况的梯度消失问题,但 并不能加速当预测框完整包含标注框时的梯度迭代。此时 GIoU 算法,会因为最小外接矩形等同于并集 的缘故,退化为 IoU 算法。从而无法起到有向加速梯度趋向更贴合标注大小的目的。 图 3-13 预测框(绿)包含标注框时 GIoU 退化为 IoU 示意图[20] 针对这种情形,后续的一些研究试图通过引入 框中心点(DIoU [Distance-IoU]) [21] ,结合 长宽一致性(CIoU [Complete-IoU]) [21] ,并在中心点基础上 进一步优化损失函数的设计(EIoU [Efficient-IoU]) [22] 来解决此问题。虽然取得了不错的效果,但算法复杂度也有较大变化,考虑到实际工程情况取舍可以酌情选用,本书不再展开讲解。 几种算法的对比结果如表 《当前主流 IoU 算法族基于 COCO val-2017 数据集的对比结果》 所示 [22],仅供参考: 进行到这里,在一些耗时训练之后,我们就能够得到一个静态的物体识别算法模型了。 由于静态模型不需要持续迭代,通过直接取模型参数或者接入其他成型的推理引擎,即可完成对指定关注物体的识别操作。 需要注意的是,目前训练所得的 简易模型,还不能在 不经过辅助方法 的情况下,自主完成锁定需要检测的物体。模型只能用于判断某一个给定检测范围(检测框)内的数据,是否属于被用于训练录入的标签物体,并给出命中率。 因此,依旧需要人为提供用于辅助锁定检测目标的方法。 配合检测所得命中率经过阈值筛选最终结果,得到其所处像素位置。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_3_3.html":{"url":"Chapter_3/Language/cn/Docs_3_3_3.html","title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","keywords":"","body":"3.3.2 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window) 分步滑动窗口(Simple Sliding Window) 是一种常用的辅助锁定检测目标的手段。其优势在于,简单易行且精度可控。 作为一个经典的工具范式,分步滑动窗口被广泛应用于深度学习相关的特征提取、语义分割、物体检测、物体识别等各种场合。前面的章节中使用 HOG 提取特征向量时的卷积核操作,其中卷积核就可以被认为是一个步长等于窗口大小的滑动窗口。本质上,滑动窗口和卷积核只是不同视角下对同种数学工具的不同描述而已。 滑动窗口 实则为一个泛化的概念,而称谓上的差异,主要体现在狭义的分步滑动窗口更注重强调概念上的步长选择。例如中科院就从变步长角度出发,提出了一种基于滑动窗口捕获图像信息的分批量化整合空间结构局部相关性的视觉 Transformer 基础模型 SimViT [23] 。滑动窗口之名,仅用于区分关注点的差异,可见一斑。 图 3-14 滑动窗口在 SimViT 中的运用[23] 我们日常工程中,在已经有可以被部署的物体检测模型阶段之后,可用滑动窗口锁定随时间轴变化而发生运动的目标。 方法本身有三个关键概念,分别是:窗口大小(Window Size) 、滑动步长(Sliding Step) 、采样层级(Sample Level)。 窗口大小(Window Size) 即 滑动窗口的空间属性,等价于卷积核大小的意义。在二维情况下通常指由 宽(Width)和高(Height)组成的矩形所围成的闭包内区域,记为 Size=(W,H)Size = (W,H)Size=(W,H) 。 滑动步长(Sliding Step) 即 滑动窗口的运动属性,代表窗口在维度空间内的移动状态。在二维情况下则分为 横向(Horizontal) 和 纵向(Vertical) 两个方向。一般在大多数工程场景下,都会选择 速度为常量取值的匀速步长(Uniform Step),且 优先横向扫描(Transverse Scaning),记为 Step=(u,v)Step = (u,v)Step=(u,v) 。 采样层级(Sample Level) 即 原数据的缩放(提取)层级,如 SimViT 的图例中,就可以被认为在窗口大小恒定情况下,利用 MCSA 注意力激励算法向上采样,构建了双层(2-Level)的变步长滑动窗口单元,记为 Level=(l)Level = (l)Level=(l) ,有: Level=(l)=Subsampling+Upsampling+1 Level = (l) = Subsampling + Upsampling + 1 Level=(l)=Subsampling+Upsampling+1 由于本身是通过设定大小的窗口 滑动筛选过滤,因此窗口的大小是否 贴合被检测目标的大小,会较大程度上影响最终判定结果。但也需要均衡算力消耗。假设当前用于检测的图像大小为 (Img_W, Img_H)(Img\\_W,\\ Img\\_H)(Img_W, Img_H) ,一套工程上的经验方法计算方式如下: WParams={Size=(W,H)=(⌊Img_W2⌋+1, ⌊Img_H2⌋+1)Step=(u,v)=(Img_Wlv⋅W, Img_Hlv⋅H)Level=(lv),lv∈[1, 3] {\\displaystyle \\begin{aligned} {WParams} = { \\begin{cases} Size &= (W,H) = ( \\lfloor \\tfrac{Img\\_W}{2} \\rfloor + 1,\\ \\lfloor \\tfrac{Img\\_H}{2} \\rfloor + 1) \\\\ Step &= (u,v) = ( \\tfrac{Img\\_W}{lv \\cdot W},\\ \\tfrac{Img\\_H}{lv \\cdot H}) \\\\ Level &= (lv) ,\\quad lv \\in [1,\\ 3 ] \\end{cases} } \\\\ \\end{aligned} } WParams=⎩⎪⎨⎪⎧SizeStepLevel=(W,H)=(⌊2Img_W⌋+1, ⌊2Img_H⌋+1)=(u,v)=(lv⋅WImg_W, lv⋅HImg_H)=(lv),lv∈[1, 3] 代入图像大小获得配置,来快速获取包含完整被检测物体的闭包,方便模型处理得到目标实际区域,并工程缩减模型的输入。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_4.html":{"url":"Chapter_3/Language/cn/Docs_3_4.html","title":"3.4 空域冗余控制 - 基础光流算法与色度压缩","keywords":"","body":"3.4 空域冗余控制 - 基础光流算法与色度压缩 介于上一节分析的时域冗余性质可以得知,时空本身就是紧密相联的。时域冗余的压缩,主要体现于从覆盖整个数据过程的更广视角,来处理宏观上的实际物理物体运动所产生的信息。所以这里的 时域(Time Domain)冗余,指的是 广时空域(Full Spatiotemporal Domain)物体冗余(Objects Redundancy)。而我们 这里所指的空域(Spacial Domain)冗余,可以认为是 相较于时域(Time Domain)的整个数据过程的广度,在单一极短(如前后几帧)的范围内,更细节的像素运动情况的处理,即 狭时空域(Narrow Spatiotemporal Domain)像素冗余(Pixels Redundancy)。 依赖新兴的人工智能方面的运用。广时空域冗余的处理当下虽处于起步阶段,但在标准工程层面探索,如新一代的编解码规格(VVC、MPAI 等)制定获得时续具有关联性的运动区域信息中,已有提案。虽然目前还无法确定最终是否会被采用。其所代表的新一代编解码规格对时域冗余的处理思路,仍然可被有效的借鉴于后续标准确立。这也意味着,传统编解码手段的未来发展方向,需要与人工智能领域在更为基础的方面相结合。必然不可避免需要多级模型的联动。 显而易见,为了保证多级模型的效率,大多数诸如 HOG 在内的一二维信号数据的前处理工作,就需要在模型外解决。而以往这些处理,仅被用于在应用层的具体某些功能过程(比如人脸识别、特征点蒙皮等)的数据准备工作,并未触及到编解码工程的核心区域(不过现在已有一些编解码框架,在利用了这些特性来做相关实践了),因此总是以单元化的单个功能的形式出现。在利用模型针对时域(广)压缩的可能性出现后,部分模型处理结果的简单重复判断过程,可以结合空域频域(如光流运动检测、频域动态分析等)的其他手段,转为由量化的传统算法单元达成。届时整体前后向反馈的系统化工作,会需要提升到音视频工程层面来协助解决。直至模型的推理引擎或算法对应算子的工程标准能够一定程度的统一,从而作为基础功能的一部分,下沉至整体编解码器的规格配置。而这将是一个漫长的过程。 所以,当下必不可少的, 会要求音视频工程师对深度学习(DL [Deep Learning])为代表的机器学习,有一定程度的基础了解和认知。 本书会在第四章节,对这部分的基础知识进行阐述。而现在,让我们回到剩余的域中冗余处理。 空域(指狭时空域,之后若无特别说明则统一按此简化表述) 和频域冗余,在编解码中已有更为成熟的方法论积累。 空域冗余目前的主流处理思路,是在传统块矢量预测、运动补偿的基础上,从更精细的尺度,基于对近似像素前后相邻时间段内的漂移情况分析来进行一定程度的预估。通过块内运动矢量来测算一段时间内,指定空间范围像素亮度值(灰度值)变化。从而使之只需要保存矢量信息,即可适当完成空域信息的还原。 在分块上基于运动矢量推导,而像素则常采用光流法完成。分块处理和规格强相关,我们将在后续编解码规格分析中再行展开。现在让我们只关注细部。 那么什么是 光流(Optical Flow) 和 光流法(Methods of Optical Flow) 呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_4_1.html":{"url":"Chapter_3/Language/cn/Docs_3_4_1.html","title":"3.4.1 传统光流法(Classic Optical Flow Methods)","keywords":"","body":"3.4.1 传统光流法(Classic Optical Flow Methods) 在 计算机视觉(Computer Vision) 体系中,光流(Optical Flow) 指的是场景内像素点在观察者所观察场景空间视角下的瞬时相对运动速度。 光流法(Methods of Optical Flow) 即是利用场景序列间的像素时域运动与相邻像素相关性变化,构建前后场景间像素对应关系的数学模型,完成像素运动信息推算的方法。 光流是一个基于观察者的相对概念,并不能完全覆盖真实的物体运动情况。在由二维图像按时序组成的视频中,采样自原三维空间的抽象像素,其三维运动矢量会被投影到观察者的视窗平面上,转为运动矢量在视窗平面上的二维投影矢量。因此,为了便于区分,往往将原三维空间三维运动矢量全体组成的矢量空间称为 三维光流场(3D Optical Flow Field),简称 光动场(OMF [Optical Momentum Field])。而把视窗平面全体投影矢量构成的矢量平面称为 二维光流场(2D Optical Flow Field),简称 光流场(OFF [Optical Flow Field])。 观察者、光动场、光流场三者的关系如下图所示: 图 3-15 观察者、光动场、光流场投射变化视图[24] 在使用光流法前,首先需要量化光流的表达。 工程上通常选用生物光学的 梯度光流公式族(Gradient-Based Optical Flow Method) 来作为衡量光流的基本数学描述。因此,梯度光流法也被称为 基本光流法(Baseline Methods of Optical Flow) [25] 。 基本光流公式(Basic Gradient-Based Optical Formula) 基本光流公式(Basic Gradient-Based Optical Formula) 也称为 基本光流约束(Basic Optical Constraint),是所有传统梯度光流法的根基,提供了光流与光流场在时序上的基础关系,并构建了通用的基本假设。分别是: 灰度不变假设,即时域稳定,每一个像素点,灰度值不随时间发生改变; 光流场内可导,即空域稳定,每一个像素与其相邻区域,像素的光流场变化是连续的; 这两个假设决定了在此条件下,每个光动场内抽象像素和其投影光流场内像素,在光流运动上的时空稳定性。 记在 ttt 时刻的某位于 p=(x, y)p = (x,\\ y)p=(x, y) 的像素点,存在平面瞬时速度 v⃗=(u, v)\\vec{v} = (u,\\ v)v⃗=(u, v) 即光流。取 I(p, t)I(p,\\ t)I(p, t) 代表对应像素点的灰度值,则根据条件,单位时间变化有: I(p, t)=I(p+v⃗, t+1) {\\displaystyle \\begin{aligned} I(p,\\ t) = I(p + \\vec{v},\\ t+1) \\\\ \\end{aligned} } I(p, t)=I(p+v⃗, t+1) 当 不限制时间流向,自 ttt 时刻经历 Δt\\Delta tΔt 到 t1=t+Δtt_1 = t + \\Delta tt1=t+Δt ,存在 I(p, t)=I(p+v⃗, t+Δt)I(p,\\ t) = I(p + \\vec{v},\\ t + \\Delta t) I(p, t)=I(p+v⃗, t+Δt) 有: {I(x, y, t)=I(x+Δx,y+Δy,t+Δt)v⃗(u, v)=(ΔxΔt, ΔyΔt) {\\displaystyle \\begin{aligned} { \\begin{cases} I(x,\\ y,\\ t) &= I (x + \\Delta x,y + \\Delta y,t + \\Delta t) \\\\ \\vec{v}(u,\\ v) & = (\\tfrac{\\Delta x}{\\Delta t}, \\ \\tfrac{\\Delta y}{\\Delta t}) \\end{cases} } \\\\ \\end{aligned} } {I(x, y, t)v⃗(u, v)=I(x+Δx,y+Δy,t+Δt)=(ΔtΔx, ΔtΔy) 则根据 泰勒级数(Taylor series) 展开,有: I(x+Δx,y+Δy,t+Δt)=I(x, y, t) + ∂I∂x⋅Δx + ∂I∂y⋅Δy + ∂I∂t⋅Δt + ε=I(x, y, t) + ∂I∂x⋅u⋅Δt + ∂I∂y⋅v⋅Δt + ∂I∂t⋅Δt + ε {\\displaystyle \\begin{aligned} I (x + \\Delta x,y + \\Delta y,t + \\Delta t) &= I(x,\\ y,\\ t) \\ +\\ \\tfrac{ \\partial I}{\\partial x} \\cdot \\Delta x \\ +\\ \\tfrac{ \\partial I}{\\partial y} \\cdot \\Delta y \\ +\\ \\tfrac{ \\partial I}{\\partial t} \\cdot \\Delta t \\ +\\ \\varepsilon \\\\ &= I(x,\\ y,\\ t) \\ +\\ \\tfrac{ \\partial I}{\\partial x} \\cdot u \\cdot \\Delta t \\ +\\ \\tfrac{ \\partial I}{\\partial y} \\cdot v \\cdot \\Delta t \\ +\\ \\tfrac{ \\partial I}{\\partial t} \\cdot \\Delta t \\ +\\ \\varepsilon \\\\ \\end{aligned} } I(x+Δx,y+Δy,t+Δt)=I(x, y, t) + ∂x∂I⋅Δx + ∂y∂I⋅Δy + ∂t∂I⋅Δt + ε=I(x, y, t) + ∂x∂I⋅u⋅Δt + ∂y∂I⋅v⋅Δt + ∂t∂I⋅Δt + ε 其中 ε\\varepsilonε 为泰勒展式的高阶无穷小项,它代表了影响灰度不变假设中灰度值的实际样本噪音和量化引入误差,纳入负号,显然我们期望: ∂I∂x⋅u⋅Δt + ∂I∂y⋅v⋅Δt + ∂I∂t⋅Δt + ε=0⇒ε=∂I∂x⋅u + ∂I∂y⋅v + ∂I∂t→0 {\\displaystyle \\begin{aligned} \\tfrac{ \\partial I}{\\partial x} \\cdot u \\cdot \\Delta t \\ +\\ &\\tfrac{ \\partial I}{\\partial y} \\cdot v \\cdot \\Delta t \\ +\\ \\tfrac{ \\partial I}{\\partial t} \\cdot \\Delta t \\ +\\ \\varepsilon = 0 \\\\ &\\Rightarrow \\\\ \\varepsilon =\\tfrac{ \\partial I}{\\partial x} \\cdot u \\ &+\\ \\tfrac{ \\partial I}{\\partial y} \\cdot v \\ +\\ \\tfrac{ \\partial I}{\\partial t} \\rightarrow 0 \\\\ \\end{aligned} } ∂x∂I⋅u⋅Δt + ε=∂x∂I⋅u ∂y∂I⋅v⋅Δt + ∂t∂I⋅Δt + ε=0⇒+ ∂y∂I⋅v + ∂t∂I→0 上式中 ∂I∂x\\tfrac{ \\partial I}{\\partial x}∂x∂I 、 ∂I∂y\\tfrac{ \\partial I}{\\partial y}∂y∂I 、 ∂I∂t\\tfrac{ \\partial I}{\\partial t}∂t∂I 是 I(p, t)=I(x, y, t)I(p,\\ t) = I(x,\\ y,\\ t)I(p, t)=I(x, y, t) 分别在三个参数方向的偏导数,记 ∇xI=∂I∂x\\nabla_xI = \\tfrac{ \\partial I}{\\partial x}∇xI=∂x∂I 、 ∇yI=∂I∂y\\nabla_yI = \\tfrac{ \\partial I}{\\partial y}∇yI=∂y∂I 、 ∇tI=∂I∂t\\nabla_t I = \\tfrac{ \\partial I}{\\partial t}∇tI=∂t∂I 。则原等式就相当于: ε=I′(x, y)⋅v⃗ + ∇tI=∇pI⋅v⃗ + ∇tI→0 {\\displaystyle \\begin{aligned} \\varepsilon = I{'}(x,\\ y) \\cdot \\vec{v} \\ +\\ \\nabla_t I = \\nabla_p I \\cdot \\vec{v} \\ +\\ \\nabla_t I \\rightarrow 0 \\\\ \\end{aligned} } ε=I′(x, y)⋅v⃗ + ∇tI=∇pI⋅v⃗ + ∇tI→0 这就是 基本光流公式 了。 可见当 ttt 确定时,想要求得指定像素点 ppp 的光流 v⃗=(u, v)\\vec{v} = (u,\\ v)v⃗=(u, v) ,单凭基本约束是不够的。因此,必须通过其他的方式引入新的约束条件来进行光流的求解。最容易联想到的,就是通过已有的空域图像信息来进行限制。由此,根据采用空域信息量的方法,传统梯度光流法被分为了 稠密光流法(Dense Optical Flow Methods) 和 稀疏光流法(Sparse Optical Flow Methods)。 稠密光流法(Dense Optical Flow Methods),即 全局光流法(Global Optical Flow Methods),指引入的补充约束需要计算场内所有像素点情况。 稀疏光流法(Sparse Optical Flow Methods),指引入的补充约束只需要计算部分像素区域的光流信息,即可达成约束要求的光流法。 经典稠密光流法的代表是 Horn–Schunck 光流算法,经典稀疏光流法的代表是 Lucas-Kanade 光流算法。 Horn–Schunck 梯度光流法(Horn–Schunck Method) 1981 年,麻省理工计算机实验室的 贝尔特霍尔德·霍恩(Berthold K.P. Horn,1943~Present) 和 布莱恩·舒克(Brian G. Schunck),在基本光流约束的前提下,提出了单帧光流场内光流全局光滑变化的假设 [26] 。 该假设认为,若光流场内 任意一点的光流 与 临近点的光流 变化都是光滑的,则存在能够 描述全场能量的单帧光流场能量函数,使得该时间段的场内能量变化 小值稳定。即对原光流场内可导假设进行了补充,使其建立了范围覆盖到整个场内像素的宏微观光流变化,与全抽象能量场能量强度间的关系。 这一补充假设也被称为 光流平滑约束(Optical Flow Smoothness Constraint),或 Horn–Schunck 约束。由于需要对整个场内的所有像素点光流进行计算,从而获取能量函数求最小值,方法被归类为稠密光流法。 数学上可以通过对 v⃗\\vec{v}v⃗ 求 p=(x, y)p = (x,\\ y)p=(x, y) 的二阶偏导数趋向无穷小来逼近无突变情况,构建平滑程度表示,有: {∇p2u=∂2u∂x2 + ∂2u∂y2∇p2v=∂2v∂x2 + ∂2v∂y2∇p2v⃗=∇p2u + ∇p2v→0 {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\nabla^2_p u &= \\tfrac{ \\partial^2 u}{\\partial x^2} \\ +\\ \\tfrac{ \\partial^2 u}{\\partial y^2} \\\\ \\nabla^2_p v &= \\tfrac{ \\partial^2 v}{\\partial x^2} \\ +\\ \\tfrac{ \\partial^2 v}{\\partial y^2} \\end{cases} } \\\\ &\\nabla^2_p \\vec{v} = \\nabla^2_p u \\ +\\ \\nabla^2_p v \\rightarrow 0 \\\\ \\end{aligned} } {∇p2u∇p2v=∂x2∂2u + ∂y2∂2u=∂x2∂2v + ∂y2∂2v∇p2v⃗=∇p2u + ∇p2v→0 而 ∇p2u\\nabla^2_p u∇p2u 、 ∇p2v\\nabla^2_p v∇p2v 则可以通过 拉普拉斯展式,利用周边像素点光流求逼近值的方式获取 [27] 。 图 3-16 Horn–Schunck 法采用的中心光流平滑度逼近卷积核[27] 有: ∇p2v⃗=∇p2u + ∇p2v=∑xyv⃗xy⋅[112, 16, 11216,−1, 16112, 16, 112]=(u¯ − u)2 + (v¯ − v)2 {\\displaystyle \\begin{aligned} \\nabla^2_p \\vec{v} &= \\nabla^2_p u \\ +\\ \\nabla^2_p v \\\\ &= \\sum_{xy}\\vec{v}_{xy} \\cdot { \\begin{bmatrix} \\tfrac{1}{12} ,& \\quad \\ \\ \\tfrac{1}{6} ,& \\quad \\ \\ \\tfrac{1}{12} \\\\ \\tfrac{1}{6} ,& \\quad -1 ,& \\quad \\ \\ \\tfrac{1}{6} \\\\ \\tfrac{1}{12} ,& \\quad \\ \\ \\tfrac{1}{6} ,& \\quad \\ \\ \\tfrac{1}{12} \\end{bmatrix} } \\\\ &= (\\bar{u} \\ -\\ u)^2 \\ +\\ (\\bar{v} \\ -\\ v)^2 \\end{aligned} } ∇p2v⃗=∇p2u + ∇p2v=xy∑v⃗xy⋅⎣⎡121,61,121, 61,−1, 61, 121 61 121⎦⎤=(u¯ − u)2 + (v¯ − v)2 那么,指定 εc2\\varepsilon_c^2εc2 为光流平滑约束的 L2L_2L2 误差代表值,则: εc2=(u¯ − u)2 + (v¯ − v)2→0 {\\displaystyle \\begin{aligned} \\varepsilon_c^2 = (\\bar{u} \\ -\\ u)^2 \\ +\\ (\\bar{v} \\ -\\ v)^2 \\rightarrow 0 \\\\ \\end{aligned} } εc2=(u¯ − u)2 + (v¯ − v)2→0 结合基本约束条件,针对像素点 ppp 的光流 v⃗=(u, v)\\vec{v} = (u,\\ v)v⃗=(u, v) 求解,就有两个约束条件了: {ε=∇pI⋅v⃗ + ∇tI→0εc2=(u¯ − u)2 + (v¯ − v)2→0 {\\displaystyle \\begin{aligned} { \\begin{cases} \\varepsilon = \\nabla_p I \\cdot \\vec{v} \\ +\\ \\nabla_t I \\rightarrow 0 \\\\ \\varepsilon_c^2 = (\\bar{u} \\ -\\ u)^2 \\ +\\ (\\bar{v} \\ -\\ v)^2 \\rightarrow 0 \\end{cases} } \\\\ \\end{aligned} } {ε=∇pI⋅v⃗ + ∇tI→0εc2=(u¯ − u)2 + (v¯ − v)2→0 至此,假设当前时间 ttt 有全光流场能量 EEE ,引入光滑因子 α\\alphaα 构建能量函数。问题随即转换为,求满足约束的 (u, v)(u,\\ v)(u, v) 值,使得 EEE 最小: E=∫∫(ε2 + α2εc2) dxdy=∫∫[∇pI⋅v⃗ + ∇tI + α2∇p2u + α2∇p2v] dxdy→min {\\displaystyle \\begin{aligned} E &= \\int\\int (\\varepsilon^2 \\ +\\ \\alpha^2 \\varepsilon_c^2) \\ dxdy \\\\ &= \\int\\int [\\nabla_p I \\cdot \\vec{v} \\ +\\ \\nabla_t I \\ +\\ \\alpha^2 \\nabla^2_p u \\ +\\ \\alpha^2 \\nabla^2_p v] \\ dxdy \\\\ &\\rightarrow min \\end{aligned} } E=∫∫(ε2 + α2εc2) dxdy=∫∫[∇pI⋅v⃗ + ∇tI + α2∇p2u + α2∇p2v] dxdy→min 显然,当 EEE 取得最小时: {∂E∂u=2⋅(∇pI⋅v⃗ + ∇tI)⋅∇xI − 2α2(u¯ − u)=0∂E∂v=2⋅(∇pI⋅v⃗ + ∇tI)⋅∇yI − 2α2(v¯ − v)=0 {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\tfrac{ \\partial E}{\\partial u} = 2 \\cdot (\\nabla_p I \\cdot \\vec{v} \\ +\\ \\nabla_t I) \\cdot \\nabla_xI \\ -\\ 2\\alpha^2 (\\bar{u} \\ -\\ u) = 0\\\\ \\tfrac{ \\partial E}{\\partial v} = 2 \\cdot (\\nabla_p I \\cdot \\vec{v} \\ +\\ \\nabla_t I) \\cdot \\nabla_yI \\ -\\ 2\\alpha^2 (\\bar{v} \\ -\\ v) = 0 \\end{cases} } \\\\ \\end{aligned} } {∂u∂E=2⋅(∇pI⋅v⃗ + ∇tI)⋅∇xI − 2α2(u¯ − u)=0∂v∂E=2⋅(∇pI⋅v⃗ + ∇tI)⋅∇yI − 2α2(v¯ − v)=0 进一步对两侧同求 ppp 的二阶导可化为: {(α2 + ∇xI2 + ∇yI2)⋅(u¯ − u)=∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI)(α2 + ∇xI2 + ∇yI2)⋅(v¯ − v)=∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI) {\\displaystyle \\begin{aligned} &{ \\begin{cases} (\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2) \\cdot (\\bar{u} \\ -\\ u) = \\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u} \\ +\\ \\nabla_yI \\cdot \\bar{v} \\ +\\ \\nabla_t I) \\\\ (\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2) \\cdot (\\bar{v} \\ -\\ v) = \\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u} \\ +\\ \\nabla_yI \\cdot \\bar{v} \\ +\\ \\nabla_t I) \\end{cases} } \\\\ \\end{aligned} } {(α2 + ∇xI2 + ∇yI2)⋅(u¯ − u)=∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI)(α2 + ∇xI2 + ∇yI2)⋅(v¯ − v)=∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI) 即: {(u − u¯)=−∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI)α2 + ∇xI2 + ∇yI2(v − v¯)=−∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI)α2 + ∇xI2 + ∇yI2 {\\displaystyle \\begin{aligned} &{ \\begin{cases} (u \\ -\\ \\bar{u}) = - \\frac{\\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u} \\ +\\ \\nabla_yI \\cdot \\bar{v} \\ +\\ \\nabla_t I)}{\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2} \\\\ (v \\ -\\ \\bar{v}) = - \\frac{\\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u} \\ +\\ \\nabla_yI \\cdot \\bar{v} \\ +\\ \\nabla_t I)}{\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2} \\end{cases} } \\\\ \\end{aligned} } ⎩⎪⎪⎨⎪⎪⎧(u − u¯)=−α2 + ∇xI2 + ∇yI2∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI)(v − v¯)=−α2 + ∇xI2 + ∇yI2∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI) 但由于启动时 v⃗p=(u, v)\\vec{v}_p = (u,\\ v)v⃗p=(u, v) 实际是未知的,而 avg(v⃗p)=(u¯, v¯)avg(\\vec{v}_p) = (\\bar{u},\\ \\bar{v})avg(v⃗p)=(u¯, v¯) 也是未知的。因此,我们需要将计算转换为由前一次结果驱动的向后迭代运算进行。 通过 克拉默法则(Cramer's Rule) 可知,位于第 n+1 次迭代的像素点 p = (x,\\ y) 光流 v⃗n+1\\vec{v}_{n+1}v⃗n+1 取值,与第 nnn 次迭代时,对应相同像素点 p=(x, y)p = (x,\\ y)p=(x, y) 所处卷积核的光流均值 avg(v⃗n)=(u¯n, v¯n)avg(\\vec{v}_n) = (\\bar{u}_n,\\ \\bar{v}_n)avg(v⃗n)=(u¯n, v¯n) 存在关系: {un+1=u¯n−∇xI⋅(∇xI⋅u¯n + ∇yI⋅v¯n + ∇tI)α2 + ∇xI2 + ∇yI2vn+1=v¯n−∇xI⋅(∇xI⋅u¯n + ∇yI⋅v¯n + ∇tI)α2 + ∇xI2 + ∇yI2 {\\displaystyle \\begin{aligned} &{ \\begin{cases} u_{n+1} = \\bar{u}_n - \\frac{\\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u}_n \\ +\\ \\nabla_yI \\cdot \\bar{v}_n \\ +\\ \\nabla_t I)}{\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2} \\\\ v_{n+1} = \\bar{v}_n - \\frac{\\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u}_n \\ +\\ \\nabla_yI \\cdot \\bar{v}_n \\ +\\ \\nabla_t I)}{\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2} \\end{cases} } \\\\ \\end{aligned} } ⎩⎪⎪⎨⎪⎪⎧un+1=u¯n−α2 + ∇xI2 + ∇yI2∇xI⋅(∇xI⋅u¯n + ∇yI⋅v¯n + ∇tI)vn+1=v¯n−α2 + ∇xI2 + ∇yI2∇xI⋅(∇xI⋅u¯n + ∇yI⋅v¯n + ∇tI) 上式即是 HS 法的核心光流推到公式 了。 当设置好启动时的 avg(v⃗0)=(u¯0, v¯0)avg(\\vec{v}_0) = (\\bar{u}_0,\\ \\bar{v}_0)avg(v⃗0)=(u¯0, v¯0) 初始值,就可以迭代获取后续帧内的像素光流场情况了。 一般取启动帧所有像素点 avg(v⃗0)=(0, 0)avg(\\vec{v}_0) = (0,\\ 0)avg(v⃗0)=(0, 0) 。 可见 Horn–Schunck 算法是需要逐个像素参与核运算,且保存完整前值的历史算法。 Lucas-Kanade 梯度光流法(Lucas-Kanade Method) 1981 年同年,在 HS 法提出的近乎相同时间,当时还在 卡内基梅隆大学(Carnegie-Mellon University) 计算机学院的 布鲁斯·卢卡斯(Bruce D. Lucas) 和 金出武雄(Takeo Kanade,1945~Present) 教授,共同提出了 Lucas-Kanade 光流法,同样试图借此完成对基础光流约束的补充,使得能够预测光流场情况 [26] 。 和 HS 法纯粹对空域的关注不同,LK 法细化基础光流约束中的时空稳定条件 [28] : 时域上,LK 法提出了 像素微位移假设。假设认为图像像素位置随时间变化是连续的,进而才能够求的像素光流和时间之间的偏导关系; 空域上,LK 法提出了 空间趋同性假设。假设认为场景中相同表面的相邻像素点运动模式是趋同的,且由光动场到光流场投影后,其光流情况也是保持了这一性质。 这两个补充条件,让 LK 法定义的整个场景时空,任意一点和其相邻空间都是时空连续的。 这使我们可以将有关全图逐个像素点光流时空关系的推导,通过分割整体图像的像素点集合,转换为不同像素点子集构成的对应分块(卷积核),以核内区域为单元的光流时空关系推导。从点对点,变为了区域对区域。 基于此,在核心位置 c=(x, y)c = (x,\\ y)c=(x, y) 和所处时刻 ttt 已知的情况下,核内区域光流场内所有像素的光流可以被认为是一个相同值 v⃗=(u, v)\\vec{v} = (u,\\ v)v⃗=(u, v) 。且必然有区域内,基础约束条件 ε=∇cI⋅v⃗ + ∇tI\\varepsilon = \\nabla_c I \\cdot \\vec{v} \\ +\\ \\nabla_t Iε=∇cI⋅v⃗ + ∇tI 的高阶无穷小 ε=0\\varepsilon = 0ε=0 成立。 记当前图像大小为 W×HW \\times HW×H ,有 n×nn \\times nn×n 大小分块(卷积核),全图光流场面临的计算量会降为对 N=W/n×H/nN = W/n \\times H/nN=W/n×H/n 个窗口核心光流的推算。记 m=n2m = n^2m=n2 ,则存在核内方程组: {∇cI11⋅v⃗ + ∇tI11=0∇cI12⋅v⃗ + ∇tI12=0⋯∇cInn⋅v⃗ + ∇tInn=0⇒{∇xI1⋅u + ∇yI1⋅v = −∇tI1∇xI2⋅u + ∇yI2⋅v = −∇tI2⋯∇xIm⋅u + ∇yIm⋅v = −∇tIm {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\nabla_c I_{11} \\cdot \\vec{v} \\ +\\ \\nabla_t I_{11} = 0 \\\\ \\nabla_c I_{12} \\cdot \\vec{v} \\ +\\ \\nabla_t I_{12} = 0 \\\\ \\cdots \\\\ \\nabla_c I_{nn} \\cdot \\vec{v} \\ +\\ \\nabla_t I_{nn} = 0 \\end{cases} \\quad \\Rightarrow \\quad \\begin{cases} \\nabla_x I_1 \\cdot u \\ +\\ \\nabla_y I_1 \\cdot v \\ =\\ -\\nabla_t I_1 \\\\ \\nabla_x I_2 \\cdot u \\ +\\ \\nabla_y I_2 \\cdot v \\ =\\ -\\nabla_t I_2 \\\\ \\cdots \\\\ \\nabla_x I_m \\cdot u \\ +\\ \\nabla_y I_m \\cdot v \\ =\\ -\\nabla_t I_m \\end{cases} } \\\\ \\end{aligned} } ⎩⎪⎪⎪⎨⎪⎪⎪⎧∇cI11⋅v⃗ + ∇tI11=0∇cI12⋅v⃗ + ∇tI12=0⋯∇cInn⋅v⃗ + ∇tInn=0⇒⎩⎪⎪⎪⎨⎪⎪⎪⎧∇xI1⋅u + ∇yI1⋅v = −∇tI1∇xI2⋅u + ∇yI2⋅v = −∇tI2⋯∇xIm⋅u + ∇yIm⋅v = −∇tIm 即: [∑∇xIm, ∑∇yIm][uv]=[∑−∇tIm] {\\displaystyle \\begin{aligned} \\begin{bmatrix} \\sum \\nabla_x I_m , \\ \\sum \\nabla_y I_m \\end{bmatrix} \\begin{bmatrix} u \\\\ v \\end{bmatrix} = \\begin{bmatrix} \\sum -\\nabla_t I_m \\end{bmatrix} \\\\ \\end{aligned} } [∑∇xIm, ∑∇yIm][uv]=[∑−∇tIm] 记 Mc=[∑∇xIm, ∑∇yIm]M_c =\\begin{bmatrix} \\sum \\nabla_x I_m , \\ \\sum \\nabla_y I_m \\end{bmatrix}Mc=[∑∇xIm, ∑∇yIm] , Mt=[∑−∇tIm]M_t =\\begin{bmatrix} \\sum -\\nabla_t I_m \\end{bmatrix}Mt=[∑−∇tIm] ,则: v⃗=[uv]=(McT⋅Mc)−1⋅McT⋅Mt=[∑(∇xIm)2, ∑∇xIm⋅∇yIm∑∇xIm⋅∇yIm, ∑(∇yIm)2]−1[∑∇xIm⋅∇tIm∑∇xIm⋅∇tIm] {\\displaystyle \\begin{aligned} \\vec{v} &= \\begin{bmatrix} u \\\\ v \\end{bmatrix} = ({M_c}^T \\cdot M_c)^{-1} \\cdot {M_c}^T \\cdot M_t \\\\ &= \\begin{bmatrix} &\\sum (\\nabla_x I_m)^2 &, \\ \\sum \\nabla_x I_m \\cdot \\nabla_y I_m \\\\ &\\sum \\nabla_x I_m \\cdot \\nabla_y I_m &, \\ \\sum (\\nabla_y I_m)^2 \\end{bmatrix}^{-1} \\begin{bmatrix} \\sum \\nabla_x I_m \\cdot \\nabla_t I_m \\\\ \\sum \\nabla_x I_m \\cdot \\nabla_t I_m \\end{bmatrix} \\end{aligned} } v⃗=[uv]=(McT⋅Mc)−1⋅McT⋅Mt=[∑(∇xIm)2∑∇xIm⋅∇yIm, ∑∇xIm⋅∇yIm, ∑(∇yIm)2]−1[∑∇xIm⋅∇tIm∑∇xIm⋅∇tIm] 上式即是 LK 法的核心光流推到公式 了。 可见 Lucas-Kanade 算法,属于只需要启动(且不用初始化),就能够在分块(卷积核)内自行完成核心光流保存的自适应循环算法。 从物理角度理解,式子中的 ∇xIm\\nabla_x I_m∇xIm 、 ∇yIm\\nabla_y I_m∇yIm 、 ∇tIm\\nabla_t I_m∇tIm ,是分块 mmm 内像素 ppp 的灰度值 III ,对其所处全图像素位置 p=(x, y)p = (x,\\ y)p=(x, y) 和时间参数 ttt 方向的变化趋势,即 灰度加速度。鉴于完备的灰度数据,加速度可以利用动量算法结合牛顿法等方式逼近,快速的从帧变化中取得。那么对光流 v⃗\\vec{v}v⃗ 的求解就成为了 简单的数值计算问题。 对比 HS 稠密光流和 LK 稀疏光流经典算法,显然 LK 在工程场景中更具优势。 同样,以 LK 算法为代表的稀疏光流法,由于其本身占用数据量和算力远远小于稠密光流法的缘故,得到了更为广泛的工程运用。尤其是 LK 算法本身,凭借高可控和简单的特性,被大量使用在如今的编解码器技术上。例如空域冗余压缩所采用的双向光流等算法,就可以被认为是从 LK 算法衍生出的实际运用产物。而稠密光流法,目前还停留在单帧分析等场景,不过考虑到深度学习带来的变革,利用稠密光流的思想来训练光流约束模型,并引入新一代音视频编解码过程,也从另一个角度开始发挥稠密光流法的工程价值。 但不论是哪一种类型的光流法,基于学术需求和面向工程要求的精度还是有极大的差异的。传统音视频工程对于效率要求高,而精度要求相对较低,我们需要 更快速 的处理方式。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_4_2.html":{"url":"Chapter_3/Language/cn/Docs_3_4_2.html","title":"3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow])","keywords":"","body":"3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow]) 双向光流预测值修正,简称 双向光流预测(BDOF [Bi-Directional Optical Flow]),最早在 H.265 的二版规格,由三星工程师以编码压缩补充手段的方式提出 [29] 。在 VVC 的初版制定过程中,贡献者们通过对算法层面的优化,提升了 BDOF 处理单元的性能。随 VVC 被采纳为 H.266 规格一起,作为标准的一部分被收录其中。 双向光流预测是以 LK 光流法的约束条件为基础,提出的一种亮度值推理算法。方法在编解码过程中以 LK 微位移假设为基,限制所有前后向预测帧(B帧)的选取,必须保持 当前帧(Current Frame) 与前后两帧在相同位置处的光流成 等大反向关系(Reverse Equality)。 通过这一联系,BDOF 在已知时间流向(即视频向前、向后)时,可以通过前向帧和期望预测方向的下一个关联帧,推导出当前帧的实际光流场变化情况。进而在无保存当前帧数据的前提下,求得当前帧的实际灰度值(亮度参考值)。 对于采用具有线性色彩空间映射关系的规格,依赖线性转换保证了关于灰度的推理,这时 BDOF 也可以适用在各自的原色格式(RGB)的数据通道上。但由于视频传输中,一般不直接采用会造成大量数据浪费的原色格式,所以,BDOF 只被用来对传输格式(YUV)代表亮度值的 Y 通道数据,进行冗余控制。 本质上,双向光流预测是个类似二次牛顿法的逼近求解过程。根据镜像的特性,推导可转为线性求中值(对应的交点最小值)。如下图所示: 图 3-17 BDOF 构建参考对称光流示意图[29] 假设,当前临近三帧有需要推算分块 mmm 范围内像素点 p=(x, y)p = (x,\\ y)p=(x, y) 的灰度。 按时序方向(视屏正常播放方向,图中由下而上) 的前向帧(过去帧)为 R0R_0R0 有块灰度值 I0I_0I0 集、当前帧为 RcR_cRc 有块灰度值 IcI_cIc 集、后向帧(未来帧)为 R1R_1R1 有块灰度值 I1I_1I1 集。根据 LK 的局部光流趋同性,分块 mmm 范围内像素点的光流相等,可记 R0R_0R0 光流 v⃗A\\vec{v}_Av⃗A , R1R_1R1 光流 v⃗B\\vec{v}_Bv⃗B 。 由于人为的有 R0R_0R0 、 R1R_1R1 的光流在 RcR_cRc 镜像对称,如果记 R0R_0R0 光流 v⃗A=(Vx, Vy)\\vec{v}_A =(V_x,\\ V_y)v⃗A=(Vx, Vy) ,则 R1R_1R1 光流 v⃗B=(−Vx, −Vy)\\vec{v}_B =(-V_x,\\ -V_y)v⃗B=(−Vx, −Vy) ,即 v⃗B=−v⃗A\\vec{v}_B = -\\vec{v}_Av⃗B=−v⃗A 。 那么,将关系代入 LK 条件下的基础光流公式,存在块间光流满足: {+∇xI0⋅Vx + ∇yI0⋅Vy + ε = −∇tI0−∇xI1⋅Vx − ∇yI1⋅Vy + ε = −∇tI1 {\\displaystyle \\begin{aligned} &{ \\begin{cases} +\\nabla_x I_0 \\cdot V_x \\ +\\ \\nabla_y I_0 \\cdot V_y \\ +\\ \\varepsilon \\ =\\ -\\nabla_t I_0 \\\\ -\\nabla_x I_1 \\cdot V_x \\ -\\ \\nabla_y I_1 \\cdot V_y \\ +\\ \\varepsilon \\ =\\ -\\nabla_t I_1 \\end{cases} } \\\\ \\end{aligned} } {+∇xI0⋅Vx + ∇yI0⋅Vy + ε = −∇tI0−∇xI1⋅Vx − ∇yI1⋅Vy + ε = −∇tI1 因为从 R0→Rc→R1R_0 \\rightarrow R_c \\rightarrow R_1R0→Rc→R1 只 推移单位时间,所以有关时间单位导数近似: {∇tI0 = I0 − Ic∇tI1 = I1 − Ic⇒∇tI0−∇tI1 = ΔI {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\nabla_t I_0 \\ =\\ I_0 \\ -\\ I_c \\\\ \\nabla_t I_1 \\ =\\ I_1 \\ -\\ I_c \\end{cases} } \\Rightarrow \\nabla_t I_0 - \\nabla_t I_1 \\ =\\ \\Delta I \\\\ \\end{aligned} } {∇tI0 = I0 − Ic∇tI1 = I1 − Ic⇒∇tI0−∇tI1 = ΔI 则三者间的光流关系可化为: {I0 − Ic + ∇xI0⋅Vx + ∇yI0⋅Vy + ε = 0I1 − Ic − ∇xI1⋅Vx − ∇yI1⋅Vy + ε = 0 {\\displaystyle \\begin{aligned} &{ \\begin{cases} I_0 \\ -\\ I_c \\ +\\ \\nabla_x I_0 \\cdot V_x \\ +\\ \\nabla_y I_0 \\cdot V_y\\ +\\ \\varepsilon \\ =\\ 0 \\\\ I_1 \\ -\\ I_c \\ -\\ \\nabla_x I_1 \\cdot V_x \\ -\\ \\nabla_y I_1 \\cdot V_y \\ +\\ \\varepsilon \\ =\\ 0 \\end{cases} } \\\\ \\end{aligned} } {I0 − Ic + ∇xI0⋅Vx + ∇yI0⋅Vy + ε = 0I1 − Ic − ∇xI1⋅Vx − ∇yI1⋅Vy + ε = 0 未知量有 IcI_cIc 和 (Vx, Vy)(V_x,\\ V_y)(Vx, Vy) 三个,是无法单独依赖上方的方程组,只通过两个约束获取的。 不过,块的光流 仍然 是满足 LK 约束,而 LK 法提供了对光流相对独立的预估,配合背景有: {v⃗A=[+Vx+Vy]=(Mc0T⋅Mc0)−1⋅Mc0T⋅Mt0v⃗B=[−Vx−Vy]=(Mc1T⋅Mc1)−1⋅Mc1T⋅Mt1 {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\vec{v}_A = \\begin{bmatrix} +V_x \\\\ +V_y \\end{bmatrix} = ({M_{c0}}^T \\cdot M_{c0})^{-1} \\cdot {M_{c0}}^T \\cdot M_{t0} \\\\ \\vec{v}_B = \\begin{bmatrix} -V_x \\\\ -V_y \\end{bmatrix} = ({M_{c1}}^T \\cdot M_{c1})^{-1} \\cdot {M_{c1}}^T \\cdot M_{t1} \\end{cases} } \\\\ \\end{aligned} } ⎩⎪⎪⎨⎪⎪⎧v⃗A=[+Vx+Vy]=(Mc0T⋅Mc0)−1⋅Mc0T⋅Mt0v⃗B=[−Vx−Vy]=(Mc1T⋅Mc1)−1⋅Mc1T⋅Mt1 即: [VxVy]=12[(Mc0T⋅Mc0)−1⋅Mc0T⋅Mt0+(Mc1T⋅Mc1)−1⋅Mc1T⋅Mt1]=[(Mc0T⋅Mc0)−1⋅Mc0T2⋅Mt0+(Mc1T⋅Mc1)−1⋅Mc1T2⋅Mt1] {\\displaystyle \\begin{aligned} \\begin{bmatrix} V_x \\\\ V_y \\end{bmatrix} &= \\tfrac{1}{2}[({M_{c0}}^T \\cdot M_{c0})^{-1} \\cdot {M_{c0}}^T \\cdot M_{t0} + ({M_{c1}}^T \\cdot M_{c1})^{-1} \\cdot {M_{c1}}^T \\cdot M_{t1}] \\\\ &= [\\tfrac{({M_{c0}}^T \\cdot M_{c0})^{-1} \\cdot {M_{c0}}^T}{2} \\cdot M_{t0} + \\tfrac{({M_{c1}}^T \\cdot M_{c1})^{-1} \\cdot {M_{c1}}^T}{2} \\cdot M_{t1}] \\\\ \\end{aligned} } [VxVy]=21[(Mc0T⋅Mc0)−1⋅Mc0T⋅Mt0+(Mc1T⋅Mc1)−1⋅Mc1T⋅Mt1]=[2(Mc0T⋅Mc0)−1⋅Mc0T⋅Mt0+2(Mc1T⋅Mc1)−1⋅Mc1T⋅Mt1] 而同理于时域梯度的差值近似。对于分块 mmm 范围内像素点 p=(x, y)p = (x,\\ y)p=(x, y) 的空域灰度梯度,也可近似换算为: {∇xI0 = I0(x+1) − I0(x−1)2∇yI0 = I0(y+1) − I0(y−1)2∇xI1 = I1(x+1) − I1(x−1)2∇yI1 = I1(y+1) − I1(y−1)2⇒{∇xI0+∇xI1 = Δavg(Ix)=ΔIx¯∇yI0+∇yI1 = Δavg(Iy)=ΔIy¯∇xI0−∇xI1 = avg(ΔIx)=ΔIx¯∇xI0−∇xI1 = avg(ΔIy)=ΔIy¯ {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\nabla_x I_0 \\ =\\ \\frac{I_0(x+1) \\ -\\ I_0(x-1)}{2} \\\\ \\nabla_y I_0 \\ =\\ \\frac{I_0(y+1) \\ -\\ I_0(y-1)}{2} \\\\ \\nabla_x I_1 \\ =\\ \\frac{I_1(x+1) \\ -\\ I_1(x-1)}{2} \\\\ \\nabla_y I_1 \\ =\\ \\frac{I_1(y+1) \\ -\\ I_1(y-1)}{2} \\end{cases} } \\Rightarrow { \\begin{cases} \\nabla_x I_0 + \\nabla_x I_1 \\ =\\ \\Delta avg(I_x) = \\Delta \\bar{I_x} \\\\ \\nabla_y I_0 + \\nabla_y I_1 \\ =\\ \\Delta avg(I_y) = \\Delta \\bar{I_y} \\\\ \\nabla_x I_0 - \\nabla_x I_1 \\ =\\ avg(\\Delta I_x) = \\bar{\\Delta I_x} \\\\ \\nabla_x I_0 - \\nabla_x I_1 \\ =\\ avg(\\Delta I_y) = \\bar{\\Delta I_y} \\end{cases} } \\\\ \\end{aligned} } ⎩⎪⎪⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎪⎪⎧∇xI0 = 2I0(x+1) − I0(x−1)∇yI0 = 2I0(y+1) − I0(y−1)∇xI1 = 2I1(x+1) − I1(x−1)∇yI1 = 2I1(y+1) − I1(y−1)⇒⎩⎪⎪⎪⎨⎪⎪⎪⎧∇xI0+∇xI1 = Δavg(Ix)=ΔIx¯∇yI0+∇yI1 = Δavg(Iy)=ΔIy¯∇xI0−∇xI1 = avg(ΔIx)=ΔIx¯∇xI0−∇xI1 = avg(ΔIy)=ΔIy¯ 代入样本梯度到 Mc=[∑∇xIm, ∑∇yIm]M_c =\\begin{bmatrix} \\sum \\nabla_x I_m , \\ \\sum \\nabla_y I_m \\end{bmatrix}Mc=[∑∇xIm, ∑∇yIm] ,Mt=[∑−∇tIm]M_t =\\begin{bmatrix} \\sum -\\nabla_t I_m \\end{bmatrix}Mt=[∑−∇tIm] ,展开可得 (Vx, Vy)(V_x,\\ V_y)(Vx, Vy) 取值: [VxVy]=[∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔI)−∑(ΔIx¯ΔI)⋅∑ΔIy¯2∑ΔIx¯2⋅∑ΔIy¯2−∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔIx¯)∑(ΔIx¯ΔIy¯)⋅∑(ΔIx¯ΔI)−∑(ΔIy¯ΔI)⋅∑ΔIx¯2∑ΔIx¯2⋅∑ΔIy¯2−∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔIx¯)] {\\displaystyle \\begin{aligned} \\begin{bmatrix} V_x \\\\ V_y \\end{bmatrix} &= \\begin{bmatrix} \\frac{\\sum (\\Delta \\bar{I_x} \\Delta \\bar{I_y} ) \\cdot \\sum (\\Delta \\bar{I_y} \\Delta I) - \\sum (\\Delta \\bar{I_x} \\Delta I ) \\cdot \\sum \\Delta \\bar{I_y}^2} {\\sum \\Delta \\bar{I_x}^2 \\cdot \\sum \\Delta \\bar{I_y}^2 - \\sum (\\Delta \\bar{I_x} \\Delta \\bar{I_y}) \\cdot \\sum (\\Delta \\bar{I_y} \\Delta \\bar{I_x}) } \\\\ \\frac{\\sum (\\Delta \\bar{I_x} \\Delta \\bar{I_y} ) \\cdot \\sum (\\Delta \\bar{I_x} \\Delta I) - \\sum (\\Delta \\bar{I_y} \\Delta I ) \\cdot \\sum \\Delta \\bar{I_x}^2} {\\sum \\Delta \\bar{I_x}^2 \\cdot \\sum \\Delta \\bar{I_y}^2 - \\sum (\\Delta \\bar{I_x} \\Delta \\bar{I_y}) \\cdot \\sum (\\Delta \\bar{I_y} \\Delta \\bar{I_x}) } \\end{bmatrix} \\\\ \\end{aligned} } [VxVy]=⎣⎢⎢⎡∑ΔIx¯2⋅∑ΔIy¯2−∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔIx¯)∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔI)−∑(ΔIx¯ΔI)⋅∑ΔIy¯2∑ΔIx¯2⋅∑ΔIy¯2−∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔIx¯)∑(ΔIx¯ΔIy¯)⋅∑(ΔIx¯ΔI)−∑(ΔIy¯ΔI)⋅∑ΔIx¯2⎦⎥⎥⎤ 现在,只有 IcI_cIc 是未知的了,而可取范围在分块 mmm 之内时,对于任意块内点 Ic=IpI_c = I_pIc=Ip 。 代入原方程组即可,有: Ic = I0 + I1 + (∇xI0−∇xI1)⋅Vx + (∇yI0−∇yI1)⋅Vy2 + ε= I0 + I1 + ΔIx¯⋅Vx + ΔIy¯⋅Vy2 + εIc =Ipp(x, y)∈m {\\displaystyle \\begin{aligned} I_c \\ &=\\frac{\\ I_0 \\ +\\ I_1 \\ +\\ (\\nabla_x I_0 - \\nabla_x I_1) \\cdot V_x \\ +\\ (\\nabla_y I_0 - \\nabla_y I_1) \\cdot V_y}{2} \\ +\\ \\varepsilon \\\\ &=\\frac{\\ I_0 \\ +\\ I_1 \\ +\\ \\bar{\\Delta I_x} \\cdot V_x \\ +\\ \\bar{\\Delta I_y} \\cdot V_y}{2} \\ +\\ \\varepsilon \\\\ I_c \\ &=I_p \\quad \\quad p(x,\\ y) \\in m \\end{aligned} } Ic Ic =2 I0 + I1 + (∇xI0−∇xI1)⋅Vx + (∇yI0−∇yI1)⋅Vy + ε=2 I0 + I1 + ΔIx¯⋅Vx + ΔIy¯⋅Vy + ε=Ipp(x, y)∈m 式子中的 ε\\varepsilonε 为误差修正值,一般取 ε=0.5\\varepsilon = 0.5ε=0.5 。 如是,双向光流预测的基本原理,数理推导佐证完毕。 可见,BDOF 的算力消耗重点是在有关 (Vx, Vy)(V_x,\\ V_y)(Vx, Vy) 的求解上。所以,工程化会采用小于当前分块的子块大小做卷积核,使用近似求解快速计算。当然也可以在满足精度要求下,通过模型化解决,思路类似于光流补帧的数据预处理。而由于涉及到规格中的不少工程处理技巧,有关 BDOF 标准化的部分,我们留到 H.266 规格详解时再行展开。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_4_3.html":{"url":"Chapter_3/Language/cn/Docs_3_4_3.html","title":"3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow])","keywords":"","body":"3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow]) BDOF 技术的引入,让音视频编解码工程能够进一步提高传输过程的数据压缩比。但由于仍然依托于分块和分块内小块(也是前文的梯度卷积核),当出现块的偏移、扭转、错切等情况时,像素位置的微小变动则会被此类变化成倍的放大误差。所以,还需要 适当的修正。 我们知道,音视频编解码规格(如 H.264、H.265、H.266)中,分块的子块也是存在类似的情况的。我们为了处理问题,采用的是 基于控制点运动矢量(CMVP [Control Point Motion Vector])的子块仿射运动补偿(AMC [Affine Motion Compensation]),并在 H.266 中根据目标子块大小衍生出了 高级运动矢量预测(AMVP [Advanced Motion Vector Prediction])的仿射模式,和 混合预测(Merge)的仿射模式。通俗理解,即通过相邻帧的相同块内子块的仿射变换,来映射原子块区域的对应关系。 但子块控制点的运动是远大于像素运动的,那么同样的情况发生在更小的尺度上,是否还能达到效果呢? 答案是可以的。 在 LK 条件下局部光流趋同性,决定了像素光流的差分补偿对分块只需要单次计算即可。那么对于子块来说,只用在原有仿射运动补偿(AMC)的基础上,对块内像素额外附加 光流补偿值(OFC [Optical Flow Compensation]) 即可。 记分块 mmm 有,中心点 KxyK_{xy}Kxy 在全图的绝对像素位置 Kxy=(Kx, Ky)K_{xy} = (K_x,\\ K_y)Kxy=(Kx, Ky) 的子块 kkk 。存在子块内相对位置为 pij=(i, j)p_{ij} = (i,\\ j)pij=(i, j) 的像素点 pijp_{ij}pij 。由于子块内是不存在时差的,即时间残差 ∇tI=0\\nabla_t I = 0∇tI=0 存在,则记 pijp_{ij}pij 的子块内光流补偿值(OFC)是 ΔIp\\Delta I_pΔIp ,根据基础光流公式就有: ΔIp=∇pI⋅Δv⃗p + ∇tI=∇pI⋅Δv⃗p {\\displaystyle \\begin{aligned} \\Delta I_p = \\nabla_p I \\cdot \\Delta \\vec{v}_p \\ +\\ \\nabla_t I = \\nabla_p I \\cdot \\Delta \\vec{v}_p \\\\ \\end{aligned} } ΔIp=∇pI⋅Δv⃗p + ∇tI=∇pI⋅Δv⃗p 其中, Δv⃗p=(ΔVi, ΔVj)\\Delta \\vec{v}_p = (\\Delta V_i,\\ \\Delta V_j)Δv⃗p=(ΔVi, ΔVj) 即是点 pijp_{ij}pij 在子块 kkk 内的光流偏移,这个值相对子块内部中心 KijK_{ij}Kij ,在分块 mmm 内子块无相对变化情况时,是个恒定值,有: ΔKp=pij−Kij=(Δi, Δj)=Δij {\\displaystyle \\begin{aligned} \\Delta K_p = p_{ij}-K_{ij} = (\\Delta i,\\ \\Delta j) = \\Delta_{ij} \\\\ \\end{aligned} } ΔKp=pij−Kij=(Δi, Δj)=Δij 而根据仿射变换特点,当分块 mmm 发生仿射变换,其每个子块 kkk 的像素点内部光流偏移矢量,也会发生 等效于块中心运动补偿 的仿射变换。 因此,假设分块 mmm 块运动采用左上、右上、左下的三点定位(即标准三控制点),记帧 R0R_0R0 到帧 R1R_1R1 有块三点定位运动矢量分别为 MV⃗0\\vec{MV}_0MV⃗0 、 MV⃗1\\vec{MV}_1MV⃗1 、 MV⃗2\\vec{MV}_2MV⃗2 如下: 图 3-18 PROF 子块光流与块运动矢量示意图 [30] 假设分块 mmm 大小为 Mw×MhM_w \\times M_hMw×Mh ,则有块从帧 R0R_0R0 到帧 R1R_1R1 的位姿仿射变换矩阵 AAA 使得: Δv⃗p=A⋅ΔKp=A⋅Δij=[MV1,x−MV0,xMw,MV2,x−MV0,xMhMV1,y−MV0,yMw,MV2,y−MV0,yMh]⋅[ΔiΔj] {\\displaystyle \\begin{aligned} \\Delta \\vec{v}_p &= A \\cdot \\Delta K_p = A \\cdot \\Delta_{ij} \\\\ &= \\begin{bmatrix} &\\frac{MV_{1,x} - MV_{0,x}}{M_w} &, \\quad \\frac{MV_{2,x} - MV_{0,x}}{M_h} \\\\ &\\frac{MV_{1,y} - MV_{0,y}}{M_w} &, \\quad \\frac{MV_{2,y} - MV_{0,y}}{M_h} \\end{bmatrix} \\cdot \\begin{bmatrix} \\Delta i \\\\ \\Delta j \\end{bmatrix} \\end{aligned} } Δv⃗p=A⋅ΔKp=A⋅Δij=⎣⎢⎡MwMV1,x−MV0,xMwMV1,y−MV0,y,MhMV2,x−MV0,x,MhMV2,y−MV0,y⎦⎥⎤⋅[ΔiΔj] 而 ∇pI\\nabla_p I∇pI 可由子块 LK 计算等效获取,有: Ip(i, j)=Ip(x+Δi, y+Δj)∇pI(i, j)=(∇iIp, ∇jIp)={∇iIp = Ip(i+1) − Ip(i−1)2∇jIp = Ip(j+1) − Ip(j−1)2 {\\displaystyle \\begin{aligned} I_p(i,\\ j) &= I_p(x+\\Delta i,\\ y+ \\Delta j) \\\\ \\nabla_p I(i,\\ j) &= (\\nabla_iI_p,\\ \\nabla_jI_p) = { \\begin{cases} \\nabla_i I_p \\ =\\ \\frac{I_p(i+1) \\ -\\ I_p(i-1)}{2} \\\\ \\nabla_j I_p \\ =\\ \\frac{I_p(j+1) \\ -\\ I_p(j-1)}{2} \\end{cases} } \\\\ \\end{aligned} } Ip(i, j)∇pI(i, j)=Ip(x+Δi, y+Δj)=(∇iIp, ∇jIp)=⎩⎪⎨⎪⎧∇iIp = 2Ip(i+1) − Ip(i−1)∇jIp = 2Ip(j+1) − Ip(j−1) 所以,子块内像素的最终亮度 I^p\\hat{I}_pI^p 取值为: I^p=Ip(x, y) + ΔIp(i, j)=∇pI(i, j)⋅Δv⃗p≈Ip(x, y) + ∇iIp⋅ΔVi + ∇jIp⋅ΔVj {\\displaystyle \\begin{aligned} \\hat{I}_p &= I_p (x,\\ y) \\ +\\ \\Delta I_p (i,\\ j) = \\nabla_p I (i,\\ j) \\cdot \\Delta \\vec{v}_p \\\\ &\\approx I_p (x,\\ y) \\ +\\ \\nabla_i I_p \\cdot \\Delta V_i \\ +\\ \\nabla_j I_p \\cdot \\Delta V_j \\\\ \\end{aligned} } I^p=Ip(x, y) + ΔIp(i, j)=∇pI(i, j)⋅Δv⃗p≈Ip(x, y) + ∇iIp⋅ΔVi + ∇jIp⋅ΔVj 上式中的 IpI_pIp 即像素点 pij=Kxy+Δij=(x+Δi, y+Δj)p_{ij} = K_{xy} + \\Delta_{ij} = (x+\\Delta i,\\ y+ \\Delta j)pij=Kxy+Δij=(x+Δi, y+Δj) 的分块 mmm 内实际亮度预测值,可通过 BDOF 求得,也可以采用其他传统块推理方式获取。根据 PROF 的修正,BDOF 推算所得像素点的亮度将更为准确,进而在 提高压缩程度(以子块为最小压缩单位的块内冗余压缩)的同时,保证了灰度(亮度值)数据还原效果。 以上我们介绍的,就是光流法在音视频编解码过程中较为粗浅的基本应用了。这些数学工具已经通过标准化,被嵌入到了 H.266/VVC 规格中,并在同期其他竞争规格(如 AV1)的最新标准里逐步推广。而光流法的引入,无疑进一步缩减了传统音视频和机器学习之间的工程鸿沟。在可预见的未来,人工智能模型流水线和编解码器必然会有更深入的融合,在技术层面形成一套全新的顶层设计。这种趋势,作为音视频开发者,是不应该忽视的。 回到当前话题,在依靠光流法处理了传输格式的亮度狭时空域冗余数据后,如果能够在纯空域上,同时对随亮度传输的色度信息进行一定程度的压缩,就能更好的降低数据成本,并提升色彩还原程度,支撑更广的色域选择了。 这就是色度缩放亮度映射技术的由来。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_4_4.html":{"url":"Chapter_3/Language/cn/Docs_3_4_4.html","title":"3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling])","keywords":"","body":"3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling]) 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling]) 技术,是一类纯粹的空域数据处理技术,即本身不涉及时域相关性,直接针对像素原值的冗余分离手段。传统音视频编解码中,包括大部分帧内预测工具、帧内编码条带分块、色度重建等,严格来说都属于这种类型。 LMCS 最早引入自 H.266/VVC 标准中,用于编解码环路滤波阶段 [31] 。通过建立从传输格式对应存储位深(Bit Depth),到色度和亮度实际可取值范围间的线性转换放缩,来提高针对 标准动态范围(SDR [Standard Dynamic Range]) 和 高动态范围(HDR [High Dynamic Range]) 视频的支持,提升编解码性能。 这是一种基于物理存储方式和实际规格约束的差异,以直接操作空域到数据的映射关系,来间接降低信息熵的一种技术(有别于熵编码技术族对存储的信息熵直接衰减)。 LMCS 由两个组件构成,分别是:分段线性(Piecewise Linear)亮度映射(LM [Luma Mapping]) 和 依赖亮度(Luma-Dependent )色度残差缩放(CRS [Chroma Residue Scaling]),简称为 亮度映射(LM) 和 色度缩放(CS)。前者精简格式,后者压缩数据。 分段线性亮度映射 分段线性亮度映射,即 亮度映射(LM) 的基本目的,是为了方便规格支持的传输格式,在数据存储格式(Data Format)和格式空间(Format Space)原值之间的非对称相互转换。 例如,H.266 中采用 ITU-R BT.2100 的色彩转换规格标准,并兼容 ITU-R BT.709 等其他的的历史转换规格。其中,ITU-R BT.2100 提供了 10-bit YUV 存储格式,而老规格中亦然也有一系列 8-bit YUV 存储格式。同时,YUV 本身亦是具有两种基本有效范围,即 狭隘区间(Narrow Range) 和 完整区间(Full Range)。这些不同的 YUV 格式和区间,虽然各自的原色空间色域表示存在范围差异,但由于传输格式都采用同一套 CIE YUV 色彩空间衡量,因此在颜色的传输格式取值上是互通的,差异只在于存储格式的存储范围上,即两个线性区间的映射。 所以,理论上可以 只采用一个转换标准的传输格式,就能通过数据存储范围的线性转换,实现对所有规格下标准的兼容。 假设目标 YUV 规格亮度存储值为 IoutI_{out}Iout 存储取值范围为 Iout∈[Minout, Maxout]I_{out} \\in [Min_{out},\\ Max_{out}]Iout∈[Minout, Maxout] ,当前输入 YUV 规格亮度存储值为 IinI_{in}Iin 存储取值范围为 Iin∈[Minin, Maxin]I_{in} \\in [Min_{in},\\ Max_{in}]Iin∈[Minin, Maxin] ,则: Iout=Maxout−MinoutMaxin−Minin⋅(Iin−Minin)+Minin {\\displaystyle \\begin{aligned} I_{out} &= \\frac{Max_{out} - Min_{out}}{Max_{in} - Min_{in}} \\cdot (I_{in} - Min_{in}) + Min_{in} \\\\ \\end{aligned} } Iout=Maxin−MininMaxout−Minout⋅(Iin−Minin)+Minin 不过在实际使用过程中,因为均色问题(详见第二章)仍在 CIE YUV 标准空间上存在,亮度值本身在整个色域范围并不均匀,使得亮度值(灰度值)转换到存储值后,存储值也保留了这种性质。这在存储格式和格式空间一致的情况下,由于互为逆变换的缘故,并不存在转换误差。但当两者不一致时,非对称转换非互逆,则会产生误差,并随传输格式的原色格式还原而扩大。如果我们为了保证完美映射,则需要引入复杂的计算,不利于像素通道级别的处理过程。 这个问题,亮度映射提出以 分段牛顿法对亮度存储值取值范围处理,即采用分段线性映射来减小误差水平到可接受的范围,并降低算力消耗。 我们一般将原有亮度值对应的可取范围称为原区域,而在此之上分割得到的每个子段,被称为 子区域。记 原区域码字长(Code Words) 为 CWtotalCW_{total}CWtotal 个,而位于索引 i∈Z[0, N−1]i \\in \\mathbb{Z}[0 ,\\ N - 1]i∈Z[0, N−1] 位置的子区域 CWiCW_{i}CWi 的码字长为 CWbin[i]CW_{bin}[i]CWbin[i] 个,均值为 avg(CWbin)avg(CW_{bin})avg(CWbin) 。 码字(Code Word) 即来自哈夫曼编码数据传输中,所指代的有意义代表值,此处则相当于一个范围内有效的灰度值。则: CWbin[i]=round(CWtotaliend−istart+1)=round(CWtotalindexavail) {\\displaystyle \\begin{aligned} CW_{bin}[i] &= round \\begin{pmatrix} \\frac{CW_{total}}{i_{end} - i_{start} + 1} \\end{pmatrix} = round \\begin{pmatrix} \\frac{CW_{total}}{index_{avail}} \\end{pmatrix} \\\\ \\end{aligned} } CWbin[i]=round(iend−istart+1CWtotal)=round(indexavailCWtotal) 其中, iendi_{end}iend 、 istarti_{start}istart 是实际可用于存放数据子区域上下限的索引的,而 indexavailindex_{avail}indexavail 即为有效索引的数目。 注意分段码字长和存储格式位深(Bit Depth)并无强相关。若非要建立联系,则两者的关联只相关于取值范围。取 存储格式位长 为 DFbitsDF_{bits}DFbits 位(bit),保护位等效(非整)占用 indexsafeindex_{safe}indexsafe 个索引数目,有: avg(CWbin)=2DFbitsN=2DFbitsindexavail+indexsafe {\\displaystyle \\begin{aligned} avg(CW_{bin}) &= \\frac{2^{DF_{bits}}}{N} = \\frac{2^{DF_{bits}}}{index_{avail} +index_{safe}}\\\\ \\end{aligned} } avg(CWbin)=N2DFbits=indexavail+indexsafe2DFbits 例如,当采用 狭隘区间的 10-bit YUV 存储格式时,由于高低电平保护区域的存在,亮度值能够取值的范围其实是 I∈[64, 940]I \\in [64,\\ 940]I∈[64, 940] ,而等效到亮度可用的子区域索引上就相当于只有 Z[1, 14]\\mathbb{Z}[1,\\ 14]Z[1, 14] 可用。那么,就有 avg(CWbin)=64avg(CW_{bin}) = 64avg(CWbin)=64 ,子区域划分如图: 图 3-19 位深 10-bit 亮度映射码字子区域分段示意图(无修正) 则原线性转换就有分段表示: Iout=Maxout[i]−Minout[i]Maxin[i]−Minin[i]⋅(Iin−Minin[i])+Minin[i],i∈Z[istart, iend] {\\displaystyle \\begin{aligned} I_{out} &= \\frac{Max_{out}[i] - Min_{out}[i]}{Max_{in}[i] - Min_{in}[i]} \\cdot (I_{in} - Min_{in}[i]) + Min_{in}[i] \\quad , i \\in \\mathbb{Z}[i_{start} ,\\ i_{end}]\\\\ \\end{aligned} } Iout=Maxin[i]−Minin[i]Maxout[i]−Minout[i]⋅(Iin−Minin[i])+Minin[i],i∈Z[istart, iend] 即,输入和输出的一一对应分段映射。 现在,基本的分段构建完毕,在数据还原程度上有了可行的保证。但是,这一系列操作除了提供兼容性便利外,在数据量上却是无衰减的,所以 对空域冗余的压缩没有太大的帮助。 因此,具体采用过程中还要根据情况,从码字方面进行数据优化。 依赖亮度色度残差缩放 依赖亮度色度残差缩放,即 色度缩放(CS),顾名思义需要依靠亮度码字子区域划分后的分片进行放缩。不过这种放缩和亮度映射不太一样的一点在于, 它甚至并不和物理意义浅关联,而是存粹作为数据上的处理,来进行的数量级上的放缩。当然,色度本身是有意义的,这点不能混淆。 色度缩放依旧采用了码字分段处理,为了匹配亮度值对应码字区域的变化强度,分段即与亮度取值范围子区域 CWiCW_{i}CWi 码字的划分一致。以此计算分段内常量的 色度缩放因子(Chrome Scale Factor),来对 CWiCW_{i}CWi 内色度进行统一处理。 记 CWiCW_{i}CWi 子区域,编码阶段 色度缩放因子(Chrome Scaling Factor)为 Senc[i]S_{enc}[i]Senc[i] ,解码阶段 色度缩放因子为 Sdec[i]S_{dec}[i]Sdec[i] ,显然 Sdec[i]=Senc[i]−1S_{dec}[i] = {S_{enc}[i]}^{-1}Sdec[i]=Senc[i]−1 。若记区域内对应某采样(像素点)亮度 IinI_{in}Iin 的色度值(如采用 YUV 则是其 UV 分量,独立计算)为 CinC_{in}Cin ,而输出存储值(传输值)亮度 IoutI_{out}Iout 的色度值为 CoutC_{out}Cout ,则: {Cout=Cin ⋅Senc[i]Cin=Cout⋅Sdec[i] {\\displaystyle \\begin{aligned} \\begin{cases} C_{out} &= C_{in} \\ \\cdot S_{enc}[i] \\\\ C_{in} &= C_{out} \\cdot S_{dec}[i] \\end{cases} \\\\ \\end{aligned} } {CoutCin=Cin ⋅Senc[i]=Cout⋅Sdec[i] 而 Senc[i]S_{enc}[i]Senc[i] 和亮度保证相同的放缩比,有: Senc[i]=Maxout[i]−Minout[i]+ΔCRSMaxin[i]−Minin[i]=Sdec[i]−1 {\\displaystyle \\begin{aligned} S_{enc}[i] &= \\frac{Max_{out}[i] - Min_{out}[i] + \\Delta CRS}{Max_{in}[i] - Min_{in}[i]} = {S_{dec}[i]}^{-1} \\\\ \\end{aligned} } Senc[i]=Maxin[i]−Minin[i]Maxout[i]−Minout[i]+ΔCRS=Sdec[i]−1 其中, ΔCSR\\Delta CSRΔCSR 即为色度残差修正值,这个量为一个查表或其他方式处理的外部传参。虽然理论上, ΔCSR\\Delta CSRΔCSR 可以通过在 LMCS 过程中,以计算当前帧分块局部色度残差,或全局残差均值来代替,但这种做法消耗太多不必要算力而不太可取。另外,考虑到 ΔCSR\\Delta CSRΔCSR 在编解码中是个相对常用的概念,可以通过其他模块或方法解决,因此一般 不会在 LMCS 里进行处理。 此处我们认为 ΔCSR\\Delta CSRΔCSR 为一个色度放缩修正常量即可。可见色度缩放因子在子区域 CWiCW_{i}CWi 确认的情况下,是一个 固定值。 现在,LMCS 的理论准备就绪了。我们来看这种纯粹的规格技术是怎么运用的。即,子区域码字修正过程。 LMCS 技术在 SDR 和 HDR-HLG 格式中的应用 我们在对图片进行信息分离和提取时了解到,从频域来看,光亮度(灰度值)变化较大,且对亮度精度要求高的部分,一般在低频轮廓区域出现,占用整体数据量比例较小。而光亮度差异较小,变化平滑,且精度要求低的部分,往往是高频区域,占有大量的数据。此时,如果从光亮度数据,即空域角度出发,低频区域内的 局部亮度方差(Local Spatial Variance) 和高频区域相比,与 全局平均空域亮度方差(Global Average Spatial Variance) 的平均平方误差(MSE [Mean-Square Error])则会更大。 通过这一点,我们能够可以在一定程度上,只通过空域亮度数据,就确认是否是低频或高频区域,从而为其分配更少或更多的码字。使得对精度要求高的低频分割更精细,码字分片信息密度更高。而高频则更粗粒度,码字分片信息密度更低。提高精度并减少不必要的数据占用。 那么用于统计局部方差的样本区域该怎么选择呢?在 H.266/VVC 标准的执行委员会联合视频探索小组(JVET [Joint Video Exploration Team]) 推荐的 VVC 验证模型(VTM [VVC Test Model])官方工程实践里,仍然采用了基本卷积核(此处即代指正方形的无权重采样窗口),这种便于 GPU 加速改造的方式来进行中心点周边一定区域的关联性采样。 记 局部方差采样核(Local Variance Kernel) 为 LVKpLVK_{p}LVKp ,简称 方差核,中心为 p=(x, y)p = (x,\\ y)p=(x, y) ,窗口为 K×KK \\times KK×K 大小。取当前帧画面大小为 W×HW \\times HW×H ,有经验取值: K=floor(min(W, H)240)⋅2+1 {\\displaystyle \\begin{aligned} K &= floor \\begin{pmatrix} \\frac{min(W,\\ H)}{240} \\end{pmatrix} \\cdot 2 + 1 \\\\ \\end{aligned} } K=floor(240min(W, H))⋅2+1 则, LVKpLVK_{p}LVKp 对应核心点 ppp 的局部亮度方差 VarpVar_{p}Varp 为: Varp=1K2∑(Ik−Ip)2 {\\displaystyle \\begin{aligned} Var_p &= \\frac{1}{K^2} \\sum (I_k - I_p)^2 \\\\ \\end{aligned} } Varp=K21∑(Ik−Ip)2 于是,只要 确定当前各个分片的平均样本均值情况,就可以进行修正了。 另一个耗时位置在于亮度均方误(MSE)与全局差值比的计算,一个比较鲁棒的实现是,通过求取落于当前码字分段内,包含样本的 平均对数方差(Average Log Variance) 来代替处理,记为 Varavg[i]Var_{avg}[i]Varavg[i] ,有: Varavg[i]=∑log(Varp+1.0)Count[i] {\\displaystyle \\begin{aligned} Var_{avg}[i] &= \\frac{\\sum log(Var_p + 1.0)}{Count[i]} \\\\ \\end{aligned} } Varavg[i]=Count[i]∑log(Varp+1.0) 其中, Count[i]Count[i]Count[i] 为当前码字分段所包含的样本(即亮度落于区段内的像素点)总数。 而我们需要统一衡量所有码字分片的情况,因此需要归一化处理。记归一化后对应分片的平均对数方差为 Norm[i]Norm[i]Norm[i] ,则: Norm[i]=Varavg[i]⋅N∑Varavg[i] {\\displaystyle \\begin{aligned} Norm[i] &= Var_{avg}[i] \\cdot \\frac{N}{\\sum Var_{avg}[i]} \\\\ \\end{aligned} } Norm[i]=Varavg[i]⋅∑Varavg[i]N 至此,我们即可根据归一化的 Norm[i]Norm[i]Norm[i] 取值,开展对当前帧的码字分片进行修正的工作了。取修正补偿为 Δ1[i]\\Delta_1[i]Δ1[i] 和 Δ2[i]\\Delta_2[i]Δ2[i] ,记码字分段子区域 CWiCW_{i}CWi 的包含的样本,占总样本比例为 Hist[i]Hist[i]Hist[i] ,且强制 Hist[i]∈[0, 0.4]Hist[i] \\in [0,\\ 0.4]Hist[i]∈[0, 0.4] 经验范围( 避免失衡 ),有: Hist[i]=max(min(0.0, Count[i]∑Count[i]), 0.4)Δ={Δ1[i]=round(10⋅Hist[i])Δ2[i]=round(20⋅Hist[i])∈Z {\\displaystyle \\begin{aligned} Hist[i] &= max(min(0.0,\\ \\frac{Count[i]}{\\sum Count[i]}),\\ 0.4) \\\\ \\Delta = &\\begin{cases} \\Delta_1[i] &= round(10 \\cdot Hist[i]) \\\\ \\Delta_2[i] &= round(20 \\cdot Hist[i]) \\end{cases} \\quad \\in \\mathbb{Z} \\\\ \\end{aligned} } Hist[i]Δ==max(min(0.0, ∑Count[i]Count[i]), 0.4){Δ1[i]Δ2[i]=round(10⋅Hist[i])=round(20⋅Hist[i])∈Z 则最终修正后的码字长 CWbin^[i]\\hat{CW_{bin}}[i]CWbin^[i] 与原长 CWbin[i]CW_{bin}[i]CWbin[i] 的关系为: CWbin^[i]={CWbin[i],Norm[i]=1.0CWbin[i]+Δ1[i], 0.8≤Norm[i]0.9CWbin[i]+Δ2[i], 0.0≤Norm[i]0.8CWbin[i]−Δ1[i], 1.1≤Norm[i]1.2CWbin[i]−Δ2[i], 1.2≤Norm[i] {\\displaystyle \\begin{aligned} \\hat{CW_{bin}}[i] & = { \\begin{cases} CW_{bin}[i] \\quad &, Norm[i] = 1.0 \\\\ CW_{bin}[i] + \\Delta_1[i] \\quad &,\\ 0.8 \\le Norm[i] CWbin^[i]=⎩⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎧CWbin[i]CWbin[i]+Δ1[i]CWbin[i]+Δ2[i]CWbin[i]−Δ1[i]CWbin[i]−Δ2[i],Norm[i]=1.0, 0.8≤Norm[i]0.9, 0.0≤Norm[i]0.8, 1.1≤Norm[i]1.2, 1.2≤Norm[i] 以新分片码字长度 CWbin^[i]\\hat{CW_{bin}}[i]CWbin^[i] 更新子区域 CWiCW_{i}CWi 后,在将修正后的码字范围,代入色度自适应处理,就组成了最终修正标准(注意只有输出码字子区域需要修正),只展示编码阶段,解码取逆运算: CWin[i]∈[Minin[i], Maxin[i]]=[Maxin[i−1]+1, Minin[i]+CWbin[i]]CWout[i]∈[Minout[i], Maxout[i]]=[Maxout[i−1]+1, Minout[i]+CWbin^[i]]Colorout={Iout=Maxout[i]−Minout[i]Maxin[i]−Minin[i]⋅(Iin−Minin[i])+Minin[i]Cout=Cin ⋅Maxout[i]−Minout[i]+ΔCRSMaxin[i]−Minin[i],i∈Z[istart, iend] {\\displaystyle \\begin{aligned} CW_{in}[i] &\\in [Min_{in}[i],\\ Max_{in}[i]] = [Max_{in}[i-1]+1,\\ Min_{in}[i]+ CW_{bin}[i]] \\\\ CW_{out}[i] &\\in [Min_{out}[i],\\ Max_{out}[i]] = [Max_{out}[i-1]+1,\\ Min_{out}[i]+\\hat{CW_{bin}}[i]] \\\\ Color_{out} & = { \\begin{cases} I_{out} &= \\frac{Max_{out}[i] - Min_{out}[i]}{Max_{in}[i] - Min_{in}[i]} \\cdot (I_{in} - Min_{in}[i]) + Min_{in}[i] \\\\ C_{out} &= C_{in} \\ \\cdot \\frac{Max_{out}[i] - Min_{out}[i] + \\Delta CRS}{Max_{in}[i] - Min_{in}[i]} \\end{cases} } \\quad , i \\in \\mathbb{Z}[i_{start} ,\\ i_{end}] \\\\ \\end{aligned} } CWin[i]CWout[i]Colorout∈[Minin[i], Maxin[i]]=[Maxin[i−1]+1, Minin[i]+CWbin[i]]∈[Minout[i], Maxout[i]]=[Maxout[i−1]+1, Minout[i]+CWbin^[i]]=⎩⎪⎪⎨⎪⎪⎧IoutCout=Maxin[i]−Minin[i]Maxout[i]−Minout[i]⋅(Iin−Minin[i])+Minin[i]=Cin ⋅Maxin[i]−Minin[i]Maxout[i]−Minout[i]+ΔCRS,i∈Z[istart, iend] 两式结合,即是 LMCS 关于 SDR 和 HDR-HLG 格式的修正公式。 依旧选 狭隘区间的 10-bit YUV 存储格式 取均匀样本为例,修正后的结果如下: 图 3-20 位深 10-bit 亮度映射码字子区域分段示意图(修正后) 当然,这一套修正方式,是针对 SDR 和 HDR-HLG 格式采用的 峰值信噪比(PSNR [Peak Signal-to-Noise Ratio]) 指标考核方式进行的。对于采用 加权峰值信噪比(wPSNR [weighted Peak Signal-to-Noise Ratio]) 指标考核的 HDR-PQ 格式,则需要另外的处理流程。具体本书不再行展开,感兴趣可参阅原 H.266/VVC 的 LMCS 补充意见稿 [31] 。 可见偏重于工程规格依赖的技术,和基于现实观察的理论进行迁移的技术,在实践上还是有较大处理细节关注点上的差异的。前者更注重和具体规格设置的匹配(如 LMCS 等),因此相对局限。而后者则更在意规律性质的还原(如 HOG、BDOF 等),对比之下更为通用。同时,前者理论约束较多会比较繁琐,但实现起来的复杂程度和最终效果,却会有较大的波动,即可以非常简单,也可以充满策略。 毕竟对于规格而言,重要的在于规定与限制,以便统一实现。但具体实现的过程,就因设计和目标而异了。 相对于空域两者皆有的情况,频域冗余处理则更偏重依赖传统数学工具,来达成压缩效果。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_5.html":{"url":"Chapter_3/Language/cn/Docs_3_5.html","title":"3.5 频域冗余控制 - 基础变换编码","keywords":"","body":"3.4 空域冗余控制 - 基础光流算法与色度压缩 频域冗余目前仍然采用的是一些传统方式,近些年还没有太大的突破。而工程中对频域冗余的控制,确切的来说,是指从频域角度,对 残差信号(Residual Singnal) 进行频域分离后再 压缩所得数据,以富集变换信息,减小存储空间由于波动数据的不集中分布,而产生存储冗余的过程。 不过需要注意的是,频域冗余并不产生自被采样物理对象客观真实世界下的 原始信息(Original Infomation),而是来自不规律的数字信号的分散占用,导致的高熵存储。 因此,分离规律归类,提纯存储数据,并适当滤掉部分高频数据,才是降低频域冗余的关键。我们选择从帧数据的频域进行切入,即是利用空频分离(SFS)后,从频域能够直观体现数据密度的特点,来更好辅助应用中对数据进行的压缩处理。配合量化、熵编码等其他手段,降低原信息量级。 而这,便需要使用到傅立叶变换,及其衍生自同体系下的信息分离手段了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_5_1.html":{"url":"Chapter_3/Language/cn/Docs_3_5_1.html","title":"3.5.1 整数离散正余弦变换(DST/DCT)","keywords":"","body":"3.5.1 整数离散正余弦变换(IDST/IDCT) 整数离散正余弦变换(IDST/IDCT),顾名思义,就是将原本作用于浮点域的离散正余弦变换(DST/DCT),通过适当放缩量化到整数域进行。 在本章开始时,我们曾花了大量篇幅讲解信号分析的核心算法, 傅立叶变换(Fourier Transform),并简短的辨析了一维/二维离散傅立叶变换(1D/2D-DFT)。 回顾前文。有提到,如果取任意点 P⃗(x,y)\\vec{P}(x,y)P⃗(x,y) 可取 x∈[0, 1, ⋯, W]x \\in [0, \\ 1, \\ \\cdots , \\ W]x∈[0, 1, ⋯, W] , y∈[0, 1, ⋯, H]y \\in [0, \\ 1, \\ \\cdots , \\ H]y∈[0, 1, ⋯, H] ,只取整数位置。同时, u∈[−U2, ⋯, +U2]u \\in [-\\tfrac{U}{2}, \\ \\cdots , \\ +\\tfrac{U}{2}]u∈[−2U, ⋯, +2U] 、 v∈[−V2, ⋯, +V2]v \\in [-\\tfrac{V}{2}, \\ \\cdots , \\ +\\tfrac{V}{2}]v∈[−2V, ⋯, +2V] ,有离散 k⃗∈[k0⃗, k1⃗, ⋯, kn⃗]\\vec{k} \\in [\\vec{k_0}, \\ \\vec{k_1}, \\ \\cdots, \\ \\vec{k_{n}}]k⃗∈[k0⃗, k1⃗, ⋯, kn⃗] , n=UV=HWn = UV = HWn=UV=HW ,则: SDD: f^(u,v)=∑x=0W∑y=0Hf(x,y)⋅e−i(ux+vy)FDD: f(x,y)=1U⋅V∑u=−U/2+U/2∑v=−V/2+V/2f^(u,v)⋅Fω(x,y) {\\displaystyle \\begin{aligned} SDD: \\ \\ \\hat{f}(u,v) &= \\sum_{x = 0}^{W} \\sum_{y = 0}^{H} f(x,y) \\cdot e^{-i (ux+vy)} \\\\ FDD: \\ \\ f(x,y) &= \\frac{1}{U\\cdot V} \\sum_{u=-U/2}^{+U/2} \\sum_{v= -V/2}^{+V/2} \\hat{f}(u,v) \\cdot {\\mathcal {F}}_{\\omega}(x, y) \\\\ \\end{aligned} } SDD: f^(u,v)FDD: f(x,y)=x=0∑Wy=0∑Hf(x,y)⋅e−i(ux+vy)=U⋅V1u=−U/2∑+U/2v=−V/2∑+V/2f^(u,v)⋅Fω(x,y) 即由空域离散化(SDD)与频域离散化(FDD)共同构成空频离散化(SFD [Spacial Frequency Discrete])表达的 二维离散傅立叶(2D-DFT),如下所示: Fω=[Fk0⃗,Fk1⃗,⋯,Fkn⃗]f^(u,v)=∑x=0W∑y=0Hf(x,y)⋅e−i(ux+vy) ⇔ f(x,y)=1U⋅V∑u=−U/2+U/2∑v=−V/2+V/2f^(u,v)⋅Fω(x,y) {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega} = [{\\mathcal {F}}_{\\vec{k_0}},&{\\mathcal {F}}_{\\vec{k_1}},\\cdots,{\\mathcal {F}}_{\\vec{k_n}}] \\\\ \\hat{f}(u,v) = \\sum_{x = 0}^{W} \\sum_{y = 0}^{H} f(x,y) \\cdot e^{-i (ux+vy)} \\ \\ \\ \\ \\ \\Leftrightarrow & \\ \\ \\ \\ \\ f(x,y) = \\frac{1}{U\\cdot V} \\sum_{u=-U/2}^{+U/2} \\sum_{v= -V/2}^{+V/2} \\hat{f}(u,v) \\cdot {\\mathcal {F}}_{\\omega}(x, y) \\\\ \\end{aligned} } Fω=[Fk0⃗,f^(u,v)=x=0∑Wy=0∑Hf(x,y)⋅e−i(ux+vy) ⇔Fk1⃗,⋯,Fkn⃗] f(x,y)=U⋅V1u=−U/2∑+U/2v=−V/2∑+V/2f^(u,v)⋅Fω(x,y) 虽然当时,并没有约束复平面波 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 波矢 k⃗{\\vec{k}}k⃗ 的方向,即方向可以是平面内任意角度与大小。但对于周期(范围)确定情况下,构成傅立叶变换的基底函数族 Fω=[Fk0⃗, Fk1⃗,⋯,Fkn⃗]{\\mathcal {F}}_{\\omega} = [{\\mathcal {F}}_{\\vec{k_0}},\\ {\\mathcal {F}}_{\\vec{k_1}},\\cdots,{\\mathcal {F}}_{\\vec{k_n}}]Fω=[Fk0⃗, Fk1⃗,⋯,Fkn⃗] ,基底函数(即原函数拆解的目标平面波组)的选取,却是可以被 一定程度约束的。 如果我们约束,取周期 T=2πnT = 2 \\pi nT=2πn 的标准正余弦函数(Sine/Cosine),按照 四分之一周期 的步长 Step=π2Step = \\tfrac{\\pi}2{}Step=2π 偏移得到的 Fξ(x){\\mathcal {F}}_{\\xi}(x)Fξ(x) 和 Fη(y){\\mathcal {F}}_{\\eta}(y)Fη(y) 构成波矢 k⃗{\\vec{k}}k⃗ 。选取沿着 xxx 轴方向的一维波 Fξ(x){\\mathcal {F}}_{\\xi}(x)Fξ(x) 和沿着 yyy 轴方向的一维波 Fη(y){\\mathcal {F}}_{\\eta}(y)Fη(y) 组成的 16n16^n16n 个定向复平面波 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 集合,为当前函数的基底函数族。 那么,我们就能够在 补齐周期数据 后,使用 快速傅立叶变换(FFT) 来求解了。 但这样的做法,适用于分析,却并不适合冗余处理场景。 即使运用快速傅立叶变换,也仍然会有较大的算力消耗。且由于完整作用于任意数据源信号,所以不能保证基底函数族整体层面的规律性,从而无法提炼出统一的矩阵化算子。这让直接使用传统分析算法的方式,在 GPU 加速方面尽显劣势。 考虑到冗余压缩,并不要求保证数据帧完整不可分的输入,且精度也相对分析场景要求较低。如果能够适当的利用指数函数三角函数化,其本身的周期规律和标准化约束,建立基底整体的规律性,来契合傅立叶变换的性质。就能够在消减不必要参数(常量固定)并限定生效范围后,实现对离散傅立叶变化的常量化矩阵运算。建立卷积核,加速压缩过程。 因此,首选的出发点,就是 泛化离散正余弦变换(DST/DCT)到任何已知周期(范围)的数据信号源。 离散正余弦变换(DST/DCT)的泛化 沿用前文设定,记构成原信号函数 s(t)s(t)s(t) 的复指数函数 Sω(t){\\mathcal {S}}_{\\omega}(t)Sω(t) 有角频率(角速度)为 ωn=2πnT{\\omega_n} = \\tfrac{2\\pi n}{T}ωn=T2πn 。有傅立叶函数: s(t)=1N∑n=0Na^ω⋅cos(ωt)+i⋅b^ω⋅sin(ωt)a^ω=s^(−ω)+s^(ω) b^ω=1i⋅(s^(−ω)−s^(ω)) {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N}\\sum_{n = 0}^{N} \\hat{a}_{\\omega} \\cdot cos(\\omega t) + i \\cdot \\hat{b}_{\\omega} \\cdot sin(\\omega t)\\\\ \\hat{a}_{\\omega} &= \\hat{s}(-\\omega) + \\hat{s}(\\omega) \\ \\ \\ \\ \\ \\hat{b}_{\\omega} = \\tfrac{1}{i} \\cdot (\\hat{s}(-\\omega)-\\hat{s}(\\omega)) \\\\ \\end{aligned} } s(t)a^ω=N1n=0∑Na^ω⋅cos(ωt)+i⋅b^ω⋅sin(ωt)=s^(−ω)+s^(ω) b^ω=i1⋅(s^(−ω)−s^(ω)) 按约束条件,信号函数波长 T=2πT = 2 \\piT=2π 做步长 Step=π2Step = \\tfrac{\\pi}{2}Step=2π 的可变 n∈[0, N−1]n \\in [0, \\ N - 1]n∈[0, N−1] 等分,使复指数函数 Sω(t)=Sω(n){\\mathcal {S}}_{\\omega}(t) = {\\mathcal {S}}_{\\omega}(n)Sω(t)=Sω(n) 。则存在 k∈[0, N−1]k \\in [0, \\ N-1]k∈[0, N−1] 有 ωn=2πnT=2πkN=ωk{\\omega_n} = \\tfrac{2\\pi n}{T} = \\tfrac{2\\pi k}{N} = {\\omega_k}ωn=T2πn=N2πk=ωk 简化表示为 ω{\\omega}ω ,可对原式做三角函数离散化处理(详细推导回顾本章首节)。 当输入信号满足奇函数特性时,可得 标准正弦的离散正弦变换(DST)的傅立叶展式 为: s(n)=1N∑k=0N−1s^(k)⋅sin(2πnNk)s^(k)=∑n=0N−1s(n)⋅sin(−2πnNk) {\\displaystyle \\begin{aligned} s(n) &= \\frac{1}{N}\\sum_{k = 0}^{N-1} \\hat{s}(k) \\cdot sin(\\tfrac{2 \\pi n}{N} k) \\\\ \\hat{s}(k) &= \\sum_{n = 0}^{N-1} s(n) \\cdot sin(-\\tfrac{2 \\pi n}{N} k ) \\\\ \\end{aligned} } s(n)s^(k)=N1k=0∑N−1s^(k)⋅sin(N2πnk)=n=0∑N−1s(n)⋅sin(−N2πnk) 当输入信号满足偶函数特性时,有 标准余弦的离散余弦变换(DCT)的傅立叶展式 为: s(n)=1N∑k=0N−1s^(k)⋅cos(2πnNk)s^(k)=∑n=0N−1s(n)⋅cos(−2πnNk) {\\displaystyle \\begin{aligned} s(n) &= \\frac{1}{N}\\sum_{k = 0}^{N-1} \\hat{s}(k) \\cdot cos(\\tfrac{2 \\pi n}{N} k) \\\\ \\hat{s}(k) &= \\sum_{n = 0}^{N-1} s(n) \\cdot cos(-\\tfrac{2 \\pi n}{N} k ) \\\\ \\end{aligned} } s(n)s^(k)=N1k=0∑N−1s^(k)⋅cos(N2πnk)=n=0∑N−1s(n)⋅cos(−N2πnk) 但是,自然信号是不分奇偶的,想要将公式适用范围扩大,就需要根据正余弦傅立叶变换要求,对输入信号进行不改变原始数据的扩充调整。根据选择作为基底的标准函数正余弦的差异,人为构造 满足条件输入的方法论,被分为 离散正弦变换(DST)分解 和 离散余弦变换(DCT)分解,两套实现。 假设原信号函数 s(t)=s(n)s(t) = s(n)s(t)=s(n) 在 n∈Z[0, N−1]n \\in \\mathbb{Z} [0, \\ N - 1]n∈Z[0, N−1] 的各节点位置,有样本采样 S∈[S0, SN−1]S \\in [S_0, \\ S_{N - 1}]S∈[S0, SN−1] ,取 N=4N = 4N=4 模拟最小子块(即实际技术被使用时的通用情况)。如图: 图 3-21 事例样本取值与切片索引关系图示 当目标分解为 DST 时,我们需要平移原数据 +32Step+\\tfrac{3}{2} Step+23Step 个步长,并补充中心原点 O0O_0O0 后,再做基于中心原点 O0=(0, 0)O_0 = (0,\\ 0)O0=(0, 0) 的映射。如此才能保证,补充的映射数据和旧数据,能够组成新的等步长数据组,满足离散化的处理条件。得到如下新集合(蓝色为补充数据,红色为原数据): 图 3-22 事例样本目标 DST 补充后与切片索引关系图示 新的样本集,数据量较原有数据翻了一倍多。但只有 轴正向的取值有意义。所以,采用 DST 类型分解,在扩充后,周期跨度都变为了 T=2N+1T= 2N + 1T=2N+1 ,且原离散展式 只有 n∈[1, N]n \\in [1, \\ N]n∈[1, N] 的部分是有效的。我们可以将偏移的 +1×Step+1 \\times Step+1×Step 划到式中处理,则 nnn 的取值范围就仍然可以保持为 n∈Z[0, N−1]n \\in \\mathbb{Z} [0, \\ N - 1]n∈Z[0, N−1] 。 不过考虑到 DST 目标是为了处理奇数阶信号源分解,为避免 sin(0)=0sin(0)=0sin(0)=0 值无意义的问题,会取 k∈[1, N]k \\in [1, \\ N]k∈[1, N] 的范围,并选用标准正弦向左移动 −12π-\\tfrac{1}{2} \\pi−21π 的偏移作为 基底正弦族。因此,为了统一,对 nnn 采用直接包含偏移 +1×Step+1 \\times Step+1×Step 的取值,使得 nnn 有 n∈Z[1, N]n \\in \\mathbb{Z} [1, \\ N]n∈Z[1, N] 。需要注意这个细节差异。 当目标分解为 DCT 时,需要在基于 y=s(n)y=s(n)y=s(n) 轴对称前,先行平移元数据 +12Step+\\tfrac{1}{2} Step+21Step 个步长。得到如下新集合(蓝色为补充数据,红色为原数据): 图 3-23 事例样本目标 DCT 补充后与切片索引关系图示 新的样本集,数据量较原有数据翻了一倍。同样只有 xxx 轴正向的取值有意义。所以,采用 DCT 类型分解,在扩充后,周期跨度都变为了 T=2NT= 2NT=2N ,且原离散展式 只有 n∈[12, N−12]n \\in [\\tfrac{1}{2}, \\ N - \\tfrac{1}{2}]n∈[21, N−21] 的部分是有效的。而由于非整数索引 nnn 不利于匹配原值,我们将偏移的 +12Step+\\tfrac{1}{2} Step+21Step 划到式中处理,则 nnn 的取值范围就仍然可以保持为 n∈Z[0, N−1]n \\in \\mathbb{Z} [0, \\ N - 1]n∈Z[0, N−1] 。 于是,结合两种分解,有: DST:{s(n)=12N+1∑k=1Ns^(k)⋅sin(2π(k−12)2N+1n)=12N+1∑k=1N−(−12N+1⋅s^(k))⋅sin(πn(2k−1)2N+1)s^(k)=2⋅∑n=1Ns(n)⋅sin(−2π(k−12)2N+1n)=2⋅∑n=1Ns(n)⋅sin(−πn(2k−1)2N+1)DCT:{s(n)=12N∑k=0N−1s^(k)⋅cos(2π(n+12)2Nk)=12N∑k=0N−1(12N⋅s^(k))⋅cos(π(2n+1)k2N)s^(k)=2⋅∑n=0N−1s(n+12)⋅cos(−2π(n+12)2Nk)=2⋅∑n=0N−1s(2n+12)⋅cos(π(2n+1)k2N) {\\displaystyle \\begin{aligned} DST:& { \\begin{cases} s(n) &= \\frac{1}{2N+1}\\sum_{k = 1}^{N} \\hat{s}(k) \\cdot sin(\\tfrac{2 \\pi (k-\\tfrac{1}{2})}{2N+1} n) = \\sqrt{\\frac{1}{2N+1}} \\sum_{k = 1}^{N} -(-\\sqrt{\\frac{1}{2N+1}} \\cdot \\hat{s}(k)) \\cdot sin( \\tfrac{\\pi n (2k-1)}{2N+1} ) \\\\ \\hat{s}(k) &= 2 \\cdot \\sum_{n = 1}^{N} s(n) \\cdot sin(-\\tfrac{2 \\pi (k-\\tfrac{1}{2})}{2N+1} n ) = 2 \\cdot \\sum_{n = 1}^{N} s(n) \\cdot sin(-\\tfrac{\\pi n (2k-1)}{2N+1} ) \\end{cases} } \\\\ DCT:& { \\begin{cases} s(n) &= \\frac{1}{2N}\\sum_{k = 0}^{N-1} \\hat{s}(k) \\cdot cos(\\tfrac{2 \\pi (n+\\tfrac{1}{2})}{2N} k) = \\sqrt{\\frac{1}{2N}} \\sum_{k = 0}^{N-1} (\\sqrt{\\frac{1}{2N}} \\cdot \\hat{s}(k)) \\cdot cos( \\tfrac{\\pi (2n+1) k}{2N} ) \\\\ \\hat{s}(k) &= 2 \\cdot \\sum_{n = 0}^{N-1} s(n+\\tfrac{1}{2}) \\cdot cos(-\\tfrac{2 \\pi (n+\\tfrac{1}{2})}{2N} k ) = 2 \\cdot \\sum_{n = 0}^{N-1} s(\\tfrac{2n+1}{2}) \\cdot cos(\\tfrac{\\pi (2n+1) k}{2N} ) \\end{cases} } \\\\ \\end{aligned} } DST:DCT:⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧s(n)s^(k)=2N+11k=1∑Ns^(k)⋅sin(2N+12π(k−21)n)=√2N+11k=1∑N−(−√2N+11⋅s^(k))⋅sin(2N+1πn(2k−1))=2⋅n=1∑Ns(n)⋅sin(−2N+12π(k−21)n)=2⋅n=1∑Ns(n)⋅sin(−2N+1πn(2k−1))⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧s(n)s^(k)=2N1k=0∑N−1s^(k)⋅cos(2N2π(n+21)k)=√2N1k=0∑N−1(√2N1⋅s^(k))⋅cos(2Nπ(2n+1)k)=2⋅n=0∑N−1s(n+21)⋅cos(−2N2π(n+21)k)=2⋅n=0∑N−1s(22n+1)⋅cos(2Nπ(2n+1)k) 不过,由于 DCT 采用了 非整数步长,当 k=0k=0k=0 时并不一定有拟合的曲线使得 s^(0)=0\\hat{s}(0) = 0s^(0)=0 ,且 偶函数特点使 s^(0)\\hat{s}(0)s^(0) 在上式中被重复计算,因此需要针对变换后的 s(n)s(n)s(n) 剔除一次的 s^(0)\\hat{s}(0)s^(0) 均值累积,所以: DCT∣k=0:{s(n)=1N⋅s^(0)+12N∑k=1N−1(12N⋅s^(k))⋅cos(π(2n+1)k2N)=12N(22N⋅s^(0))+∑k=1N−1(12N⋅s^(k))⋅cos(π(2n+1)k2N))s^(k)=2⋅∑n=0N−1s(2n+12)⋅cos(π(2n+1)k2N) {\\displaystyle \\begin{aligned} DCT|_{k = 0}:& { \\begin{cases} s(n) &= \\frac{1}{N}\\cdot \\hat{s}(0)+ \\sqrt{\\frac{1}{2N}} \\sum_{k = 1}^{N-1} (\\sqrt{\\frac{1}{2N}} \\cdot \\hat{s}(k)) \\cdot cos( \\tfrac{\\pi (2n+1) k}{2N} ) \\\\ &= \\sqrt{\\frac{1}{2N}} ( \\frac{2}{\\sqrt{2N}} \\cdot \\hat{s}(0)) + \\sum_{k = 1}^{N-1} (\\sqrt{\\frac{1}{2N}} \\cdot \\hat{s}(k)) \\cdot cos( \\tfrac{\\pi (2n+1) k}{2N} ) ) \\\\ \\hat{s}(k) &= 2 \\cdot \\sum_{n = 0}^{N-1} s(\\tfrac{2n+1}{2}) \\cdot cos(\\tfrac{\\pi (2n+1) k}{2N} ) \\end{cases} } \\\\ \\end{aligned} } DCT∣k=0:⎩⎪⎪⎪⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎪⎪⎪⎧s(n)s^(k)=N1⋅s^(0)+√2N1k=1∑N−1(√2N1⋅s^(k))⋅cos(2Nπ(2n+1)k)=√2N1(√2N2⋅s^(0))+k=1∑N−1(√2N1⋅s^(k))⋅cos(2Nπ(2n+1)k))=2⋅n=0∑N−1s(22n+1)⋅cos(2Nπ(2n+1)k) 上式中,对原信号函数 s(n)s(n)s(n) 的 DST 均值常量 12N+1\\frac{1}{2N+1}2N+11 拆解为 (12N+1)2\\begin{pmatrix} \\sqrt{\\frac{1}{2N+1}} \\end{pmatrix} ^2(√2N+11)2 两部分,而 DCT 均值常量 12N\\frac{1}{2N}2N1 拆解为 (12N)2\\begin{pmatrix} \\sqrt{\\frac{1}{2N}} \\end{pmatrix} ^2(√2N1)2 两部分。其目的是为了,通过分别分配到各自展开式和傅立叶解上,来保证工程化后的算子,在 正逆运算上的统一。 因此,我们取: DST:Xk=−12N+1⋅s^(k)=12N+1⋅s^(−k)DCT:Xk=12N⋅s^(k)&X0=22N⋅s^(k) {\\displaystyle \\begin{aligned} DST:& X_k = -\\sqrt{\\frac{1}{2N+1}} \\cdot \\hat{s}(k) = \\sqrt{\\frac{1}{2N+1}} \\cdot \\hat{s}(-k) \\\\ DCT:& X_k = \\frac{1}{\\sqrt{2N}} \\cdot \\hat{s}(k) \\quad \\& \\quad X_0 = \\frac{2}{\\sqrt{2N}} \\cdot \\hat{s}(k) \\\\ \\end{aligned} } DST:DCT:Xk=−√2N+11⋅s^(k)=√2N+11⋅s^(−k)Xk=√2N1⋅s^(k)&X0=√2N2⋅s^(k) 代入即可得到,原 离散正弦变换(DST)的工程表达式 : k∈[1, N]n∈[1, N]DST:{Sn=12N+1∑k=1NXk⋅sin(πn(2k−1)2N+1)Xk=22N+1⋅∑n=1NSn⋅sin(πn(2k−1)2N+1) {\\displaystyle \\begin{aligned} &k \\in [1,\\ N] \\quad \\quad n \\in [1,\\ N] \\\\ DST:& { \\begin{cases} S_n &= \\frac{1}{\\sqrt{2N+1}} \\sum_{k = 1}^{N} X_k \\cdot sin( \\tfrac{\\pi n(2k-1) }{2N+1} ) \\\\ X_k &= \\frac{2}{\\sqrt{2N+1}} \\cdot \\sum_{n = 1}^{N} S_n \\cdot sin(\\tfrac{\\pi n(2k-1)}{2N+1} ) \\end{cases} } \\\\ \\end{aligned} } DST:k∈[1, N]n∈[1, N]⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧SnXk=√2N+11k=1∑NXk⋅sin(2N+1πn(2k−1))=√2N+12⋅n=1∑NSn⋅sin(2N+1πn(2k−1)) 和,原 离散余弦变换(DCT)的工程表达式 为: k∈[0, N−1]n∈[0, N−1]DCT:{Sn=12N∑k=0N−1Xk⋅cos(π(2n+1)k2N)Xk=22N⋅∑n=0N−1Sn⋅cos(π(2n+1)k2N) ,k≥1Xk=2⋅22N⋅∑n=0N−1Sn⋅cos(π(2n+1)k2N) ,k=0 {\\displaystyle \\begin{aligned} &k \\in [0,\\ N-1] \\quad \\quad n \\in [0,\\ N - 1] \\\\ DCT:& { \\begin{cases} S_n &= \\frac{1}{\\sqrt{2N}} \\sum_{k = 0}^{N-1} X_k \\cdot cos( \\tfrac{\\pi (2n+1) k}{2N} ) \\\\ X_k &= \\frac{2}{\\sqrt{2N}} \\cdot \\sum_{n = 0}^{N-1} S_n \\cdot cos(\\tfrac{\\pi (2n+1) k}{2N} )\\ , k \\ge 1 \\\\ X_k &= \\frac{2 \\cdot 2}{\\sqrt{2N}} \\cdot \\sum_{n = 0}^{N-1} S_n \\cdot cos(\\tfrac{\\pi (2n+1) k}{2N} )\\ , k = 0 \\end{cases} } \\\\ \\end{aligned} } DCT:k∈[0, N−1]n∈[0, N−1]⎩⎪⎪⎪⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎪⎪⎪⎧SnXkXk=√2N1k=0∑N−1Xk⋅cos(2Nπ(2n+1)k)=√2N2⋅n=0∑N−1Sn⋅cos(2Nπ(2n+1)k) ,k≥1=√2N2⋅2⋅n=0∑N−1Sn⋅cos(2Nπ(2n+1)k) ,k=0 这就是信号处理上经常使用的,泛化离散正余弦变换公式组。 从上面的过程中可以发现,我们在傅立叶基底函数族的选取上,实际限定了函数的相位、周期,并约束了原信号的特性。如果在初始相位和原信号特性上做调整,最终的结果也会有所差异。从数学工具角度来看,这种变化 最终会产生 8 种 DST 和 8 种 DCT 的变体,以分别应对实虚部奇偶阶数和初始相位不同时的快速计算。但由于工程化上需要力求简洁和相似(形似)的表达。因此,相对于其他几种的组合,我们最终采用的公式组中的两类,来用于各自条件输入的统一处理。 现在,GPU 加速的理论已准备就绪,我们来看算子是怎么获取的。 整数离散正弦变换(IDST)的 GPU 矩阵算子 首先,将离散正弦变换扩展到二维情况,有: k(u,v)&p(x,y)∈[(1, 1), (N, N)]DST:Xk(u,v)=(22N+1)2⋅∑p=(1,1)(N,N)Sp(x,y)⋅sin(2u−12N+1πx)⋅sin(2v−12N+1πy) {\\displaystyle \\begin{aligned} &k(u,v) \\& p(x,y) \\in [(1,\\ 1),\\ (N,\\ N)] \\\\ DST: X_k(u,v) &= \\begin{pmatrix} \\frac{2}{\\sqrt{2N+1}} \\end{pmatrix} ^2 \\cdot \\sum_{p = (1,1)}^{(N,N)}S_p(x,y) \\cdot sin(\\tfrac{2u-1}{2N+1} \\pi x) \\cdot sin(\\tfrac{2v-1}{2N+1} \\pi y) \\\\ \\end{aligned} } DST:Xk(u,v)k(u,v)&p(x,y)∈[(1, 1), (N, N)]=(√2N+12)2⋅p=(1,1)∑(N,N)Sp(x,y)⋅sin(2N+12u−1πx)⋅sin(2N+12v−1πy) 考虑可构成卷积核的子块最小大小为 4×44 \\times 44×4 ,则有 N=4N=4N=4 使上式变为: k(u,v)&p(x,y)∈[(1, 1), (4, 4)]DST:Xk(u,v)=49⋅∑p=(1,1)(4,4)Sp(x,y)⋅sin(2u−19πx)⋅sin(2v−19πy) {\\displaystyle \\begin{aligned} &k(u,v) \\& p(x,y) \\in [(1,\\ 1),\\ (4,\\ 4)] \\\\ DST: X_k(u,v) &= \\frac{4}{9} \\cdot \\sum_{p = (1,1)}^{(4,4)}S_p(x,y) \\cdot sin(\\tfrac{2u-1}{9} \\pi x) \\cdot sin(\\tfrac{2v-1}{9} \\pi y) \\\\ \\end{aligned} } DST:Xk(u,v)k(u,v)&p(x,y)∈[(1, 1), (4, 4)]=94⋅p=(1,1)∑(4,4)Sp(x,y)⋅sin(92u−1πx)⋅sin(92v−1πy) 如此,就可以矩阵表示 4×44 \\times 44×4 的 DST 变化为: DST4×4:Xk(u,v)∣v=KDST⋅Sp(x,y)=[23⋅∑v=14(23⋅∑u=14sin(2u−19πx))⋅sin(2v−19πy)]⋅Sp(x,y) {\\displaystyle \\begin{aligned} DST_{4 \\times 4}: \\\\ X_k(u,v)|_v &= K_{DST} \\cdot S_p(x, y) \\\\ &= \\begin{bmatrix} &\\frac{2}{3} \\cdot \\sum_{v=1}^4 \\begin{pmatrix} \\frac{2}{3} \\cdot \\sum_{u=1}^4 sin(\\tfrac{2u-1}{9} \\pi x) \\end{pmatrix} \\cdot sin(\\tfrac{2v-1}{9} \\pi y) \\end{bmatrix} \\cdot S_p(x, y) \\\\ \\end{aligned} } DST4×4:Xk(u,v)∣v=KDST⋅Sp(x,y)=[32⋅v=1∑4(32⋅u=1∑4sin(92u−1πx))⋅sin(92v−1πy)]⋅Sp(x,y) 即有: KDST=23[sin(19π),sin(29π),sin(39π),sin(49π)sin(39π),sin(69π),sin(99π),sin(129π)sin(59π),sin(109π),sin(159π),sin(209π)sin(79π),sin(149π),sin(219π),sin(289π)]=23[sin(19π),sin(29π),sin(39π),sin(49π)sin(39π),sin(39π),0,−sin(39π)sin(49π),−sin(19π),−sin(39π),sin(29π)sin(29π),−sin(49π),sin(39π),−sin(19π)] {\\displaystyle \\begin{aligned} K_{DST}&= \\frac{2}{3} \\begin{bmatrix} &sin(\\tfrac{1}{9}\\pi) &, \\quad sin(\\tfrac{2}{9}\\pi) &, \\quad sin(\\tfrac{3}{9}\\pi) &, \\quad sin(\\tfrac{4}{9}\\pi) \\\\ &sin(\\tfrac{3}{9}\\pi) &, \\quad sin(\\tfrac{6}{9}\\pi) &, \\quad sin(\\tfrac{9}{9}\\pi) &, \\quad sin(\\tfrac{12}{9}\\pi) \\\\ &sin(\\tfrac{5}{9}\\pi) &, \\quad sin(\\tfrac{10}{9}\\pi) &, \\quad sin(\\tfrac{15}{9}\\pi) &, \\quad sin(\\tfrac{20}{9}\\pi) \\\\ &sin(\\tfrac{7}{9}\\pi) &, \\quad sin(\\tfrac{14}{9}\\pi) &, \\quad sin(\\tfrac{21}{9}\\pi) &, \\quad sin(\\tfrac{28}{9}\\pi) \\end{bmatrix} \\\\ &= \\frac{2}{3} \\begin{bmatrix} &sin(\\tfrac{1}{9}\\pi) &, \\quad sin(\\tfrac{2}{9}\\pi) &, \\quad sin(\\tfrac{3}{9}\\pi) &, \\quad sin(\\tfrac{4}{9}\\pi) \\\\ &sin(\\tfrac{3}{9}\\pi) &, \\quad sin(\\tfrac{3}{9}\\pi) &, \\quad \\quad 0 &, -sin(\\tfrac{3}{9}\\pi) \\\\ &sin(\\tfrac{4}{9}\\pi) &, -sin(\\tfrac{1}{9}\\pi) &, -sin(\\tfrac{3}{9}\\pi) &, \\quad sin(\\tfrac{2}{9}\\pi) \\\\ &sin(\\tfrac{2}{9}\\pi) &, -sin(\\tfrac{4}{9}\\pi) &, \\quad sin(\\tfrac{3}{9}\\pi) &, -sin(\\tfrac{1}{9}\\pi) \\end{bmatrix} \\end{aligned} } KDST=32⎣⎢⎢⎡sin(91π)sin(93π)sin(95π)sin(97π),sin(92π),sin(96π),sin(910π),sin(914π),sin(93π),sin(99π),sin(915π),sin(921π),sin(94π),sin(912π),sin(920π),sin(928π)⎦⎥⎥⎤=32⎣⎢⎢⎡sin(91π)sin(93π)sin(94π)sin(92π),sin(92π),sin(93π),−sin(91π),−sin(94π),sin(93π),0,−sin(93π),sin(93π),sin(94π),−sin(93π),sin(92π),−sin(91π)⎦⎥⎥⎤ 其中, KDSTK_{DST}KDST 就是 DST 的卷积核算子,但目前还是 浮点数的形式。浮点数矩阵不利于 GPU 算力的节省,因此还需要整数化。考虑 KDSTK_{DST}KDST 本身作用在实际像素取值上,而像素值的数据格式是以整数形式离散化存储的,具有位深数据范围中值记为常量 DDD 。 比如,8-bit 位深格式可取范围为 [0, 255][0,\\ 255][0, 255] ,就有 D=128D=128D=128 取值。我们可以利用这一特点来对原数据进行放缩,并四舍五入取整。 记整数化后的 KDSTK_{DST}KDST 为 K^DST\\hat{K}_{DST}K^DST 则: K^DST≈[29,55,74,8474,74,0,−7484,−29,−74,5555,−84,74,−29]=D⋅KDST {\\displaystyle \\begin{aligned} \\hat{K}_{DST}&\\approx \\begin{bmatrix} &29 &, \\quad 55 &, \\quad 74 &, \\quad 84 \\\\ &74 &, \\quad 74 &, \\quad 0 &, -74 \\\\ &84 &, -29 &, -74 &, \\quad 55 \\\\ &55 &, -84 &, \\quad 74 &, -29 \\end{bmatrix} = D \\cdot K_{DST} \\\\ \\end{aligned} } K^DST≈⎣⎢⎢⎡29748455,55,74,−29,−84,74,0,−74,74,84,−74,55,−29⎦⎥⎥⎤=D⋅KDST 原 DST 的算子,即可以转化为如下表示: Xk(u,v)∣v=1D⋅D⋅KDST⋅Sp(x,y)=1D⋅K^DST⋅Sp(x,y) {\\displaystyle \\begin{aligned} X_k(u,v)|_v &= \\frac{1}{D} \\cdot D \\cdot K_{DST} \\cdot S_p(x, y) \\\\ &= \\frac{1}{D} \\cdot \\hat{K}_{DST} \\cdot S_p(x, y) \\\\ \\end{aligned} } Xk(u,v)∣v=D1⋅D⋅KDST⋅Sp(x,y)=D1⋅K^DST⋅Sp(x,y) 当然,这里单独计算了分离后波矢 k⃗=(u,v){\\vec{k}}=(u,v)k⃗=(u,v) 对应平面波的权重 Xk(u,v)X_k(u,v)Xk(u,v) ,那么对于整个 4×44 \\times 44×4 区域所有的平面波权重(即傅立叶解)就有 等价矩阵 : Xk∣4×4=(1D)2⋅K^DST⋅Sp∣4×4⋅K^DSTT {\\displaystyle \\begin{aligned} X_k|_{4 \\times 4} &= \\begin{pmatrix} \\frac{1}{D} \\end{pmatrix} ^2 \\cdot \\hat{K}_{DST} \\cdot S_p|_{4 \\times 4} \\cdot {\\hat{K}_{DST}}^T \\\\ \\end{aligned} } Xk∣4×4=(D1)2⋅K^DST⋅Sp∣4×4⋅K^DSTT 精简一下,即可写为: Xk=(1D)2⋅K^DST⋅Sp⋅K^DSTT {\\displaystyle \\begin{aligned} X_k &= \\begin{pmatrix} \\frac{1}{D} \\end{pmatrix} ^2 \\cdot \\hat{K}_{DST} \\cdot S_p\\cdot {\\hat{K}_{DST}}^T \\\\ \\end{aligned} } Xk=(D1)2⋅K^DST⋅Sp⋅K^DSTT 这个即为 整数正弦变化(IDST)核心公式,而 K^DST\\hat{K}_{DST}K^DST 则被称为 整数正弦变化的基本算子(IDST Opt)。显然,在已知 SpS_pSp 和存储范围 DDD 的情况下,还是非常容易求得 XkX_kXk 的。而对应的 GPU 程序片也很简单,基本可当作滑动窗口移动步长 K=4K = 4K=4 的固定算子乘法运算,就不再复写了。 整数离散正弦变换(IDST)的 GPU 矩阵算子 同理于 IDST,虽然 整数离散余弦变换(IDCT) 的切入理论,和 IDST 有一些不同。但最终的算子区别仅在于取值上。 仍然需要,将离散正弦变换扩展到二维情况。有: k(u,v)&p(x,y)∈[(0, 0), (N−1, N−1)]εk∣k=(0,0)=12&εk∣k≠(0,0)=1DCT:Xk(u,v)=(2⋅εk2N)2⋅∑p=(0,0)(N−1,N−1)Sp(x,y)⋅cos(2x+12Nπu)⋅cos(2y+12Nπv) {\\displaystyle \\begin{aligned} &k(u,v) \\& p(x,y) \\in [(0,\\ 0),\\ (N-1,\\ N-1)] \\\\ &\\varepsilon_k|_{k=(0,0)} = \\frac{1}{\\sqrt{2}} \\quad \\& \\quad \\varepsilon_k|_{k \\ne (0,0)}=1 \\\\ DCT: X_k(u,v) &= \\begin{pmatrix} \\frac{2 \\cdot \\varepsilon_k}{\\sqrt{2N}} \\end{pmatrix} ^2 \\cdot \\sum_{p = (0,0)}^{(N-1,N-1)}S_p(x,y) \\cdot cos(\\tfrac{2x+1}{2N} \\pi u) \\cdot cos(\\tfrac{2y+1}{2N} \\pi v) \\\\ \\end{aligned} } DCT:Xk(u,v)k(u,v)&p(x,y)∈[(0, 0), (N−1, N−1)]εk∣k=(0,0)=√21&εk∣k≠(0,0)=1=(√2N2⋅εk)2⋅p=(0,0)∑(N−1,N−1)Sp(x,y)⋅cos(2N2x+1πu)⋅cos(2N2y+1πv) 依然,考虑可构成卷积核的子块最小大小为 4×44 \\times 44×4 ,则有 N=4N=4N=4 使上式变为: k(u,v)&p(x,y)∈[(0, 0), (3, 3)]εk∣k=(0,0)=12&εk∣k≠(0,0)=1DCT:Xk(u,v)=(εk2)2⋅∑p=(0,0)(3,3)Sp(x,y)⋅cos(2x+18πu)⋅cos(2y+12Nπv) {\\displaystyle \\begin{aligned} &k(u,v) \\& p(x,y) \\in [(0,\\ 0),\\ (3,\\ 3)] \\\\ &\\varepsilon_k|_{k=(0,0)} = \\frac{1}{\\sqrt{2}} \\quad \\& \\quad \\varepsilon_k|_{k \\ne (0,0)}=1 \\\\ DCT: X_k(u,v) &= \\begin{pmatrix} \\frac{\\varepsilon_k}{\\sqrt{2}} \\end{pmatrix} ^2 \\cdot \\sum_{p = (0,0)}^{(3,3)}S_p(x,y) \\cdot cos(\\tfrac{2x+1}{8} \\pi u) \\cdot cos(\\tfrac{2y+1}{2N} \\pi v) \\\\ \\end{aligned} } DCT:Xk(u,v)k(u,v)&p(x,y)∈[(0, 0), (3, 3)]εk∣k=(0,0)=√21&εk∣k≠(0,0)=1=(√2εk)2⋅p=(0,0)∑(3,3)Sp(x,y)⋅cos(82x+1πu)⋅cos(2N2y+1πv) 如此,就可以矩阵表示 4×44 \\times 44×4 的 DCT 变化为: DCT4×4:Xk(u,v)∣v=KDCT⋅Sp(x,y)=[εv2⋅∑y=03(εu2⋅∑x=03cos(2x+18πu))⋅cos(2y+18πv)]⋅Sp(x,y)=12⋅[εv⋅∑y=0312⋅(εu⋅∑x=03cos(2x+18πu))⋅cos(2y+18πv)]⋅Sp(x,y)εk∣k=(0,0)=12&εk∣k≠(0,0)=1 {\\displaystyle \\begin{aligned} DCT_{4 \\times 4}: \\\\ X_k(u,v)|_v &= K_{DCT} \\cdot S_p(x, y) \\\\ &= \\begin{bmatrix} &\\frac{\\varepsilon_v}{\\sqrt{2}} \\cdot \\sum_{y=0}^3 \\begin{pmatrix} \\frac{\\varepsilon_u}{\\sqrt{2}} \\cdot \\sum_{x=0}^3 cos(\\tfrac{2x+1}{8} \\pi u) \\end{pmatrix} \\cdot cos(\\tfrac{2y+1}{8} \\pi v) \\end{bmatrix} \\cdot S_p(x, y) \\\\ &= \\frac{1}{\\sqrt{2}} \\cdot \\begin{bmatrix} &\\varepsilon_v \\cdot \\sum_{y=0}^3 \\frac{1}{\\sqrt{2}} \\cdot \\begin{pmatrix} \\varepsilon_u \\cdot \\sum_{x=0}^3 cos(\\tfrac{2x+1}{8} \\pi u) \\end{pmatrix} \\cdot cos(\\tfrac{2y+1}{8} \\pi v) \\end{bmatrix} \\cdot S_p(x, y) \\\\ \\varepsilon_k|_{k=(0,0)} &= \\frac{1}{\\sqrt{2}} \\quad \\& \\quad \\varepsilon_k|_{k \\ne (0,0)}=1 \\\\ \\end{aligned} } DCT4×4:Xk(u,v)∣vεk∣k=(0,0)=KDCT⋅Sp(x,y)=[√2εv⋅y=0∑3(√2εu⋅x=0∑3cos(82x+1πu))⋅cos(82y+1πv)]⋅Sp(x,y)=√21⋅[εv⋅y=0∑3√21⋅(εu⋅x=0∑3cos(82x+1πu))⋅cos(82y+1πv)]⋅Sp(x,y)=√21&εk∣k≠(0,0)=1 即有: KDCT=12[12cos(08π),12cos(08π),12cos(08π),12cos(08π)cos(18π),cos(38π),cos(58π),cos(78π)cos(28π),cos(68π),cos(108π),cos(148π)cos(38π),cos(98π),cos(158π),cos(218π)]=12[12,12,12,12cos(18π),cos(38π),cos(38π),−cos(18π)cos(28π),−cos(28π),−cos(28π),cos(28π)cos(38π),−cos(18π),cos(18π),−cos(38π)] {\\displaystyle \\begin{aligned} K_{DCT}&= \\frac{1}{\\sqrt{2}} \\begin{bmatrix} &\\frac{1}{\\sqrt{2}} cos(\\tfrac{0}{8}\\pi) &, \\quad \\frac{1}{\\sqrt{2}} cos(\\tfrac{0}{8}\\pi) &, \\quad \\frac{1}{\\sqrt{2}} cos(\\tfrac{0}{8}\\pi) &, \\quad \\frac{1}{\\sqrt{2}} cos(\\tfrac{0}{8}\\pi) \\\\ &cos(\\tfrac{1}{8}\\pi) &, \\quad cos(\\tfrac{3}{8}\\pi) &, \\quad cos(\\tfrac{5}{8}\\pi) &, \\quad cos(\\tfrac{7}{8}\\pi) \\\\ &cos(\\tfrac{2}{8}\\pi) &, \\quad cos(\\tfrac{6}{8}\\pi) &, \\quad cos(\\tfrac{10}{8}\\pi) &, \\quad cos(\\tfrac{14}{8}\\pi) \\\\ &cos(\\tfrac{3}{8}\\pi) &, \\quad cos(\\tfrac{9}{8}\\pi) &, \\quad cos(\\tfrac{15}{8}\\pi) &, \\quad cos(\\tfrac{21}{8}\\pi) \\end{bmatrix} \\\\ &= \\frac{1}{\\sqrt{2}} \\begin{bmatrix} &\\frac{1}{\\sqrt{2}} &, \\quad \\frac{1}{\\sqrt{2}} &, \\quad \\frac{1}{\\sqrt{2}} &, \\quad \\frac{1}{\\sqrt{2}} \\\\ &cos(\\tfrac{1}{8}\\pi) &, \\quad cos(\\tfrac{3}{8}\\pi) &, \\quad cos(\\tfrac{3}{8}\\pi) &, -cos(\\tfrac{1}{8}\\pi) \\\\ &cos(\\tfrac{2}{8}\\pi) &, -cos(\\tfrac{2}{8}\\pi) &, -cos(\\tfrac{2}{8}\\pi) &, \\quad cos(\\tfrac{2}{8}\\pi) \\\\ &cos(\\tfrac{3}{8}\\pi) &, -cos(\\tfrac{1}{8}\\pi) &, \\quad cos(\\tfrac{1}{8}\\pi) &, -cos(\\tfrac{3}{8}\\pi) \\end{bmatrix} \\end{aligned} } KDCT=√21⎣⎢⎢⎢⎡√21cos(80π)cos(81π)cos(82π)cos(83π),√21cos(80π),cos(83π),cos(86π),cos(89π),√21cos(80π),cos(85π),cos(810π),cos(815π),√21cos(80π),cos(87π),cos(814π),cos(821π)⎦⎥⎥⎥⎤=√21⎣⎢⎢⎢⎡√21cos(81π)cos(82π)cos(83π),√21,cos(83π),−cos(82π),−cos(81π),√21,cos(83π),−cos(82π),cos(81π),√21,−cos(81π),cos(82π),−cos(83π)⎦⎥⎥⎥⎤ 依然取位深数据范围中值记为常量 DDD 。有 D=128D=128D=128 对应 8-bit 位深格式 [0, 255][0,\\ 255][0, 255] 的可取范围,使得我们能够将结果矩阵整数化处理。记整数化后的 KDCTK_{DCT}KDCT 为 K^DCT\\hat{K}_{DCT}K^DCT 则: K^DCT≈[64,64,64,6483,36,−36,−8364,−64,−64,6436,−83,83,−36]=D⋅KDCT {\\displaystyle \\begin{aligned} \\hat{K}_{DCT}&\\approx \\begin{bmatrix} &64 &, \\quad 64 &, \\quad 64 &, \\quad 64 \\\\ &83 &, \\quad 36 &, -36 &, -83 \\\\ &64 &, -64 &, -64 &, \\quad 64 \\\\ &36 &, -83 &, \\quad 83 &, -36 \\end{bmatrix} = D \\cdot K_{DCT} \\\\ \\end{aligned} } K^DCT≈⎣⎢⎢⎡64836436,64,36,−64,−83,64,−36,−64,83,64,−83,64,−36⎦⎥⎥⎤=D⋅KDCT 原 DCT 的算子,即可以转化为如下表示: Xk(u,v)∣v=1D⋅D⋅KDCT⋅Sp(x,y)=1D⋅K^DCT⋅Sp(x,y) {\\displaystyle \\begin{aligned} X_k(u,v)|_v &= \\frac{1}{D} \\cdot D \\cdot K_{DCT} \\cdot S_p(x, y) \\\\ &= \\frac{1}{D} \\cdot \\hat{K}_{DCT} \\cdot S_p(x, y) \\\\ \\end{aligned} } Xk(u,v)∣v=D1⋅D⋅KDCT⋅Sp(x,y)=D1⋅K^DCT⋅Sp(x,y) 当然,这里单独计算了分离后波矢 k⃗=(u,v){\\vec{k}}=(u,v)k⃗=(u,v) 对应平面波的权重 Xk(u,v)X_k(u,v)Xk(u,v) ,那么对于整个 4×44 \\times 44×4 区域所有的平面波权重(即傅立叶解)就有 等价矩阵 : Xk∣4×4=(1D)2⋅K^DCT⋅Sp∣4×4⋅K^DCTT {\\displaystyle \\begin{aligned} X_k|_{4 \\times 4} &= \\begin{pmatrix} \\frac{1}{D} \\end{pmatrix} ^2 \\cdot \\hat{K}_{DCT} \\cdot S_p|_{4 \\times 4} \\cdot {\\hat{K}_{DCT}}^T \\\\ \\end{aligned} } Xk∣4×4=(D1)2⋅K^DCT⋅Sp∣4×4⋅K^DCTT 精简一下,即可写为: Xk=(1D)2⋅K^DCT⋅Sp⋅K^DCTT {\\displaystyle \\begin{aligned} X_k &= \\begin{pmatrix} \\frac{1}{D} \\end{pmatrix} ^2 \\cdot \\hat{K}_{DCT} \\cdot S_p\\cdot {\\hat{K}_{DCT}}^T \\\\ \\end{aligned} } Xk=(D1)2⋅K^DCT⋅Sp⋅K^DCTT 这个即为 整数余弦变化(IDCT)核心公式,而 K^DCT\\hat{K}_{DCT}K^DCT 则被称为 整数余弦变化的基本算子(IDCT Opt)。同样,在已知 SpS_pSp 和存储范围 DDD 的情况下,还是非常容易求得 XkX_kXk 的。而对应的 GPU 程序片也很简单,基本可当作滑动窗口移动步长 StepK=4Step_K = 4StepK=4 的固定算子乘法运算,就不再复写了。 现在汇总两者所述,对于整数离散正余弦变换(IDST/IDCT)的同理性,我们将 K^DST\\hat{K}_{DST}K^DST 与 K^DCT\\hat{K}_{DCT}K^DCT 统一称为 K^\\hat{K}K^ 矩阵,即 整数离散正余弦变换算子(IDST/IDCT Opt)。而 K^\\hat{K}K^ 的取值,显然和位深(Bit Depth)是强相关的。只有确定位深情况,才有固定的 K^\\hat{K}K^ 矩阵。 因此,当存储格式(Data Format)位深为 8-bit 时目标 4×44 \\times 44×4 大小,整合后的公式如下 : K^DST≈[29,55,74,8474,74,0,−7484,−29,−74,5555,−84,74,−29],K^DCT≈[64,64,64,6483,36,−36,−8364,−64,−64,6436,−83,83,−36]Xk=(1D)2⋅K^⋅Sp⋅K^T {\\displaystyle \\begin{aligned} \\hat{K}_{DST} \\approx \\begin{bmatrix} &29 &, \\quad 55 &, \\quad 74 &, \\quad 84 \\\\ &74 &, \\quad 74 &, \\quad 0 &, -74 \\\\ &84 &, -29 &, -74 &, \\quad 55 \\\\ &55 &, -84 &, \\quad 74 &, -29 \\end{bmatrix} , \\quad & \\hat{K}_{DCT} \\approx \\begin{bmatrix} &64 &, \\quad 64 &, \\quad 64 &, \\quad 64 \\\\ &83 &, \\quad 36 &, -36 &, -83 \\\\ &64 &, -64 &, -64 &, \\quad 64 \\\\ &36 &, -83 &, \\quad 83 &, -36 \\end{bmatrix} \\\\ X_k = \\begin{pmatrix} \\frac{1}{D} \\end{pmatrix} ^2 \\cdot & \\hat{K} \\cdot S_p\\cdot \\hat{K}^T \\\\ \\end{aligned} } K^DST≈⎣⎢⎢⎡29748455,55,74,−29,−84,74,0,−74,74,84,−74,55,−29⎦⎥⎥⎤,Xk=(D1)2⋅K^DCT≈⎣⎢⎢⎡64836436,64,36,−64,−83,64,−36,−64,83,64,−83,64,−36⎦⎥⎥⎤K^⋅Sp⋅K^T 整合后的两种变化中, K^DCT\\hat{K}_{DCT}K^DCT 会将卷积核范围内大部分 低频信息 对应基底的 分离权重,富集到结果矩阵 XkX_kXk 的 左上角 ;而 K^DST\\hat{K}_{DST}K^DST 会将卷积核范围内大部分 低频信息 对应基底的 分离权重,富集到结果矩阵 XkX_kXk 的 右上角。而低频权重所对应的高残差区域,才是原始图像最关键的轮廓数据。因此,对于压缩场景,考虑到数据存储惯性,采用 K^DCT\\hat{K}_{DCT}K^DCT 得到关键权重值 Xk(0,0)X_k(0,0)Xk(0,0) 的方式更为合适。而 K^DST\\hat{K}_{DST}K^DST 则由于取用的基底函数类型,决定了其更适合平滑波动区域的数据处理,例如轮廓内的相对均匀填充部分。 我们通常将 K^DCT\\hat{K}_{DCT}K^DCT 得到的 Xk(0,0)X_k(0,0)Xk(0,0) 称为 直流系数(DC [Direct Coefficient]),而把 XkX_kXk 其余位置的基底函数权重值,称为 交流系数(AC [Alternating Coefficient)。 数据还原时,通过矩阵逆运算求得常量矩阵 K^−1\\hat{K}^{-1}K^−1 ,随后代入 Sp=D2⋅K^−1⋅Xk⋅K^−1TS_p = D^2 \\cdot \\hat{K}^{-1} \\cdot X_k\\cdot {\\hat{K}^{-1}}^TSp=D2⋅K^−1⋅Xk⋅K^−1T 式中还原原值。而对于其它类型的三角基底函数,和不同的目标窗口大小(常用为 2n2^n2n , 取 n=2,3,4,5n=2,3,4,5n=2,3,4,5 ),使用基本公式代入,并按照上述推导类比处理,即可获取对应算子。 这就是最终主流的,整数离散正余弦变换。之于其它的 DST/DCT 共计 16 种类型,皆在特殊条件下起相关作用,被运用到针对子块的数据分离过程中。当然,推理过程依旧一致,只不过部分性质存在不同,如 DCT-8 就无法利用周期性来根据已知算子直接类推,每个不同的大小,都需要重新计算,这里不另作展开。 而对于整数离散正余弦变换本身来说,我们常用它来初步完成对子块内高低频数据的分离汇总,即对数据的分离归类。借此,方便后续在频域上,根据提纯结果进行压缩处理。对于其它位深取值,则根据 K^=D⋅K\\hat{K}=D \\cdot KK^=D⋅K 计算即可,而 KKK 在窗口大小不变(即基底函数族固定)情况下,不会发生变化,可认为是一个常数矩阵。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_5_2.html":{"url":"Chapter_3/Language/cn/Docs_3_5_2.html","title":"3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform])","keywords":"","body":"3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform]) 除了整数离散正余弦变换(IDST/IDCT)外,在早期的规格中(如 H.264)还会使用一种,被称为 沃尔什-哈达玛变换(WHT [Walsh-Hadamard Transform]) 的离散傅立叶的变换变体算法,来代为进一步的富集频域信息。 在之前的傅立叶变换运用中,我们大都选择三角函数或近似拟合,来作为基底函数进行分解。考虑到傅立叶函数,只从周期性上,对基底函数族和目标函数进行了约束。我们是不是可以选择一种,类似于自然排序线性组合的周期性函数,代替正余弦处理,从而获取更符合数据物理介质存储(媒介传输)状态(0/1 双模态)的变换过程呢? 答案是可以的。虽然,从某种意义上,哈达玛变换相当于取用了只拟合正余弦函数极值的特殊函数。但哈达玛变换(WHT)依旧被认为是此类 非三角函数离散傅立叶变换,简称 非三角函数变换(或非正/余弦变换),的经典代表之一。 考虑周期 T=2nT=2^nT=2n 分段函数: f(x)=(−1)⌊xT⌋=(−1)⌊x2n⌋ {\\displaystyle \\begin{aligned} f(x)= & (-1)^{ \\lfloor \\tfrac{x}{T} \\rfloor } = (-1)^{ \\lfloor \\tfrac{x}{2^n} \\rfloor } \\\\ \\end{aligned} } f(x)=(−1)⌊Tx⌋=(−1)⌊2nx⌋ 记周期 T=2n=NT=2^n =NT=2n=N 的原信号函数 s(t)s(t)s(t) 以 f(x)f(x)f(x) 函数族构成基底。根据 傅立叶级数 有: s(n)=1N∑k=0N−1s^(k)⋅(−1)nks^(k)=∑n=0N−1s(n)⋅(−1)nk {\\displaystyle \\begin{aligned} s(n) &= \\frac{1}{N}\\sum_{k = 0}^{N-1} \\hat{s}(k) \\cdot (-1)^{nk} \\\\ \\hat{s}(k) &= \\sum_{n = 0}^{N-1} s(n) \\cdot (-1)^{nk} \\\\ \\end{aligned} } s(n)s^(k)=N1k=0∑N−1s^(k)⋅(−1)nk=n=0∑N−1s(n)⋅(−1)nk 这即是 哈达玛变换的基础公式。 不同于 DST/DCT 需要进行泛化后,才能运用到工程之中的情况。哈达玛变换由于 f(x)f(x)f(x) 本身为偶函数,且始终只有实部的特性,可以直接在原有无扩充的数据集上使用。因此,假设原信号函数 s(t)=s(n)s(t) = s(n)s(t)=s(n) 在 n∈Z[0, N−1]n \\in \\mathbb{Z} [0, \\ N - 1]n∈Z[0, N−1] 的各节点位置,有样本采样 S∈[S0, SN−1]S \\in [S_0, \\ S_{N - 1}]S∈[S0, SN−1] 。则: WHT:{s(n)=1N∑k=0N−1s^(k)⋅(−1)nk=1N⋅∑k=0N−1(1N⋅s^(k))⋅(−1)nks^(k)=∑n=0N−1s(n+12)⋅(−1)nk=∑n=0N−1s(n)⋅(−1)nk {\\displaystyle \\begin{aligned} WHT:& { \\begin{cases} s(n) &= \\frac{1}{N}\\sum_{k = 0}^{N-1} \\hat{s}(k) \\cdot (-1)^{nk} = \\sqrt{\\frac{1}{N}} \\cdot \\sum_{k = 0}^{N-1} \\begin{pmatrix} \\sqrt{\\frac{1}{N}} \\cdot \\hat{s}(k) \\end{pmatrix} \\cdot (-1)^{nk} \\\\ \\hat{s}(k) &= \\sum_{n = 0}^{N-1} s(n+\\tfrac{1}{2}) \\cdot (-1)^{nk} = \\sum_{n = 0}^{N-1} s(n) \\cdot (-1)^{nk} \\end{cases} } \\\\ \\end{aligned} } WHT:⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧s(n)s^(k)=N1k=0∑N−1s^(k)⋅(−1)nk=√N1⋅k=0∑N−1(√N1⋅s^(k))⋅(−1)nk=n=0∑N−1s(n+21)⋅(−1)nk=n=0∑N−1s(n)⋅(−1)nk 即: k∈[0, N−1]n∈[0, N−1]WHT:{Sn=1N∑k=0N−1Xk⋅(−1)nkXk=1N⋅∑n=0N−1Sn⋅(−1)nk {\\displaystyle \\begin{aligned} &k \\in [0,\\ N-1] \\quad \\quad n \\in [0,\\ N - 1] \\\\ WHT:& { \\begin{cases} S_n &= \\frac{1}{\\sqrt{N}} \\sum_{k = 0}^{N-1} X_k \\cdot (-1)^{nk} \\\\ X_k &= \\frac{1}{\\sqrt{N}} \\cdot \\sum_{n = 0}^{N-1} S_n \\cdot (-1)^{nk} \\end{cases} } \\\\ \\end{aligned} } WHT:k∈[0, N−1]n∈[0, N−1]⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧SnXk=√N1k=0∑N−1Xk⋅(−1)nk=√N1⋅n=0∑N−1Sn⋅(−1)nk 扩展到 二维 情况,有: k(u,v)&p(x,y)∈[(0, 0), (N−1, N−1)]WHT:Xk(u,v)=(1N)2⋅∑p=(0,0)(N−1,N−1)Sp(x,y)⋅(−1)xu⋅(−1)yv {\\displaystyle \\begin{aligned} &k(u,v) \\& p(x,y) \\in [(0,\\ 0),\\ (N-1,\\ N-1)] \\\\ WHT: & X_k(u,v) = \\begin{pmatrix} \\frac{1}{\\sqrt{N}} \\end{pmatrix} ^2 \\cdot \\sum_{p = (0,0)}^{(N-1,N-1)}S_p(x,y) \\cdot (-1)^{xu} \\cdot (-1)^{yv} \\\\ \\end{aligned} } WHT:k(u,v)&p(x,y)∈[(0, 0), (N−1, N−1)]Xk(u,v)=(√N1)2⋅p=(0,0)∑(N−1,N−1)Sp(x,y)⋅(−1)xu⋅(−1)yv 如此,就可以矩阵表示 WHT 变化为: Xk=KWHT⋅Sp⋅KWHTT {\\displaystyle \\begin{aligned} X_k &= K_{WHT} \\cdot S_p \\cdot {K_{WHT}}^{T} \\\\ \\end{aligned} } Xk=KWHT⋅Sp⋅KWHTT 其中: KWHT=[1N⋅∑y=0N−1(1N⋅∑x=0N−1(−1)xu)⋅(−1)yv]=1N⋅[(−1)i⋅j]N×N=KWHTT=KWHT−1 {\\displaystyle \\begin{aligned} K_{WHT} &= \\begin{bmatrix} \\frac{1}{\\sqrt{N}} \\cdot \\sum_{y = 0}^{N-1} \\begin{pmatrix} \\frac{1}{\\sqrt{N}} \\cdot \\sum_{x = 0}^{N-1} (-1)^{xu} \\end{pmatrix} \\cdot (-1)^{yv} \\end{bmatrix} \\\\ &= \\frac{1}{\\sqrt{N}} \\cdot \\begin{bmatrix} & (-1)^{i \\cdot j} \\end{bmatrix} _{N \\times N} \\\\ &= {K_{WHT}}^{T} = {K_{WHT}}^{-1} \\\\ \\end{aligned} } KWHT=[√N1⋅y=0∑N−1(√N1⋅x=0∑N−1(−1)xu)⋅(−1)yv]=√N1⋅[(−1)i⋅j]N×N=KWHTT=KWHT−1 所以,我们通常记 NNN 阶哈达玛矩阵为 HN=[(−1)i⋅j]N×N=N⋅KWHTH_N = \\begin{bmatrix} (-1)^{i \\cdot j} \\end{bmatrix} _{N \\times N} = \\sqrt{N} \\cdot K_{WHT}HN=[(−1)i⋅j]N×N=√N⋅KWHT ,原式即可简化为: Xk=1N⋅H⋅Sp⋅H {\\displaystyle \\begin{aligned} X_k &= \\frac{1}{N} \\cdot H \\cdot S_p \\cdot H \\\\ \\end{aligned} } Xk=N1⋅H⋅Sp⋅H 显然,对于 H2NH_{2N}H2N 与 HNH_NHN 有关系: H2N=[HN ,HNHN ,−HN] {\\displaystyle \\begin{aligned} H_{2N} &= \\begin{bmatrix} & H_N \\ , & H_N \\\\ & H_N \\ , -& H_N \\end{bmatrix}\\\\ \\end{aligned} } H2N=[HN ,HN ,−HNHN] 常用的哈达玛变换算子主要有 3 种,分别是: H2=[1 ,11 ,−1]H4=[H2 ,H2H2 ,−H2]=[1,1 ,1 ,11, −1 ,1 , −11,1 , −1 , −11, −1 , −1 ,1]H8=[H4 ,H4H4 ,−H4]=[H2,H2 ,H2 ,H2H2, −H2 ,H2 , −H2H2,H2 , −H2 , −H2H2, −H2 , −H2 ,H2] {\\displaystyle \\begin{aligned} H_2 &= \\begin{bmatrix} & 1 \\ , & 1 \\\\ & 1 \\ , -& 1 \\end{bmatrix} \\\\ H_4 &= \\begin{bmatrix} & H_2 \\ , & H_2 \\\\ & H_2 \\ , -& H_2 \\end{bmatrix}= \\begin{bmatrix} & 1 , & \\quad 1 \\ , & \\quad 1 \\ , & \\quad 1 \\\\ & 1 , & \\ -1 \\ , & \\quad 1 \\ , & \\ -1 \\\\ & 1 , & \\quad 1 \\ , & \\ -1 \\ , & \\ -1 \\\\ & 1 , & \\ -1 \\ , & \\ -1 \\ , & \\quad 1 \\end{bmatrix}\\\\ H_8 &= \\begin{bmatrix} & H_4 \\ , & H_4 \\\\ & H_4 \\ , -& H_4 \\end{bmatrix} = \\begin{bmatrix} & H_2 , & \\quad H_2 \\ , & \\quad H_2 \\ , & \\quad H_2 \\\\ & H_2 , & \\ -H_2 \\ , & \\quad H_2 \\ , & \\ -H_2 \\\\ & H_2 , & \\quad H_2 \\ , & \\ -H_2 \\ , & \\ -H_2 \\\\ & H_2 , & \\ -H_2 \\ , & \\ -H_2 \\ , & \\quad H_2 \\end{bmatrix}\\\\ \\end{aligned} } H2H4H8=[1 ,1 ,−11]=[H2 ,H2 ,−H2H2]=⎣⎢⎢⎡1,1,1,1,1 , −1 ,1 , −1 ,1 ,1 , −1 , −1 ,1 −1 −11⎦⎥⎥⎤=[H4 ,H4 ,−H4H4]=⎣⎢⎢⎡H2,H2,H2,H2,H2 , −H2 ,H2 , −H2 ,H2 ,H2 , −H2 , −H2 ,H2 −H2 −H2H2⎦⎥⎥⎤ 从上面的推导过程可知,采用哈达玛变换,同样能够将频域中的高低频信息,进行分区汇集。理论上 WHT 也可以代替 IDST/IDCT 来做频域压缩(降低信息熵)前的归类处理。 哈达玛变换的常见应用 考虑到 WHT 是 DST/DCT 的特殊拟合,而基底函数有限。其本身在选取较大的窗口尺寸,且被使用在取值范围差异较大的原信号时,会导致一定程度的误差。工程中除非量化到门电路的粒度,其余大多时间还是用它来求解指定窗口范围,残差信号(Residual Singnal) 经哈达玛变换后 绝对误差和(SATD [Sum of Absolute Transformed Difference])。 而哈达玛变换后绝对误差和(SATD)取值,即是变换求得 的所有元素绝对值之和,有: SATD=∑i∑j∣Xk(i,j)∣ {\\displaystyle \\begin{aligned} SATD = \\sum_i \\sum_j |X_k(i,j)| \\\\ \\end{aligned} } SATD=i∑j∑∣Xk(i,j)∣ 以 SATD 来代替传统绝对误差和(SAD [Sum of Absolute Difference])。利用 WHT 的加和快速运算特征计算残差趋势,协助时空域运动估计和数据量化的压缩处理。 哈达玛变换的常见应用 除此之外,如果我们换一种视角,将经过 IDST/IDCT 处理后的一系列子块所得结果,整合各子块得到的直流系数(DC)为一次输入给哈达玛变换。那么根据傅立叶变换特性,WHT 将对已经分离的低频权重信息,再次进行一次基于基底函数的分离。 而哈达玛变换仍属于傅立叶变换,这样的处理会使参与运算的 直流系数(DC) 所处子块,再进行一次变化程度的筛选,从而完成进一步细分并降低区域内的取值量级,更便于随后配合其它量化手段,减少信息熵。而小于 4×44 \\times 44×4 大小的哈达玛变换算子,并不会造成太大损失。 这个做法在 H.264 中得到了较为充分的体现。 H.264 中,对 YUV420 传输格式的亮度值 YkY_kYk 数据,取用了 16×1616 \\times 1616×16 点区域构成包含 4×44 \\times 44×4 个子块的范围,进行了两次特殊的哈达玛变换。得到 二次直流系数矩阵 Y^k\\hat{Y}_kY^k 作为传输值 : HY1=[1,1 ,1 ,12, −1 ,1 , −21,1 , −1 , −11, −2 , −2 ,1]HY2=[1,1 ,1 ,11, −1 ,1 , −11,1 , −1 , −11, −1 , −1 ,1]Y^k=HY2⋅(HY1⋅Yk∣DC⋅HY1)⋅HY2 {\\displaystyle \\begin{aligned} H_{Y_1} = \\begin{bmatrix} & 1 , & \\quad 1 \\ , & \\quad 1 \\ , & \\quad 1 \\\\ & 2 , & \\ -1 \\ , & \\quad 1 \\ , & \\ -2 \\\\ & 1 , & \\quad 1 \\ , & \\ -1 \\ , & \\ -1 \\\\ & 1 , & \\ -2 \\ , & \\ -2 \\ , & \\quad 1 \\end{bmatrix} \\quad &H_{Y_2} = \\begin{bmatrix} & 1 , & \\quad 1 \\ , & \\quad 1 \\ , & \\quad 1 \\\\ & 1 , & \\ -1 \\ , & \\quad 1 \\ , & \\ -1 \\\\ & 1 , & \\quad 1 \\ , & \\ -1 \\ , & \\ -1 \\\\ & 1 , & \\ -1 \\ , & \\ -1 \\ , & \\quad 1 \\end{bmatrix} \\\\ \\hat{Y}_k = H_{Y_2}\\cdot (H_{Y_1} &\\cdot Y_k|_{DC} \\cdot H_{Y_1}) \\cdot H_{Y_2} \\end{aligned} } HY1=⎣⎢⎢⎡1,2,1,1,1 , −1 ,1 , −2 ,1 ,1 , −1 , −2 ,1 −2 −11⎦⎥⎥⎤Y^k=HY2⋅(HY1HY2=⎣⎢⎢⎡1,1,1,1,1 , −1 ,1 , −1 ,1 ,1 , −1 , −1 ,1 −1 −11⎦⎥⎥⎤⋅Yk∣DC⋅HY1)⋅HY2 而对色度分量 CbCrC_bC_rCbCr 数据,则根据格式的数据组成和排布,取用了 8×88 \\times 88×8 点区域构成包含 2×22 \\times 22×2 个子块的范围,进行了单次标准哈达玛变换。得到 二次直流系数矩阵 C^bC^r\\hat{C}_b\\hat{C}_rC^bC^r 作为传输值 : HCbCr=[1 ,11 ,−1]C^b=HCbCr⋅Cb∣DC⋅HCbCrC^r=HCbCr⋅Cr∣DC⋅HCbCr {\\displaystyle \\begin{aligned} &H_{C_bC_r} = \\begin{bmatrix} & 1 \\ , & 1 \\\\ & 1 \\ , -& 1 \\end{bmatrix} \\\\ \\hat{C}_b &= H_{C_bC_r} \\cdot C_b|_{DC} \\cdot H_{C_bC_r} \\\\ \\hat{C}_r &= H_{C_bC_r} \\cdot C_r|_{DC} \\cdot H_{C_bC_r} \\\\ \\end{aligned} } C^bC^rHCbCr=[1 ,1 ,−11]=HCbCr⋅Cb∣DC⋅HCbCr=HCbCr⋅Cr∣DC⋅HCbCr 不过,随着小模型介入了二次变换压缩直流系数矩阵的过程,这套基于哈达玛变换(WHT)的压缩手段,最终还是被压缩比和还原程度更高的,以 低频不可分变换(LFNST)为代表的高频凋零技术,替代了原有的作用。 因为如上的缘故,在现行最新的规格中,以压缩冗余为目的频域数据分离,大都仍然采用整数离散正余弦变换(IDST/IDCT) 为主要入口技术。哈达玛变换(WHT)则相对局限的,被使用在 SATD 上。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Docs_3_5_3.html":{"url":"Chapter_3/Language/cn/Docs_3_5_3.html","title":"3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform])","keywords":"","body":"3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform]) 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform]) 是高频凋零技术的代表,通过一个 经过离线训练 所得的不可分变换矩阵,来进一步压缩指定范围内的前一次变换结果 [32] 。我们通常将首次变换称为 一次变换(First Transform) 或 主要变换(Primary Transform)。将以一次变换为输入的第二次变换,称为 二次变换(Secondary Transform)。 低频不可分变换(LFNST),与 H.264 中的哈达玛变换(WHT),都属于作用于二次变换的一种处理技术。其本身复杂的部分,在于如何得到 不可分变换矩阵组(NSMG [Non-Separable Matrix Group])。矩阵组通过特殊的离线小模型计算所得,是一个常量矩阵组。 那么,什么是不可分变换(Non-Separable Transform)? 不可分变换与 LFNST 原理 不可分变换,被用来指代一类,无法分解为独立的行变换与列变换的组合形式表示, 只能 用单一矩阵作统一处理的变换类型。 与之相对的就是可分变换(Separable Transform)。 前文中的离散正弦变换和哈达玛变换,就属于可分变换类型。 可见,对于 可分变换,如果记分离后的行列变化矩阵分别为 M∣rowM|_{row}M∣row 、 M∣colM|_{col}M∣col ,那么始终有: Out=M∣row⋅In⋅M∣col {\\displaystyle \\begin{aligned} Out &= M|_{row} \\cdot In \\cdot M|_{col} \\\\ \\end{aligned} } Out=M∣row⋅In⋅M∣col 而对于 不可分变换,则有变换矩阵 M∣uniM|_{uni}M∣uni 使得: Out=M∣uni⋅In {\\displaystyle \\begin{aligned} Out &= M|_{uni} \\cdot In \\\\ \\end{aligned} } Out=M∣uni⋅In 低频不可分所使用的矩阵组,就属于后一种。 如果记采用一次变换所得 M×NM \\times NM×N 大小的输出 作为 LFNST 输入。则 LFNST 需要根据技术执行前置环节中的一些求得信息(如角度预测模式、帧内预测模式)的参数值,来从矩阵组中选取满足条件的常量矩阵 TTT 。以 TTT 作为当前 LFNST 的算子参与再次变换。 算法要求,目标算子 TTT 矩阵的大小为 输入大小的平方,即有 TTT 取 MN×MNMN \\times MNMN×MN 尺寸。而输入 XkX_kXk 则需要以一维向量形式展开,有: Xk=[X11 ,X12 ,⋯, X1NX21 ,X22 ,⋯, X2N⋮,⋮ ,⋯, ⋮XM1 ,XM2 ,⋯, XMN]M×NXk′=[X11 ,X12 ,⋯, XM1 ,⋯, XMN]MN×1T {\\displaystyle \\begin{aligned} &X_k = \\begin{bmatrix} & X_{11} \\ , & X_{12} \\ , \\cdots,\\ & X_{1N} \\\\ & X_{21} \\ , & X_{22} \\ , \\cdots,\\ & X_{2N} \\\\ & \\vdots , & \\vdots \\ , \\cdots,\\ & \\vdots \\\\ & X_{M1} \\ , & X_{M2} \\ , \\cdots,\\ & X_{MN} \\end{bmatrix}_{M \\times N} \\\\ &X_k\\prime = \\begin{bmatrix} & X_{11} \\ , & X_{12} \\ , \\cdots,\\ & X_{M1} \\ , \\cdots,\\ & X_{MN} \\end{bmatrix}_{MN \\times 1}^{T} \\\\ \\end{aligned} } Xk=⎣⎢⎢⎡X11 ,X21 ,⋮,XM1 ,X12 ,⋯, X22 ,⋯, ⋮ ,⋯, XM2 ,⋯, X1NX2N⋮XMN⎦⎥⎥⎤M×NXk′=[X11 ,X12 ,⋯, XM1 ,⋯, XMN]MN×1T 将 Xk′X_k\\primeXk′ 代入标准公式,得到输出结果 X^k′\\hat{X}_k\\primeX^k′ 为: X^k′=T⋅Xk′ {\\displaystyle \\begin{aligned} \\hat{X}_k\\prime &= T \\cdot X_k\\prime \\\\ \\end{aligned} } X^k′=T⋅Xk′ 上式即是 低频不可分变换,被应用在二次变换时的基本公式 了。所得 X^k′\\hat{X}_k\\primeX^k′ 是长度为 MN×1MN \\times 1MN×1 的一维向量,我们需要按照 Xk→Xk′X_k \\rightarrow X_k\\primeXk→Xk′ 的逆过程,反向将其展开到 M×NM \\times NM×N 的二维区域,得到变换后的 X^k\\hat{X}_kX^k 结果矩阵。 LFNST 变换集矩阵组的获取 在说明原理中我们提到,低频不可分变换中,根据前置参数的差异,会从持有矩阵组里选择合适的算子 TTT 来代入运算,称之为 变换集(Transform Set)。但因为 H.266 的专利权问题,这一传统机器学习(注意,并未使用深度学习手段)聚类分析模型的训练,所采用的基本数据集和部分过程细节,并没有完全开源。 从 LFNST 原论文中可获知的信息是,针对 不可分二次变换(NSST [Non-Separable Secondary Transform])的变换集,采用的是基于 K均值(K-Means)聚类分析法 的变体算法。因此,基本训练过程属于标准的双步(2-Stages)无监督学习(Unsupervised Learning),存在两个阶段,分别是:初始阶段(Initialization)和 迭代阶段(Iteration) [33] 。当然,数据也需要分为 准备数据(Preparing Data) 和 训练数据(Training Data),两者皆来自未开放的黑盒数据集。 初始阶段中,主要处理准备数据,进行了两个工作: 首先,进行 特征(Feature)的选择(Selection)和提取(Extraction)。为每一个从 编码过程(Encoding Process) 获取的 变换系数块(Transform Coefficient Block) 随机的分配一个取值范围为 label∈Z[0, 3]label \\in \\mathbb{Z} [0, \\ 3]label∈Z[0, 3] 的标签。并将分配好标签的变换系数块的 M×NM \\times NM×N 个低频系数,加入到 对应标签聚类(Cluster)的训练用数据集中。 M×NM \\times NM×N 为目标输入输出的大小,例如前文采用核心为 N×NN \\times NN×N ,那么这里企图训练所得核心的参数 M=NM = NM=N 相等。而选出对应标签的 M×NM \\times NM×N 个输入,每个都被认为是独立的一个数据,即每个聚类有 MNMNMN 个准备数据,共 4 组 4MN4MN4MN 个准备数据。 其次, 选择聚类算法(Clustering Algorithm Selection) 和 约束条件设计(Constraint Design)。这里采用 K均值算法,通过利用前一步中,标签范围在 label∈Z[1, 3]label \\in \\mathbb{Z} [1, \\ 3]label∈Z[1, 3] 的聚类(Cluster)分配好的训练用数据集,以 奇异值分解(SVD [Singular Value Decom-position]) 等解释性较弱但快速的方法,来求解各自聚类的协方差矩阵特征向量的最佳不可分离变换矩阵(采用 SVD 所得即是奇异值矩阵)。进而获得 label∈Z[1, 3]label \\in \\mathbb{Z} [1, \\ 3]label∈Z[1, 3] 聚类的 质心(Centroid),与各数据集一起构造了算法 启动输入。而 label=0label = 0label=0 的聚类,则被选做为 对照组(Validation Group),因此该矩阵的质心被设置为单位矩阵 E=[1]E = [1]E=[1] ,以输入输出恒等的形式,不再参与迭代阶段的更新。 那么约束条件是怎么设置的呢?这里采用的是,在单次训练过程后,从 3 个聚类的质心与 label=0label = 0label=0 聚类的质心中,选取最小(或下降最明显)的 率失真优化指数(RDO [Rate-Distortion Optimization]) 作为评判标准。筛选出 4 个聚类中 RDO 最优的一个聚类,用新参与训练的对应聚类数据集,替换原有被选最优聚类的数据集,参与下一次迭代。以此,作为 K-均值聚类分析的标准约束。 随后进入迭代阶段。 迭代阶段中,主要处理训练数据,训练同样也分为两步: 首先,进行 聚类验证(Cluster Validation)。聚类验证的过程就和一般的 K均值算法一致,将分批的训练数据交付到 4 个聚类,分别计率失真优化指数(RDO)。之后,用计算结果,按照设置的约束条件进行处理, 更新聚类标定的数据集。 其次,完成 结果解析(Results Interpretation)。当更新聚类当前数据集后,需要重新计算聚类的质心,方法同初始阶段一致。通过求解协方差矩阵,获取最佳不可分离变换矩阵,替代原聚类的质心。显然, label=0label = 0label=0 聚类的质心 E=[1]E = [1]E=[1] 并不需要更新。 而下一次迭代是否继续,则根据是否到达模型的 最大迭代次数(该参数未提供经验值,也可根据自身训练情况自行设定),或 RDO 没有进一步降低 来决定。两者命中其一,则停止迭代训练,获取 label∈Z[1, 3]label \\in \\mathbb{Z} [1, \\ 3]label∈Z[1, 3] 聚类此时的质心,作为结果构成 MN×MNMN \\times MNMN×MN 尺寸的变化集: TMN×MN∈[Cluster1 ,Cluster2 ,Cluster3 ] {\\displaystyle \\begin{aligned} &T_{MN \\times MN} \\in \\begin{bmatrix} & Cluster_{1} \\ , & Clust&er_{2} \\ , & Cluster_{3} \\ \\end{bmatrix}\\\\ \\end{aligned} } TMN×MN∈[Cluster1 ,Cluster2 ,Cluster3 ] 一般 TMN×MNT_{MN \\times MN}TMN×MN 会比较难记,通常简化为根据输入标记,写做 TM×NT_{M \\times N}TM×N 简记。变换集简写为: TM×N∈[T1∣M×N ,T2∣M×N ,T3∣M×N ] {\\displaystyle \\begin{aligned} &T_{M \\times N} \\in \\begin{bmatrix} & {T_1}|_{M \\times N} \\ , & T_2|_{M \\times N} \\ , & T_3|_{M \\times N} \\ \\end{bmatrix}\\\\ \\end{aligned} } TM×N∈[T1∣M×N ,T2∣M×N ,T3∣M×N ] 此时的 TTT 即是 M×NM \\times NM×N 输入尺寸的 低频不可分变换算子(LFNST Opt)。理论上,矩阵 TTT 会 保留输入源的分布形式,并将之密度梯度化。 若输入前置主变换采用 DCT-2 型,那么二次变换的输入 Xk′X_k\\primeXk′ ,在经过 LFNST 变换后,算子会将低频波密度参数富集到自身靠上方的行信息中,将高频波密度参数富集到靠下方的行信息中。从而实现,变换后输出的相对训练结束时质心位置的相对均匀分布。即维持输出的高低频权重二次变换结果矩阵 X^k\\hat{X}_kX^k ,在一维展开式 X^k′\\hat{X}_k\\primeX^k′ 情况下的类算子高低频分离布局,左侧低频右侧高频。 因此,当还原输出权重矩阵 X^k′\\hat{X}_k\\primeX^k′ 到 M×NM \\times NM×N 大小后,前置 DCT-2 型的低频权重仍然会位于矩阵的左上角。相应,高频则会接近右下角。这样的因素,让主变换采用 DCT-2 型,经过 LFNST 变换后的左上角首个参数值,仍可被当作直流系数(DC)。而结合 H.266/VVC 规格下的包括平面(Planar)模式、直流(DC)模式、65 种角度(Angle)预测模式在内,共计 67 种帧内预测模式本身就需要多组变化集的情况下,对于不同的主变换类型,又要单独再训练一系列变换集。处理代价会高到无法接受。 所以,目前 只将 LFNST 运用在 DCT-2 输入的情况。 至此,在经过多次不同尺寸和模式输入下的模型训练过程后,得到了数个 M×NM \\times NM×N 取值不等的矩阵算子 [T1,⋯, Tq][T_1, \\cdots ,\\ T_q][T1,⋯, Tq] 。共同组成了 LFNST 的基础变换集组 T=[T1,⋯, Tq]T = [T_1, \\cdots ,\\ T_q]T=[T1,⋯, Tq] ,亦被称为 基础多变换集(MTS [Multiple Transform Set]),应对目标主变换。 LFNST 有关不可分二次变换(NSST)的化简 经过上述的推理,我们可以察觉到即便是取一个较小的尺寸,整个 LFNST 的运算也会呈指数的增加算力消耗。例如输入的 M=N=8M = N = 8M=N=8 时,就需要一个尺寸为 64×6464 \\times 6464×64 大小的 LFNST 运算核。但如此大小对于计算机本身的硬件来说,会是一个 巨大的负担。 于是,在 VTM5 有关 LFNST 工程实践的 JVET-K0099 提案中,对 LFNST 的主要应用场景,即二次不可分变换(NSST),做了算法上的调整 [34] 。利用复合基,降低计算成本。 假设当前输入尺寸为 M×NM \\times NM×N 大小,有与输入预测模式对应的尺寸为 MN×MNMN \\times MNMN×MN 的低频不可分变换算子 TTT 。 NSST 规定,对于 min(M, N)=4min(M ,\\ N) = 4min(M, N)=4 的输入,统一取用 4×44 \\times 44×4 输入的算子 TTT 。对于 min(M, N)=8min(M ,\\ N) = 8min(M, N)=8 的输入,统一取用 8×88 \\times 88×8 输入的算子 TTT 。那么需要保存的算子就只分为 16×1616 \\times 1616×16 和 64×6464 \\times 6464×64 大小的共计 6 个变换核,即有 T=[T4×4, T8×8]T = [T_{4 \\times 4},\\ T_{8 \\times 8}]T=[T4×4, T8×8] 变换集。 对于小于输入尺寸的块,补 0 到可以进行计算的大小。 而对于两类变换集,NSST 只需要分离所得的低频权重部分。因此反推算子情况,亦只需要保留所有 MTS 中的算子 TTT 上方一定行即可。提案中,NSST 在经过多次大批量数据的模拟实验后,确定了最终方案。 取尺寸为 RN×RNRN \\times RNRN×RN 的 NSST 低频不可分变换算子 T′T\\primeT′ ,代替原有 MN×MNMN \\times MNMN×MN 大小算子 TTT 。 对于 T4×4′T_{4 \\times 4}\\primeT4×4′ 时的 4×44 \\times 44×4 输入,由于已经被划分的不可再分的量级,因而对于算子没有办法进行压缩。 4×44 \\times 44×4 相当于对输入的 再排列,只有右下角的最高频权重有去掉的可能。此类强制过滤的处理都是有损的,不需要做不必要的工作。 而如果强行构造 2×22 \\times 22×2 输入的算子 T2×2′T_{2 \\times 2}\\primeT2×2′ ,则会因为算子训练特性没有分离的空间,使结果反倒太过平均。因此,对于 2×22 \\times 22×2 大小的输入,无法采用 LFNST 处理。这也阻断了我们通过选用 T2×2′T_{2 \\times 2}\\primeT2×2′ 的局部解构建复合基,等效替代更大尺寸基底,来降低变化成本的途径。 所以,此处仍选择取用原 4×44 \\times 44×4 输入对应的算子 T4×4T_{4 \\times 4}T4×4 ,有 R=4R = 4R=4 即: T4×4′=T4×4 {\\displaystyle \\begin{aligned} T_{4 \\times 4}\\prime = T_{4 \\times 4} \\\\ \\end{aligned} } T4×4′=T4×4 对于 T8×8′T_{8 \\times 8}\\primeT8×8′ 时的 8×88 \\times 88×8 输入,因为存在 T4×4′T_{4 \\times 4}\\primeT4×4′ 作为基础,就能够使用分离复合基的方式了。我们可以将输出的 X^k\\hat{X}_kX^k 分割为 4 个等大的 4×44 \\times 44×4 区域。以 4×44 \\times 44×4 区域为一组 复合解基。采用经过训练的 T4×4′T_{4 \\times 4}\\primeT4×4′ 作为基底函数族,来求得 8×88 \\times 88×8 输入情况下,针对 T4×4′T_{4 \\times 4}\\primeT4×4′ 的解集,构成输出 X^k\\hat{X}_kX^k 。即期望有: W^k=T4×4′⋅WkX^k′′=∑i=14(T4×4′⋅W4×4⋅Xk′)i {\\displaystyle \\begin{aligned} \\hat{W}_k &= T_{4 \\times 4}\\prime \\cdot W_k \\\\ \\hat{X}_k\\prime\\prime &= \\sum_{i=1}^4 ( T_{4 \\times 4}\\prime \\cdot W_{4 \\times 4} \\cdot X_k \\prime )_i\\\\ \\end{aligned} } W^kX^k′′=T4×4′⋅Wk=i=1∑4(T4×4′⋅W4×4⋅Xk′)i 其中, WkW_kWk 是基于 T4×4′T_{4 \\times 4}\\primeT4×4′ 训练的 LFNST 核,它和输出 W^k\\hat{W}_kW^k 都为 4×44 \\times 44×4 大小训练 8×88 \\times 88×8 的 LFNST 基础分解基,训练完毕后是个 固定值。 而 X^k′′\\hat{X}_{k}\\prime\\primeX^k′′ 则是输入 Xk′X_k\\primeXk′ 关于 T4×4′⋅W4×4T_{4 \\times 4}\\prime \\cdot W_{4 \\times 4}T4×4′⋅W4×4 的变换结果。但一组选定尺寸的 LFNST 变换集,只有 3 个矩阵可作为基底。因此,变换的覆盖范围也是有限的。若将输入 8×88 \\times 88×8 大小的 XkX_kXk 也分为 4 个等大的 4×44 \\times 44×4 区域,写作如下形式: Xk=[Xk∣4×4 ,Xk∣4×4Xk∣4×4 ,Xk∣4×4]=[Xk1 ,Xk2Xk3 ,Xk4] {\\displaystyle \\begin{aligned} &X_k = \\begin{bmatrix} & X_k|_{4 \\times 4} \\ , & X_k|_{4 \\times 4} \\\\ & X_k|_{4 \\times 4} \\ , & X_k|_{4 \\times 4} \\end{bmatrix} = \\begin{bmatrix} & X_{k1} \\ , & X_{k2} \\\\ & X_{k3} \\ , & X_{k4} \\end{bmatrix} \\end{aligned} } Xk=[Xk∣4×4 ,Xk∣4×4 ,Xk∣4×4Xk∣4×4]=[Xk1 ,Xk3 ,Xk2Xk4] 那么原 X^k′′\\hat{X}_{k}\\prime\\primeX^k′′ 分离式即变为: X^k′′=[T1∣4×4′ ,T2∣4×4′T1∣4×4′ , [0]4×4]⋅[Wk1 ,Wk2Wk3 ,Wk4]⋅[Xk1 ,Xk2Xk3 ,Xk4]=∑T4×4′⋅[Wk1 ,Wk2Wk3 ,[0]4×4]⋅[Xk1 ,Xk2Xk3 ,[0]4×4] {\\displaystyle \\begin{aligned} \\hat{X}_k\\prime\\prime &= \\begin{bmatrix} & T_1|_{4 \\times 4}\\prime \\ , & T_2|_{4 \\times 4}\\prime \\\\ & T_1|_{4 \\times 4}\\prime \\ , & \\ [0]_{4 \\times 4} \\end{bmatrix} \\cdot \\begin{bmatrix} & W_{k1} \\ , & W_{k2} \\\\ & W_{k3} \\ , & W_{k4} \\end{bmatrix} \\cdot \\begin{bmatrix} & X_{k1} \\ , & X_{k2} \\\\ & X_{k3} \\ , & X_{k4} \\end{bmatrix} \\\\ &= \\sum T_{4 \\times 4}\\prime \\cdot \\begin{bmatrix} & W_{k1} \\ , & W_{k2} \\\\ & W_{k3} \\ , & [0]_{4 \\times 4} \\end{bmatrix} \\cdot \\begin{bmatrix} & X_{k1} \\ , & X_{k2} \\\\ & X_{k3} \\ , & [0]_{4 \\times 4} \\end{bmatrix} \\\\ \\end{aligned} } X^k′′=[T1∣4×4′ ,T1∣4×4′ ,T2∣4×4′ [0]4×4]⋅[Wk1 ,Wk3 ,Wk2Wk4]⋅[Xk1 ,Xk3 ,Xk2Xk4]=∑T4×4′⋅[Wk1 ,Wk3 ,Wk2[0]4×4]⋅[Xk1 ,Xk3 ,Xk2[0]4×4] 存在 Xk4X_{k4}Xk4 区域,乘 000 丢解的问题,因此 X^k′′\\hat{X}_k\\prime\\primeX^k′′ 与 X^k′\\hat{X}_k\\primeX^k′ 的关系,还需要补充 Xk4X_{k4}Xk4 的 LFNST 独立解,记为 X^k4′\\hat{X}_{k4}\\primeX^k4′ ,有: T8×8−1⋅X^k′=[T1∣4×4′−1 ,T2∣4×4′−1T3∣4×4′−1 , [0]4×4]⋅W4×4−1⋅X^k′′+T4×4−1⋅X^k4′=[W^k1−1 ,W^k2−1W^k3−1 ,T4×4−1]⋅[X^k′′ ,[0]4×4[0]4×4 ,X^k4′]=Xk {\\displaystyle \\begin{aligned} {T_{8 \\times 8}}^{-1} \\cdot \\hat{X}_k\\prime &= \\begin{bmatrix} & {T_1|_{4 \\times 4}\\prime}^{-1} \\ , & {T_2|_{4 \\times 4}\\prime}^{-1} \\\\ & {T_3|_{4 \\times 4}\\prime}^{-1} \\ , & \\ [0]_{4 \\times 4} \\end{bmatrix} \\cdot {W_{4 \\times 4}}^{-1} \\cdot \\hat{X}_k\\prime\\prime + {T_{4 \\times 4}}^{-1} \\cdot \\hat{X}_{k4}\\prime \\\\ &= \\begin{bmatrix} & {\\hat{W}_{k1}}^{-1} \\ , & {\\hat{W}_{k2}}^{-1} \\\\ & {\\hat{W}_{k3}}^{-1} \\ , & {T_{4 \\times 4}}^{-1} \\end{bmatrix} \\cdot \\begin{bmatrix} & \\hat{X}_k\\prime\\prime \\ , & [0]_{4 \\times 4} \\\\ & [0]_{4 \\times 4} \\ , & \\quad \\hat{X}_{k4}\\prime \\end{bmatrix} \\\\ &= X_k \\end{aligned} } T8×8−1⋅X^k′=[T1∣4×4′−1 ,T3∣4×4′−1 ,T2∣4×4′−1 [0]4×4]⋅W4×4−1⋅X^k′′+T4×4−1⋅X^k4′=[W^k1−1 ,W^k3−1 ,W^k2−1T4×4−1]⋅[X^k′′ ,[0]4×4 ,[0]4×4X^k4′]=Xk 即: X^k′=[X^k′′ ,[0]4×4[0]4×4 ,X^k4′]T8×8=[W^k1 ,W^k2W^k3 ,T4×4]=[T4×4′⋅Wk ,T4×4′⋅WkT4×4′⋅Wk ,T4×4] {\\displaystyle \\begin{aligned} \\hat{X}_k\\prime &= \\begin{bmatrix} & \\hat{X}_k\\prime\\prime \\ , & [0]_{4 \\times 4} \\\\ & [0]_{4 \\times 4} \\ , & \\quad \\hat{X}_{k4}\\prime \\end{bmatrix} \\\\ T_{8 \\times 8} &= \\begin{bmatrix} & \\hat{W}_{k1} \\ , & \\hat{W}_{k2} \\\\ & \\hat{W}_{k3} \\ , & T_{4 \\times 4} \\end{bmatrix} = \\begin{bmatrix} & T_{4 \\times 4}\\prime \\cdot W_k \\ , & T_{4 \\times 4}\\prime \\cdot W_k \\\\ & T_{4 \\times 4}\\prime \\cdot W_k \\ , & T_{4 \\times 4} \\end{bmatrix} \\end{aligned} } X^k′T8×8=[X^k′′ ,[0]4×4 ,[0]4×4X^k4′]=[W^k1 ,W^k3 ,W^k2T4×4]=[T4×4′⋅Wk ,T4×4′⋅Wk ,T4×4′⋅WkT4×4] 取用: T8×8′=[T4×4′⋅Wk ,T4×4′⋅WkT4×4′⋅Wk ,0] {\\displaystyle \\begin{aligned} T_{8 \\times 8}\\prime &= \\begin{bmatrix} & T_{4 \\times 4}\\prime \\cdot W_k \\ , & T_{4 \\times 4}\\prime &\\cdot W_k \\\\ & T_{4 \\times 4}\\prime \\cdot W_k \\ , & &0 \\end{bmatrix} \\end{aligned} } T8×8′=[T4×4′⋅Wk ,T4×4′⋅Wk ,T4×4′⋅Wk0] 那么原 8×88 \\times 88×8 输入 XkX_kXk 经过 LFNST 变换的输出 X^k′\\hat{X}_k\\primeX^k′ 就有: X^k′=(T8×8′+T4×4′)⋅Xk {\\displaystyle \\begin{aligned} \\hat{X}_k\\prime &= (T_{8 \\times 8}\\prime + T_{4 \\times 4}\\prime )\\cdot X_k \\\\ \\end{aligned} } X^k′=(T8×8′+T4×4′)⋅Xk 而 X^k′\\hat{X}_k\\primeX^k′ 的右上和左下角,皆为 [0]4×4[0]_{4 \\times 4}[0]4×4 值。 T8×8′T_{8 \\times 8}\\primeT8×8′ 算子展开去零后,只有 16×4816 \\times 4816×48 的运算大小 因为固定了基底 T4×4′T_{4 \\times 4}\\primeT4×4′ 的位置,同样也只有 3 个聚类,即 3 个矩阵算子。 最终: NSST:{T=[ T4×4′, T8×8′ ]X^k′=(T8×8′+T4×4′)⋅Xk,min(M, N)=8X^k′=T4×4′⋅Xk,min(M, N)=4 {\\displaystyle \\begin{aligned} NSST:& \\begin{cases} { \\begin{aligned} T &= [\\ T_{4 \\times 4}\\prime,\\ T_{8 \\times 8}\\prime \\ ] \\\\ \\hat{X}_k\\prime &= (T_{8 \\times 8}\\prime + T_{4 \\times 4}\\prime )\\cdot X_k &, min(M ,\\ N) = 8 \\\\ \\hat{X}_k\\prime &= T_{4 \\times 4}\\prime \\cdot X_k &, min(M ,\\ N) = 4 \\end{aligned} } \\end{cases} \\\\ \\end{aligned} } NSST:⎩⎨⎧TX^k′X^k′=[ T4×4′, T8×8′ ]=(T8×8′+T4×4′)⋅Xk=T4×4′⋅Xk,min(M, N)=8,min(M, N)=4 由 T4×4′T_{4 \\times 4}\\primeT4×4′ 和 T8×8′T_{8 \\times 8}\\primeT8×8′ 构造新的基础多变换集(MTS)。结合上述变换过程,构成了 NSST 的完整理论基础。 不过,即使 NSST 已经极大的缩减了 LFNST 变换集的大小,并能在参与熵编码后,能更为有效的降低信息熵。但在以 H.265/HEVC 为目标应用时,就需要 35 组 2 类 3 算子的变换集 [34] 。延伸到 H.266/VVC 规格,则会至少需要 67 组 2 类 3 算子变换集。不论是 H.265 还是 H.266 ,都不可能采纳,属于无法工程化的技术。 那么,如何精简基础多变换集呢? LFNST 在 H.266 应用的工程 RST 与常值 MTS 在 VTM5 的有关 JVET-N0193 提案的提交中,H.266/VVC 采用了 缩减低频不可分变换(R-LFNST [Reduced LFNST]),处理此问题 [33] 。因为是针对 LFNST 的 二次不可分变换(NSST)的逼近算法,R-LFNST 也被称为 缩减二次变换(RST [Reduced Secondary Transform]) [35] 。 缩减二次变换对 LFNST 的 NSST 应用所得 基础多变换集(MTS),进行 整体变换集算子数量 和 算子生效范围,两方面的裁剪。其理论根基仍来源自 NSST 。 RST 在生效范围的调整,主要集中于控制 NSST 在工程实现中的有效计算区域。根据 NSST 的基本公式可以发现,实际上对于尺寸大于 8×88 \\times 88×8 的 M×NM \\times NM×N 大小主变换 XkX_kXk 输入,NSST 能起作用的部分仅局限于左上角和与其相邻的,共计 3 个 4×44 \\times 44×4 大小的范围。 如此一来,介于参与 NSST 的输入已不可再分,对于这三个区域外的的其余 XkX_kXk 值, 根本不需要再次进行二次变换处理。而在 T4×4′T_{4 \\times 4}\\primeT4×4′ 时和 NSST 一致。 所以,原 NSST 公式可调整为: RST:{T=[ T4×4′, T8×8′ ]X^k′=T8×8′⋅Xk′∣48×1,min(M, N)=8X^k′=T4×4′⋅Xk,min(M, N)=4 {\\displaystyle \\begin{aligned} RST:& \\begin{cases} { \\begin{aligned} T &= [\\ T_{4 \\times 4}\\prime,\\ T_{8 \\times 8}\\prime \\ ] \\\\ \\hat{X}_k\\prime &= T_{8 \\times 8}\\prime \\cdot X_k\\prime|_{48 \\times 1} &, min(M ,\\ N) = 8 \\\\ \\hat{X}_k\\prime &= T_{4 \\times 4}\\prime \\cdot X_k &, min(M ,\\ N) = 4 \\end{aligned} } \\end{cases} \\\\ \\end{aligned} } RST:⎩⎨⎧TX^k′X^k′=[ T4×4′, T8×8′ ]=T8×8′⋅Xk′∣48×1=T4×4′⋅Xk,min(M, N)=8,min(M, N)=4 即,对于 M×N≥8×8M \\times N \\ge 8 \\times 8M×N≥8×8 的情况,就如下图所示: 图 3-24 RST 的 8x8 输入理示意图[32] 有 T8×8′T_{8 \\times 8}\\primeT8×8′ 时,只需处理图中蓝色部分的 XkX_kXk 数据。 经 T8×8′T_{8 \\times 8}\\primeT8×8′ 计算后的原输出结果 X^k′\\hat{X}_k\\primeX^k′ ,安全起见会需要对非左上角部分扫描归零: 图 3-25 RST 的 8x8 输入 NSST 处理结果示意图(蓝线扫描顺序归零)[35] 之后,叠加至原主变化 XkX_kXk 位于计算范围外的部分,构成最终输出 X^k\\hat{X}_kX^k 。 经过此番调整后,单次算子计算所需要的算力消耗,较 NSST 相比就非常之小了。 而在 MTS 的算子数量方面,通过整合 K均值聚类机器学习 label∈Z[1, 3]label \\in \\mathbb{Z} [1, \\ 3]label∈Z[1, 3] 中,所得 率失真优化指数(RDO)较大的两个聚类的变换矩阵,将原有输入固定预测模式和尺寸时的 NSST 变换集,从 3 个矩阵精简到了 2 个,成为双算子形式: TM×N∈[T1∣M×N ,T2∣M×N ] {\\displaystyle \\begin{aligned} &T_{M \\times N} \\in \\begin{bmatrix} & {T_1}|_{M \\times N} \\ , & T_2|_{M \\times N} \\ \\end{bmatrix}\\\\ \\end{aligned} } TM×N∈[T1∣M×N ,T2∣M×N ] 同时,RST 对需要处理的 H.266 规格下的各类帧内预测模式进行了分类。将原本需要单独生成变换集的平面(Planar)模式、直流(DC)模式、角度(Angle)预测模式进行了拆解。把临近相似方向的角度预测模式进行了分类。之后归类于 4 个主流变换集到如下索引 [32] : 凭借这样的处理,使得原本大于 67×2×367 \\times 2 \\times 367×2×3 个 MTS 矩阵,缩减到了 4×2×24 \\times 2 \\times 24×2×2 共计 8 个(详见【附表一】)的可接受范围。 至此,根据输入尺寸大小、预测模式所处归类、输入率失真优化指数(RDO)这 3 个参数,就能够选定具体的算子进行相关处理了。完成 RST 的工程化。 到这里,信息频域分离和部分冗余处理,就已经完成了。随后再配合传统音视频的量化和熵编码,即可完成对信息剩余存储空间冗余的压缩。此处不再赘言。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/Playground_3.html":{"url":"Chapter_3/Language/cn/Playground_3.html","title":"【在线展示】","keywords":"","body":" 在线演示 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_3/Language/cn/References_3.html":{"url":"Chapter_3/Language/cn/References_3.html","title":"【参考文献】","keywords":"","body":"三、【参考文献】 [1] Fourier, J.B. Joseph (1878) [1822], The Analytical Theory of Heat, translated by Alexander Freeman, The University Press (translated from French). [2] Champeney, D.C. (1987), A Handbook of Fourier Theorems, Cambridge University Press. [3] Clozel, Laurent; Delorme, Patrice (1985), \"Sur le théorème de Paley-Wiener invariant pour les groupes de Lie réductifs réels\", Comptes Rendus de l'Académie des Sciences, Série I, 300: 331–333. [4] Rahman, Matiur (2011), Applications of Fourier Transforms to Generalized Functions, WIT Press, ISBN 978-1-84564-564-9. [5] Stein, Elias; Weiss, Guido (1971), Introduction to Fourier Analysis on Euclidean Spaces, Princeton, N.J.: Princeton University Press, ISBN 978-0-691-08078-9. [6] Wolf, Kurt B. (1979), Integral Transforms in Science and Engineering, Springer, doi:10.1007/978-1-4757-0872-1, ISBN 978-1-4757-0874-5. [7] Grafakos, Loukas (2004), Classical and Modern Fourier Analysis, Prentice-Hall, ISBN 978-0-13-035399-3. [8] Gauss, Carl Friedrich (1876). Theoria Interpolationis Methodo Nova Tractata. Band 3. Göttingen: Königliche Gesellschaft der Wissenschaften. pp. 265–327. [9] Heideman, M. T., D. H. Johnson, and C. S. Burrus, \"Gauss and the history of the fast Fourier transform,\" IEEE ASSP Magazine, 1, (4), 14–21 (1984). [10] James W. Cooley, John W. Tukey, (1965). \"An algorithm for the machine calculation of complex Fourier series\". Math. Comput. 19 (90): 297–301. doi:10.2307/2003354. [11] James W. Cooley, Peter A. W. Lewis, and Peter W. Welch, \"Historical notes on the fast Fourier transform,\" Proc. IEEE, vol. 55 (no. 10), p. 1675–1677 (1967). [12] Ghissoni, S. , Costa, E. , Lazzari, C. , Monteiro, J. , & Reis, R. . (2011). Radix-2 Decimation in Time (DIT) FFT implementation based on a Matrix-Multiple Constant multiplication approach. IEEE International Conference on Electronics. IEEE. [13] C. Tomasi and R. Manduchi, \"Bilateral filtering for gray and color images,\" Sixth International Conference on Computer Vision (IEEE Cat. No.98CH36271), Bombay, India, 1998, pp. 839-846, doi: 10.1109/ICCV.1998.710815. [14] R. Haralick and L. Shapiro Computer and Robot Vision, Vol. 1, Addison-Wesley Publishing Company, 1992, pp 346 - 351. [15] Irwin Sobel, 2014, History and Definition of the Sobel Operator [16] William T. Freeman, Michal Roth, \"Orientation Histograms for Hand Gesture Recognition\", Tech. Rep. TR94-03, Mitsubishi Electric Research Laboratories, Cambridge, MA, December 1994. [17] Dalal, N. , and B. Triggs . \"Histograms of Oriented Gradients for Human Detection.\" IEEE Computer Society Conference on Computer Vision & Pattern Recognition IEEE, 2005. [18] J. F. Henriques, R. Caseiro, P. Martins and J. Batista, \"High-Speed Tracking with Kernelized Correlation Filters,\" in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 37, no. 3, pp. 583-596, 1 March 2015, doi: 10.1109/TPAMI.2014.2345390. [19] Yu J, Jiang Y, Wang Z, et al. Unitbox: An advanced object detection network[C]//Proceedings of the 24th ACM international conference on Multimedia. 2016: 516-520. [20] Rezatofighi, Hamid , et al. \"Generalized Intersection Over Union: A Metric and a Loss for Bounding Box Regression.\" 2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) IEEE, 2019. [21] Zheng Z, Wang P, Liu W, et al. Distance-IoU loss: Faster and better learning for bounding box regression[C]//Proceedings of the AAAI conference on artificial intelligence. 2020, 34(07): 12993-13000. [22] Zhang Y F, Ren W, Zhang Z, et al. Focal and efficient IOU loss for accurate bounding box regression[J]. Neurocomputing, 2022, 506: 146-157. [23] Li G, Xu D, Cheng X, et al. Simvit: Exploring a simple vision transformer with sliding windows[C]//2022 IEEE International Conference on Multimedia and Expo (ICME). IEEE, 2022: 1-6. [24] Huston SJ, Krapp HG (2008) Visuomotor Transformation in the Fly Gaze Stabilization System. PLoS Biol 6(7): e173. https://doi.org/10.1371/journal.pbio.0060173. [25] Fleet, David J.; Weiss, Yair (2006). \"Optical Flow Estimation\" (PDF). In Paragios, Nikos; Chen, Yunmei; Faugeras, Olivier D. (eds.). Handbook of Mathematical Models in Computer Vision. Springer. pp. 237–257. ISBN 978-0-387-26371-7. [26] Barron, John L.; Fleet, David J. & Beauchemin, Steven (1994). \"Performance of optical flow techniques\" (PDF). International Journal of Computer Vision. 12: 43–77. CiteSeerX 10.1.1.173.481. doi:10.1007/bf01420984. S2CID 1290100. [27] Berthold.K.P. Horn and Brian.G. Schunck, \"Determining optical flow.\" Artificial Intelligence, vol 17, pp 185–203, 1981. [28] Lucas B D and T. Kanade, An iterative image registration technique with an application to stereo vision[C]//Proc. of the 7th International Conference on Artificial Intelligence, pp 121-130, 1981. [29] A. Alshin, E. Alshina and T. Lee, \"Bi-directional optical flow for improving motion compensation,\" 28th Picture Coding Symposium, Nagoya, Japan, 2010, pp. 422-425, doi: 10.1109/PCS.2010.5702525. [30] J. Luo, Y. He and W. Chen, \"Prediction Refinement with Optical Flow for Affine Motion Compensation,\" 2019 IEEE Visual Communications and Image Processing (VCIP), Sydney, NSW, Australia, 2019, pp. 1-4, doi: 10.1109/VCIP47243.2019.8965942. [31] T. Lu et al., \"Luma Mapping with Chroma Scaling in Versatile Video Coding,\" 2020 Data Compression Conference (DCC), Snowbird, UT, USA, 2020, pp. 193-202, doi: 10.1109/DCC47342.2020.00027. [32] M. Koo, M. Salehifar, J. Lim and S. -H. Kim, \"Low Frequency Non-Separable Transform (LFNST),\" 2019 Picture Coding Symposium (PCS), Ningbo, China, 2019, pp. 1-5, doi: 10.1109/PCS48520.2019.8954507. [33] X. Zhao, J. Chen, M. Karczewicz, A. Said and V. Seregin, \"Joint Separable and Non-Separable Transforms for Next-Generation Video Coding,\" in IEEE Transactions on Image Processing, vol. 27, no. 5, pp. 2514-2525, May 2018, doi: 10.1109/TIP.2018.2802202. [34] Salehifar M, Koo M, Lim J, et al. CE 6.2. 6: Reduced Secondary Transform (RST)[J]. Joint Video Experts Team (JVET) of ITU-T SG, 2018, 16: 10-18. [35] Koo M, Salehifar M, Lim J, et al. CE6: reduced secondary transform (RST)(CE6-3.1)[J]. Joint Video Experts Team (JVET) of ITU-T SG, 2019, 16: 19-27. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Apex_4_Introduce.html":{"url":"Chapter_4/Language/cn/Apex_4_Introduce.html","title":"四、音视频机器学习基础","keywords":"","body":"四、音视频机器学习基础 引言 在前一章中,我们对基础音视频的关键技术工具,进行了详细介绍。其中,不少地方需要用到机器学习相关的处理手段。可见结合机器学习尤其是深度学习模型的优秀能力,来强化现有音视频工程的各方面,已逐步成为主流趋势。 因此,需要我们对机器学习这个大类技术族,有初步的认知。 整个机器学习(ML)的发展历程中,总有不一样的想法和更先进(或特色)的方法论被各路探索者们提出来。而深度学习(DL [Deep Learning])作为机器学习(ML [Machine Learning])的实现手段之一,最初的概念早在上个世纪就已经被 Hinton、Bengio、LeCun 等学者提出。受到近年来快速增长的计算机算力和大数据云建设,而得以真正落地。 如果回顾机器学习的发展会发现,过程中通常是多条路线方法论并行的。在历史上(现认为 2019 至今属于第三次高峰),前两次小高峰都是伴随着计算机硬件技术的突破,而带来的飞跃性变革。从单层感知器模型(Single-Perception)到多层感知器模型(Multi-Perception)再到深度信念网络(Deep Belief Network),直至今天百花齐放的 DL。整个历史中的每一次迭代,更像是多次多维度的技术积累准备齐全后,才应运而生的。 本章节主要整理说明了,当下机器学习至 2019 年前的发展简史,并阐明了部分算法的必要基础概念。只给出核心原理,不包含理论证明和推导过程。 关键字:机器学习分类、深度学习、激活函数、损失函数、最优化算法、模型结构速览 目录 4.1 发展概览 4.2 模型工程基础 4.2.1 算子(Operator)& 层(Layer) 4.2.2 神经元(Neuron) 4.2.3 神经网络(NN [Neural Network]) 4.2.4 特征选择(Feature Selection) 4.3 经典激活函数(Classic Activation Function) 4.3.1 Sigmoid 4.3.2 Tanh 4.3.3 Softplus 4.3.4 ReLU 族 4.3.5 ELU & SELU 4.3.6 Mish 4.3.7 Swish 族 4.4 连接函数/衰减函数(Connection/Attenuation Function) 4.4.1 Dropout 4.4.2 Maxout 4.4.3 SoftMax 4.5 损失函数(Loss Function) 4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error]) 4.5.2 回归项-均方误差(MSE [Mean Squared Error]) 4.5.3 回归项-休伯损失(Huber Loss) 4.5.4 回归项-分位数损失(Quantile Loss) 4.5.5 分类项-对数损失(Log Loss) 4.5.6 分类项-交叉熵损失(Cross Entropy Loss) 4.5.7 分类项-合页损失(Hinge Loss) 4.5.8 分类项-对比损失(Contrastive Loss) 4.5.9 分类项-三元损失(Triplet Loss) 4.5.10 分类项-对组排异损失(N-Pair Loss) 4.5.11 正则项-L1 惩罚 4.5.12 正则项-L2 惩罚 4.6 优化算法/优化器(Optimizer) 4.6.1 经典优化算法(Classic Optimize Function) 4.6.2 优化算法的优化-应对震荡 4.6.3 优化算法的优化-应对重点强(弱)化更新 4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation]) 4.6.5 优化算法对比与使用建议 4.7 模型结构速览 4.7.1 卷积神经网络(CNN [Convolutional Neural Network]) 4.7.2 循环神经网络(RNN [Recurrent Neural Network]) 4.7.3 自注意力网络(Transformer) 【参考文献】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_1.html":{"url":"Chapter_4/Language/cn/Docs_4_1.html","title":"4.1 发展概览","keywords":"","body":"4.1 发展概览 机器学习(ML) 传统意义上的方法,可以大致分为两类:有监督学习(Supervised Learning) 和 无监督学习(Unsupervised learning)。 在 1946~2006 年期间,两种类型因为各自侧重领域存在区分,基本是同步并行发展的。有监督学习(Supervised Learning)经常被用来做一些拥有较为充足数据和标注的样本集的分类、预测工作。而无监督学习(Unsupervised learning)则更多的被用于数据重建,或简单二元分类应用上。直到 2006 年 杰弗里·辛顿(Geoffrey Hinton,1947~Present) 和 拉斯·萨拉胡迪诺夫(Russ Salakhutdinov) 提出了RBM 的快速学习算法 [1],并在 2008 年由 雨果·拉罗谢尔(Hugo Larochelle) 和 约书亚·本吉奥(Yoshua Bengio,1964~Present) 实现多层 RBM 节点组成的深度信念网络(DBN)半监督分类 [2] 后,无监督学习才逐渐被更多人所知。 有监督学习(Supervised Learning) 有监督学习(Supervised Learning) 指的是,在迭代中需要人为调参裁剪的 机器学习(ML)过程。即,从标签化训练数据集中推断出函数的机器学习任务。每一个样本集中的训练数据,都是由输入参数和预期结果组合而成。这其中,预期结果也被称为监督信号。常见的有监督学习如:支持向量机(SVM)、线性回归(Linear Regression)、逻辑回归(Logistic Regression)、朴素贝叶斯(NBM)、决策树(DT)、K-临近(K-Nearest)、深度信念网络(DBN)。 图 4-1 经典有监督学习关联图谱 无监督学习(Unsupervised Learning) 无监督学习(Unsupervised Learning) 指的是,在迭代中不需要人为干预的 机器学习(ML)过程。即,根据未标签的训练数据进行函数推断的机器学习任务。每一个样本集中的训练数据,仅由 输入的样本参数组成。无人工标注和对应输入的预期结果标记。 无监督学习主要分为两大类:确定型 和 概率型,不过也有 将概率型做为无监督学习,而 确定型归类为半监督(SSL [Semi-Supervised Learning])的分类方式。这种分类方式的原因主要是因为,确定型的代表主要是 自编码器(Auto Encoder),而自编码器在实际运行过程中,并不是完全不需要人为调参的。自编码器虽然不需要启动时进行样本标记,但是需要对解码后的结果进行对比筛选,因此也被学术界认为并不是完全的无监督。 确定型无监督学习,主要进行数据的复原、重现、重构等操作,无法进行数据分类。因此这种类型目前主要指代自编码器(Auto Encoder)及其改进算法,其目标主要是能够从抽象后的数据中尽量无损地恢复原有数据。自编码器(Auto Encoder)类型最大的特点就是: 数据相关(Data-Specific),只能用于训练集类似数据的编解码。 数据有损(Data-Degradation),解码后的输出与编码前的输入相比是退化的。 定制自动化(Customized Automation),根据指定类型输入,训练特定的自编码器,而不需要完成任何新工作。 概率型无监督学习,主要根据概率情况推算分类、结果状态。这种类型代表就是受限波尔兹曼机(RBM)及其改进算法(rRBM等)或延伸(DBN等),其目标主要是使受限玻尔兹曼机达到稳定状态时原数据出现的概率最大。从基础上来讲,属于贝叶斯学派的观点。 图 4-2 经典无监督学习关联图谱 深度学习(Deep Learning)的崛起 前文中我们提到了 2008 年基于 DBN 的半监督分类带给了业界极大的启发。从这一刻开始,深度学习的前置科技已经准备就绪。而传统的分类方式,显然已经 不足以描述 未来可能的发展趋势了。 2011 年 吴恩达(Andrew Ng,1976~Present) 等学者发表了《有关单层神经网络的无监督特征学习》[3] ,首次将受限波尔兹曼机(RBM)应用于无监督特征学习(Unsupervised Feature Learning)。论文通过简单的算法,实现了当时在 CIFAR-10 数据集(Acc: 79.6%) 和 NORB 数据集(Acc: 97.2%) 上最高的准确度,引起了剧烈反响。大家开始思考,是否能够通过更深层的网络结构,配合强大算力与过去积累的算法,来构造一种能够自主特征提取(Features self-extracting)的人工神经网络(ANNs [Artificial Neural Networks])模型。从而实现从单一模式识别到高层抽象的过渡 [4] ( 2013 年前,此类多数还停留在,用于做复杂多模式识别的应用领域),进一步推进人工智能(AI [Artificial Intelligence])发展。受此启发,大家开始尝试与其他领域概念结合,从而解决过去备受困扰的难题。 2012 年由 Hinton 学生 埃里克斯·克里热夫斯基(Alex Krizhevsky) 发表的 AlexNet [5] 无疑为人们的信心打上了有力的兴奋剂。AlexNet 在 ImageNet LSVRC-2012 训练集上以 top-5 error 15.3% 和高达 78.1% 的识别率展示了深度学习在目标分类领域强大的潜力。要知道当年第二名的 top-5 error 仅为 26.2%,差距高达 10.9%。 AlexNet 的关键之处,就在于它将 LeNet [6] 逐步完善的 卷积神经网络(CNN [Convolutional Neural Network]) 的基本框架和操作单元概念,与深度信念网络(DBF)中由RBM单元构成的计算单元设计理念进行了结合,并引入了由生物学侧抑制概念衍生的 局部响应归一化(LRN [Local Response Normalization]) 来构建了整个网络模型。证明了深度学习的正确性,和手段的多样性。这为随后深度学习概念的分类及发展,有着 承上启下 的作用。 AlexNet 的出现,将深度学习送上了高速发展的快车道。深度学习开始做为一种有效的训练方法而逐渐登上历史舞台,而与之相关的各种其他领域方向也被送上了副驾驶。 综合以往技术与深度学习近年来的发展过程,我们有了如下的脉络: 图 4-3 深度学习与传统及相关进展关联图谱 从图不难看出。时至今日,在深度学习方向上的工业化,逐渐形成以已由 神经网络框架(backbone),配合 逐层预训练(layer-wise pre-training) 与 裁剪(fine-tunning),来构筑一类问题的 批处理解决方案。当前模型发展也呈现了多元化的态势,在不同领域分支里也出现了更多的针对于领域内问题处理的细分。我们将由此发散而出的一系列模式分析方法统一归类为深度学习的手段,就具体研究内容而言,目前主要涉及如下处理理念: 多层自编码神经网络,包括:自编码(Auto Encoder,注意其在实现上区别于 Transformer 的自编码器类型)、稀疏编码(Sparse Coding)、降噪编码(Stacked Denoising Autoencoders)等单元处理手段; 深度信念网络(DBN),由单层或多层RBM构成的神经网络系统; 卷积神经网络(CNN),卷积运算的神经网络系统; 循环神经网络(RNN),共参循环单元链式递归的神经网络系统; 生成对抗网络(GAN),生成 & 判别模型互相博弈的神经网络系统; 自注意力网络(Transformer),一种基于自注意力(Self-Attention)和多头注意力(Multi-head Attention)机制的序列到序列(Sequence to Sequence)深度神经网络模型; 深度神经网络(DNN [Deep Neural Network]) 可以认为是这一系列方法所包含的神经网络类型的统称。这几种处理方式经常 交叉混用,或 多级组合互相协作。例如:通过 CNN+GAN 来完成视觉风格迁移,通过多层Transformer 自编码器(Auto Encoder) 实现的用于 NLP 的 BERT 模型。 而随着近年来的进一步发展,传统机器学习几大领域和新兴深度学习之间逐步交叉覆盖,出现了类似于 深度强化学习(DRL [Deep Reforcement Learning]) 这样的概念。例如:AlphaGo 和 DQN(Deep Q Networks)就是这一交叉方向的产物。同时,由于研究中发现,日常人所处理的信息是有限的,如果想要达到更贴近日常情况的 ML,那么必须考虑样本量不足的情况。为了解决这部分日益增长的问题,结合现有的DL手段,人们从 2019 年开始逐渐重视小样本学习(Few-Shot Learning)、大语言模型(LLM [Large Language Model])等领域的发展和探索。未来与之相关领域(如:元学习 Meta-Learning)可以预见将会有更多的注意力倾注。 图 4-4 深度学习与传统及相关领域关系图(图小圆有重叠部分) 在这一过程中,一些传统的机器学习技术展现出了新的活力。在部分问题的处理上,通过新老技术结合构建的新型网络,如:ArcFace、DQN 等。相对来说,诸如聚类分析、降维、模式分析、自编码器等,在当下往往都以单元、组件、方法论的方式在新网络中发挥传统的作用。而新一代技术的发展,更多的是在原有的研究基础上演变而来的。这就是我们总能够在新发布的 SOTA 中,看到过去的理念和前人的影子的原因。 即 事物的发展,总是螺旋上升的。深度学习是机器学习的手段,最终实现人工智能,或有限度的人工智能,才是目的。 传统机器学习对音视频方面的帮助,并不算太大。但是深度神经网络却极大的契合了音视频工程特征。 由于传统音视频,尤其是图像处理,和深度神经网络的技术栈关联性。音视频工程不可避免会大量使用到深度学习技术。想要简单了解深度学习模型是怎么起到作用的,就需要对一些基本概念有清晰的认知。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_2.html":{"url":"Chapter_4/Language/cn/Docs_4_2.html","title":"4.2 模型工程基础","keywords":"","body":"4.2 模型工程基础 在深度学习(DL)中,我们通过计算损失函数(Loss Function),来衡量当次迭代结果对应各个关键参数权重,在实际描述问题上的有效程度。通过损失函数的变化方向,来获取对应关键参数权重,更趋近于实际结果的梯度方向。从而被我们使用来更新当前参数权重配置,以降低损失函数值,逼近最优解。这一过程被称为一次迭代(Iteration)过程。而通过多次迭代来获取最优解的过程,被称为一次训练(Training)。 在一次迭代(Iteration)中,一般需要对参与训练的所有样本进行分组,我们将这些数据子集称为 批(Batch)。每一批所包含的数据量是有可能有差异的,所以,对不同批次的样本量,我们采用 批大小(Batch Size) 进行衡量。 而训练中,基本不可能通过单次迭代就能达到想要的结果。所以,在工程中,我们把一次迭代所包含的相关数据和处理的周期过程,称为一个 时期(Epoch)。用以区分深度学习学术概念的迭代,和工程执行层面的差异。因此,时期(Epoch)也可以代表数量级,即指代当前一次迭代过程中的所有批的输入样本个数。 两者本质是一个概念的不同角度称呼。 简单来说: sampleinput≤sampletotal1 epochsize=sampleinput≥batchsize⋅batchnum1 batchsize=sampleinputbatchnum {\\displaystyle \\begin{aligned} {sample}_{input} &\\le {sample}_{total} \\\\ 1\\ epoch_{size} &= {sample}_{input} \\\\ &\\ge batch_{size} \\cdot batch_{num} \\\\ 1\\ batch_{size} &= \\frac{ {sample}_{input} } {batch_{num}} \\\\ \\end{aligned} } sampleinput1 epochsize1 batchsize≤sampletotal=sampleinput≥batchsize⋅batchnum=batchnumsampleinput 皆为训练过程中的 样本量级参数。 那么,除去这部分变量,实际进行运算的基本单元是什么呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_2_1.html":{"url":"Chapter_4/Language/cn/Docs_4_2_1.html","title":"4.2.1 算子(Operator)& 层(Layer)","keywords":"","body":"4.2.1 算子(Operator)& 层(Layer) 算子(Operator) 和 层(Layer) 是当前各种通用神经网络模型中,最基础的组成元件。一般来说,算子用于表示模型中的数学运算,而层用于组织模型中的算子。我们通常将单一的数学函数,抽象为一个算子。 需要注意的是,两者皆为 工程概念。 算子(Operator) 算子(Operator) 本身仅代表基础运算。因此,既可以是一对一的输入输出,也可以是多对多的输入输出,可以是有状态的或无状态的,也可以是可微的或不可微的。而在使用中,类似 ReLU 等激活函数,或 Dropout 之类的损失函数,都可以被定义为算子,以方便过程中直接使用。 有状态算子在计算输出时,会对前次计算的结果进行 一定程度的 抽象记录,从而 保存以前的状态。循环神经网络 (RNN) 中的循环单元(Recurrent Unit)就属于有状态算子。无状态算子在计算输出时不需要记住以前的状态,卷积神经网络 (CNN) 中的卷积算子就属于无状态算子。 可微算子的导数可以计算,这使得它们可以用于训练神经网络。例如,线性算子和非线性算子都是可微的。不可微算子的导数不能计算,这使得它们不能用于训练神经网络,但能够做最终汇总所用。例如,Maxout 算子就是一个不可微算子。 可见,算子本身是灵活的,基本作用等同于一次单一的数学运算,而不在意具体类型。由它构成了 整个神经网络中最基础的 “加减乘除” 功能。 层(Layer) 层(Layer) 是由一组算子组成的,神经网络基本组成部分。这些算子共同执行一个特定的任务。例如,卷积层(Convolution Layer) 由一组卷积算子组成,这些算子共同执行卷积操作。池化层(Pooling Layer) 由一组池化算子组成,这些算子共同执行池化操作。 根据不同的出发点,层可以进行 非单一化 的分类。 按照 功能特性,可以分为 卷积层(Convolutional Layer) 、 全连接层(Fully Connected Layer) 、 池化/下采样层(Pooling Layer/Subsampling Layer) 、 上采样层(Upsampling Layer)。 顾名思义,卷积层即卷积算子参与运算的层级,全链接层即采用连接函数精简参数的层级。同理,池化/下采样层即采用 传统/非传统 的下采样算法(Subsampling Function),进行输入数据精简的层级,而上采样即是采用 传统/非传统 的上采样算法(Upsampling Function)对数据进行扩充的层级。这种命名法的好处是 直指功能,缺点是不太好区分流程中位置。需要根据对模型的熟悉程度和经验,来确定实际生效的阶段。 按照 数学特性,可以分为 线性层(Linear Layer) 或 非线性层(Nonlinear Layer),两种类型。线性层由一组线性算子组成,这些算子共同执行线性变换。例如,全连接层就是一个线性层。非线性层由一组非线性算子组成,这些算子共同执行非线性变换。例如,卷积层就是一个非线性层。 按照 网络特性,可以分为 前馈层(Feed Forward Layer) 或 循环层(Recurrent Layer)。前馈层中的信息只从输入流向输出。循环层中的信息可以从输入流向输出,也可以从输出流向输入。这种分类方式常被使用在 自注意力网络(Transformer) 的层单元中,也可以适当的用来描述其他类型深度神经网络中的层划分。不过,由于如 CNN、RNN 相较于 Transformer 的层级特点相对单一,所以一般不会这么使用。例如,卷积神经网络 (CNN) 中的卷积层就是一个前馈层,循环神经网络 (RNN) 中的循环单元就是一个循环层,不如直接以数学特性表述的准确。 不过,最常见的分类方式,还是直接以层所处神经网络(Neural Network)位置进行划分,称为 经典基础层类型(Classic Base Layer Type)。 经典层分类(Classic Base Layer Type) 经典基础层类型,将层分为三类,分别是:输入层(Input Layer) 、 隐藏层(Hidden Layer) 、 输出层(Output Layer)。这种分类非常直观: 图 4-5 经典层分类在简单神经网络中位置示意图(切片) 输入层(Input Layer) 是一个神经网络的 输入节点集合(Input Nodes Set),负责接收外部传入的数据。显然输入数据的维度,决定了输入层节点的数量。如图,假设我们传入的训练用样本中,每一个样本数据皆为 4×14 \\times 14×1 向量的话,那么输入层的节点就同样有 4×14 \\times 14×1 个。 隐藏层(Hidden Layer) 是一个神经网络的 特征提取节点集合(Feature Extract Nodes Set),负责将输入层经过激活函数处理后的数据,交付权重运算,得到抽象后的 特征向量(Feature Vector) 输出。如图,这里我们指定抽取的特征为 3×13 \\times 13×1 向量,因此需要 3×13 \\times 13×1 个隐藏层节点。由于本身处于神经网络内部,所以被称为隐藏层。 该层也是反向传播(BP)算法,起到主要作用的层级。 输出层(Output Layer) 则是神经网络的预测结果 输出节点集合(Prediction Output Nodes Set),负责将临近的隐藏层输入,通过连接函数(Connection Function)转换为最终的预测结果输出。也就是将抽象的特征向量,转化为实际当次时期(epoch)预测结果的关键层。 通常情况下,一个神经网络只会有一个经过专门设计的输出层。输出层的结果将会与样本集中该样本的标注结果,一同作为损失函数的输入做损失计算,并以此迭代权重变化。 图中,我们期望的预测输出是个 2×12 \\times 12×1 的结果向量,向量的维度依赖于对比集的标注。此时,输出层就需要采用 2×12 \\times 12×1 个节点,来接收前一级隐藏层的输入(例子只有一层隐藏层)。 所以综合而言,在工程上,算子常常是以最小的 方法单元(Method Unit) 而存在,层中节点相当于最小 执行单元(Operation Unit)。层则相当于由一系列算子按照一定的处理顺序,组成的 任务单元(Task Unit)。而模型(Model)则是由一系列层按照既定目标排列组合,形成的 作业流水线(Process Pipeline)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_2_2.html":{"url":"Chapter_4/Language/cn/Docs_4_2_2.html","title":"4.2.2 神经元(Neuron)","keywords":"","body":"4.2.2 神经元(Neuron) 神经元(Neuron) 是对神经网络中的 节点(Node) 的一种,来自于仿生学的概念。上一节中我们提到的 输入/特征/输出节点集合 中的节点,都可以被称为神经元。 图 4-6 生物神经元示意图 生物学神经元之间的信号传递,是通过突触进行的。突触是神经元之间连接的部位。当一个神经元接收到信号时,它会释放神经递质,神经递质会穿过突触间隙,并与另一个神经元的受体结合。这种结合会引发一系列化学反应,最终导致另一个神经元产生动作电位。 这即是神经网络雏形,多层感知器(MLP [Multi-Layer Perceptron]) 的灵感来源。 在深度学习中,我们继续沿用了这一称谓,将所有设计相关层内数据计算的节点,统称为神经元。 神经元的组成 作为神经网络中最小的执行单位,神经元的成分可以统一用一个函数来说明: zi=wi⋅δ(xi)+bi z_i = w_i \\cdot \\delta(x_i) +b_i zi=wi⋅δ(xi)+bi 上式中, 角标 [i][_i][i] 表示当前神经元在层内的标号为 iii ,是一种 固定的表示 ; 以 xxx 表示一个输入的数值信号; 以 www 表示当前输入的 附加权重(wight),既可作为 参与训练 的层级特征权重,也可为常数 ; 以 bbb 表示当前输入的 附加偏移(bias),既可作为 参与训练 的层级特征偏移,也可为常数 ; 以 zzz 表示当前神经元的输出数值; 以 δ(x)\\delta(x)δ(x) 为当前神经元的激活函数; 可见,激活函数(Activation Function)是直接作用于神经元输入上的。 一般情况下,不论是 输入层、隐藏层,还是输出层的神经元,它们的 权重 www 和 偏移 bbb ,理论上都可以参与到反向传播(BP)的参数迭代中。 但是,输出层(Output Layer) 由于本身主要作用,是 接收连接函数(Connection Function)计算预测值,不会使用到 权重 www 和 偏移 bbb 。我们一般为了方便起见,会将作用于输出层与前一级隐藏层之间的链接函数,整合到输出层神经元中来便于代码实现。取链接函数为 f(x)f(x)f(x) 表示,有: z=f(x) z = f(x) z=f(x) 而 输入层(Input Layer) 一般为了工程和说明方便,会单独 只做激活,或 只传递(pass)数据并入下一级隐藏层。这使得对于输入层,神经元函数就变为了简单的: z=x z = x z=x 所以,真正 完整使用 到公式的,只有 隐藏层(Hidden Layer) 中的神经元。公式: zi=wi⋅δ(xi)+bi z_i = w_i \\cdot \\delta(x_i) +b_i zi=wi⋅δ(xi)+bi 也由此,可以被称为 隐藏层函数(Hidden Layer Function)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_2_3.html":{"url":"Chapter_4/Language/cn/Docs_4_2_3.html","title":"4.2.3 神经网络(NN [Neural Network])","keywords":"","body":"4.2.3 神经网络(NN [Neural Network]) 本书中的 神经网络(NN [Neural Network]) 主要是对深度神经网络(DNN,这是个大类别,见第一节)中,采用 反向传播(Back Propagation) 技术的一类深度神经网络的统称。也被称为 反向传播神经网络(Back Propagation Neural Network),是个广义分类。 所谓反向传播(BP)算法,是一种 有监督学习(Supervised Learning)算法。它需要一个标记好判定结果的数据集,来进行隐藏层特征权重和偏移的迭代。BP 在当前神经网络的损失函数,计算输出预测值与正确值误差之后,以导数驱动调整神经元的权重和偏移(依赖是否参与运算),以期望下次迭代跟贴近预测结果,减少误差 [1] 。 而这涵盖了,包括 CNN、RNN、GAN、Transformer 在内的这些经典 DNN 模式。 图 4-7 完整的 Alexnet 示意图(工程版) 如上图所示,我们以经典 CNN 图像分类模型 AlexNet 为例。 由图可以看出,一个神经网络(NN)的构成,通常由一个输入层、多个隐藏层、一个输出层组成。而隐藏层中,根据具体作用的不同,按照之前提到的层级功能性划分,又可以分为 卷积层、池化层等多种子类型。 不同类型的网络,差异体现在层级的设计上。而层级的排列和执行方式,共同组成了工程流水线(Pipeline)。这一整体,被称为神经网络结构(Nerual Network Structure)。我们在实际工作中,常以 神经网络(NN)、模型(Model)来等价指代 神经网络结构。 当然,我们这里展示的只是最简单的深度神经网络。除了单独使用一个模型外,NN 之间也可以根据各种情况进行组合串联 或 联合训练,共同构成更大的神经网络,这种方式被称为 神经网络聚合(NNE [Neural Network Ensemble])。 除此之外,当下包括 大模型(Large Model) 在内的多种模型融合技术,简称 多模态(Multi Model),皆开始采用多模型混合的实现。 例如,由 杨立昆(Yann LeCun) 提出的,基于 短期预测(Short Term Prediction) 和 长期预测交叉融合(Joint Embedding) 实现完整连续时效预测,的 自监督大模型(Self-Supervised Large Model) 理论中,通过将传统深度学习(指带单一功能深度学习模型)的各个功能层或层组合,拆分为包含:损失模型(Cost Module,类似于一个复杂的,非单一点生效的损失函数替代模型)、感知模型(Perception Module)、规则模型(Policy Module)、动作模型(Action Model)、世界模型(World Model)在内的多种特定任务模型(Specific Model),组合为复杂的连续网络,以期实现模型自学习处理体系。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_2_4.html":{"url":"Chapter_4/Language/cn/Docs_4_2_4.html","title":"4.2.4 特征选择(Feature Selection)","keywords":"","body":"4.2.4 特征选择(Feature Selection) 特征选择(Feature Selection) 是每个模型启动前最为重要的一环,也是 特征工程(Feature Engineering) 方法论所靶向的关键问题之一。 在传统机器学习(ML)中,特征选择是对影响结果较不明显的 因变量(Independent Variables),以一系列处理手段,转换为较为明显的 关系参数(Related Parameters)表示,从而发掘出潜藏影响因素。这一过程所产生的单次影响因子参数,构成的一组标量数组,就是 特征向量(Feature Vector)。而对全体样本进行相同流程的抽象,得到的特征向量集合,即是 训练特征集(Training Feature Set)。 工程上,训练特征集 通常以 一批次(1 Batch) 样本计算后,由神经网络输出的当前权重下,输入样本的抽象非零解集构成。这个输出的抽象特征向量数据集,才是正真被我们用来衡量当前迭代结果情况的决定数据。即,损失函数(Loss Function)作用的部分。 而特征选择,正是对如何获取满足模型目标的特征和训练特征集的方法论。 常用的特征选择方式,可以分为三大类别: 过滤法(Filtered),以相关性和扩散性对特征评分,采用阈限法或策略来筛选; 包裹法(Wrapped),以评分函数或预测效果校验评分,筛选满足条件特征; 嵌入法(Embedded),以影响权重加强/衰减影响过程,用权重变换决定特征; 采用不同方法获取的训练集,也根据方法的选择情况,被分别称为 过滤集(Filterings) 、 包裹集(Wrappings) 、 嵌入集(Embeddings)。 显然,在深度学习中,被批量使用的特征选择方法,就是嵌入法。 嵌入集(Embeddings) 经由神经网络抽象高维特征的输出向量数据集,被我们称为嵌入特征向量组(Embeddings of Low-Dimesional Features),简称嵌入集(Embeddings)。与特征工程的相关称谓同名,并不矛盾。 它既可以是一组由 n×mn \\times mn×m 的向量构成的数组,如下 n×m=8×1n \\times m = 8 \\times 1n×m=8×1 有: double embeddings[BATCH_SIZE][VECTOR_SIZE] = { {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, {4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0}, /* ... */ }; 也可以是单纯的评估数据,相当于 n×m=1n \\times m = 1n×m=1 的向量组成: double predictions[BATCH_SIZE] = { 0.1, 0.8, 0.2, 0.3, 0.5, 0.7, 1.0, 0.9, /* ... */ }; 即,组成嵌入集的特征向量形式,并没有特殊的要求。但往往需要根据采用的损失函数来决定最终的格式。这一点在实践中非常重要。由于评估数据常用于线性回归,区别起见被称为 预测集(Predictions)。 现在,我们基本掌握了深度学习的入门概念。让我们分步来看,一个神经网络的具体细节。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_3.html":{"url":"Chapter_4/Language/cn/Docs_4_3.html","title":"4.3 经典激活函数(Classic Activation Function)","keywords":"","body":"4.3 经典激活函数(Classic Activation Function) 激活函数(Activation Function) 是一种被设计用来,在模型训练的每个单元数据输入位置,为输入引入非对称性特征 的特殊辅助函数。 图 4-8 激活函数作用阶段(图中蓝色线条)示意图 从图上可以看出,激活函数主要作用于隐藏层的输入。示例中只有一层隐藏层,因此激活函数作用位置在输入层接收输入数据后,交付到隐藏层的过程中。而对于多个隐藏层情况,前一级的输入也会经激活后才交付给后一级。 如果不采用激活函数,那么我们经过每层神经网络计算后,得到的最终输出都将为线性结果。线性输出实际就是最原始的感知器(Perceptron)。而单纯使用线性函数计算,在实际的处理过程中,对于大多是场景将不能很好的描述其特征。常见的算法问题常常需要引入非线性特性,才能更好的拟合样本。通常,我们通过引入激活函数来给我们设计、使用的神经网络,提供逼近任何非线性场景的能力。 激活函数,基本满足:单一输入输出、单一层处理、可参与训练参数 ,的一类激活函数。其中常用的几类,被称为 经典激活函数(Classic Activation Function)。 一般的: 当一个激活函数 f(x)f(x)f(x) 满足 x→+∞f′(x)=0x \\rightarrow +\\infty \\quad f\\prime(x)=0x→+∞f′(x)=0 时,我们称之为 右饱和。 当一个激活函数 f(x)f(x)f(x) 满足 x→−∞f′(x)=0x \\rightarrow -\\infty \\quad f\\prime(x)=0x→−∞f′(x)=0 时,我们称之为 左饱和。 当一个激活函数,既满足左饱和又满足又饱和时,我们称之为 饱和。 对任意的 xxx ,如果存在常数 ccc ,当 x>cx > cx>c 时恒有 f′(x)=0f\\prime(x)=0f′(x)=0 取值,则称其为 右硬饱和。 对任意的 xxx ,如果存在常数 ccc ,当 xcx xc 时恒有 f′(x)=0f\\prime(x)=0f′(x)=0 取值,则称其为 左硬饱和。 若既满足左硬饱和,又满足右硬饱和,则称这种激活函数为 硬饱和。 如果只有在 极限 状态下偏导数 f′(x)=0f\\prime(x)=0f′(x)=0 的函数,称之为 软饱和。 由于激活函数的作用,大多基于同向对比实验的统计结果来进行说明(目前,部分有相关的数理研究佐证,如 ReLU,但仍有争议)。因此,这里仅列出算子的公认已证明特性,和 C 语言实现。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_3_1.html":{"url":"Chapter_4/Language/cn/Docs_4_3_1.html","title":"4.3.1 Sigmoid","keywords":"","body":"4.3.1 Sigmoid 迭代公式: δ(x)=11+e−x {\\displaystyle \\begin{aligned} \\delta(x) = \\frac{1}{1+e^{-x}} \\\\ \\end{aligned} } δ(x)=1+e−x1 图像: 图 4-9 Sigmoid 函数图 特性: 非 0 为中心(non-zero-centered) 输出范围在 [0, 1][0,\\ 1][0, 1] 之间,导数 0.250.25 输出 >0> 0>0 ,反向传播(BP)权值正向堆积(梯度始终 >0> 0>0) 输入 (−∞, −5](-\\infty,\\ -5](−∞, −5] 或 [+5, +∞)[+5,\\ +\\infty)[+5, +∞) 时,输出近乎无变化,逐层梯度趋 ,更易导致梯度消失 指数计算,较为消耗资源 Sigmoid 激活函数梯度趋近于 0,即软饱和。这会导致BP在该区域部分的导数,无法有效的传递误差至上层(趋 0 失效),导致前层权值无更新,从而无法收敛。且因为 非 0 为中心,使得我们在使用它做激活函数时,需要考虑数据对称(zero-mean data)。 Sigmoid 也可以根据情况,使用其他算法代替,例如(swish、h-swish)。通常在二分问题上采用 Sigmoid 是不错的选择,诸如:是否是某一类、问题对错,即古典逻辑回归(Classical Logical Regression)。 Sigmoid 算子化 利用 C 语言实现对算子的封装,有: #include #include double sigmoid(double x) { return 1 / (1 + exp(-x)); } int main() { double x = 0.5; double y = sigmoid(x); printf(\"The sigmoid of %f is %f\\n\", x, y); return 0; } 运行验证可得到结果: The sigmoid of 0.500000 is 0.622459 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_3_2.html":{"url":"Chapter_4/Language/cn/Docs_4_3_2.html","title":"4.3.2 Tanh","keywords":"","body":"4.3.2 Tanh 迭代公式: sinh(x)=ex−e−x2cosh(x)=ex+e−x2δ(x)=tanh(x)=sinh(x)cosh(x)=ex−e−xex+e−x {\\displaystyle \\begin{aligned} sinh(x) &= \\frac{e^x-e^{-x}}{2} \\\\ cosh(x) &= \\frac{e^x+e^{-x}}{2} \\\\ \\delta(x) = tanh(x) &= \\frac{sinh(x)} {cosh(x)} = \\frac{e^x-e^{-x}}{e^x+e^{-x}} \\\\ \\end{aligned} } sinh(x)cosh(x)δ(x)=tanh(x)=2ex−e−x=2ex+e−x=cosh(x)sinh(x)=ex+e−xex−e−x 图像: 图 4-10 Tanh 函数图 特性: 0 为中心(zero-centered) 输出范围在 [−1, +1][-1,\\ +1][−1, +1] 之间,输出值域对称 当输入在 (−∞, −2.5](-\\infty,\\ -2.5](−∞, −2.5] 或 (−∞, −2.5](-\\infty,\\ -2.5](−∞, −2.5] 时,Tanh也会面临梯度趋 000 问题(过饱和问题) 指数计算,较为消耗资源 不难看出 Tanh(x)=2⋅Sigmoid(2x)−1Tanh( x ) = 2 \\cdot Sigmoid( 2x ) - 1Tanh(x)=2⋅Sigmoid(2x)−1 。本质上来讲 Tanh 属于Sigmoid 的一种变体,尝试通过平移拉伸变换,来解决 Sigmoid 的非原点对称问题。虽然能够处理梯度堆积带来的影响,但是 tanh 同样不能处理相较于堆积更为严重的梯度消失问题。这也是饱和类激活函数的通病。 Tanh 算子化 利用 C 语言实现对算子的封装,有: #include #include double tanh(double x) { return (exp(x) - exp(-x)) / (exp(x) + exp(-x)); } int main() { double x = 0.5; double y = tanh(x); printf(\"The tanh of %f is %f\\n\", x, y); return 0; } 运行验证可得到结果: The tanh of 0.500000 is 0.462117 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_3_3.html":{"url":"Chapter_4/Language/cn/Docs_4_3_3.html","title":"4.3.3 Softplus","keywords":"","body":"4.3.3 Softplus 迭代公式: δ(x)=log(1+ex) {\\displaystyle \\begin{aligned} \\delta(x) = log(1+e^x) \\\\ \\end{aligned} } δ(x)=log(1+ex) 图像: 图 4-11 Softplus 函数图 特性: 非 0 为中心(non-zero-centered) 输出范围在 [0, +∞)[0,\\ +\\infty)[0, +∞) 之间,导数正好为 Sigmoid 输出 ≥0\\ge 0≥0 ,反向传播(BP)权值正向堆积(梯度始终 ≥0\\ge 0≥0 ) 当输入在 [+5, +∞)[+5,\\ +\\infty)[+5, +∞) 时,梯度趋近常量 111 ,极大避免梯度消失问题 及 梯度爆炸问题 当输入在 (−∞, −5](-\\infty,\\ -5](−∞, −5] 时,输出近乎无变化,逐层梯度趋 000 ,更易导致梯度消失 指数计算,较为消耗资源 Softplus 可以看作是 ReLU 的平滑版,即无穷阶连续可导。但是因为采用了指数运算,且特性在计算机处理可近似相同。因此,常常使用 ReLU 而不是 Softplus。并且实验验证,Softplus 也并不优于 ReLU。 Softplus 算子化 利用 C 语言实现对算子的封装,有: #include #include double softplus(double x) { return log(1 + exp(x)); } int main() { double x = 0.5; double y = softplus(x); printf(\"The softplus of %f is %f\\n\", x, y); return 0; } 运行验证可得到结果: The softplus of 0.500000 is 0.648721 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_3_4.html":{"url":"Chapter_4/Language/cn/Docs_4_3_4.html","title":"4.3.4 ReLU 族 ","keywords":"","body":"4.3.4 ReLU 族 矫正线性单元(ReLU [Rectified Linear Unit]) 是整个经典激活函数中,被使用最广泛的经典中的经典。经过多年探索,已经形成了一系列以 ReLU 为基础的多种变体,用于各种突出场景。 ReLU(Rectified Linear Unit) 迭代公式: δ(x)=Max(0, x) {\\displaystyle \\begin{aligned} \\delta(x) = Max(0,\\ x) \\\\ \\end{aligned} } δ(x)=Max(0, x) 图像: 图 4-12 ReLU 函数图 特性: 非 0 为中心(non-zero-centered) 输出范围在 [0, +∞)[0,\\ +\\infty)[0, +∞) 之间 输出 ≥0\\ge 0≥0 ,反向传播(BP)权值正向堆积(梯度始终 ≥0\\ge 0≥0 ) 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,梯度为常量 111 ,完美解决梯度消失问题 及 梯度爆炸问题 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,梯度为 000 ,面临梯度归零问题 线性处理便于计算 ReLU(2013)被称为 线性整流函数,又称为线性修正单元。ReLU 因其简洁的特性,极低的运算量,成为了当前最常用的激活函数。业界各位炼丹师在不清楚或不确定具体使用什么激活函数时,常常选择 ReLU 或 其变体 来作为默认激活函数。 不过,纯粹的 ReLU 因为对于 神经元死亡(Dead Neuron)。已经不是梯度消失,而是直接没有了哪怕细微的迭代变化可能,即完全失活梯度归零。 但即便如此,ReLU 仍是目前最好用的激活函数。 PReLU & LReLU & RReLU 迭代公式: PReLU: δ(x)=Max(0, x)+α⋅Min(0, x)(α=0.1)LReLU: δ(x)=Max(τx, x)(τ=0.1)RReLU: δ(x)=Max(αx,x)withα=Random(lower, upper) ) {\\displaystyle \\begin{aligned} PReLU: \\ \\delta(x) &= Max(0,\\ x) + \\alpha \\cdot Min(0,\\ x) \\quad (\\alpha=0.1) \\\\ LReLU: \\ \\delta(x) &= Max(\\tau x,\\ x) \\quad (\\tau=0.1) \\\\ RReLU: \\ \\delta(x) &= Max(\\alpha x,x) \\quad with \\\\ \\alpha &= Random(lower,\\ upper) \\ ) \\\\ \\end{aligned} } PReLU: δ(x)LReLU: δ(x)RReLU: δ(x)α=Max(0, x)+α⋅Min(0, x)(α=0.1)=Max(τx, x)(τ=0.1)=Max(αx,x)with=Random(lower, upper) ) 图像: 图 4-13 PReLU & LReLU & RReLU 函数图 特性: 0 为中心(zero-centered) 输出范围在 (−∞, +∞)(-\\infty,\\ +\\infty)(−∞, +∞) 之间 输出值域对称,降低在正向堆积风险 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,梯度为常量 111 ,完美解决梯度消失问题 及 梯度爆炸问题 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,PReLU 梯度为训练参数 τ\\tauτ (参与训练,启动值为 τ=0.1\\tau=0.1τ=0.1 ) 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,LReLU 梯度为 α=0.1\\alpha=0.1α=0.1 (常量) 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,RReLU 梯度为范围内参数(随机值) 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,三类梯度度 0.50.5 (大部分情况),还是存在 梯度消失问题 线性处理便于计算 PReLU(2016) 、 LReLU(2015 Russakovsky ImageNet 分类)、RReLU(2017 Kaggle 全美数据科学大赛 即 NDSB) 三者间的差别主要就在于 ( 0, +∞) 时的梯度是常数、参与训练、随机限定范围内取值。三者的目的都是试图通过引入 ReLU 灵活方案:NReLU(Noisy ReLU)& ReLU-N 除了上述的 ReLU 变体外,我们还可以根据实际需要选择在使用上述变体的时候,引入辅助处理,常见的辅助处理有两种: 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,引入噪音偏置常量(梯度非 111 ),或参与训练参数(类 PReLU) 当输入在 (c, +∞)(c,\\ +\\infty)(c, +∞) 时,限定最大上限常量(右饱和),或类比 LReLU 处理 这样的操作常用在一些需要限定约束激活层输出的地方使用,属于 小技巧(Tricks)。是需要 谨慎使用 的一种处理手段。 ReLU 族算子化 利用 C 语言实现对算子的封装,有: #include #include #include #include double relu(double x) { return fmax(0, x); } double prelu(double x, double tau) { return fmax(0, x) + tau * fmin(0, x); } double lrelu(double x, double alpha) { return fmax(alpha * x, x); } double rrelu(double x, double alpha, double lower, double upper) { double r = (double)rand() / (double)RAND_MAX; double alpha_rand = lower + r * (upper - lower); return fmax(alpha_rand * x, x); } int main() { // ReLU { double x = -0.5; double y = relu(x); printf(\"The ReLU of %f is %f\\n\", x, y); } { double x = +0.5; double y = relu(x); printf(\"The ReLU of %f is %f\\n\", x, y); } // PReLU { double x = -0.5; double tau = 0.1; double y = prelu(x, tau); printf(\"The PReLU of %f with alpha=%f is %f\\n\", x, tau, y); } // LReLU { double x = -0.5; double alpha = 0.1; double y = lrelu(x, alpha); printf(\"The LReLU of %f with alpha=%f is %f\\n\", x, alpha, y); } // RReLU { // Set the random seed srand(time(NULL)); double x = -0.5; double alpha = 0.1; double lower = 0.0; double upper = 1.0; double y = rrelu(x, alpha, lower, upper); printf(\"The RReLU of %f with alpha=%f, lower=%f, and upper=%f is %f\\n\", x, alpha, lower, upper, y); } return 0; } 运行验证可得到结果: The ReLU of -0.500000 is 0.000000 The ReLU of +0.500000 is 0.500000 The PReLU of -0.500000 with alpha=0.100000 is -0.050000 The LReLU of -0.500000 with alpha=0.100000 is -0.050000 The RReLU of -0.500000 with alpha=0.100000, lower=0.000000, and upper=1.000000 is -0.019595 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_3_5.html":{"url":"Chapter_4/Language/cn/Docs_4_3_5.html","title":"4.3.5 ELU & SELU","keywords":"","body":"4.3.5 ELU & SELU 迭代公式: ELU: δ(x)={xx≥0α(ex−1)x0SELU: δ(x)=λ⋅ELU(x, α) {\\displaystyle \\begin{aligned} ELU: \\ \\delta(x) &= \\begin{cases} x & x \\geq 0 \\\\ \\alpha (e^x-1) & xELU: δ(x)SELU: δ(x)={xα(ex−1)x≥0x0=λ⋅ELU(x, α) 图像: 图 4-14 ELU & SELU 函数图 特性: 0 为中心(zero-centered) 输出范围在 (−c, +∞)(-c,\\ +\\infty)(−c, +∞) 之间,称 ccc 为常量乘数 输出值域对称,降低在正向堆积风险 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,梯度为常量 111 ,完美解决梯度消失问题 及 梯度爆炸问题 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,梯度以 f(x)+cf(x)+cf(x)+c 形式变化,仍然存在梯度消失风险 公式中的 α\\alphaα 可取经验值,也可参与迭代 指数计算,较为消耗资源 ELU(2016)被称为 指数线性单元。也是一种为了处理 ReLU 梯度消失问题而提出的激活函数。 ELU 比之 ReLU 其他几种变体,最大的特点就是曲线平滑。而 SELU 则是在原有 ELU 激活函数的基础上,再乘以一个系数(通常取固定常量),即 SELU(x)=λ⋅ELU(x)SELU( x ) = \\lambda \\cdot ELU( x )SELU(x)=λ⋅ELU(x) 。根据原作者 京特·克兰鲍尔(Günter Klambauer) 在论文《Self-Normalizing Neural Networks》中的描述 [8] ,推荐取 λ=1.0507009873554804934193349650124\\lambda = 1.0507009873554804934193349650124λ=1.0507009873554804934193349650124 的经验值。 SELU 可使输入经过一定层数处理后,变为固定分布。 ELU & SELU 算子化 利用 C 语言实现对算子的封装,有: #include #include double elu(double x, double alpha) { return x >= 0 ? x : alpha * (exp(x) - 1); } double selu(double x, double alpha, double lambda) { return lambda * (x >= 0 ? x : alpha * (exp(x) - 1)); } int main() { // ELU { double x = -0.5; double alpha = 1.0; double y = elu(x, alpha); printf(\"The ELU of %f with alpha=%f is %f\\n\", x, alpha, y); } // SELU { double x = -0.5; double alpha = 1.6732632423543772848170429916717; double lambda = 1.0507009873554804934193349650124; double y = selu(x, alpha, lambda); printf(\"The SELU of %f with alpha=%f and lambda=%f is %f\\n\", x, alpha, lambda, y); } return 0; } 运行验证可得到结果: The ELU of -0.500000 with alpha=1.000000 is -0.393469 The SELU of -0.500000 with alpha=1.673263 and lambda=1.050701 is -0.428348 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_3_6.html":{"url":"Chapter_4/Language/cn/Docs_4_3_6.html","title":"4.3.6 Mish","keywords":"","body":"4.3.6 Mish 迭代公式: δ(x)i=x⋅tanh(softplus(x)) {\\displaystyle \\begin{aligned} \\delta(x)_i &= x \\cdot tanh(softplus(x)) \\\\ \\end{aligned} } δ(x)i=x⋅tanh(softplus(x)) 即: δ(x)i=x⋅eln(1+ex)−e−ln(1+ex)eln(1+ex)+e−ln(1+ex)=x⋅(1+ex)2−1(1+ex)2+1=x⋅2ex+e2x2+2ex+e2x=x1+12ex+e2x {\\displaystyle \\begin{aligned} \\delta(x)_i &= x \\cdot \\frac{e^{ln(1+e^x)}-e^{-ln(1+e^x)}}{e^{ln(1+e^x)}+e^{-ln(1+e^x)}} \\\\ &=x \\cdot \\frac{(1+e^x)^2-1}{(1+e^x)^2+1} \\quad \\\\ &=x \\cdot \\frac{2e^x+e^{2x}}{2+2e^x+e^{2x}} \\\\ &= \\frac{x}{1+\\frac{1}{2e^x+e^{2x}}} \\qquad \\quad \\\\ \\end{aligned} } δ(x)i=x⋅eln(1+ex)+e−ln(1+ex)eln(1+ex)−e−ln(1+ex)=x⋅(1+ex)2+1(1+ex)2−1=x⋅2+2ex+e2x2ex+e2x=1+2ex+e2x1x 图像: 图 4-15 Mish 函数图 特性: 0 为中心(zero-centered) 输出范围在 [≈0.278, +∞)[\\approx 0.278,\\ +\\infty)[≈0.278, +∞) 之间,导数近似 Switch(x)Switch(x)Switch(x) 但过于复杂 输出值域对称,降低在正向堆积风险,但负向变化慢 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,梯度 ≥0.5\\ge 0.5≥0.5 当输入趋近 +∞+\\infty+∞ 时,近似于 ReLU,梯度趋近 111 当输入趋近 −∞-\\infty−∞ 时,近似于 ReLU,梯度趋近 000 ,负向过输入大存在梯度消失风险 Mish 当 β→+∞\\beta \\rightarrow +\\inftyβ→+∞ 时,趋近 ReLU 平滑不单调 Mish 是由 迪甘塔·米斯拉(Diganta Misra) 在 2019 年提出的,其目的是为了在 Swish 基础上,提供一种更有效的激活函数。就目前而言,Mish 的有效性和性价比其实一直处于讨论中 [9] 。 不过,在实验检验下 Mish 并没有那么好用,其各方面特性都与 Swish 高度相似。而且采用 ImageNet 数据集 + MobileNetV2 + FPN 来做物体识别,从结果上反倒没有直接用 ReLU、或者 Swish 效果好,且 MAdds 激增。 因此,本书作者不建议使用。如果既想要利用函数平滑特性来提高优化函数效率,又不想要增加太多算力消耗的话,建议可以考虑 Swish,或 h-Swish(ReLU-N)。 Mish 算子化 利用 C 语言实现对算子的封装,有: #include #include double mish(double x) { return x * tanh(log(1 + exp(x))); } int main() { double x = 0.5; double y = mish(x); printf(\"The mish of %f is %f\\n\", x, y); return 0; } 运行验证可得到结果: The mish of 0.500000 is 0.462117 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_3_7.html":{"url":"Chapter_4/Language/cn/Docs_4_3_7.html","title":"4.3.7 Swish 族 ","keywords":"","body":"4.3.7 Swish 在本节开始时,我们曾提到过可以用 Swish 来代替 Sigmoid 在模型中的作用,以获取平滑非线性特征。那么 Swish 具体是什么样的呢? Swish & Swish-β 迭代公式: δ(x)i=x⋅sigmoid(x)=x1+e−x {\\displaystyle \\begin{aligned} \\delta(x)_i = x \\cdot sigmoid(x)=\\frac{x}{1+e^{-x}} \\\\ \\end{aligned} } δ(x)i=x⋅sigmoid(x)=1+e−xx 迭代公式(参与训练动态参数版本,Swish-β ): δ(x)i=x⋅sigmoid(βx)=x1+e−βx {\\displaystyle \\begin{aligned} \\delta(x)_i = x \\cdot sigmoid(\\beta x)=\\frac{x}{1+e^{-\\beta x}} \\\\ \\end{aligned} } δ(x)i=x⋅sigmoid(βx)=1+e−βxx 图像: 图 4-16 Swish 函数图 特性: 0 为中心(zero-centered) 输出范围在 [≈0.278, +∞)[\\approx 0.278,\\ +\\infty)[≈0.278, +∞) 之间,导数为 swish(x)+sigmoid(x)⋅(1−swish(x))swish(x) + sigmoid(x) \\cdot ( 1-swish(x) )swish(x)+sigmoid(x)⋅(1−swish(x)) 输出值域对称,降低在正向堆积风险,但负向变化慢 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,梯度 ≥0.5\\ge 0.5≥0.5 当输入趋近 +∞+\\infty+∞ 时,近似于 ReLU,梯度趋近 111 当输入趋近 −∞-\\infty−∞ 时,近似于 ReLU,梯度趋近 000 ,负向过输入大存在梯度消失风险 Swish-β 当 β→+∞\\beta \\rightarrow +\\inftyβ→+∞ 时,趋近 ReLU 平滑不单调 Swish 是由谷歌实验室在 2017 年提出的,提出以后其实一直争议不断。Swish 被谷歌认为是一种可以完美替代 ReLU 的简单激活函数,在论文中的演示里,其使用同模型在 Mobile NASNet-A (Zoph et al., 2017) 和 Inception-ResNet-v2 (Szegedy et al., 2017) 数据集上分别带来了0.9% 和 0.6% 的准确度提升 [10] 。不过业界普遍认为这个是因为数据集完善带来的。 Swish 作为一种平滑函数,它的特性和 SoftPlus 类似,优势都体现在优化函数的连续处理上。另外不单调,也能够提供更灵活的特性变化。兼容算力消耗,Swish-β 也不失为一种良好的选择。否则还是建议使用 ReLU 处理。 h-Swish 迭代公式: δ(x)i=x⋅h-sigmoid(x)=x⋅ReLU6(x+3)6 {\\displaystyle \\begin{aligned} \\delta(x)_i = x \\cdot h\\text{-}sigmoid(x)= x \\cdot \\frac{ReLU6(x+3)}{6} \\\\ \\end{aligned} } δ(x)i=x⋅h-sigmoid(x)=x⋅6ReLU6(x+3) 图像: 图 4-17 h-Swish 函数图 特性: 非 0 为中心(non-zero-centered) 输出范围在 [−0.375, +∞)[ -0.375,\\ +\\infty)[−0.375, +∞) 之间 输出 ≥0\\ge 0≥0 ,反向传播(BP)权值正向堆积(梯度始终 ≥0\\ge 0≥0 ) 当输入在 [+3, +∞)[ +3,\\ +\\infty)[+3, +∞) 时 梯度为 111 ,完美解决梯度消失问题 及 梯度爆炸问题,等效 ReLU 当输入在 (−∞, −3](-\\infty,\\ -3](−∞, −3] 时 梯度为 000 ,面临神经元死亡问题,等效ReLU 当输入在 (−3, +3)(-3,\\ +3)(−3, +3) 时 梯度为 cx+bcx+bcx+b ,c=16c = \\tfrac{1}{6}c=61 ,b=0.5b = 0.5b=0.5 ,梯度 ≥0.5\\ge 0.5≥0.5 非指数处理便于计算 非平滑不单调 h-Swish 是由谷歌实验室在 2019 年的 MobileNetV3 中提出的,用于作为两种 MobileNet 关键优化手段中的一种 [11] 。h 表示 hard。h-Swish 与 Swish 最大的不同就在于,用近似 sigmoid 的 ReLU-6(x+3) / 6 代替了 Sigmoid,也被称为 h-Sigmoid。 h-Swish 保留了 Swish不单调的特性,能够更好的进行非线性特性的引入。但是 h-Swish 也保留了 Swish 的有效范围特性。且因为采用 h-Sigmoid 处理,在样本输入小于 -3,将会导致神经元死亡问题。但是 h-Swish 的优势也同样明显,因为单激活函数最高只用到二次幂,实际运算当中较 Swish 节约了相当的算力。因此,建议根据情况,选择特征缩放处理(或单边限定偏移)后使用。其本身还是很有潜力的新兴激活函数。 需要注意的是,考虑到计算便利性和 Tanh 与 Sigmoid 的函数趋势近似。在工程中,我们 采用 Tanh 代替原论文的 Sigmoid 进行 Swish 族的算子化。同理也适用于,采用 log 代替 h-Sigmoid。从而简化了计算过程。 Swish 族算子化 利用 C 语言实现对算子的封装,有: #include #include double swish(double x) { return x * tanh(x); } double swish_beta(double x, double beta) { return x * tanh(beta * x); } double h_swish(double x) { return x * tanh(log(1 + exp(x))); } int main() { // Swish { double x = 0.5; double y = swish(x); printf(\"The swish of %f is %f\\n\", x, y); } // Swish-β { double x = 0.5; double beta = 1.0; double y = swish_beta(x, beta); printf(\"The swish-beta of %f with beta=%f is %f\\n\", x, beta, y); } // h-Swish { double x = 0.5; double y = h_swish(x); printf(\"The h-swish of %f is %f\\n\", x, y); } return 0; } 运行验证可得到结果: The swish of 0.500000 is 0.462117 The swish-beta of 0.500000 with beta=1.000000 is 0.462117 The h-swish of 0.500000 is 0.462117 至此,常用激活函数基本梳理完毕。 其实,常用于中间层的激活函数,往往都是简单激活函数,这样能够在引入非线性特征的同时,相对较小或者几乎不怎么消耗算力资源。而在最终阶段,常常使用复杂的激活函数来做分类器,或结果控制的操作。相对来说,复杂激活函数的使用往往都放在最终阶段的原因,就是因为其过于复杂的特性,能够适应更严格的情况,但也相对更耗费算力资源,无法频繁使用。 至于如何更好的使用激活函数,建议结合激活函数的特性,将输入值进行适当的放缩,例如:如果使用Sigmoid,那么我们可以先行放缩上层输入到 ( -5, 5 ) 的范围内,这样一定程度的避免梯度消失问题。所以,如何根据选用的激活函数,适当的调整上层输入,将会对结果大有裨益。 另外,个人理解 光滑(smooth) 函数类型的激活函数,其优势在于 能够更好的配合优化方法,而且能够 解离 不同分类之间的差异性(连续非离散,差异细化),使得模型具有更好的鲁棒性。但是因为算力上不占优势,建议用在 last stage 部分。 除此之外,非单调性也是近期激活函数新的关注点,业界的研究显示,适当的引入非单调性,能够很好的增强激活函数将源数据,输出为非线性数据的信息保存水平。 综合而言,建议现阶段使用激活函数,优先考虑:ReLU、LReLU、ReLU-N、h-Swish,根据是否需要配合优化算法(利用 smooth 特性),进一步选择是否采用:Softplus、Swish。结合现有硬件水平,适度的考虑含有指数计算的激活函数。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_4.html":{"url":"Chapter_4/Language/cn/Docs_4_4.html","title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","keywords":"","body":"4.4 连接函数/衰减函数(Connection/Attenuation Function) 连接函数(Connection Function) 是一种被设计用来,在模型训练的每个单元数据输入位置,为输入进行筛选收减的特殊辅助函数。常被用在诸如:全链接层(Full Connected Layer) 、 自注意力层(Self-Attention Layer) 等 输出层(Output Layer) 或 部分特殊隐藏层(Hidden Layer) 单元设计中。用来对经过激活(或纯输入)的当前层输入进行 特征提炼(Feature Extraction)。由于在当下占据主流的 Transformer 模型中,以自注意力层的注意力衰减机制存在,因而也被称为 衰减函数(Attenuation Function)。 图 4-18 连接函数作用阶段(图中蓝色线条)示意图 连接函数,基本满足:单一输入输出、多层参数处理、可参与训练参数 > 1,的特点。其中,较为经典的主要有 3 个,分别是 Dropout 、Maxout 、Softmax 。 由于链接函数作用于层中各节点的串联,因此,为了便于说明。 统一的: 多个前置常为向量形式输入,这里我们统一记输入为 x⃗\\vec{x}x⃗ ,各分量值都以 xxx 代替。 以 iii 代表对应层输入通道,输入层输入设置为 nnn ,则 iii 顺序取值 [1, n][ 1,\\ n][1, n] 。 以 jjj 代表对应层激活节点,激活层节点设置为 kkk ,则 jjj 顺序取值 [1, k][ 1,\\ k][1, k] 。 以 WWW 代表对应层激活节点权重,对应输入 iii 的激活节点 jjj 的权重就为 WijW_{ij}Wij 。 以 bbb 代表对应层激活节点偏移,对应输入 iii 的激活节点 jjj 的偏移就为 bijb_{ij}bij 。 以 zzz 代表对应层计算后值,对应激活节点 jjj 的算后值就为 zjz_jzj 。 以 hj(x)h_j(x)hj(x) 代表对应层,各节点计算值 zjz_jzj 经过函数 f(zj)f(z_j)f(zj) 处理后输出,对应下一层的输入。 则,未经过链接函数处理前后的网络情况,可以用 公式表示 为: zj=wijT⋅x+bijhj(x)=f(z) {\\displaystyle \\begin{aligned} z_j &= {w_{ij}}^T \\cdot x+b_{ij} \\\\ h_j(x) &= f(z) \\end{aligned} } zjhj(x)=wijT⋅x+bij=f(z) 其中,函数 f(x)f(x)f(x) 的选择非常广泛,既可以是一些激活函数,如 ReLU,也可以是链接函数。而链接函数的作用位置往往有一些差异,部分作用于 zjz_jzj 的计算过程,另一些则直接作用于结果的筛选。介于作用在多个节点范围,我们用 Σ(x⃗)\\Sigma(\\vec{x})Σ(x⃗) 来代指整个 链接函数生效过程,它的输出即是下一层(即后一级,如果有)的输入向量。 则,经过链接函数处理前后的网络情况,就可以用公式表示,有: Σ(x⃗)=∑hj(x) {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\end{aligned} } Σ(x⃗)=∑hj(x) 在这些前提下,我们来看这三个经典链接函数。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_4_1.html":{"url":"Chapter_4/Language/cn/Docs_4_4_1.html","title":"4.4.1 Dropout","keywords":"","body":"4.4.1 Dropout 迭代公式: Σ(x⃗)=∑hj(x)∈{f(z)∈Activation FunctionRj=0 or 1∈Bernoulli(p)zj=Rj⋅WijT⋅x+bijhj(x)=f (zij) {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} f(z) &\\in Activation \\ Function\\\\ R_j &= 0 \\ \\text{or} \\ 1 \\in Bernoulli(p) \\\\ z_j &= R_j \\cdot {W_{ij}}^T \\cdot x+b_{ij} \\\\ h_j(x) &= f\\ (z_{ij}) \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎪⎪⎪⎨⎪⎪⎪⎧f(z)Rjzjhj(x)∈Activation Function=0 or 1∈Bernoulli(p)=Rj⋅WijT⋅x+bij=f (zij) 图像: 图 4-19 Dropout 输入输出作用示意图 特性: Dropout 采用了根据采用者需要的任意设定激活函数,来作为 f(zj)f(z_j)f(zj) 功效 Dropout 对每一个激活节点输出 zjz_jzj 都赋予了根据伯努利分布的随机 000 或 111 附加筛选值 伯努利分布(Bernoulli Distribution)参数 ppp 的值,越大越容易取 111 ,越小则易取 000 被证明,当 p=0.5p=0.5p=0.5 时,能够带来最好的 类正则效果 每次触发层计算,伯努利结果 RijR_{ij}Rij 都会根据 ppp 重新获取 变相取平均,能够减少同层内,神经元间的公适性 辅助链接层处理,作用于节点选择,0 丢弃,1 通过 Dropout 是由 Hinton 于 2012 年提出的一种,针对容易过拟合小数据集训练的,过拟合防治手段 [11] 。其本身通过阻塞当前层计算中的生效节点,来实现对当次参与计算权重的随机过滤,从而降低各个训练参数间的关联性。 这个方法随后就被用在了于同年发表的 AlexNet 上,并随着 AlexNet 飞跃式的高准确度(在发表时间点),一起被人们熟知。而随着后续多篇相关 Dropout 数学特征和统计研究的文献中,证明了 Dropout 不止可以被运用于小样本情况,更是相当有效的正则化和模型鲁棒性处理方式。 直到今日,仍然被运用于大量模型的训练中。 Dropout 算子化 利用 C 语言实现对算子的封装,有: #include #include #include double dropout(double x, double p) { if (drand48() 运行验证可得到结果: The dropout of 0.500000 with p=0.500000 is 0.000000 The dropout of 0.500000 with p=0.500000 is 1.000000 和理论表现一致。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_4_2.html":{"url":"Chapter_4/Language/cn/Docs_4_4_2.html","title":"4.4.2 Maxout","keywords":"","body":"4.4.2 Maxout 迭代公式: Σ(x⃗)=∑hj(x)∈{zj=WijT⋅x+bijhj(x)=maxj∈[1,k] (zj) {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} z_j &= {W_{ij}}^T \\cdot x+b_{ij} \\\\ h_j(x) &= \\max_{j \\in [1, k]}\\ (z_j) \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎨⎧zjhj(x)=WijT⋅x+bij=j∈[1,k]max (zj) 图像: 图 4-20 Maxout 输入输出作用示意图 特性: Maxout 对输入,走激活层进行线性处理,一个节点即一次线性拟合,参数激增 ≥ k 倍 最终是由经由激活层映射后的数据,计算 Max 取极大值 本身不面对梯度消失导致和梯度爆炸问题 适合配套 Dropout ,作为后级或位于同层一起使用 无法用于求导,Maxout 不可微 整体处理线性,且非饱和 在 Goodfellow 提出的 Maxout Networks 中指出 Dropout 在层数更多的框架中,能有更好的性能 [13] 。因此,应该有与之匹配的激活函数,来以一种通用的手段将原有模型抽象非线性特征过程,进行层化处理。 Maxout 的设计目的,就是为了更好的使用 Hinton 提出的 Dropout 的性能,提供此类操作。其需要学习的参数就是k个神经元中的权值和偏置,这就相当于常规的激活函数一层,而 Maxout 是两层,而且参数个数增加了 K 倍。 Maxout 能够有效的原理是,任何 ReLU 及其变体等激活函数都可以看成分段的线性函数,而 Maxout 加入的一层神经元正是一个可以学习参数的分段线性函数。所以,理论是可以拟合(无限分割)所有凸函数的。 如下图展示的 k 为 1、2、4 时的情况: 图 4-21 Maxout 凸函数拟合示意图[13] 但是,由于 Maxout 会导致参数激增,从而造成运算量增加,因此不常使用。且由于本身的 不可微 特性,大部分情况下 Maxout 仅能 被用于末尾层中,来对此时已经经过提炼,参数相对较少的特征,进行连接拟合。 Maxout 算子化 利用 C 语言实现对算子的封装,有: #include #include double maxout(double *x, int size) { double max_value = x[0]; for (int i = 1; i max_value) { max_value = x[i]; } } return max_value; } int main() { int size = 3; double vecx[] = {0.5, 0.75, 1.0}; double w = maxout(vecx, size); printf(\"The maxout of the input vector is %f\\n\", w); return 0; } 运行验证可得到结果: The maxout of the input vector is 1.000000 和理论表现一致。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_4_3.html":{"url":"Chapter_4/Language/cn/Docs_4_4_3.html","title":"4.4.3 SoftMax","keywords":"","body":"4.4.3 Softmax 迭代公式: Σ(x⃗)=∑hj(x)∈{zj=WijT⋅x+bijhj(x)=ezj∑ezj {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} z_j &= {W_{ij}}^T \\cdot x+b_{ij} \\\\ h_j(x) &= \\frac{e^{z_j}}{\\sum{e^{z_j}}} \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎨⎧zjhj(x)=WijT⋅x+bij=∑ezjezj 迭代公式( log 版本,log-Softmax): Σ(x⃗)=∑hj(x)∈{zj=WijT⋅x+bijhj(x)=log(ezj∑ezj)=zj−log(∑j=1kezj) {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} z_j &= {W_{ij}}^T \\cdot x+b_{ij} \\\\ h_j(x) &= log(\\frac{e^{z_j}}{\\sum{e^{z_j}}})=z_j-log(\\sum_{j=1}^k{e^{z_j}}) \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎪⎪⎨⎪⎪⎧zjhj(x)=WijT⋅x+bij=log(∑ezjezj)=zj−log(j=1∑kezj) 迭代公式( stable 版本,stable-Softmax): Σ(x⃗)=∑hj(x)∈{zj=WijT⋅x+bijD=log(C)=−max(z1,z2,...,zk)hj(x)=C⋅ezjC⋅∑ezj=ezj+log(C)∑ezj+log(C)=ezj+D∑ezj+D {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} z_j &= {W_{ij}}^T \\cdot x+b_{ij} \\\\ D &= log(C)=-max(z_1, z_2,...,z_k) \\\\ h_j(x) &= \\frac{C\\cdot e^{z_j}}{C\\cdot \\sum{e^{z_j}}} = \\frac{e^{z_j+log(C)}}{\\sum {e^{z_j+log(C)}}}=\\frac{e^{z_j+D}}{\\sum {e^{z_j+D}}} \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎪⎪⎪⎨⎪⎪⎪⎧zjDhj(x)=WijT⋅x+bij=log(C)=−max(z1,z2,...,zk)=C⋅∑ezjC⋅ezj=∑ezj+log(C)ezj+log(C)=∑ezj+Dezj+D 迭代公式( stable-log 结合版本,stable-log-Softmax): Σ(x⃗)=∑hj(x)∈{zj=WijT⋅x+bijD=log(C)=−max(z1,z2,...,zk)hj(x)=log(C⋅ezjC⋅∑ezj)=(zj−D)−log(∑j=1ke(zj−D)) {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} z_j &= {W_{ij}}^T \\cdot x+b_{ij} \\\\ D &= log(C)=-max(z_1, z_2,...,z_k) \\\\ h_j(x) &= log(\\frac{C\\cdot e^{z_j}}{C\\cdot \\sum{e^{z_j}}}) = (z_j-D)-log(\\sum_{j=1}^k{e^{(z_j-D)}}) \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧zjDhj(x)=WijT⋅x+bij=log(C)=−max(z1,z2,...,zk)=log(C⋅∑ezjC⋅ezj)=(zj−D)−log(j=1∑ke(zj−D)) 图像: 图 4-22 Softmax 输入输出作用示意图 特性: Softmax 能够起到归一化作用,将输入变换到输出范围在 [ 0, 1 ] 之间 输出满足概率累和为 1 求最大值过程非稀疏化 只增加了一层用于概率映射的隐藏层,增加了 input 个参数 Softmax 存在大指数输入导致的数值稳定性问题 log-Softmax 少了除法,相对数值更加稳定 stable-Softmax 对指数做了差值限定,但因为除法,可能会导致除零问题 stable-log-Softmax 有 stable 和 log 两者的优点,且无除零问题,但略微增加消耗 Softmax 常用于多目标分类、目标预测、NLP领域。能够将数字特征映射到概率范围内。常用在全联接层后,并配合 Cross-Entropy 损失函数使用。 目前 Softmax 的多种变体中,被使用最多的还是 stable-log-Softmax ,且涵盖了 log-Softmax 的相关情况。因此,一般将 stable-log-Softmax 和 log-Softmax ,统一称为 log-Softmax。 Softmax 被广泛使用的原因,还是在于它自带归一化,且能够稳定神经元的功能。这使得用 Softmax 做链接层算子,能够在分类场景上,更快的达到期望结果。是提升训练速率的有效手段。 Softmax 算子化 利用 C 语言实现对算子的封装,有: #include #include double ori_softmax(double *x, int size) { double sum = 0; for (int i = 0; i max_value) { max_value = x[i]; } } double sum = 0; for (int i = 0; i max_value) { max_value = x[i]; } } double sum = 0; for (int i = 0; i 运行验证可得到结果: The softmax of the input vector is 0.244728 The log-softmax of the input vector is -1.401880 The stable-softmax of the input vector is 0.244728 The log-stable-softmax of the input vector is -1.401880 和理论表现一致。 当然,连接函数并不只有列出的这三种类型。每年都有大量有关此方面的研究,给出新的样式。但从上我们也能够发现,若非足够泛化,连接函数鲜有脱离模型而存在的独立类型。这在上文中列出的 Maxout 与 Dropout、Softmax 的对比中有明显体现。因此,需要在训练中注意这一点。 目前,我们已经掌握了基本的样本提炼手段, 接下来就需要考虑权重迭代了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5.html":{"url":"Chapter_4/Language/cn/Docs_4_5.html","title":"4.5 损失函数(Loss Function)","keywords":"","body":"4.5 损失函数(Loss Function) 损失函数(Loss Function) 是用来,评估当前通过模型得到的预测值和实际样本真实值之间差异的大小。通过损失函数的导数,可以得到当前迭代预测值趋近实际值的变化情况,即梯度。因此常被我们用来作为下一次迭代的依据。损失函数的计算涉及所有引入的参数,计算的尺度是从整个模型层面进行的。 图 4-23 损失函数作用阶段(图中蓝色线条)示意图 如图,一次有效的损失函数计算,通常都是发生在一次全样本遍历(epoch)之后。我们通常使用的损失函数(Loss Function),严格意义上应该被称为 成本函数(Cost Function),即一种针对 整个训练集误差进行衡量 的目标函数。 损失函数的组成 损失函数的风险来源主要有两个:来自数据的风险 和 来自结构结构。这两种风险都会导致训练模型容易过拟合,而使得泛化能力受到影响。我们可以通过降低模型的复杂度来防止过拟合,这种方法被称为 正则化(Regularization)。 以最小化损失为目标,称为 经验风险最小化 : minimize( Loss( Data∣Model ) ) minimize(\\ Loss(\\ Data|Model\\ )\\ ) minimize( Loss( Data∣Model ) ) 以最小化损失和复杂度为目标,称为 结构风险最小化 : minimize( Loss( Data∣Model ) +complexity( Model ) ) minimize(\\ Loss(\\ Data|Model\\ )\\ + complexity(\\ Model\\ )\\ ) minimize( Loss( Data∣Model ) +complexity( Model ) ) 我们通常用结构风险最小化的目标函数,作为实际损失函数。其成分广义上分为两个部分,损失项 和 正则项(在线上学习的角度上,还会引入第三项中心值项,用来约束新的迭代结果与历史记录差异性)。 损失项(Losses),用于衡量模型与数据的 拟合度(fitness) 的损失函数组成部分,也是实际需要进行选择性设计和采用的模型关键成分。这种针对性的处理,在聚类分析和人脸识别领域非常常见。根据功能的不同,又可以细分为 回归项(Regression) 和 分类项(Classification)。 正则项(Regularities),用于衡量模型 复杂度(complexity) 的损失函数组成部分。衡量模型复杂度的方法有很多。大部分是从权重对整个模型影响的层面来判断的,即从权重的大小,来衡量某个参数对整体模型的影响。 接下来,我们就分别从 回归项(Regression)、分类项(Classification)、正则项(Regularities)三种类型,来了解损失函数的使用。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_1.html":{"url":"Chapter_4/Language/cn/Docs_4_5_1.html","title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","keywords":"","body":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error]) 迭代公式: Loss=1N∑i=1N∣yi−predictioni∣ {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i = 1}^{N}|y_i-prediction_i| \\\\ \\end{aligned} } Loss=N1i=1∑N∣yi−predictioni∣ 图像: 图 4-24 MAE 函数图 特性: 契合拉普拉斯分布(Laplace distribution)样本 通过样本投影平面的距离向量绝对值,来衡量预测结果 导数为常数,梯度迭代线形 非光滑(non-smooth) 线性处理便于计算 MAE 也被称为 L-1 损失(L1L_1L1 Loss)。虽然 MAE 常用于机器学习,但它既不是唯一实用的损失函数,也不是适用于所有情形的最佳损失函数。MAE 以样本分布满足拉普拉斯分布的情况为假设,因此对于样本分布满足拉普拉斯分布的样本集,会有更好的效果。MAE 的梯度变换是刚性的,但也因此不容易受到离群值的影响。相应的,MAE 的收敛速度也会更慢一些。 MAE 算子化 利用 C 语言实现对算子的封装,有: #include #include double mae(double *y_true, double *y_pred, int size) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The MAE is 0.100000 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_2.html":{"url":"Chapter_4/Language/cn/Docs_4_5_2.html","title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","keywords":"","body":"4.5.2 回归项-均方误差(MSE [Mean Squared Error]) 迭代公式: Loss=1N∑i=1N(yi−predictioni)2 {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i = 1}^{N}(y_i-prediction_i)^2 \\\\ \\end{aligned} } Loss=N1i=1∑N(yi−predictioni)2 图像: 图 4-25 MSE 函数图 特性: 契合正态分布(Normal distribution)样本 通过投影平面上的欧式距离,来衡量预测结果 导数非常数,梯度迭代非线形 光滑(smooth),适合优化算法 非指数计算,算力消耗相对较低 MSE 也被称为 L-2 损失(L2L_2L2 Loss),它相当于 MAE 的光滑版。虽然 MSE 常用于机器学习,但它既不是唯一实用的损失函数,也不是适用于所有情形的最佳损失函数。 MSE 从本质上是以极大似然估计,拟合正态分布。对于满足正态分布特性的样本数据,MSE 能相对得到满意的结果。但是对于非正态分布的问题,如:二分类,或更进一步的聚类分析,MSE 不能满足需求。MSE 常被用来做多对一正态分布样本集结果预测的损失函数使用。 MSE 和 MAE 对应差异主要是在于 鲁棒性 和 收敛速度 的权衡上,在使用条件上是类似的,根据情况选择使用。 MSE 算子化 利用 C 语言实现对算子的封装,有: #include #include double mse(double *y_true, double *y_pred, int size) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The MSE is 0.033333 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_3.html":{"url":"Chapter_4/Language/cn/Docs_4_5_3.html","title":"4.5.3 回归项-休伯损失(Huber Loss)","keywords":"","body":"4.5.3 回归项-休伯损失(Huber Loss) 迭代公式: Loss={1N∑i=1N[12⋅(yi−predictioni)2]∣yi−predictioni∣≤δ1N∑i=1N[δ⋅(∣yi−predictioni∣−12δ)]∣yi−predictioni∣>δ {\\displaystyle \\begin{aligned} Loss = \\begin{cases} \\frac{1}{N} \\sum_{i = 1}^{N} [\\frac{1}{2} \\cdot (y_i-prediction_i)^2] \\quad & |y_i-prediction_i| \\leq \\delta \\\\ \\frac{1}{N} \\sum_{i = 1}^{N}[\\delta \\cdot (|y_i-prediction_i| -\\frac{1}{2}\\delta) ] \\quad & |y_i-prediction_i| > \\delta \\end{cases} \\\\ \\end{aligned} } Loss=⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧N1i=1∑N[21⋅(yi−predictioni)2]N1i=1∑N[δ⋅(∣yi−predictioni∣−21δ)]∣yi−predictioni∣≤δ∣yi−predictioni∣>δ 图像: 图 4-26 Huber Loss 函数图 特性: 当绝对误差在 [0, δ][ 0,\\ \\delta][0, δ] 时,契合正态分布(Normal distribution) 当绝对误差在 (δ, +∞)( \\delta,\\ +\\infty)(δ, +∞) 时,契合拉普拉斯分布(Laplace distribution) 当绝对误差小于 δ\\deltaδ 时,它采用平方误差,导数非常数 当绝对误差大于 δ\\deltaδ 时,采用的线性误差,导数常数 δ2\\tfrac{\\delta}{2}2δ 。 光滑(smooth),适合优化算法 非指数计算,算力消耗相对较低 休伯损失(Huber Loss) 实际上是基于 MAE 和 MSE 基础上,提出的一种兼容 MAE 与 MSE 各自优点的损失函数设计。 相比于 MSE 和 MAE,Huber Loss 的算力消耗没有太多的提升。相比于 MSE,Huber Loss 降低了 δ\\deltaδ 半径外对离群值的惩罚;相比于 MAE,Huber Loss 提高了 δ\\deltaδ 半径内回归的收敛速度。可以看出,Huber Loss 的效果首 δ\\deltaδ 的选择影响较大。因此,使用它的时候,需要注意 δ\\deltaδ 调参问题。 Huber Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double huber_loss(double *y_true, double *y_pred, int size, double delta) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The Huber loss is 0.033333 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_4.html":{"url":"Chapter_4/Language/cn/Docs_4_5_4.html","title":"4.5.4 回归项-分位数损失(Quantile Loss)","keywords":"","body":"4.5.4 回归项-分位数损失(Quantile Loss) 迭代公式: Loss={1N∑i=1N(1−γ)⋅∣yi−predictioni∣yipredictioni1N∑i=1Nγ⋅∣yi−predictioni∣yi≥predictioni {\\displaystyle \\begin{aligned} Loss = \\begin{cases} \\frac{1}{N} \\sum_{i = 1}^{N} (1-\\gamma) \\cdot |y_i-prediction_i| \\quad & y_i Loss=⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧N1i=1∑N(1−γ)⋅∣yi−predictioni∣N1i=1∑Nγ⋅∣yi−predictioni∣yipredictioniyi≥predictioni 图像: 图 4-27 Quantile Loss 函数图 图 4-28 Quantile Loss 样本拟合示意图 特性: 当预测值残差在 [0, +∞)[ 0,\\ +\\infty)[0, +∞) 时,梯度为设定值 γ\\gammaγ 当预测值残差在 (−∞, 0)(-\\infty ,\\ 0)(−∞, 0) 时,梯度为设定值 1−γ1- \\gamma1−γ 可通过 γ\\gammaγ 的设定,来有指向的调整模型结果,γ\\gammaγ 的可范围在 [0, 1][ 0,\\ 1][0, 1] 适用于区间预测,通过调整 γ\\gammaγ 范围覆盖预测区间 非光滑(non-smooth) 非指数计算,算力消耗相对较低 分位数损失(Quantile Loss) 是一种用于区间预测的损失函数。MAE、MSE、Huber 等损失函数,基于的是最小二乘法,默认预测实际值残差方差保持不变且相对独立。而以分位数损失作为损失函数的回归模型,对于具有变化方差或非正态分布的残差,也能给出合理的预测区间。 分位损失函数中,γ\\gammaγ 值代表对预测结果的预判程度:γ\\gammaγ 值 越大,对结果被低估的惩罚程度越高,即越容易被 高估 ;γ\\gammaγ 值 越小,对结果被高估的惩罚程度越高,即越容易被 低估。在区间预测过程中,通过调整 γ\\gammaγ 取值范围,来实现对样本的覆盖,得到预测区间。 因为 Quantile Loss 的这种特性,常被用来做商业评估类型的回归模型。 Quantile Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double quantile_loss(double *y_true, double *y_pred, int size, double q) { double sum = 0; for (int i = 0; i 0) { sum += q * error; } else { sum += (1 - q) * error; } } return sum / size; } int main() { int size = 3; double y_true[] = {0.5, 0.75, 1.0}; double y_pred[] = {0.6, 0.8, 0.9}; double q = 0.5; double quantile_loss_value = quantile_loss(y_true, y_pred, size, q); printf(\"The quantile loss is %f\\n\", quantile_loss_value); return 0; } 运行验证可得到结果: The quantile loss is 0.083333 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_5.html":{"url":"Chapter_4/Language/cn/Docs_4_5_5.html","title":"4.5.5 分类项-对数损失(Log Loss)","keywords":"","body":"4.5.5 分类项-对数损失(Log Loss) 迭代公式: Loss=1N∑i=1N−yi⋅log(predictioni)−(1−yi)⋅log(1−predictioni) {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i=1}^N -y_i \\cdot log(prediction_i)-(1-y_i) \\cdot log(1-prediction_i) \\\\ \\end{aligned} } Loss=N1i=1∑N−yi⋅log(predictioni)−(1−yi)⋅log(1−predictioni) 图像: 图 4-29 Log Loss 函数图 特性: 契合逻辑分布(Logistic distribution)样本,拟合 Sigmoid 模型 二分类下的交叉熵损失表现,二者本质等价 越接近目标,损失越小 越趋近两极,结果越准确 基于贝叶斯统计(Bayesian statistics),采用交叉熵估计 光滑(smooth),适合优化算法 对数计算,算力消耗相对较高 对数损失(Log Loss) 是一种利用最小化负对数似然,即交叉熵最小化,来进行逻辑回归的损失函数。实际上,Log Loss 相当于 只包含两种分类 情况下的交叉熵损失函数。其所适应逻辑分布样本集,我们认为只存在 “是/否”两种情况 的 独热向量(one-hot vector) 集合。对于此类样本集,我们一般采用 Sigmoid 将输出压缩到 [0, 1][ 0,\\ 1][0, 1] 范围内,以便于输出百分比估计结果,作为预测结果的置信水平。而从 Log Loss,我们不难看出,最小化交叉熵函数本质就是对数似然函数的最大化。 注意,对数损失只能用来区分 “是/否” 为某个物体。 这一点在初学者首次接触时,容易与交叉熵损失搞混,从而选错分类项(比如目标是多分类检测)需要小心。 Log Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double log_loss(double *y_true, double *y_pred, int size) { double sum =0; for (int i =0; i 运行验证可得到结果: The log loss is -0.056644, for object class 'apple' Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_6.html":{"url":"Chapter_4/Language/cn/Docs_4_5_6.html","title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","keywords":"","body":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss) 迭代公式: Loss=1N∑i=1N[∑j=1k−yj⋅log(predictionj)]i {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i=1}^N [\\sum_{j=1}^k -y_j \\cdot log(prediction_j)]_i \\\\ \\end{aligned} } Loss=N1i=1∑N[j=1∑k−yj⋅log(predictionj)]i 图像: 图 4-30 Cross Entropy Loss 函数图 特性: 契合逻辑分布(Logistic distribution)样本,拟合 Softmax 模型 二分类下的交叉熵损失表现,二者本质等价 越接近目标,损失越小 越趋近两极,结果越准确 基于贝叶斯统计(Bayesian statistics),采用交叉熵估计 光滑(smooth),适合优化算法 对数计算,算力消耗相对较高 交叉熵损失(CEL [Cross Entropy Loss]) 是一种处理分布于高维同平面(K-Space)下独热向量(one-hot vector)样本集的聚类分析手段。交叉熵损失函数是一种为了 配合 Softmax 激活函数 的损失函数设计,输出满足概率累和为 1。这是因为交叉熵的本质,是试图用预测值来表示某个事件发生所需要的平均概率,从概念上,将事物可能发生的几率,和事物不可能发生的几率做了二元分割,即 Log Loss 实际上是 CEL 的最简表示形式。 所以,在使用交叉熵损失前,最好 先对参与交叉熵运算的所有同样本,进行一次 Softmax 处理,以求尽可能保证估计值之和为 1。 但是需要注意的是,交叉熵损失本身,并不依赖于是否对输入概率进行了归一化。也就是说,虽然可以进行估值之和大于 1 的输入处理,但本身会相对失去意义。因为,CEL 的结果越小,越说明分类估值准确性。非归一化输入只会干扰结果,从而影响模型准确。 Cross Entropy Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double cross_entropy_loss(double *y_true, double *y_pred, int size, int num_classes) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The cross entropy loss is 0.1982671, for 'cat' 'puppy' 'dog' 上面的代码中,展示了存在三类分类情况下,样本的输入分类和预测特征向量,皆未归一化会产生的结果。交叉熵损失仍然能使用,但不精确。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_7.html":{"url":"Chapter_4/Language/cn/Docs_4_5_7.html","title":"4.5.7 分类项-合页损失(Hinge Loss)","keywords":"","body":"4.5.7 分类项-合页损失(Hinge Loss) 迭代公式: Loss=1N∑i=1Nmax(0,1−yi⋅predictioni) {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i=1}^N \\max(0, 1-y_i \\cdot prediction_i) \\\\ \\end{aligned} } Loss=N1i=1∑Nmax(0,1−yi⋅predictioni) 图像: 图 4-31 Hinge Loss 函数图 特性: 缺乏统计学理论支持,没有太好的概率解释 样本值为 -1 或 1,预测值区间范围限定 [−1, +1][ -1,\\ +1][−1, +1] 之间 一般情况下,会限定排除 ∣prediction∣>1|prediction| > 1∣prediction∣>1 的取值,因此对离群值有较好的健壮性 越趋近于 0,结果越准确 依赖决策边界驱动,通过决策边界移动分割样本集 非光滑(non-smooth) 线性处理便于计算 合页损失(Hinge Loss) 通常与 L-2 正则项一起使用,这正是 SVM 支持向量机模型采用的损失函数。Hinge Loss 对非超出驱动边界的满足条件预测给予偏离度惩罚,而对于离散值则直接进行忽略。因此,Hinge Loss 的健壮性比较强。 然而 Hinge Loss 所依赖的决策边界的处理方式更类似于经验划分。对于样本量不足的情况,Hinge Loss(实际上是对应的 SVM)常常会过拟合(Over-fitting)。所以,这种边界限定的方式,在深度学习中常被衍生为一种样本集的裁剪方式的小技巧(trick)来使用。 此外,在概率,尤其是贝叶斯学派看来,Hinge Loss 并不足够合理。贝叶斯学派认为,概率应该用来量化不确定性,而 Hinge Loss 则是一种确定性的损失函数。因此,Hinge Loss 并不完全符合贝叶斯学派的观点。 不过,因果推断方面的领军人物 朱迪亚·珀尔(Judea Pearl) 在其著作《Causality》中阐述了他早期作为贝叶斯学派支持者对于 SVM 的看法。他认为,SVM 是一种经验风险最小化(ERM)方法,它并不依赖于概率模型。因此,Hinge Loss 虽然不完全符合概率,但也并不违背贝叶斯学派的基本原则。 Hinge Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double hinge_loss(double *y_true, double *y_pred, int size) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The hinge loss is 0.250000 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_8.html":{"url":"Chapter_4/Language/cn/Docs_4_5_8.html","title":"4.5.8 分类项-对比损失(Contrastive Loss)","keywords":"","body":"4.5.8 分类项-对比损失(Contrastive Loss) 迭代公式: Di=∣predictioni∣Loss=1N∑i=1N(yi⋅Di2+(1−yi)⋅max(0, m−Di)2) {\\displaystyle \\begin{aligned} D_i &= | prediction_i| \\\\ Loss &= \\frac{1}{N} \\sum_{i=1}^N(y_i \\cdot D_i^2 + (1 - y_i) \\cdot max(0,\\ m - D_i)^2) \\\\ \\end{aligned} } DiLoss=∣predictioni∣=N1i=1∑N(yi⋅Di2+(1−yi)⋅max(0, m−Di)2) 图像(蓝线 Pred,红线 True): 图 4-32 Contrastive Loss 函数图[14] 特性: 基于投影平面角度,降维分离样本类型 mmm 项代表被认为相似的确认半径 样本相似则 yi=1y_i = 1yi=1 ,样本不相似则 yi=0y_i = 0yi=0 增大类间差异并且减小类内差异,损失函数值最小时,两者达到均衡点 当样本不相似时,预测距离在 DwmD_w Dwm 的范围内,模型会试图增大不相似样本点之间的距离 越接近样本情况,损失越小 光滑(smooth),适合优化算法 非指数计算,算力消耗相对较低 对比损失(Contrastive Loss) 函数是在 2006 年,由 R.Hadsell、S.Chopra、Y.LeCun 在论文《通过学习不变映射进行降维运算》[14] 中 ,提出的一种用来解决样本集中数据聚集过于密集,而导致的 退化解(Degenerate Solutions) 问题。 这种通过降维来寻找合适投影角度,来得到比较优秀的分离聚类的分类损失函数的想法,首次经过合理的论证,并进入广泛大众的视野。为后续 Triplet Loss、N-pair Loss 等,类似的通过分离特性来进行处理的损失函数,打下了基础。 对比损失中,输入的 yiy_iyi 指的是选取样本点 SiS_iSi 和某个类型标签的接近程度。 同理, predictioniprediction_ipredictioni 则是模型预测的该样本 SiS_iSi 距离指定类型标签的结果。 为什么将之前通用的样本的类型概率数据,转为距离描述呢?这是因为,对比损失是通过 确认半径(Margin) 来得到优化的。对比损失函数结果越小,越认为当前权重所对应训练结果越接近实际情况。而方法对于预测距离小于确认半径的数据,取用 max(0, m−Di)2max(0,\\ m - D_i)^2max(0, m−Di)2 拉高了损失函数的结果,达到淘汰分类的效果。 Contrastive Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double contrastive_loss(double *y_true, double *y_pred, int size, double margin) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The contrastive loss is 0.1250000 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_9.html":{"url":"Chapter_4/Language/cn/Docs_4_5_9.html","title":"4.5.9 分类项-三元损失(Triplet Loss)","keywords":"","body":"4.5.9 分类项-三元损失(Triplet Loss) 迭代公式: Dni=∣∣negativei−yi∣∣2Dpi=∣∣positivei −yi∣∣2Loss=1N∑i=1Nmax( 0, Dpi−Dni+m ) {\\displaystyle \\begin{aligned} Dn_i &= \\sqrt{|| negative_i - y_i||^2} \\\\ Dp_i &= \\sqrt{|| positive_i \\ - y_i||^2} \\\\ Loss &= \\frac{1}{N} \\sum_{i=1}^N \\max( \\ 0, \\ {Dp_i} - {Dn_i} + m \\ ) \\\\ \\end{aligned} } DniDpiLoss=√∣∣negativei−yi∣∣2=√∣∣positivei −yi∣∣2=N1i=1∑Nmax( 0, Dpi−Dni+m ) 图像: 图 4-33 Triplet Loss 函数图[15] 特性: 使具有相同标签的样本(positive)之间的距离,尽量接近 使具有不同标签的样本(negative)之间的距离,尽量远离 要求输入 3 个分类样本子集:相似正样本集、相反负样本集、原样本对照集,并行训练 以 mmm 项代表被认为相似的确认半径,Loss 最小则理论上 Dn->m, Dp->0 越接近样本情况,损失越小 光滑(smooth),适合优化算法 非指数计算,算力消耗相对较低 三元损失(Triplet Loss) 函数来自于论文《FaceNet: A Unified Embedding for Face Recognition and Clustering》中 [15] ,提出的通过拆分 三元组(Triplet),选取正负样本与原样本进行差异化处理,来让预测值趋近于原样本而远离负样本的一种损失函数。 三元组(Triplet) 来自于输入分批的卷积神经网络(CNN)结果,我们需要将输入样本分为三类,在每一时代(Epoch)中都进行相同神经网络隐藏层权重(Wights)影响下的结果计算。累计 单次样本 的损失计算(Loss),以求得分批的损失函数(Cost Function)输出评分。 Triplet Loss 的使用 介于三元组损失提出最初目的,是为了进行人脸识别(FD [Face Detection]),我们因此取用人脸样本集举例。类似于人脸样本集,一般由 PPP 位不同人的 DDD 张该人不同脸的图片样本组成的,样本总量 P⋅DP \\cdot DP⋅D 大小的数据集。 以此为基础,三元组损失要求的三种样本分类子集分别是: 相似正样本集(Positives),由同人不同脸组成的 D−1D -1D−1 大小子集 相反负样本集(Negatives),由不同人不同脸组成的 (P−1)⋅(D−1)(P-1) \\cdot (D -1)(P−1)⋅(D−1) 大小子集 原样本对照集(Anchors),由不同人同脸(选一校订)组成的 PPP 大小子集 这三类子集,在数据分批后,会被分为相同批数并组合为一批数据,作为单次迭代输入数据,参与训练。我们仍然采用角标 [i][_i][i] 来表示分批,那么有: batch_size=(Di−1)+(Pi−1)(Di−1)+Pi=DiPi {batch\\_size} = (D_i-1) + (P_i-1)(D_i-1)+P_i = D_iP_i batch_size=(Di−1)+(Pi−1)(Di−1)+Pi=DiPi 则,在分批数据参与一次批计算后,最终会构成 batch_size{batch\\_size} batch_size 大小的一组 嵌入集(Embeddings),被我们用来计算损失函数(Loss)的实际处理对象。 最终,计算损失后的三元组,按照质量 来划分,可以分为三个类别: 易辨三元组(easy triplets),可以使得 loss 基本趋近于 0 的类型 难辩三元组(hard triplets),有 Dn 模糊三元组(semi-hard triplets),有 Dp 可见,如果构成的三元组一上来就是易辨三元组,那只能证明模型训练参数的启动配置,使模型陷入了过拟合。通常,我们希望每一时代(Epoch)被计算的三元组都具有一定的模糊特性,而方便权重更新。因此,模糊三元组(semi-hard triplets)才是迭代的选择。 那么怎么评估当前的三元组,是否是模糊三元组呢? 其实很简单,通过当前正样本集所占有效样本的百分比,就能大致估算是否属于模糊类型。记正样本集百分比为 fraction_positive{fraction\\_positive}fraction_positive ,则有: fraction_positive=num_positivenum_available=count(loss>0)count(vector) {\\displaystyle \\begin{aligned} {fraction\\_positive} &= \\frac{num\\_positive}{num\\_available} \\\\ &= \\frac{count( loss > 0)}{count(vector)} \\\\ \\end{aligned} } fraction_positive=num_availablenum_positive=count(vector)count(loss>0) 我们一般取 fraction_positive>0.2{fraction\\_positive} > 0.2fraction_positive>0.2 认为是一次有效训练中的模糊三元组数据。 三元损失在对比损失的基础上更近一步,引入了正负样本概念,来使得分类预测结果更加聚集,且使分类间能够更加远离。本身计算并不算非常复杂,因此可以用在如人脸识别、车辆识别等模型的移动端迁移上。但是,三元损失只是在对比损失上引入正负概念,实际处理过程中,每次只能对比一个负样本而忽略了其他的非关联性。这样就很容易造成迭代结果陷入不稳定(在多个距离相近但实际不同的负样本间抖动),或者局部最优解。 Triplet Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include #include #include #define BATCH_SIZE 10 // Batch_size = Samples_of_Person x Data/Person #define VECTOR_SIZE 128 // Extract output layer Feature vector's dimissions #define DEVIDE_SAFE 1e-12 // protect when gridant at 0 will be to lage // Pairwise Distance Calculation void pairwise_distance(double embeddings[BATCH_SIZE][VECTOR_SIZE], double distances[BATCH_SIZE][BATCH_SIZE], bool squared) { for (int i = 0; i 0) { num_positive++; } if (current_mask > 0) { num_validate++; } } } } } // Calculate fraction of positive triplets *fraction_positives = (double)num_positive / ((double)num_validate + DEVIDE_SAFE); return triplet_cost / (double)(num_positive + DEVIDE_SAFE); } int main() { // Example input (fulfill to BATCH_SIZE x VECTOR_SIZE) // Use Random labels and embeddings for testing // Use three classes as different type, to generate labels int type = 3; int labels[BATCH_SIZE]; double embeddings[BATCH_SIZE][VECTOR_SIZE]; for (int i = 0; i 运行验证可得到结果: The triplet loss is 0.270146 with positives 0.668605 虽然看上去比较复杂,然而在实际执行过程中, 一个时代(Epoch)只会执行一次三元组损失的计算,而空间复杂度上,仅额外增加了距离矩阵和遮罩的共 O(batch_size2+batch_size3)O({batch\\_size}^2 + {batch\\_size}^3)O(batch_size2+batch_size3) 的空间大小。是完全可以接受的。 代码中,我们所使用的 遮罩(Mask)矩阵,实际上相当于将原论文中对三元组的三分类计算,用遮罩来代替了有效处理流程。这样做可行的基本原因,在于距离矩阵本身,在以整体分批不做区别输入的情况下,仍旧可以用全体分批包含样本的欧式距离,构成 batch_size×batch_size{batch\\_size} \\times {batch\\_size}batch_size×batch_size 大小的差异矩阵,记为 MdistM_{dist}Mdist 。以人脸检测为例,同人物同一张样本脸的情况,就相当于 MdistM_{dist}Mdist 的对角线位置。而对角线两侧的数据,则涵盖了同人不同脸、不同人的两种类型。 如此,计算所得 MdistM_{dist}Mdist 实际就包含和三元组的三分类计算中,不同分类的 所有距离类型。与此同时,最终损失函数的计算,是要叠加所有分类独立计算的单次损失的。进而,让我们有机会通过遮罩矩阵就能直接规划不同分类情况,应该取用哪一个距离值,来直接获取当次损失值叠加。如果记遮罩矩阵为 MmaskM_{mask}Mmask ,那么三元损失有工程公式: Loss=Mdist⋅Mmask {\\displaystyle \\begin{aligned} Loss &= M_{dist} \\cdot M_{mask} \\\\ \\end{aligned} } Loss=Mdist⋅Mmask 而既然是矩阵乘法,除了本书例子中采用的纯 C 语言实现外,也可以通过 GPU 算子来实现进一步加速。类似于 CUDA 算子,或部分成熟的推理引擎(如 Keras、py-Touch 等)就是这样处理的。 从这个例子就能看出, 有效的工程化能够极大提升算法的训练效率,减小耗时。 这即是工程师在此处的关键作用。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_10.html":{"url":"Chapter_4/Language/cn/Docs_4_5_10.html","title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","keywords":"","body":"4.5.10 分类项-对组排异损失(N-Pair Loss) 迭代公式: Dni=∣∣negativei−yi∣∣2Dpi=∣∣positivei −yi∣∣2Loss=1N∑i=1Nlog[m+∑j≠iexp(Dpi−Dni)]=1N∑i=1N∑j≠ilog[m+exp(Dpi−Dni)] {\\displaystyle \\begin{aligned} Dn_i &= \\sqrt{|| negative_i - y_i||^2} \\\\ Dp_i &= \\sqrt{|| positive_i \\ - y_i||^2} \\\\ Loss &= \\frac{1}{N} \\sum_{i=1}^{N} log[m+ \\sum_{j\\neq i} exp( Dp_i - Dn_i)] \\\\ &= \\frac{1}{N} \\sum_{i=1}^{N} \\sum_{j\\neq i} log[m+ exp(Dp_i - Dn_i)] \\\\ \\end{aligned} } DniDpiLoss=√∣∣negativei−yi∣∣2=√∣∣positivei −yi∣∣2=N1i=1∑Nlog[m+j≠i∑exp(Dpi−Dni)]=N1i=1∑Nj≠i∑log[m+exp(Dpi−Dni)] 图像: 图 4-34 N-Pair Loss 函数图[16] 特性: 使具有相同标签的样本(positive)之间的距离,尽量接近 使具有不同标签的样本(negative)之间的距离,尽量远离 输入 N+1 个子集:1 个相似正样本集、N-1 个相反负样本集、1 个原样本对照集 同三元组损失一样,输入样本集,都在同权重下并行训练 以 mmm 项代表二维平面上多角度排斥最小力矩,一般 m=1m = 1m=1 防止样本过近重合 越接近目标,损失越小 光滑(smooth),适合优化算法 非指数计算,算力消耗相对较低 对组排异损失(N-Pair Loss) 的提出,旨在解决 对比损失(Contrastive Loss) 和 三元组损失(Triplet Loss) 在分类上的局限性问题 [16] 。这两者,从物理学受力角度分析权重促成的样本聚集,会发现都是一维运动过程。 N-Pair Loss 的使用 N-Pair Loss 在每一次计算中,采用了 同样本集(Positive Set) 和 负样本类集(Negative Classes Set) 的概念。类集中的每一个负样本,都会对预测结果产生排斥,而单一的正样本则会对预测结果产生吸引。这样就能够更好地实现同类型聚集效果。一个比较适当的例子,就像一滴油散到了水中,最终会被水排斥而聚成一个集合的油滴。 实际上,基克·索恩(Kihyuk Sohn) 在对组排异损失的推导过程中,详细描述了从 Triplet Loss -> (N+1)-Tuplet Loss -> N-Pair Loss 的完整过程。其中,(N+1)-Tuplet Loss 可认为是 N-Pair Loss 的过渡。 文中指出,当 N = 2 时,(N+1)-Tuplet Loss 可认为近似于 Triplet Loss。以此为起点,我们很快便会发现,对组排异损失 相当于将 三元组损失中 一组 相似正样本集(Positives) 、 一组 相反负样本集(Negatives) 、 一组 原样本对照集(Anchors) 总共三组之间,两两样本集间样本的距离均值计算,改换成了 一组 相似正样本集(Positives) 、 多组 相反负样本集(Negatives) 、 一组 原样本对照集(Anchors) 总共 N+1 组之间的距离计算。 即,相较于三元组损失,进一步细化了 相反负样本集(Negatives)内,不同标签的对正样本集的驱动作用。 同样以人脸识别(FD [Face Detection])为例,由 PPP 位不同人的 DDD 张该人不同脸的图片样本组成的,样本总量 P⋅DP \\cdot DP⋅D 大小的数据集。 对组排异损失要求,也是三种样本分类子集分类: 相似正样本集(Positives),由同人不同脸组成的 D−1D -1D−1 大小子集 相反负样本集(Negatives),由不同人不同脸组成的 P−1P - 1P−1 组各 D−1D - 1D−1 大小子集 原样本对照集(Anchors),由不同人同脸(选一校订)组成的 PPP 大小子集 这三类子集,在数据分批后,会被分为相同批数并组合为一批数据,作为单次迭代输入数据,参与训练。我们仍然采用角标 [i][_i][i] 来表示分批,那么有: batch_size=(Di−1)+∑Pi−1(Di−1)+Pi=DiPi {batch\\_size} = (D_i-1) + \\sum^{P_i-1}(D_i-1)+P_i = D_iP_i batch_size=(Di−1)+∑Pi−1(Di−1)+Pi=DiPi 则,在分批数据参与一次批计算后,最终还是会构成同三元组损失类似的 batch_size{batch\\_size} batch_size 大小的一组嵌入集(Embeddings),被我们用来计算损失函数(Loss)的实际处理对象。 因此,在工程上,我们 只需要更换单次损失的计算公式,就能从三元组损失的迁移至对组排异损失的计算过程。 N-Pair Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include #include #include #define BATCH_SIZE 10 // Batch_size = Samples_of_Person x Data/Person #define VECTOR_SIZE 128 // Extract output layer Feature vector's dimissions #define DEVIDE_SAFE 1e-12 // protect when gridant at 0 will be to lage // Pairwise Distance Calculation void pairwise_distance(double embeddings[BATCH_SIZE][VECTOR_SIZE], double distances[BATCH_SIZE][BATCH_SIZE], bool squared) { for (int i = 0; i margin) { num_positive++; } if (current_mask > 0) { num_validate++; } } } n_pair_cost += log(margin + n_pair_loss); } } // Calculate fraction of positive n_pairs *fraction_positives = (double)num_positive / ((double)num_validate + DEVIDE_SAFE); return n_pair_cost / (double)(num_positive + DEVIDE_SAFE); } int main() { // Example input (fulfill to BATCH_SIZE x VECTOR_SIZE) // Use Random labels and embeddings for testing // Use three classes as different type, to generate labels int type = 3; int labels[BATCH_SIZE]; double embeddings[BATCH_SIZE][VECTOR_SIZE]; for (int i = 0; i 运行验证可得到结果: The n_pair loss is 0.408567 with positives 0.377907 对组排异损失从样本宏观角度,统一了正负样本概念。指明了,非当前指向类的负样本,可以被认为是指向负样本类型情况的正样本。因此,对于 N 分类处理过程,整个运算损失计算时间复杂度被化简为仅有 2N。相当的高效。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_11.html":{"url":"Chapter_4/Language/cn/Docs_4_5_11.html","title":"4.5.11 正则项-L1 惩罚","keywords":"","body":"4.5.11 正则项-L1 惩罚 迭代公式: L1=∣w1∣+∣w2∣+∣w3∣+⋯+∣wn∣ {\\displaystyle \\begin{aligned} L_1 = |w_1|+|w_2|+|w_3|+ \\cdots +|w_n| \\\\ \\end{aligned} } L1=∣w1∣+∣w2∣+∣w3∣+⋯+∣wn∣ 特性: 根据参数权重绝对值之和,来惩罚权重 当权重 > 0 时,指定权重偏导数为 1,所有权重变化线性统一,因此无法区分主次 当权重 ≤ 0 时,使用 L-1 的参数迭代在 0 处不具备连续性,即 ≤ 0 的值都会为 0 可以使不相关或几乎不相关权重归 0,从模型中移除不相关特征 线性方便计算 L-1 惩罚项(L1L_1L1 Regularity) 由于其特性,常被用于裁剪参数数量,缩减模型宽度。从另一种角度来理解,可以认为 L-1 的思想其实和 Maxout 激活函数的思想有些类似。都是通过线性关系,来整合实际特征曲线。只不过 L-1 是从模型复杂度的角度,Maxout 是从非线性特征的角度。 L-1 惩罚项被证明,对于稀疏性模型优化非常有效。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_5_12.html":{"url":"Chapter_4/Language/cn/Docs_4_5_12.html","title":"4.5.12 正则项-L2 惩罚","keywords":"","body":"4.5.12 正则项-L2 惩罚 迭代公式: L2=w12+w22+w32+⋯+wn2 {\\displaystyle \\begin{aligned} L_2 = {w_1}^2+{w_2}^2+{w_3}^2+ \\cdots +{w_n}^2 \\\\ \\end{aligned} } L2=w12+w22+w32+⋯+wn2 特性: 根据参数权重平方和,来惩罚权重 L-2 导数为 2x,所有权重变化非线性,可以以此区分参数主次(模型层面) 无法使不相关或几乎不相关权重归 0,无法从模型中移除不相关特征 平滑连续,权重变化自然 平方计算,非指数,可接受 L-2 惩罚项(L2L_2L2 Regularity) 最大的特点就是平滑(smooth)。这决定了在实际运算过程中,L-2 惩罚项只有办法让权重趋近于 0 ,而无法彻底移出对应参数。但是这种特点也使得,L-2 惩罚项可以通过非线性权重,调整模型相关参数在模型中的重要程度。 因此,L-2 惩罚项也被称为 权重衰减(Weight Decay)。并不能消除不相关特征,但能较好的保证特征和结果的因果关系。 至此,损失函数的三类组成部分认识完毕。其实我们只做了粗浅的介绍,真正实用中,还有大量的细分和类型设计。除了少数我们介绍的经典如 MAE、MSE 等,每一个新的损失函数,都可能意味着有自己独特的配套神经网络结构。 究其原因,还是在于损失函数作用的范围,在于衡量整个网络的迭代,这决定了它不太可能会脱离而存在。使用中,需要小心。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_6.html":{"url":"Chapter_4/Language/cn/Docs_4_6.html","title":"4.6 常用最优化算法(Optimizer Operator)","keywords":"","body":"4.6 优化算法/优化器(Optimizer) 优化算法(Optimize Function) 是一种更快实现权重更新的处理办法。常用优化算法能够根据梯度方向和一些其他的关键信息,对当前权重作出更有效率的更新,降低训练所需要的迭代次数,同时提高模型鲁棒性。根据其相对固定的作用位置,在工程中,部署优化算法的 关键单一组件,被称为 优化器(Optimizer)。 图 4-35 优化器作用阶段(图中蓝色线条)示意图 优化器起作用于 隐藏层(Hidden Layer) 的训练中特征权重的加速迭代,所以生效阶段在每一 时代(Epoch) 后,完成 损失函数(Cost) 计算的 结算位置。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_6_1.html":{"url":"Chapter_4/Language/cn/Docs_4_6_1.html","title":"4.6.1 基础优化算法","keywords":"","body":"4.6.1 经典优化算法(Classic Optimize Function) 常用的经典优化算法,主要有三种,分别是:随即梯度下降法(SGD)、批量梯度下降法(BGD)、小批梯度下降法(MBGD)。 随机梯度下降法(SGD [Stochastic Gradient Descent]) 迭代公式: θt=θt−1−η˙∇θJ(θ;xi;yi) {\\displaystyle \\begin{aligned} \\theta_t = \\theta_{t-1} - \\eta \\dot{} \\nabla_\\theta J(\\theta ; x_i; y_i) \\\\ \\end{aligned} } θt=θt−1−η˙∇θJ(θ;xi;yi) 每次更新时,只对当前对应批次的被选用样本数据,进行 损失函数(Loss) 计算,一次计算一次更新。因为计算少,所以速度快,并且可以实现实时计算,支持动态的样本添加。 批量梯度下降法(BGD [Batch Gradient Descent]) 迭代公式: θt=θt−1−η˙∇θJ(θ) {\\displaystyle \\begin{aligned} \\theta_t = \\theta_{t-1} - \\eta \\dot{} \\nabla_\\theta J(\\theta) \\\\ \\end{aligned} } θt=θt−1−η˙∇θJ(θ) 每次迭代需要计算当前批次整个数据集 损失函数(Loss) ,更新梯度。所以每次计算的耗时比较高。对于大批量数据来说,比较难以处理,更新不实时。简单的说,就是粒度太大。 小批梯度下降法(MBGD [Mini-Batch Gradient Descent]) 迭代公式: θt=θt−1−η˙∇θJ(θ;x(i:i+n);y(i:i+n)) {\\displaystyle \\begin{aligned} \\theta_t = \\theta_{t-1} - \\eta \\dot{} \\nabla_\\theta J(\\theta ; x_{(i:i+n)}; y_{(i:i+n)}) \\\\ \\end{aligned} } θt=θt−1−η˙∇θJ(θ;x(i:i+n);y(i:i+n)) 针对 BGD 每次都需要对当前批次数据集的问题,MBGD 进行了改良,每一次更新,取当前批次中的一组子样本集合来进行 损失函数(Loss)计算,降低参数更新方差计算,收敛更稳定,并且因为采用子批次构成矩阵运算,更加有优势。 基础优化算法比较 三个经典算法各有优劣,基本可以以下表来判断。 粒度:小 SGDMBGDBGD大速度:慢 BGDMBGDSGD快收敛:低 SGDMBGDBGD高过拟合:低 SGDMBGDBGD高 {\\displaystyle \\begin{aligned} \\text{粒度:} &\\text{小} \\ &SGD 粒度:速度:收敛:过拟合:小 慢 低 低 SGDMBGDBGDBGDMBGDSGDSGDMBGDBGDSGDMBGDBGD大快高高 因为 SGD 每次处理数据单取一个样本点,相比于 MBGD 的当前批次全数据取子集,和 BGD 当前批次扫描全部数据,SGD 更新权重每次计算出的梯度变化幅度相对都会比较大一些,所以不容易在梯度更新上陷入局部最优解。这也是 SGD 较其余两种基本算法的最大优势。建议没有特殊要求,而需要在这三种算法中做选择的话,优先使用 SGD。 当然,他们都有同样的缺点,那就是: 仍存在易陷入局部最小值或鞍点震荡问题,以 BGD 为最 仍存在无法根据不同参数重要程度进行变速权重更新问题,即全权重更新速度统一问题 不过,既然有了疑问,那自然有解决办法。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_6_2.html":{"url":"Chapter_4/Language/cn/Docs_4_6_2.html","title":"4.6.2 优化算法的优化-应对震荡","keywords":"","body":"4.6.2 优化算法的优化-应对震荡 常用的减震算法,比较容易想到的就是利用 阻尼运动特性 和 加速度,即 动量(Momentum),来减小离散瞬时值的影响。因此,先贤们首先想到的就是梯度迭代动量化。 标准动量(Standard Momentum) 迭代公式: vt=γvt−1+η∇θJ(θ)θt=θt−1−vt {\\displaystyle \\begin{aligned} v_t &= \\gamma v_{t-1} + \\eta \\nabla_\\theta J(\\theta) \\\\ \\theta_t &= \\theta_{t-1} - v_t \\\\ \\end{aligned} } vtθt=γvt−1+η∇θJ(θ)=θt−1−vt 标准动量(Standard Momentum) 是在原有计算权重迭代基础上,通过引入上一次变化值情况,来强化梯度延方向变化趋势。即 SGD/BGD/MBGD + Momentum。 这样做可以使得梯度方向不变的维度,权重迭代速率加快,而方向发生改变的维度,更新速度变慢。并且由于速度此时变化是和 之前状态 有关系的,就不会发生“指向突变”的情况,有助于减小震荡和跃出鞍点。 超参数 γ\\gammaγ 被称为 阻尼系数,或遗忘因子。一般取 γ=0.9\\gamma = 0.9γ=0.9 ,表示经验重要程度。 然而,单纯的动量处理却也存在其他问题。最明显的就是,因为动量叠加,没有修正逻辑的纯动量叠加,会导致每一次的轻微误差也随着时间一起叠加,导致当前时刻 ttt 时,实际梯度变化速率要远大于实际值,阻尼因子设定过小和初速度过大都可能会久久不能收敛。所以,在动量化的基础上,我们更希望能够有修正方法来减小误差的累积。 幸运的是 Nesterov Y. 在1983年提出的 NAG 很好的解决了这个问题。 涅斯捷罗夫梯度加速(NAG [Nesterov Accelerated Gradient]) 迭代公式: vt=γvt−1+η∇θJ(θ−γvt−1)θt=θt−1−vt {\\displaystyle \\begin{aligned} v_t &= \\gamma v_{t-1} + \\eta \\nabla_\\theta J(\\theta-\\gamma v_{t-1}) \\\\ \\theta_t &= \\theta_{t-1} - v_t \\\\ \\end{aligned} } vtθt=γvt−1+η∇θJ(θ−γvt−1)=θt−1−vt 涅斯捷罗夫梯度加速(NAG [Nesterov Accelerated Gradient]) 较标准动量化处理来说,用来计算当前梯度方向的时候,计算 损失函数(Loss) 采用的是基于当前上一次梯度变化值预测的,当前状态下,下一次可能的维度权重。以这个预测的维度权重来计算当前位置的方向梯度变化,来修正动量化算法。这样,当我们计算当前 ttt 时梯度变化速度的时候,就可以从一定程度上避免掉误差堆积导致的问题。 这里借用一下 Hinton 课程 [17] 中的图来说明效果: 图 4-36 NAG 加速作用过程示意图[17] 可以看出,蓝色(blue vector)是 标准动量 的过程,会先计算当前的梯度,然后在更新后的累积梯度后会有一个大的跳跃。绿色是 NAG 会先在前一步 棕色(brown vector) 的累积梯度上有一个大的跳跃,然后衡量梯度做 红色(red vector) 修正偏移。 这种预期的更新可以避免我们走的太快。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_6_3.html":{"url":"Chapter_4/Language/cn/Docs_4_6_3.html","title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","keywords":"","body":"4.6.3 优化算法的优化-应对重点强(弱)化更新 另一个问题,就是针对性处理对结果影响更大/更小的权重,让重要的迭代的迭代更谨慎,而不重要的获得更快衰减。以保证优势权重,剔除不必要影响。 自适应梯度算法(AdaGrad/AGA [Adaptive Gradient Algorithm]) 迭代公式: gt,i=∇θJ(θi)Gt,i=∑τ=1tgτ,i2θt+1,i=θt,i−ηGt,i+ϵ˙gt,i {\\displaystyle \\begin{aligned} g_{t,i} &= \\nabla_\\theta J(\\theta_i) \\\\ G_{t,i} &= \\sum _{\\tau=1} ^{t} g_{\\tau, i}^2 \\\\ \\theta_{t+1,i} &= \\theta_{t,i} - \\frac{\\eta}{\\sqrt{G_{t,i}+\\epsilon}} \\dot{} g_{t,i} \\\\ \\end{aligned} } gt,iGt,iθt+1,i=∇θJ(θi)=τ=1∑tgτ,i2=θt,i−√Gt,i+ϵη˙gt,i 以 Gt,iG_{t,i}Gt,i 为当前索引为 [i][_i][i] 的参数,所对应从 111 到时刻 ttt 的 所有梯度平方和。 自适应梯度算法(AdaGrad/AGA [Adaptive Gradient Algorithm]) 是将 SGD 的统一学习速率修改为,有一定预测成分在内的,参数对应独立更新的处理方式。这样处理的好处是,每一个不同参数都会根据当前自身变化和总模型结果关系的差异,独立的进行变更,变化大的会更快,变化小的会更慢。减少了手动调节学习速率的次数。 缺点也比较明显: 前期需手工设置一个全局的初始学习率,过大值会导致惩罚变化不明显,过小会提前停止 中后期自适应分母会不断累积导致学习速率不断收缩,趋于非常小从而可能提前结束训练 因此,我们有了改进版 RMSprop 法。 均方根传播法(RMSprop) 迭代公式: gt,i=∇θJ(θi),E[g2]t,i=γE[g2]t−1,i+(1−γ)gt,i2Δθt,i=−ηE[g2]t,i+ϵgt,i=−ηRMS[g]t,igt,iθt+1,i=θt,i+Δθt,i=θt,i−ηE[g2]t,i+ϵgt,i {\\displaystyle \\begin{aligned} g_{t,i} &= \\nabla_\\theta J(\\theta_i) \\quad , \\quad E[g^2]_{t,i} = \\gamma E[g^2]_{t-1,i} + (1-\\gamma)g_{t,i}^2 \\\\ \\Delta \\theta_{t,i} &= - \\frac{\\eta}{\\sqrt{E[g^2]_{t,i}+\\epsilon}}g_{t,i} = - \\frac{\\eta}{RMS[g]_{t,i}}g_{t,i} \\\\ \\theta_{t+1,i} &= \\theta_{t,i} + \\Delta \\theta_{t,i} =\\theta_{t,i} - \\frac{\\eta}{\\sqrt{E[g^2]_{t,i}+\\epsilon}}g_{t,i} \\\\ \\end{aligned} } gt,iΔθt,iθt+1,i=∇θJ(θi),E[g2]t,i=γE[g2]t−1,i+(1−γ)gt,i2=−√E[g2]t,i+ϵηgt,i=−RMS[g]t,iηgt,i=θt,i+Δθt,i=θt,i−√E[g2]t,i+ϵηgt,i 以 E[g2]t,iE[g^2]_{t,i}E[g2]t,i 为当前索引为 [i][_i][i] 的参数,所对应从 111 到时刻 ttt 的所有梯度均方和,有: RMS[g]t,i=E[g2]t,i+ϵ RMS[g]_{t,i}=\\sqrt{E[g^2]_{t,i}+\\epsilon} RMS[g]t,i=√E[g2]t,i+ϵ 因为学习速率变化采用的是 梯度均方和(RMS)。所以,某一维度变化较大时,RMS 较大;变化较小时,RMS 较小。这样就保证了各个维度的变化速率是基于同一个变化量级的,同时也避免了 AdaGrad 中后期的学习速率极速下降,过早停止的问题。而且,因为 RMS 采用近似算法,极大降低了内存消耗(毕竟不需要记录每一次的迭代值了) 不过,RMSprop 可以看出,仍然依赖于全局学习速率 的设定,那么是否能够继续改进不依赖呢? 如果对比两个方法的过程中单位差异,或许能找到答案。 AdaGrad 和 RMSprop 单位问题 我们知道,很多单位是有实际价值的。比如是米(meter),天(day)等,就有具体物理含义。所以,对于迭代使用的加速度 Δθt\\Delta\\theta_tΔθt ,一个很自然的期望是,的单位和是保持一致的。 但是: Δx∝g∝∂f∂x∝1x \\Delta x \\propto g \\propto \\frac{\\partial f}{\\partial x} \\propto \\frac{1}{x} Δx∝g∝∂x∂f∝x1 有 Δx\\Delta xΔx 和 ggg 为同单位,而与 xxx 的单位互逆。即 x−1x^{-1}x−1 表示的瞬时变化才与 Δx\\Delta xΔx 和 ggg 为同单位。 也就是说,对于 AdaGrad 和 RMSprop 来说,Δθt\\Delta\\theta_tΔθt 权重变化量最终得到的结果,其单位和 θt\\theta_tθt 单位并不一致,而是对应时间单位的倒数。而我们要的 权重 θt\\theta_tθt 是时间单位的。 如果我们用牛顿法使 Δx=Ht−1gt\\Delta x =H_t^{-1 }g_tΔx=Ht−1gt , HtH_tHt 为 Hessian 矩阵,即所有参数指定 ttt 时刻二阶偏导数方阵,有: Δx∝H−1g∝∂f∂x∂2f∂2x∝1x \\Delta x \\propto H^{-1 }g \\propto \\frac{\\tfrac{\\partial f}{\\partial x}}{\\tfrac{\\partial^2 f}{\\partial^2 x}} \\propto \\frac{1}{x} Δx∝H−1g∝∂2x∂2f∂x∂f∝x1 上述变化后,便能将 xxx 、 Δx\\Delta xΔx 和 ggg 单位一致化。但是 Hessian 矩阵计算量太大,我们没办法直接使用。所以,我们还需要模拟退火牛顿法,有: Δx=∂f∂x∂2f∂2x⇒Δxt=−∑τ=1t−1ΔxτE[g2]t+ϵ \\Delta x = \\frac{\\frac{\\partial f}{\\partial x}}{\\frac{\\partial ^2 f}{\\partial ^2 x}} \\Rightarrow \\Delta x_t = -\\frac{\\sqrt{\\sum{ _{\\tau=1} ^{t-1}} \\Delta x_\\tau} }{\\sqrt{E[g^2]_t+\\epsilon}} Δx=∂2x∂2f∂x∂f⇒Δxt=−√E[g2]t+ϵ√∑τ=1t−1Δxτ 上式在 ∞\\infty∞ 位置近似等价。 如此,既可以保证单位,又能简化运算。同时我们发现,Δθt\\Delta\\theta_tΔθt 的更新在这种拟合下,后续迭代不再依赖于全局学习速率 η\\etaη 。 于是,便有了 AdaDelta 算法。 自适应梯度算法改进版(AdaDelta/ADGA [Adaptive Delta Gradient Algorithm]) 迭代公式: gt,i=∇θJ(θi),E[g2]t,i=γE[g2]t−1,i+(1−γ)gt,i2Δθt,i=−RMS[Δθ]t−1,iRMS[g]t,igt,iθt+1,i=θt,i+Δθt,i=θt,i−RMS[Δθ]t−1,iRMS[g]t,igt,i {\\displaystyle \\begin{aligned} g_{t,i} &= \\nabla_\\theta J(\\theta_i) \\quad , \\quad E[g^2]_{t,i} = \\gamma E[g^2]_{t-1,i} + (1-\\gamma)g_{t,i}^2 \\\\ \\Delta \\theta_{t,i} &= - \\frac{RMS[\\Delta \\theta]_{t-1,i}}{RMS[g]_{t,i}}g_{t,i} \\\\ \\theta_{t+1,i} &= \\theta_{t,i} + \\Delta \\theta_{t,i} =\\theta_{t,i} - \\frac{RMS[\\Delta \\theta]_{t-1,i}}{RMS[g]_{t,i}}g_{t,i} \\\\ \\end{aligned} } gt,iΔθt,iθt+1,i=∇θJ(θi),E[g2]t,i=γE[g2]t−1,i+(1−γ)gt,i2=−RMS[g]t,iRMS[Δθ]t−1,igt,i=θt,i+Δθt,i=θt,i−RMS[g]t,iRMS[Δθ]t−1,igt,i 以 E[g2]t,iE[g^2]_{t,i}E[g2]t,i 为当前索引为 [i][_i][i] 的参数,所对应从 111 到时刻 ttt 的所有梯度均方和,有: RMS[g]t,i=E[g2]t,i+ϵ RMS[g]_{t,i}=\\sqrt{E[g^2]_{t,i}+\\epsilon} RMS[g]t,i=√E[g2]t,i+ϵ 相较于前两种,AdaDelta 具有优势: 结合了 AdaGrad 善于处理稀疏梯度和 RMSprop 善于处理非平稳目标的优点 不需要依赖于 全局学习速率的设置 是一种相对理想的,针对强弱重点的梯度优化算法了。 目前,我们所有的处理方式都是秩针对性的解决单一问题。那么有没有什么方法,可以结合两类的优点呢?既解决鞍点,又能自适应学习速率呢? 当然有,那就是 Adam 自适应实时评估算法。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_6_4.html":{"url":"Chapter_4/Language/cn/Docs_4_6_4.html","title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","keywords":"","body":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation]) 自适应实时评估算法(Adam [Adaptive Moment Estimation]),相当于RMSprop 和 Momentum 结合的一种算法,标准Adam 可以认为是 一阶AdaDelta 的动量改进版。 迭代公式: gt,i=∇θJ(θi)mt,i=β1mt−1,i+(1−β1)gt,ivt,i=β2 vt−1,i+(1−β2)gt,i2Δθt,i=−ηv^t,i+ϵm^t,iθt+1,i=θt,i+Δθt,i=θt,i−ηv^t,i+ϵm^t,i {\\displaystyle \\begin{aligned} g_{t,i} &= \\nabla_\\theta J(\\theta_i) \\\\ m_{t,i} &= \\beta_1 m_{t-1,i} + (1-\\beta_1) g_{t,i} \\\\ v_{t,i} &= \\beta_2 \\ v_{t-1,i} + (1-\\beta_2) g_{t,i}^2 \\\\ \\Delta \\theta_{t,i} &= - \\frac{\\eta}{\\sqrt{\\hat{v}_{t,i}}+\\epsilon}\\hat{m}_{t,i} \\\\ \\theta_{t+1, i} &= \\theta_{t,i} +\\Delta \\theta_{t,i} =\\theta_{t,i} - \\frac{\\eta}{\\sqrt{\\hat{v}_{t,i}}+\\epsilon}\\hat{m}_{t,i} \\\\ \\end{aligned} } gt,imt,ivt,iΔθt,iθt+1,i=∇θJ(θi)=β1mt−1,i+(1−β1)gt,i=β2 vt−1,i+(1−β2)gt,i2=−√v^t,i+ϵηm^t,i=θt,i+Δθt,i=θt,i−√v^t,i+ϵηm^t,i 其中 m^t\\hat{m}_tm^t 、 v^t\\hat{v}_tv^t 是我们为了防止 mmm 、 vvv 被初始化时为 000 导致向 000 偏移而做的 偏差校正值,有: m^t=mt1−β1v^t=vt1−β2 {\\displaystyle \\begin{aligned} \\hat{m}_t &= \\frac{m_t}{1-\\beta_1} \\\\ \\hat{v}_t &= \\frac{v_t}{1-\\beta_2} \\\\ \\end{aligned} } m^tv^t=1−β1mt=1−β2vt 取 经验系数 β1\\beta_1β1 、 β1\\beta_1β1 ,Hinton建议 β1=0.9\\beta_1 = 0.9β1=0.9 ,β2=0.999\\beta_2 = 0.999β2=0.999 取 η\\etaη 防爆因子,建议 ϵ=10e-8\\epsilon = \\text{10e-8}ϵ=10e-8 避免干扰运算 Adam 很好的结合了前辈们的各种优化处理手段,成为了集大成之优化函数。因此,Adam是被经常使用的,现代主流优化函数之一。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_6_5.html":{"url":"Chapter_4/Language/cn/Docs_4_6_5.html","title":"4.6.5 优化算法对比与使用建议","keywords":"","body":"4.6.5 优化算法对比与使用建议 这里引用一下特斯拉人工智能主管 安德烈·卡尔帕蒂(Andrej Karpathy) 的 在线 Demo(使用的是 pytouch) ,来做一下演示。 我们需要将脚本改成如下(增加 Adam): // lets use an example fully-connected 2-layer ReLU net var layer_defs = []; layer_defs.push({type:'input', out_sx:24, out_sy:24, out_depth:1}); layer_defs.push({type:'fc', num_neurons:20, activation:'relu'}); layer_defs.push({type:'fc', num_neurons:20, activation:'relu'}); layer_defs.push({type:'softmax', num_classes:10}); // below fill out the trainer specs you wish to evaluate, and give them names for legend var LR = 0.01; // learning rate var BS = 8; // batch size var L2 = 0.001; // L2 weight decay nets = []; trainer_defs = []; trainer_defs.push({learning_rate:10*LR, method: 'sgd', momentum: 0.0, batch_size:BS, l2_decay:L2}); trainer_defs.push({learning_rate:LR, method: 'sgd', momentum: 0.9, batch_size:BS, l2_decay:L2}); trainer_defs.push({learning_rate:LR, method: 'nesterov', momentum: 0.9, batch_size:BS, l2_decay:L2}); trainer_defs.push({learning_rate:LR, method: 'adagrad', eps: 1e-6, batch_size:BS, l2_decay:L2}); trainer_defs.push({learning_rate:1.0, method: 'adadelta', eps: 1e-6, ro:0.95, batch_size:BS, l2_decay:L2}); trainer_defs.push({learning_rate:LR, method: 'adam', eps: 1e-6, betas:[0.9, 0.999], batch_size:BS, l2_decay:L2}); // names for all trainers above legend = ['sgd', 'sgd+momentum', 'Nesterov', 'AdaGrad', 'AdaDelta', 'Adam']; 在运行一小段时间后(大概 11 k 经处理样本左右),有如下的结果: 感兴趣的读者,可以自行前往地址: https://cs.stanford.edu/people/karpathy/convnetjs/demo/trainers.html 观看更为直观的展示。 通过对比,我们也发现了问题。针对震荡优化的几个算法,在速度上不太有优势;而针对强弱重点的算法,又不是太稳定。 但 Adam 综合表现始终良好,证明了其优秀的可用性。 至此,我们可以得出大致结论: 如果数据稠密,实际上简单的算法就能得到鲁棒性很好的结果。参考使用 标准动量 的 SGD/BGD/MBGD + Momentum 。加动量既可以保证相对快的训练速度,也可以一定程度避免局部最小值。 如果数据稀疏,因为需要对关键特征点进行提取,所以需要用一些自适应算法。对于简单凸性和相对不复杂的数据,可以采用 L1、L2正则化 + 组合分桶。而复杂一些的,就需要采用Adagrad, Adadelta, RMSprop, Adam 等方式了。 如果关键参数更多的出现在运算后期,即梯度稀疏一定程度后,则Adam 比 RMSprop 效果会好。这时 Adam 明显是最好的选择。 按照这样的策略,灵活且合理的选取优化器。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_7.html":{"url":"Chapter_4/Language/cn/Docs_4_7.html","title":"4.7 模型结构速览","keywords":"","body":"4.7 模型结构速览 现在,基本了解了神经网络的主要工程元件后,即能设计简单模型结构,做一些训练了。 不过,在起步阶段,我们还需要决定具体使用哪一种模型类型,来构建面向目标的神经网络。可供选择的类型,其实在本章的开篇就已介绍,即深度神经网络(DNN [Deep Neural Network])的分类(见 4.1)。 这里我们主要对 当下主流的 CNN、RNN、GAN、Transformer 类别,进行说明。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_7_1.html":{"url":"Chapter_4/Language/cn/Docs_4_7_1.html","title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","keywords":"","body":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network]) 卷积神经网络(CNN [Convolutional Neural Network]),是对采用 卷积核(Convolutional Kernel),配合 层叠网格结构 构成的流水线,来进行特征提取的一类神经网络的统称。该类型最为擅长抽象图片或更复杂信息的高维特征。 仍以 AlexNet 为例,原工程示意图之前已有展示: 图 4-37 完整的 Alexnet 示意图(工程版) 我们用它来做一个,基于 MINST 手写字母图像集的,简单字母分类识别模型。 如下所示: 图 4-38 以 AlexNet 部署的 MINST 字母识别模型 可以发现,该模型在层级设计上,前半部分使用到了一系列由多个 更偏重功能性 的特殊隐藏层,即卷积层(Conv)、池化层(Pool),以连接构成复杂结构。之后,在经过一个独立的平化层(Flatten Layer)处理多通道数据完毕,才到达我们熟知类似之前介绍的,三层简易结构组成 多层感知器(MLP)朴素神经网络(Simple Neural Network) 的部分。 CNN 层级分类 显然,新引入的 卷积层(Conv) 、 池化层(Pool) 、 平化层(Flatten Layer),从神经网络结构上划分,仍然属于隐藏层。但所偏重的处理却更为细分。单纯使用结构体系的称谓,已经不能体现其具体作用了。 介于此,我们不得不采用 功能分类 方式,细化隐藏层类,来扩展对 CNN 的结构描述能力。 于是,结合原本输入层、输出层的相对准确描述,通常情况,我们能够将 CNN 的层类型分为 6 类。分别是: 输入层(Input Layer),处理接收样本的前处理准备工作; 卷积层(Conv [Convolutional Layer]),处理卷积核的移动 和 相关数据过滤工作; 池化层(Pool [Pooling Layer]),处理压缩/提升参数量的工作; 平化层(Flat [Flatten Layer]),处理接收样本的前处理准备工作; 全连接层(FC [Fully Connected Layer]),完成神经网络提取后特征的权重迭代部分; 输出层(Output Layer),完成最终特征向量结果输出,交由损失函数处理 不过,因为是按照针对处理任务进行的划分,因此,不同的 CNN 模型,在分类上会有或多或少的不同。例如,有些没有平化层,而有些则会多出独立的激活层(专门用于激活函数生效的隐藏层)。所以需要根据实际情况,做些许调整。但分类原则仍然依照上述标准。 为了区别于基础 输入层、输出层、隐藏层 概念,我们在介绍时统一采用 CNN 前缀(如,CNN-Input),表明特殊的分类形式。 CNN 输入层(CNN-Input) CNN 输入层(CNN-Input) 所做的工作和传统神经网络的输入层工作有一定的区分,除了要完成初步激活外,还需要对输入样本数据做一定的 预处理(Pre-Process) 工作。此类主要为 特征工程(Feature Engineering) 的相关工作,包括但不限于: 数据过滤(Data Filtering),初步筛选部分可以直接从样本本身判断出来的无效数据; 标准化(Standardization),将数据转为均值 0 且标准差为 1 的分布,无关原分布特征; 归一化(Normalization),将样本数据映射到一定范围区间内,放大或缩小范围; 中心化(Zero-Centered),将样本数据转为均值 0 但保留原有分布特征; 这里的 归一化(Normalization),指的是将参与训练的数据,归一到 统一尺度 下处理。虽然 [0, 1][0,\\ 1][0, 1] 范围因契合概率特征,常作为考虑范围之一,但并不一定都会选在 [0, 1][0,\\ 1][0, 1] 范围。例如我们在音视频中,以 RGB 作为输入数据时,更希望保留 [0, 255][0,\\ 255][0, 255] 的离散范围作为样本 。 除此之外,还有对原有数据的去关联性、离散化、量化转移。因此,输入层的工作,也被经常称为 数据预处理(Data Preprocessing)。这一部分存在相当多的工作,本书在章节末尾的书目推荐中,已列出相关参考推荐,感兴趣可以自行前往了解。 CNN 卷积层(CNN-Conv) CNN 卷积层(CNN-Conv) 主要采用一些滤波器算法,来针对性的提取特征信息。这一过程中使用的滤波器(Filter)即是我们在本书第三章第二节中介绍的一类类型,涵盖了之前所提到的常用滤波手段在内的一系列滤波处理方式。而这也是 CNN 的核心概念之一。 在 CNN 中,一般将 采用滤波器称为卷积核(Kernel)。 图 4-39 CNN 卷积层的计算过程示例 通过卷积操作,不断的从输入样本(CNN 一般是多维数据)中,提取出滤波后的特征。这一过程实际上是对最终被用来作为训练的输出特征向量,所在高维投影信息的一种过滤和逼近。通过对多层 CNN 卷积层的加权训练,来实现简洁观察到样本集代表数据的本质特征。 由于卷积操作的特点,CNN 最适合被 GPU 加速的运算,即是卷积核运算。 CNN 池化层(CNN-Pool) CNN 池化层(CNN-Pool) 是除了 CNN 卷积层外的另一种特征提取方式。它本身其实也可归类为一种算子简单的 CNN 卷积层。为了区别,我们把池化层的卷积核,称为池化核。 因此,CNN 池化层具有 CNN 卷积层的所有特点,并一样利于 GPU 化加速。 池化算子根据前一级输入,一般为 2×22 \\times 22×2 或 3×33 \\times 33×3 大小,移动步长为了避免范围覆盖,会取用等核大小步长。 常见的池化算子(Pooling Operator)主要有两种,分别是: 最大值池化(Max-Pooling),以池化核内最大值为输出; 核均值池化(Avg-Pooling),以池化核内所有值的均值作为输出; 这两类都是 向下采样(Subsampling) 过程,效果如下: 图 4-40 CNN 池化层的计算过程示例 [18] 除此外,还有各种类型的其它池化算法,例如:混合池化(Mixed Pooling)、线性探测池化(Linear Probing Pooling)[19] 、向上采样(Upsampling)的全局池化(Global Pooling)等。 方法不一而足。 但池化层的目的,始终是对前级输入的一种,引入相对对抗性的校准方式。使得特征的小范围内值得到突出,或用以磨平部分核内数据干扰的手段。 CNN 平化层(CNN-Flat) CNN 平化层(CNN-Flat),从字面意义理解,即把输入变换到指定维度大小数据当特殊处理层。它的意义在于,为传统 MLP 的神经网络部分,提供可供其学习的输入特征。 因此,常见的平化层操作,即将前一级输入直接按照顺序延展到指定维度即可。 图 4-41 CNN 平化层的计算过程示例 通常情况,我们会选择输出扁平化到 一维张量(1-dim Tensor,即 的有 n×1n \\times 1n×1 个元素的向量)。这个过程如上图展示。 CNN 全连接层(CNN-FC)& CNN 输出层(CNN-Output) CNN 全连接层(CNN-FC) & CNN 输出层(CNN-Output),相比之前几类,和它俩在 MLP 时期的作用基本无变化。 CNN 全连接层(CNN-FC) 相当于前文中三层朴素神经网络里的隐藏层; CNN 输出层(CNN-Output) 相当于前文中三层朴素神经网络里的输出层; 此处就不再赘述。 需要注意的是,CNN 输出层(CNN-Output)的输出结果,才是我们 在训练阶段 中,用来 交付损失函数计算,并参与优化器迭代权重的部分。为区别于其它,有时会被称为模型的 关键特征向量(Key Vector)。 CNN 网络结构 卷积神经网络存在远超 MLP 的层级,带来的变换远非只停留于对层级功能的细化上。在网络结构层面,也逐渐由处理针对任务性质的差异,产生了在 CNN 整体结构内,更为明确的区分。我们一般将位于神经网络内,专项执行单一主要任务的内部子模块,称为 子网结构(Sub-Network Structure),或简称为 子网(Subnet)。 同样于层级分类情况,对于主要目的不同的 CNN ,其子网结构也不完全相同。 但一般而言,大体可以分为 3 个子网,分别是: 特征提取(FE [Feature Extraction])子网,用于提炼原始信息至高级特征; 特征选择(FS [Feature Selection])子网,用于将高级特征抽象至最终输出特征向量; 结果输出(RO [Result Output])子网,用于输出最终的处理结果; 从分类可见,特征选择子网和特征提取子网,在卷积神经网络中的作用,基本等同于传统机器学习过程中,特征的选择和提取在传统逻辑回归和聚类分析中,所起到的作用一致。但其作用范围是整张网络内,所有的过程中特征。这一点还是有较大维度上的差异的。 在具体实践中,是什么样的情况呢? 我们以 AlexNet 部署物体识别的 CNN 分类模型为例,有: 图 4.7.1-6 以 AlexNet 部署的 ImageNet 物体识别模型 在例子中,3 个子网结构各包含了多个 AlexNet 的不同层级: 特征提取子网(FE),包含 输入层、平化层,以及从输入层至平化层间的多个池化层、卷积层,共同组成; 特征选择子网(FS),在本例中根据功能也被称为 分类子网(Classification Subnet),包含接收平化层输出的相邻隐藏层至输出层前一级隐藏层。这些隐藏层都是全链接层,以此完成特征向量提炼。 结果输出子网(RO),则是在接收 FS 输出后,最终生成特征向量的传统 MLP 组成。例子中采用了 SoftMax 连接函数,完成了对样本的 概率分布(Probabilistic Distribution) 归一化向量输出。 需要注意的是,例子中由于是训练好的模型,并没有画出当模型还在训练时,损失函数生效的阶段。不过,在我们经过前几节的讲解后,还是可以判断得到,其生效位置正是在 RO 之后。 训练阶段的 CNN ,正是接收了结果输出子网的特征向量,以此作为迭代的损失函数输入。 那么 CNN 有哪些适合的优势场景呢? CNN 的常见场景 考虑到 CNN 的特点,其实只要是超过一维的样本数据,都能够以 CNN 来进行相关作业。这也决定了 CNN 具有极强的普适性。 目前上,工业界对 CNN 的运用已经涵盖了: 图像分类,如:手势识别、动作识别、人脸识别等; 图像分割,如:背景分离、智能抠图、轮廓融合等; 语义分割,如:物体分类、车辆检测等; 语音识别,如:文本转译、同声传录、情感分析等; 除此外,包括 2016 年名声大噪的 AlphaGo ,也是采用的 CNN 多模型混合架构。足以见得其巨大的发挥空间。虽然 2022 年因 OpenAI 的 ChatGPT 引起 LLM Transformer 浪潮,让 CNN 的热度有所减退,但并不能阻碍它成为目前最好用的模型结构选择之一。 相信未来,我们仍然能够一睹 CNN 回归 LLM 多模态语言大模型的风采。 至此,CNN 的初级概念和网络结构,基本介绍完毕。有了这些知识背景,在了解 CNN 的各种类型网络的设计时,亦能窥得大概。其余就需要仔细钻研论文,以了解全貌了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_7_2.html":{"url":"Chapter_4/Language/cn/Docs_4_7_2.html","title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","keywords":"","body":"4.7.2 循环神经网络(RNN [Recurrent Neural Network]) 循环神经网络(RNN [Recurrent Neural Network]),是指为了应对序列数据类型而专门设计的一种,具有一定程度 长期记忆(Long Term Memory) 和 短期记忆(Short Term Memory) 能力的神经网络模型类型。即然被称为“循环”神经网络,则循环在整个 RNN 流水线过程中,起到了至关重要的作用。 那么,具体时如何“循环”的呢?这一点需要从网络的权重更新细节说起。 RNN 自 MLP 的改进 在前文中,我们认识了朴素神经网络的代表多层感知器(MLP)。并通过分析 输入层、隐藏层、输出层,了解了具体的推理过程。一个典型的三层 MLP ,有如下展示(见 4.2): 图 4-43 经典层分类在简单神经网络中位置示意图(切片) 取图例所示,记时代 ttt 时,有输入 i⃗=[i1, i2, i3, i4]\\vec{i} = [i_1,\\ i_2,\\ i_3,\\ i_4]i⃗=[i1, i2, i3, i4] , 隐藏层存在变化 z⃗=W⋅i⃗+b⃗\\vec{z} = W \\cdot \\vec{i} + \\vec{b}z⃗=W⋅i⃗+b⃗ 使输出为 z⃗=[z1, z2, z3]\\vec{z} = [z_1,\\ z_2,\\ z_3]z⃗=[z1, z2, z3] ,而输出层在接收处理 z⃗\\vec{z}z⃗ 后得当次迭代结果为 o⃗=[o1, o2]\\vec{o} = [o_1,\\ o_2]o⃗=[o1, o2] 。则上图就有时代 ttt 的简化表示: 图 4-44 经典层分类在简单神经网络中位置简化图(切片) 在 MLP 中,每一个时代(Epoch)的权重更新过程,都不会受到前一个时代的影响,并且也未影响即将参与训练的后一个时代。这种现象,被称为 “无记忆”(No-Memory),即每一次权重更新只和当前时代的训练有关。 为了解决这个问题,1982 年,约翰·霍普菲尔德 (John Hopfield) 基于 威廉·伦茨(Wilhelm Lenz, 1888 - 1957) 和 恩斯特·伊辛(Ernst Ising, 1990 - 1998) 等人提出的 伦茨-伊辛模型(Lanz-Ising Model) 思想 ,改善了基础 MLP ,创建了首个被称为 “霍普菲尔德网络模型(HN [Hopfield Network])” 的类现代 RNN 神经网络,用于处理时序优化。该模型试图在神经元间引入趋势关联性,来实现一定程度的短期记忆。虽然是相当有开创性的工作,但由于需要保存较多变元受控影响参数过多,且由于核内只采用了 Tanh 简单引入非线性,却无其他处理,从而存在 梯度消失(Vanishing Gradient),导致不太具有工程优势。 受到启发,塞普·霍克雷特(Sepp Hochreiter) 和 尤尔根·施密特胡伯 (Jürgen Schmidhuber) 于 1991 年提出了 长短期记忆(LSTM [Long Short-Term Memory]) 模型 [20] ,通过简化迭代参数到隐藏层神经元结构内,引入了具有一定程度时间性的权重 W(t)W(t)W(t) 和偏移 b(t)b(t)b(t) ,代替了原有单步权重 WWW 和偏移 bbb 。相比 HN 的独立时序权重影响参数, LSTM 相当于用相对复杂结构,替换原 z⃗=W⋅i⃗+b⃗\\vec{z} = W \\cdot \\vec{i} + \\vec{b}z⃗=W⋅i⃗+b⃗ 在时序上的单步计算的方式,而非在式子本身上施加额外干预。 假设我们三个时代,分别是 t−1t - 1t−1 、 ttt 、 t+1t + 1t+1 ,那么改进后的 LSTM 有: 图 4-45 长短期记忆(LSTM)网络结构类比 MLP 的时序关联性示意图 如此,使得权重和偏移也参与到了迭代中,进而通过两者在时序上的传递,实现了过往训练结果的历史传递。并同时,减小了非样本量的人为干扰。 而 LSTM 也不出意料,成为了 RNN 模型的 经典代表。当下我们所谈论的 RNN 结构,基本都可归类为 LSTM 的变体。 RNN 元胞(Cell) 显然,原有通过分层(Layer)来对神经网络过程区分的方式,是不足以描述 RNN 在时间上的复杂过程的。而从时序角度来看,每个时代(Epoch)的一次迭代过程,都可以被抽象为重复且工程独立的计算模块。于是, RNN 中,我们将单个传统层级神经网络(NN)在时刻 ttt 的一次 完整计算,称为位于 ttt 的 RNN 元胞(Cell)。如图 4.7.2-3 中绿色框体内的部分。 同时,LSTM 也对单元内的传统网络部分进行了进一步精简,取消了层中神经元(Neuron)的固定状态,并打破了层的阈限,采用 计算节点(Node) 的泛化称谓,分化解构了层和层内的结构性与功能性。这种更接近现代电子电路设计的处理方式,也是为何 RNN 类型网络会更容易被 硬件化(Hardwareization) 的主要原因。 对于 RNN 元胞(Cell),有些文献中将其译为神经元,这从仿生学角度来说无可厚非。但以神经网络命名歧义上讲,会发现并不合适。因此,本书参照早期机器学习(ML)的 元胞自动机 和 受限玻尔兹曼机(RBM) 中,对元胞(Cell)的定义,来代称 RNN 原文献对 Cell 的表述。两者在概念和作用上都更为接近,是以为更贴切的意译。 另外,RNN 计算节点(Node),其实就是 MPL 意义上的节点(见 4.2),只是在功能上存在从 RNN 角度出发的独特分类。为做区别,我们称之为 RNN 节点(RNN-Node)。 RNN 节点(RNN-Node) 回到正题,怎么理解 RNN 节点(RNN-Node)呢? RNN 节点(RNN-Node) 即为 RNN 中,对算子(Operator)进行最小粒度能力整合的 基本组成 成分。相较于层本身的双重特性来说,单元更加强调自身的功能性,并将自身的结构性完全交由元胞结构设计来表示。而从单元功能来说,主要分为两种: 门(Gate)节点,用来控制元胞内外特征,对元胞影响的单核运算,根据功能有多种子类; 层(Layer)节点,遵循 MLP 标准层内神经元特性的类层功能组件; 因此,层(Layer)节点,其实就是朴素神经网络中的层(Layer)。而各类 门(Gate)节点 和 各个节点间的数据流组合方式,即元胞驱动公式(Cell Formula),才是 RNN 元胞结构上的独特之处。 图 4-46 长短期记忆(LSTM)的三连元胞结构图(即绿框内实际情况)[20] 上图中,紫色部分即为 门(Gate)节点,而黄色部分则为 层(Layer)节点。箭头代表数据流向。工程上,通常将门节点和门的前一级输入,共同作为门来表示。这在工程导向的数据驱动流程图上就有所体现。 想要明确三者间的关系,就需要结合实际模型来理解。这里我们仍基于 LSTM 来说明。 RNN 长短期记忆(LSTM)模型 在图 4.7.2-4 中,我们仅从宏观的角度,用类 MLP 的便于承接的方式说明 RNN 的时序性。而 LSTM 真正元胞内的流程,是如下所示(数据驱动流程图): 图 4-47 长短期记忆(LSTM)的元胞结构详情 [21] 其中, 以 ttt 代表迭代时代; 以 ccc 代表 元胞状态(Cell State) 向量,代表元胞长期记忆内的高维特征; 以 hhh 代表 隐藏状态(Hidden State) 向量,代表元胞短期记忆内的高维特征; 以 fff 代表 遗忘门(Forget Gate) 输出,遗忘门用于随机或策略(训练)的丢弃前一级输入; 以 iii 代表 输入门(Input Gate) 输出,输入门控制(训练)需被长期记忆的高维特征; 以 ooo 代表 输出门(Output Gate) 输出,输出门控制(训练)需被短期记忆的高维特征; 以 XXX 代表元胞的输入特征,即对 RNN 而言经样本预处理后的输入; 以 OOO 代表元胞的输出特征,可见 ttt 时有 Ot=htO_t = h_tOt=ht ,即 LSTM 输出为当前元胞隐藏状态向量 ; 由此,我们引申出了 LSTM 的 元胞驱动公式(Cell Formula) : Input:XtGate:{it=Sigmod(Xt⋅Bi + ht−1⋅Wi)ft=Sigmod(Xt⋅Bf + ht−1⋅Wf)ot=Sigmod(Xt⋅Bo + ht−1⋅Wo)Cell State:{ct=Sigmod(ft⊗ct−1 + it⊗c^t)c^t=Tanh(Xt⋅Bg + ht−1⋅Wg)Hidden State:ht=Tanh(ct)⊗otOutput:Ot=ht {\\displaystyle \\begin{aligned} Input: &\\quad X_t \\\\ Gate: &\\begin{cases} i_t &= Sigmod(X_t \\cdot B^i \\ +\\ h_{t-1} \\cdot W^i ) \\\\ f_t &= Sigmod(X_t \\cdot B^f \\ +\\ h_{t-1} \\cdot W^f ) \\\\ o_t &= Sigmod(X_t \\cdot B^o \\ +\\ h_{t-1} \\cdot W^o ) \\end{cases} \\\\ Cell\\ State: &\\begin{cases} c_t &= Sigmod(f_t \\otimes c_{t-1} \\ +\\ i_t \\otimes \\hat{c}_t ) \\\\ \\hat{c}_t &= Tanh(X_t \\cdot B^g \\ +\\ h_{t-1} \\cdot W^g ) \\end{cases} \\\\ Hidden\\ State: &\\quad h_t = Tanh(c_t) \\otimes o_t \\\\ Output: &\\quad O_t = h_t \\\\ \\end{aligned} } Input:Gate:Cell State:Hidden State:Output:Xt⎩⎪⎨⎪⎧itftot=Sigmod(Xt⋅Bi + ht−1⋅Wi)=Sigmod(Xt⋅Bf + ht−1⋅Wf)=Sigmod(Xt⋅Bo + ht−1⋅Wo){ctc^t=Sigmod(ft⊗ct−1 + it⊗c^t)=Tanh(Xt⋅Bg + ht−1⋅Wg)ht=Tanh(ct)⊗otOt=ht 式子中, 权重(Weight) 和 偏移(Bias) 分别为 WWW 和 BBB 采用 矩阵形式表示。 而 {Wi, Wf, Wo, Wg}\\{W^i,\\ W^f,\\ W^o,\\ W^g \\}{Wi, Wf, Wo, Wg} 和 {Bi, Bf, Bo, Bg}\\{B^i,\\ B^f,\\ B^o,\\ B^g \\}{Bi, Bf, Bo, Bg} ,则分别代表着 遗忘门(Forget Gate) 、 输入门(Input Gate) 、 输出门(Output Gate) 和 基本元(Standard Neuron) 的权重和偏移。它们都将参与 RNN 训练中的迭代,即我们要训练的对象。 我们将这种权重 WWW 和偏移 BBB 参与训练,并在时序上以相反于时间流向传递(指从后时间节点的观察角度)历史锚点的方式,称为 随时间反向传播(BPTT [Back Propagation Through Time])。而我们在前文中所介绍的 反向传播(BP [Back Propagation]),在这种意义下,则为 当期反向传播(BPIE [Back Propagation In Epoch])。BPTT 考虑了时间的影响,相当于引入时序概念的升级版 BP(即 BPIE)。也正是这样,BPTT 仍然具有 BP 的一切相关特性,同时却额外的具有了历史因素 ,谨慎二者差异。 这就是一个 LSTM 的 RNN 元胞的基本构成了。除了 LSTM 外,还有各种改进类型,如:引入了 “窥视孔连接(Peephole)”的 Peephole-LSTM,采用了门循环控制单元(GRU [Gated Recurrent Unit])的 GRU-LSTM 等。这些变体所改进的皆是 LSTM 的内部流结构,有了现在的基础,读者亦可独立了解了。此处给出对比图例,以简单供参考: 图 4-48 LSTM 与 GRU-LSTM 的元胞结构对比 那么 RNN 有哪些适合的优势场景呢? RNN 的常见场景 考虑到 RNN 的特点,RNN 类模型最为擅长的基本在于需要考虑时间关联性的场景。 目前上,工业界对 RNN 的运用已经涵盖了: 自然语言处理(NLP),如:文本分析(智能输入法)、机器翻译、语音助手等; 音视频生成,如:音乐生成、视频生成、合成编辑、自动裁剪等; 时序预测,如:股票分析、气象预测、医疗诊断等; 不过随着 Transformer 的兴起,RNN 在 NLP 领域的地位早已面临着极大挑战(自 2018 年 BERT 达成 SOTA 以来)。2023 年中的 Google Bard(BERT,GPT-3,Transformer) 大战 OpenAI ChatGPT-4(ChatGPT-4,Transformer) ,以 Bard 的糟糕(相对)表现失败告终。最终又进一步促成了 Google 加速推进了另一个用 Transformer 做为主体的 Gemini 大语言模型(LLM)发布,来扳回颜面。 而这精彩纷呈的大语言模型大战中,采用 RNN 作为骨架的 OpenAI Jukebox(12B 参数)和 EleutherAI GPT-NeoX(20B 参数),甚至没有激起水花。可见一斑。 如果 RNN 在短期内没有进一步突破,可见 Transformer 会逐步取而代之。但这,并不意味着 RNN 会退出历史舞台。技术永远都是博弈的过程,在人工智能的终极命题被解决前,无人能够断言。 需要注意的是,RNN 从始至终意图解决的都是“记忆”问题,而非 CNN 所解决的“提取”问题。两者 并不冲突,甚至还可以适度融合,即组合形成 CNN+RNN 融合模型(Hybrid Model)。由 CNN 的特征提取(FE)子网得倒高级特征,再经过 RNN 代替原 CNN 的特征选择(FS)子网和结果输出(RO)子网,实现对高级特征的时间敏感训练。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/Docs_4_7_3.html":{"url":"Chapter_4/Language/cn/Docs_4_7_3.html","title":"4.7.3 自注意力网络(Transformer)","keywords":"","body":"4.7.3 自注意力网络(Transformer) 自注意力网络(Transformer) 是一种基于自注意力机制(Self-Attention)的深度神经网络结构分类。最初的设计原型来自于 2015 年由 约书亚·本吉奥(Yoshua Bengio,1964~Present) 有关神经网络翻译模型优化而提出的设想 [22] 。 本吉奥 通过引入一种模拟生物在感知、学习、思考过程中自然而然产生的,主被动关注关键点的生理机制,来解决传统 机器学习(ML)编码器-解码器(Encoder-Decoder) 翻译模型(Translation Models)的 长句压缩固定长度(Fixed-Length) 特征,导致潜层语意缺失的问题。这种模拟方式,能够将长句(Long Sequence)中的信息,抽象为固定长度(Fixed-Length)向量的集合,而非 以单一向量的形式进行后续相关语意分解工作。 基于该研究,Google AI 实验室终于在 2017 年以 《Attention Is All You Need》[23] 一文,确定了基本的网络结构设计。 从此,开启了 Transformer 类型网络的快速发展之路。 Transformer 核心成分 & 核心机制 自注意力机制(Self-Attention) 是 Transformer 的 核心机制。而 Transformer 网络结构,则是与自注意力机制提取特征 配套,用于达成转译目的(不一定非要是语言)的整个编解码器 系统作业流水线。 图 4-49 Transformer 网络结构 [23] Transformer 在结构中,参考了 RNN 和早期 自编解码网络系统的特点,采用 时序上以串行元胞(Cell),而单次迭代中编解码(Encoder-Decoder)并行(Parallelization) 的方式,结合两者各自所长。 如此设计的原因,一方面来自于 Transformer 意图改善 RNN 对 序列到序列(Seq2Seq) 任务的处理能力。这便要求,其本身对输入样本的高级特征提取,能够拆解直接关联性,但却又可以保留更高维度潜藏的逻辑信息。另一方面,注意力机制只能提炼解构关联性特征,但我们需要的结果却是对原有输入的另一种未改变原意的表述,决定了必须有编解码类似的结构(至少目前为止)对高级特征进行非对称的压缩和还原。 自注意力机制,保证了前一点。整体结构,保证了后一点。而 注意力单元(Attention Unit),则是用于完成自注意力处理的组成框架的一部分(图中橙色)。 根据最精简实现, 从 注意力单元(Attention Unit) 上,Transformer 包括了两个分步主成分: 放缩点积注意力(SDPA),用于完成对于一次 Transformer 输入的单一注意力筛选; 多头注意力(MHA),通过对放缩点积的平行组合,完成对同一输入的多点注意力检测; 从 网络结构(Network Structure) 上,Transformer 包括了两个主体模块: 编码器(Encoder)模块,用于完成对样本输入的高级特征提取; 解码器(Decoder)模块,用于完成对样本输入的转译结果输出; 通俗来说,以人做类比,两者一个属于 Transformer 的“灵魂”,一个属于 Transformer 的“身躯”。从宏观角度,是互为一体的存在。这与前两节 CNN 、RNN 里,结构占相对主导地位(即框架为改进的关键)的情况有所不同。而是皆有突破。 正是这样,Transformer 才称为深度学习领域,在模型结构基本方法论上,近年来相对具有较大革新突破的工具。 刚刚我们提到的注意力单元,是对自注意力机制的实现。从上可知,Transformer 基本的一切都是围绕其运转的。那么,什么是自注意力机制(Self-Attention)呢? 自注意力机制(Self-Attention) 自注意力机制(Self-Attention) 是 Transformer 中,一种目的用于模拟生物注意力的数学量化范式。是 一类方法的方法论。 想要能够区分轻重,最容易想到的方式便是加权了。因此,在 Transformer 中,对注意力的数值定义便是: 对输入序列元素,根据输入查询元素键值对,以求动态计算加权平均值的权重。 所以,我们计算注意力,需要的输入数据序列就必须包含三个重要的信息维度,分别是: 查询信息,即查询(Query),代表当前输入具有可能被关注(查询)的意义表征; 关键信息,即键值(Keys),代表当前输入在查询(Query)下可能提供的信息特征; 标值信息,即取值(Values),代表与键值(Key)关联的量化查询积分; 最终,由三者共同借由算法组成 输出特征(Output Fratures),即 加权平均值的权重,作为 Transformer 的神经网络内高级特征。 图 4-50 Transformer 的输入转换过程 [23] 显然,虽然键值和取值可以按照 传统键值对(Key-Value Pair) 形式直接配对(当然也可以有复杂映射实现)。但从查询到键值还需要有一定转换方式,通过它来实现将查询可能的关注点,分散到对应的键值上,从而构成可以计算的结果。这个转换方式,就是 评分函数(Score Function)。 一般而言,评分函数可分为两类: 简单评分函数(Simple Scorer),此类以单步函数来完成粗糙的映射,如直接 Sigmod 复杂评分函数(Complex Scorer),此类用朴素神经网络或模型来完成映射,如 MLP 评分 而现有大部分 Transformer 中,采用的都是第二个类型的实现。包括基础 Transformer 的 SDPA 和 MHA 在内,皆属于此类。 当评分函数确定后,每个查询都能拆解到键值上,并获取对应积分了。此时,就需要由 输出函数(Output Function) 来将各个部分组合成最终的高级特征向量进行输出。 假设, 以 QQQ 代表查询, 以 SeqSeqSeq 表示输入序列, 以角标 [i][_i][i] 代表所处输入序列 SeqSeqSeq 的位置,有 iii 处键值 KKK 取积分 VVV , 以 OutOutOut 代表经过注意力筛选后,的高级特征向量, 记评分函数为 fscoref_{score}fscore ,输出函数为 foutf_{out}fout 则有: wi=fscore(Ki, Q)∑fscore(Ki, Q)Out=∑fout(wi⋅Vi) {\\displaystyle \\begin{aligned} w_i &= \\frac{f_{score}(K_i,\\ Q)}{\\sum{f_{score}(K_i,\\ Q)}} \\\\ Out &= \\sum{f_{out}(w_i \\cdot V_i)} \\\\ \\end{aligned} } wiOut=∑fscore(Ki, Q)fscore(Ki, Q)=∑fout(wi⋅Vi) 如此,我们便得到了 注意力量化公式。实际上,这种按序列计算的过程,常被直接 以矩阵运算代替。公式的意义更多在于原理描述。即,自注意力机制工程化的指导过程。依照上式,便可以通过构建评分函数和输出函数,来设计 Transformer 里的注意力单元了。 经典 Transformer 中,将这一步分为了两个组成。 放缩点积注意力(SDPA [Scaled Dot-Product Attention]) 放缩点积注意力(SDPA [Scaled Dot-Product Attention]) 被用于计算单点注意力,即只生成一个注意力高级特征输出。 图 4-51 Transformer 的 SDPA 单元 [23] 如图,红框部分便是 SDPA 的评分函数,而蓝框部分则为 SDPA 的输出函数。 我们记一个输入序列,其序列长度为 TTT 而查询维度为 ddd 。这里需要解释一下,什么是序列长度和查询维度。我们举个例子,如果有一条查询为 Q⃗=[[0.12, 3.57], [0.71, 1.14], [0.95, 0.63]]\\vec{Q} = [[0.12,\\ 3.57],\\ [0.71,\\ 1.14],\\ [0.95,\\ 0.63]]Q⃗=[[0.12, 3.57], [0.71, 1.14], [0.95, 0.63]] ,那么我们就称 单条查询的维度 为 d=2d=2d=2 ,而 总共有长度 为 T=3T=3T=3 条查询。即 查询维度就是一条查询所包含的参数数目,而 序列长度就是单次输入样本包含查询的数目。 当 ddd 确定,对于长度 TTT 的输入数据序列,就有 查询 Q∈RT×dQ \\in \\mathbb{R}^{T \\times d}Q∈RT×d 、 键值 K∈RT×dK \\in \\mathbb{R}^{T \\times d}K∈RT×d 、 取值 V∈RT×dV \\in \\mathbb{R}^{T \\times d}V∈RT×d ,即三者都是 T×dT \\times dT×d 大小的矩阵。则 SDPA 的 评分函数(Score Function) 有如下表示: fscore(K, Q)=softmax(Q⋅KTd) {\\displaystyle \\begin{aligned} f_{score}(K,\\ Q) = softmax \\left( \\frac{Q \\cdot K^T}{\\sqrt{d}} \\right) \\\\ \\end{aligned} } fscore(K, Q)=softmax(√dQ⋅KT) 而输出时采用的 输出函数(Output Function),就是一个取值与评分结果的矩阵点积(Dot-Product),这也是 SDPA 名称的原因。即: fout(fscore, V)=fscore⋅VOutput=softmax(Q⋅KTd)⋅V {\\displaystyle \\begin{aligned} f_{out} &(f_{score},\\ V) = f_{score} \\cdot V \\\\ Output &= softmax \\left( \\frac{Q \\cdot K^T}{\\sqrt{d}} \\right) \\cdot V \\\\ \\end{aligned} } foutOutput(fscore, V)=fscore⋅V=softmax(√dQ⋅KT)⋅V 过程中 1d\\tfrac{1}{\\sqrt{d}}√d1 即 缩放因子(Scale Factor)。而 Mask 操作是可选的,一般过程中作用于 fscoref_{score}fscore 的 SoftMax 操作之前,已经完成点积和缩放的 (Q⋅KTd)\\left( \\tfrac{Q \\cdot K^T}{\\sqrt{d}} \\right)(√dQ⋅KT) 这个 T×TT \\times TT×T 大小的矩阵。通过屏蔽部分数据或进行简单过滤,来进一步加工交给 Softmax 的输入。 实际操作时,可以在 编码器(Encoder) 阶段引入 Mask 层来做 部分参数优化,加速训练。而 解码器(Decoder) 需要用 Mask 来做 零值处理。即,将 (Q⋅KTd)\\left( \\tfrac{Q \\cdot K^T}{\\sqrt{d}} \\right)(√dQ⋅KT) 结果中的部分数据标记为 0 或极小值(如 1e-12 ,避免权重消失),组成不完整数据。 在经过一系列运算后,根据矩阵点乘的特性,最终输出为具有 Output∈RT×dOutput \\in \\mathbb{R}^{T \\times d}Output∈RT×d 的大小的 单次注意力张量(Tensor)。 不过,我们想要的是有多个关注点的高维特征,单个注意力无法满足要求。 这就需要 MHA 了。 多头注意力(MHA [Multi-Head Attention]) 多头注意力(MHA [Multi-Head Attention]) 是对多个单头注意力,即放缩点积注意力(SDPA),处理的加权复合。 千万需要小心的是,正是在 MHA 中,我们引入了真正用于 持久训练 的迭代用权重参数,构成参数矩阵参与模型训练。 图 4-52 Transformer 的 MHA 单元与 SDPA 单元的关系 如图,蓝色气泡内便是 SDPA 单元。在图例中,由 hhh 个 SDPA 单元,经过链接层(Concat 为简写),和线性归一化(目的是为了保证输入输出等大),构成了最终 MHA 的输出。 所以,从另一个角度来看,链接层函数就相当于 MHA 的评分函数,线性归一化则是输出函数。而 MHA 真正意义上的输入,即每个 SDPA 输入的集合。有: 图 4-53 Transformer 的 MHA 单元 [23] 上方即为 MHA 在 Transformer 中的基本算子表示。红框部分便是 MHA 的评分函数,而蓝框部分则为 MHA 的输出函数。可见,评分函数和输出函数的概念,也是相对于被选择作为参考的单元本身而言的。 我们仍然取一个输入序列(MHA 和 SDPA 都是对同一序列的操作,仅目标输出不同),其序列长度为 TTT 而查询维度为 ddd 。 记当前一个 MHA 总共有 hhh 个 SDPA 单元,每个单元按照顺序,由角标 [i][_i][i] 表示序号。则,对于顺序 iii 的 SDPA 单元输入,有查询 Qi∈RT×dQ_i \\in \\mathbb{R}^{T \\times d}Qi∈RT×d 、 键值 Ki∈RT×dK_i \\in \\mathbb{R}^{T \\times d}Ki∈RT×d 、 取值 Vi∈RT×dV_i \\in \\mathbb{R}^{T \\times d}Vi∈RT×d ,即三者都是 T×dT \\times dT×d 大小的矩阵。并有经过 SDPA 处理后的输出 Outputi∈RT×dOutput_i \\in \\mathbb{R}^{T \\times d}Outputi∈RT×d ,简记为 Oi∈RT×dO_i \\in \\mathbb{R}^{T \\times d}Oi∈RT×d 交付链接。 由于采用了多组 SDPA 组合,我们不再能以固定形式,确定每个 SDPA 输入的重要程度。因此,需要对每个构成 MHA 的 SDPA 算子的输入 [Qi, Ki, Vi][Q_i,\\ K_i,\\ V_i][Qi, Ki, Vi] 进行确权,来通过训练得到实际 MHA 的输入的初始关注点。 介于这一点,我们对每一组顺序 的 SDPA 单元输入进行加权。引入 输入权重(Input Wights),根据加权对象,分为 iii 组查询权重 WiQ∈Rd×TW^Q_i \\in \\mathbb{R}^{d \\times T}WiQ∈Rd×T 、 iii 组键值权重 WiK∈Rd×TW^K_i \\in \\mathbb{R}^{d \\times T}WiK∈Rd×T 、 iii 组取值权重 WiV∈Rd×TW^V_i \\in \\mathbb{R}^{d \\times T}WiV∈Rd×T 。 注意,加权需要用和加权对象维度转置(Transpose)的矩阵。 加权后,顺序 iii 的 SDPA 算子的输入就变为了 [Qi⋅WiQ, Ki⋅WiK, Vi⋅WiV][Q_i \\cdot W^Q_i,\\ K_i \\cdot W^K_i,\\ V_i \\cdot W^V_i][Qi⋅WiQ, Ki⋅WiK, Vi⋅WiV] 。同时,这也是为什么 MHA 中,Q、K、V 需要经过一次线性归一化。即目的是为了保证每一组的输入在样本值上的价值等同。 调整后,MHA 的 SDPA 计算公式 化为: Oi=softmax(QiWiQ⋅(KiWiK)Td)⋅ViWiV=SDPA(QiWiQ, KiWiK, ViWiV) {\\displaystyle \\begin{aligned} O_i &= softmax \\left( \\frac{Q_iW^Q_i \\cdot (K_iW^K_i)^T}{\\sqrt{d}} \\right) \\cdot V_iW^V_i \\\\ &= SDPA(Q_i W^Q_i,\\ K_i W^K_i,\\ V_i W^V_i) \\\\ \\end{aligned} } Oi=softmax(√dQiWiQ⋅(KiWiK)T)⋅ViWiV=SDPA(QiWiQ, KiWiK, ViWiV) 使得 MHA 的评分函数(Score Function)有如下表示: fscore(K, Q, V)=Concat(O1, O2, ⋯ , Oi) {\\displaystyle \\begin{aligned} f_{score}(K,\\ Q, \\ V) = Concat \\left( O_1,\\ O_2,\\ \\cdots \\ ,\\ O_i \\right) \\\\ \\end{aligned} } fscore(K, Q, V)=Concat(O1, O2, ⋯ , Oi) 其中,连接函数(Concat [Connection Function])是简单全链接。即,将每一个 SDPA 的输出 OiO_iOi 顺序拼接,构成 (FC=∑Oi)∈RT×dh(FC =\\sum O_i )\\in \\mathbb{R}^{T \\times dh}(FC=∑Oi)∈RT×dh 的输出。 而输出时采用的输出函数(Output Function),存在迭代的 目的权重(Target Wight) 矩阵 WO∈Rhd×TW^O \\in \\mathbb{R}^{hd \\times T}WO∈Rhd×T ,以权重代表注意力积分并参与训练(即动态的积分)。有: fout(fscore, WO)=linear(fscore⋅WO)Output=linear(Concat(O1, O2, ⋯ , Oi)⋅WO) {\\displaystyle \\begin{aligned} f_{out} &(f_{score},\\ W^O) = linear(f_{score} \\cdot W^O) \\\\ Output &= linear(Concat \\left( O_1,\\ O_2,\\ \\cdots \\ ,\\ O_i \\right) \\cdot W^O) \\\\ \\end{aligned} } foutOutput(fscore, WO)=linear(fscore⋅WO)=linear(Concat(O1, O2, ⋯ , Oi)⋅WO) 其中,线性归一化算子(Linear) 其实同 MHA 的 SDPA 输入线性归一化一样,目的在于归一化 MHA 的输出以取得我们想要的多关注点高维特征,并同时让输出保持与输入相同的维度大小。即,通过 linear(fscore⋅WO)linear(f_{score} \\cdot W^O)linear(fscore⋅WO) ,让原本 (fscore⋅WO)∈RT×dh(f_{score} \\cdot W^O) \\in \\mathbb{R}^{T \\times dh}(fscore⋅WO)∈RT×dh 大小的数据,通过以 T×dT \\times dT×d 大小分块,分为 hhh 块叠加求均值,来使最终输出的 Output∈RT×dOutput \\in \\mathbb{R}^{T \\times d}Output∈RT×d 大小。 所以,MHA 的完整处理公式为: fout(fscore, WO)=linear(fscore⋅WO)linear(fscore⋅WO)=∑h(fscore⋅WO)i∑(fscore⋅WO)iOutput=linear(Concat(O1, O2, ⋯ , Oi)⋅WO) {\\displaystyle \\begin{aligned} f_{out} &(f_{score},\\ W^O) = linear(f_{score} \\cdot W^O) \\\\ linear &(f_{score} \\cdot W^O) = \\sum^h \\frac{(f_{score} \\cdot W^O)_i}{\\sum (f_{score} \\cdot W^O)_i} \\\\ Output &= linear(Concat \\left( O_1,\\ O_2,\\ \\cdots \\ ,\\ O_i \\right) \\cdot W^O) \\\\ \\end{aligned} } foutlinearOutput(fscore, WO)=linear(fscore⋅WO)(fscore⋅WO)=∑h∑(fscore⋅WO)i(fscore⋅WO)i=linear(Concat(O1, O2, ⋯ , Oi)⋅WO) 至此,特征提取完毕。 由 MHA 的输出 Output∈RT×dOutput \\in \\mathbb{R}^{T \\times d}Output∈RT×d 和权重矩阵 [WO, ∑[WiQ, WiK, WiV]][W^O,\\ \\sum [W^Q_i,\\ W^K_i,\\ W^V_i] ][WO, ∑[WiQ, WiK, WiV]] ,参与到 Transformer 训练的内部过程。 Transformer 的辅助处理单元 在正式开始 Transformer 的网络结构讲解前。我们还需要了解一下,自注意力网络(Transformer)中的 其它辅助机制。 在经典结构中,Transformer 除了使用自注意力来完成特征提取外,还使用了由 ResNet 提出在当时已经相对成熟的 残差连接(Residual Connection) 技术,并使用简单 前馈控制(Feed Forward) 来修正 MHA 特征,提供非线性和引入深层次的 隐藏权重(Hidden Wight) 参与训练。 图 4-54 Transformer 辅助机制作用位置 图中红框的部分,即为这两个机制起作用的位置。一般,在 Transformer 中,将其分别称为 前馈控制单元(FFU [Feed Forward Unit]) 和 加和标准化单元(ANU [Add & Norm Unit])。 记两者的输入为 XXX ,输出为 X^\\hat{X}X^ 。 大部分情况下前馈控制单元的输入 XXX 都为 MHA 的输出,即 X=MHAOutput∈RT×dX = MHA_{Output} \\in \\mathbb{R}^{T \\times d}X=MHAOutput∈RT×d 但也有例外。加和标准化单元则需要两个输入。不过,在这两个单元的处理中,我们为了保证输入前后特征张量(Tensor)的一致性,要求不论 FFU 还是 ANU,都必须实现输入输出大小相等。 所以,在整个 Transformer 中,FFU 和 ANU 都有 X,X^∈RT×dX,\\hat{X} \\in \\mathbb{R}^{T \\times d}X,X^∈RT×d 。 而两者的 驱动公式(Core Formula),则为: FFU:{Input: XOutput: X^=ReLU(X⋅W1+B1)⋅W2+B2ANU:{Input: X1, X2Output: X^=Norm(X1+X2) {\\displaystyle \\begin{aligned} FFU: &\\begin{cases} Input &: \\ X \\\\ Output &: \\ \\hat{X} = ReLU(X \\cdot W_1 + B_1) \\cdot W_2 + B_2 \\end{cases} \\\\ ANU: &\\begin{cases} Input &: \\ X_1,\\ X_2 \\\\ Output &: \\ \\hat{X} = Norm(X_1 + X_2) \\end{cases} \\\\ \\end{aligned} } FFU:ANU:{InputOutput: X: X^=ReLU(X⋅W1+B1)⋅W2+B2{InputOutput: X1, X2: X^=Norm(X1+X2) 每一个 FFU 都能为我们引入一套权重 W=[W1T×d, W2T×d]W = [{W_1}^{T \\times d},\\ {W_2}^{T \\times d}]W=[W1T×d, W2T×d] 和偏移 B=[B1T×d, B2T×d]B= [{B_1}^{T \\times d},\\ {B_2}^{T \\times d}]B=[B1T×d, B2T×d] 参与训练。而 ANU 则负责通过 归一化(Normalization) 将样本数据映射到一定范围区间内,保证前级输出的统一尺度衡量,加速模型收敛。 所有原件准备就绪,Transformer 网络结构就非常容易理解了。 Transformer 网络结构 在本节开始时提到,自注意力网络(Transformer)从结构角度分为编码器(Encoder)和 解码器(Decoder)。两者在整体上分别对同一个序列(Sequence)进行不同处理。 图 4-55 Transformer 编解码器示意图 如图,蓝框内部分即编码器(Encoder)的构成,红框内部分则是解码器(Decoder)。 编码器(Encoder) 接收正常顺序的序列,如:“I am eating an apple” 经过 位子编码(Positional Encoding),再以特征工程提炼出的 [Q, K, V][Q,\\ K,\\ V][Q, K, V] 。 之后,交由 MHA 提取高级特征,并将提取的高级特征经过一次 ANU 归一化。最终,归一化的高级特征通过 FFU 加隐藏的核心权重和偏移,再次经由一次 ANU 归一化,完成当前时代的编码部分处理。记编码器的输出为 OencO_{enc}Oenc ,显然 OencO_{enc}Oenc 有 T×dT \\times dT×d 大小。 解码器(Decoder) 接收被标记过的序列,如:“I am eating an apple” 经过标记(Shifted Right)变为 “\\ I am eating an apple” ,再由特征工程提炼出的 [Q, K, V][Q,\\ K,\\ V][Q, K, V] 输入。 标记(Shifted Right) 的作用是为了区分每一个序列的起点,例子里我们采用的是 “\\” ,当然也可以用其他标志。 之后,交由 加遮罩(Mask)的 MHA 提取高级特征,并 ANU 归一化。这里的 遮罩,就是前文中提到的 SDPA 的可选 Mask 操作,即解码器对 (Q⋅KTd)\\left( \\tfrac{Q \\cdot K^T}{\\sqrt{d}} \\right)(√dQ⋅KT) 的零值处理。简单的 Mask 有: Mask=[0 ,1 ,1 ,⋯, 10 ,0 ,1 ,⋯, 10 ,0 ,0 ,⋯, 1⋮,⋮ ,⋮ ,⋯, ⋮0 ,0 ,0 ,⋯, 0]T×d {\\displaystyle \\begin{aligned} &Mask = \\begin{bmatrix} & 0 \\ , & 1 \\ , & 1 \\ , \\cdots,\\ & 1 \\\\ & 0 \\ , & 0 \\ , & 1 \\ , \\cdots,\\ & 1 \\\\ & 0 \\ , & 0 \\ , & 0 \\ , \\cdots,\\ & 1 \\\\ & \\vdots, & \\vdots \\ , & \\vdots \\ , \\cdots,\\ & \\vdots \\\\ & 0 \\ , & 0 \\ , & 0 \\ , \\cdots,\\ & 0 \\\\ \\end{bmatrix}_{T \\times d} \\\\ \\end{aligned} } Mask=⎣⎢⎢⎢⎢⎢⎢⎡0 ,0 ,0 ,⋮,0 ,1 ,0 ,0 ,⋮ ,0 ,1 ,⋯, 1 ,⋯, 0 ,⋯, ⋮ ,⋯, 0 ,⋯, 111⋮0⎦⎥⎥⎥⎥⎥⎥⎤T×d 即 mask(Q⋅KTd)mask \\left( \\tfrac{Q \\cdot K^T}{\\sqrt{d}} \\right)mask(√dQ⋅KT) 只保留右上角数据,完成解码器对输入的第一次注意力操作。 接下来,解码器会接受编码器的同序列输出 OencO_{enc}Oenc ,作为一组键值 [K=Oenc, V=Oenc][K = O_{enc},\\ V = O_{enc}][K=Oenc, V=Oenc] 组合,并用前一级 MHA 的 ANU 归一化结果作为查询 QQQ ,合并为一组 [Q, K=Oenc, V=Oenc][Q,\\ K = O_{enc},\\ V = O_{enc}][Q, K=Oenc, V=Oenc] 作为第二个 MHA 的输入。 第二个 MHA 进行常规的 无 Mask 注意力过程。将第二个 MHA 的输出交由 FFU 加隐藏的核心权重和偏移。在 ANU 归一化后,作为解码器的最终输出。 记解码器的输出为 OdecO_{dec}Odec ,同样有 T×dT \\times dT×d 大小。 或许有心的读者已经注意到,在图例中,编解码器的核心流水线旁边都有一个数量标记 NNN 。这意味着每个编解码都是由 NNN 个这样的流水线构成的。目的是为了将 长序列(Long Sequence),拆分为顺序的单个 单词(Word),即 短序列(Short Sequence),顺序的输入处理。我们将编解码各自的一条完整流水线,称为 编码层(Encoding Layer) 和 解码层(Decoding Layer)。 那么,以解码器输入 “\\ I am eating an apple” 为例。经过分割后,就变成了: 0 - \"\" 1 - \"I\" 2 - \"am\" 3 - \"eating\" 4 - \"an\" 5 - \"apple\" 总共 6 个短句。分别交由 6 个解码层处理。最终的输出也按照解码层的顺序,进行顺序拼接。相当于每一个解码层的 T=1T=1T=1 。而拼接后的结果仍然是 T×dT \\times dT×d 。 这样既保证了模型本身的一定范围实时感知,也解放了模型本身的训练处理机能。在 2017 经典 Transformer 中,建议取 N=6N=6N=6 ,平衡效率。 Transformer 的输出 & 训练迭代 其实,经过之上的一系列工作,最终编码器的输出 OdecO_{dec}Odec ,还需要经过一次 线性归一化(Linear Normalization),再通过 SoftMax 输出概率预测结果 PPP 。预测 PPP 的大小为 T×1T \\times 1T×1 是一组概率数组。 这个输出,才是最终参与模型迭代,用于损失函数的结果。 那么,Transformer 采用的损失函数是什么呢? 即然最终操作的对象是概率值,那么不难想到本质仍然属于分类(Classification)。 因此,Transformer 通常采用 交叉熵损失(Cross Entropy Loss)。即我们在损失函数一节中,提到过的: Loss=1N∑i=1N[∑j=1k−yj⋅log(predictionj)]i {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i=1}^N [\\sum_{j=1}^k -y_j \\cdot log(prediction_j)]_i \\\\ \\end{aligned} } Loss=N1i=1∑N[j=1∑k−yj⋅log(predictionj)]i 同理,也可以考虑改用其他的分类项损失。 随后的过程就是深度学习网络(DNN)的通用过程了,用优化算法加速权重迭代。并持续训练,直到模型达成收敛指标。 而部署后,预测结果 PPP 所关联的词汇,就是最终输出。 Transformer 的常见场景 自注意力网络(Transformer)在诞生之后,大部分都被运用在 NLP 由其是 LLM 领域。 目前上,工业界对 Transformer 的运用已经涵盖了: 自然语言处理(NLP),如:文本分析(智能输入法)、机器翻译、语音助手等; 音视频生成,如:音乐生成、视频生成、合成编辑、自动裁剪等; 而配合其他网络结构,如 CNN 的原样本特征提取能力,Transformer 在图形处理上也被大量运用,涵盖了: 图像分类,如:手势识别、动作识别、人脸识别等; 图像分割,如:背景分离、智能抠图、轮廓融合等; 语义分割,如:物体分类、车辆检测等; 语音识别,如:文本转译、同声传录、情感分析等; 时序预测,如:股票分析、气象预测、医疗诊断等; 可以说,Transformer 几乎体现在各种方面。 至此,随着经典模型结构 自注意力网络(Transformer)介绍完毕,基本理论知识也完成了初步的梳理。 从下一章开始,我们将正式步入音视频处理的实践工程领域。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_4/Language/cn/References_4.html":{"url":"Chapter_4/Language/cn/References_4.html","title":"【参考文献】","keywords":"","body":"四、【参考文献】 [1] Hinton, G. E.; Salakhutdinov, R. R. (2006). \"Reducing the Dimensionality of Data with Neural Networks.Science\". 313 (5786): 504–507. [2] Larochelle, H.; Bengio, Y. (2008). \"Classification using discriminative restricted Boltzmann machines\". Proceedings of the 25th international conference on Machine learning - ICML '08. p. 536. [3] Coates, A.; Lee, H.; Ng, A. Y. (2011). \"An analysis of single-layer networks in unsupervised feature learning\". International Conference on Artificial Intelligence and Statistics (AISTATS). [4] Yuxi Li. (2018). \"DEEP REINFORCEMENT LEARNING”. arXiv. [5] Krizhevsky, Alex , I. Sutskever , and G. Hinton. (2012). \"ImageNet Classification with Deep Convolutional Neural Networks.\" NIPS Curran Associates Inc. [6] Y. LeCun, “LeNet-5, convolutional neural networks”. History summary page. [7] Eugenio Culurciello. (2016). \"Navigating the unsupervised learning landscape\". [8] Klambauer G, Unterthiner T, Mayr A, et al. Self-normalizing neural networks[J]. Advances in neural information processing systems, 2017, 30. [9] Misra D. Mish: A self regularized non-monotonic activation function[J]. arXiv preprint arXiv:1908.08681, 2019. [10] Ramachandran P, Zoph B, Le Q V. Searching for activation functions[J]. arXiv preprint arXiv:1710.05941, 2017. [11] Howard A, Sandler M, Chu G, et al. Searching for mobilenetv3[C]//Proceedings of the IEEE/CVF international conference on computer vision. 2019: 1314-1324. [12] Srivastava N, Hinton G, Krizhevsky A, et al. Dropout: a simple way to prevent neural networks from overfitting[J]. The journal of machine learning research, 2014, 15(1): 1929-1958. [13] Goodfellow I, Warde-Farley D, Mirza M, et al. Maxout networks[C]//International conference on machine learning. PMLR, 2013: 1319-1327. [14] Hadsell R, Chopra S, LeCun Y. Dimensionality reduction by learning an invariant mapping[C]//2006 IEEE computer society conference on computer vision and pattern recognition (CVPR'06). IEEE, 2006, 2: 1735-1742. [15] Schroff F, Kalenichenko D, Philbin J. Facenet: A unified embedding for face recognition and clustering[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2015: 815-823. [16] Sohn K. Improved deep metric learning with multi-class n-pair loss objective[J]. Advances in neural information processing systems, 2016, 29. [17] Hinton G, Srivastava N, Swersky K. Neural networks for machine learning lecture 6a overview of mini-batch gradient descent[J]. Cited on, 2012, 14(8): 2. [18] Teo Y S, Shin S, Jeong H, et al. Benchmarking quantum tomography completeness and fidelity with machine learning[J]. New Journal of Physics, 2021, 23(10): 103021. [19] Gao M, Wang Q, Lin Z, et al. Tuning Pre-trained Model via Moment Probing[C]//Proceedings of the IEEE/CVF International Conference on Computer Vision. 2023: 11803-11813. [20] Hochreiter, Sepp & Schmidhuber, Jürgen. (1997). Long Short-term Memory. Neural computation. 9. 1735-80. 10.1162/neco.1997.9.8.1735. [21] Lee, Daeil & Koo, Seoryong & Jang, Inseok & Kim, Jonghyun. (2022). Comparison of Deep Reinforcement Learning and PID Controllers for Automatic Cold Shutdown Operation. Energies. 15. 2834. 10.3390/en15082834. [22] Bahdanau D, Cho K, Bengio Y. Neural machine translation by jointly learning to align and translate[J]. arXiv preprint arXiv:1409.0473, 2014. [23] Vaswani A, Shazeer N, Parmar N, et al. Attention is all you need[J]. Advances in neural information processing systems, 2017, 30. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:09:50 "},"Chapter_5/Language/cn/Apex_5_Introduce.html":{"url":"Chapter_5/Language/cn/Apex_5_Introduce.html","title":"五、音视频帧分析与数据处理","keywords":"","body":"五、音视频帧分析与实践 引言 历经四个章节,我们详细探讨了音频与色彩的相关知识,以及常用算法和机器学习在音视频中的工程方向和理论原型。通过整理并学习这些内容,我们已经对音视频处理的基本概念和技术工具有了初步的了解。而音视频处理的核心任务之一,便是对音视频帧的分析与处理。 音视频帧工程(Audio & Visual/Video Frame Engineering)是音视频工程中的关键环节。音频帧和视频帧分别代表了音频信号和视频信号在时间轴上的离散片段。对这些帧的分析与处理,不仅是实现音视频同步、特效添加、压缩编码等高级功能的基础,也是提升音视频质量和用户体验的关键。 本章节将主要整理说明音视频帧的基本概念、分析方法和简单处理技术。通过对音视频帧的深入理解和操作,我们可以更好地掌握音视频处理的核心技术,为后续的复杂应用与试验打下坚实的基础。 通过本章节的学习,读者将能够掌握音视频帧的基本分析方法和简单处理技术,为进一步深入研究和开发音视频应用提供必要的知识储备。真正进入音视频工程领域的大门。 关键字:音频帧、视频帧、帧分析、简单帧处理、工程实践 目录 5.1 音视频帧与环境准备 5.1.1 常用数学库(Numpy、Pandas、Mateplotlib) 5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio) 5.1.3 视频分析库(PyOpenCV、Color-Science) 5.1.4 分析环境准备 5.1.5 其他分析软件 【参考文献】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:10:00 "},"Chapter_5/Language/cn/Docs_5_1.html":{"url":"Chapter_5/Language/cn/Docs_5_1.html","title":"5.1 音视频帧与环境准备","keywords":"","body":"5.1 音视频帧 与 环境准备 什么是音视频帧呢?首先需要了解,什么是帧。 帧(Frame) 是 对某一时刻(视频) 或 某小段时间(音频)内 的数据代称。而 音视频帧,则是对音频帧 和 视频帧 的统称。通常而言,我们将 一段音频中一个有效数据块(如:WAV 的音频数据子块、FLAC 的音频数据块、MP3 的帧数据)称为 音频帧(Audio Frame),而将 一张图片所代表的某时刻静态图像信息(如:时刻 t 完整图像解压缩后的 YUV 数据)称为 视频帧(Visual/Video Frame)。 当然,音视频的离散采样,决定了其本身皆为 离散数据的时序排列数据集。但从相对角度来看,如果称视频帧为离散的,那么音频帧在这样的尺度下,就是连续的。因此,对音频的分析更多是从 音频整体角度,或 范围内的局部情况 分析。很少单一的局限于某个时间点。而对视频的分析则分为,是对 包含视频整体时空情况 的 动态分析,还是只对 某一固定时刻 的 静态分析。对比之下稍显隔离。不过从某种意义上讲,这也是因为视频数据本身所包含的维度更高,而更容易的被拆解以获取更多信息所致。 所以,在不考虑网络的情况下,我们通常将 视频分析(Video Analysis) 的两种类型,独立称为 视频流分析(Video Stream Analysis) 和 视频帧分析(Video Frame Analysis)。而 音频分析(Audio Analysis) 则不再细分。即 音频流分析(Audio Stream Analysis)同 音频分析(Audio Analysis)技术性一致。 需要注意的是,当引入网络条件时,音视频流分析在网络流传输的语义前提下另有所指。同时,在 流协议背景时,也同样是指协议层面的特征,切勿将三者混淆。 本章节我们讨论的音视频分析,特指对音视频的直观特征分析,即对其基础信息的分析。以此为目标,进行一些简单工程。 常用库准备 在开始搭建分析环境之前,还需要对常用的工具库进行简单的介绍。由于分析所采用的工程手段,多为以 Python 为脚本语言编写的简单处理流,因此,我们需要使用到的基本库,皆为 Python 工具库。 于是为方便后续索引、使用、总结,从库功能性上做简单归类,可以分为:常用数学库、视频分析库 和 音频分析库。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:10:00 "},"Chapter_5/Language/cn/Docs_5_1_1.html":{"url":"Chapter_5/Language/cn/Docs_5_1_1.html","title":"5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)","keywords":"","body":"5.1.1 常用数学库(NumPy、Pandas、Mateplotlib) 工程里对 数据分析和科学计算 的过程中,常用数学库是不可或缺的工具。这些库不仅提供了高效的数据处理能力,还为我们提供了 丰富的数学函数 和 可视化工具。其中,最为重要的库有三个,即 NumPy、Pandas、Mateplotlib,分别对应 [ 基础计算、数理统计、图表绘制 ] 的需求。 NumPy(Numerical Python) NumPy(Numerical Python) 是 用于科学计算的基础库,提供了针对 N 维数组/张量 及其 衍生类型 生命周期方法的结构化封装,和用于 协助处理这些数组/张量的丰富函数库 [1] 。这使得我们可以通过其进行快速的矩阵运算和其他数学操作,而不必再单独定义实现某些通用的算法。例如前文提到的傅立叶变换,其变体,或其逆变换(FFT、DFT、IDFT etc.)。除此之外,NumPy 还包含了线性代数、统计函数、随机数生成等功能模块,是数据分析和机器学习的基础工具之一。 主要功能: 提供基础数据结构(ndarray)和数据类型(dtype),作为 N 维数组/张量 数据载体 完善的基础数学函数库,包括 基础统计、线性代数、傅立叶变换、随机数生成 广泛的扩展数学函数库,包括 金融函数、伽马函数等 于特殊函数库中(numpy.special) 相对完善的内存管理和索引体系,并支持内存映射能力,即可处理超出内存大小数据集 提供完整的数据交互体系,在数据结构化、字符串操作、I/O 操作上与其他库有 较高兼容 基础库(np.)的常用函数(简,仅列出名称): 算术运算: add, subtract, multiply, divide, power, mod, remainder 比较运算: greater, greater_equal, less, less_equal, equal, not_equal 逻辑运算: logical_and, logical_or, logical_not, logical_xor 基本统计: mean, median, std, var, min, max, sum, cumsum, prod, cumprod 排序搜索: sort, argsort, argmax, argmin, searchsorted 三角函数: sin, cos, tan, arcsin, arccos, arctan, arctan2 双曲函数: sinh, cosh, tanh, arcsinh, arccosh, arctanh 指数对数: exp, expm1, log, log10, log2, log1p 矩阵运算: dot, vdot, inner, outer, matmul 直方图: histogram, histogram2d, histogramdd 多项式(需依托 np.poly1d 多项式类): poly, polyval, polyfit, roots, polyder, polyint 线性代数扩展(np.linalg.)的常用函数(简,仅列出名称): 矩阵分解: cholesky, qr, svd 求逆和解线性方程组: inv, pinv, solve 特征值和特征向量: eig, eigh, eigvals, eigvalsh 矩阵范数(L1/L2/inf): norm 矩阵行列式和秩: det, matrix_rank 傅立叶变换扩展(np.fft.)的常用函数(简,仅列出名称): 一维傅里叶变换: fft, ifft 二维傅里叶变换: fft2, ifft2 多维傅里叶变换: fftn, ifftn 一维快速傅立叶法: rfft, irfft 一维亥姆霍兹变换: hfft, ihfft 随机数生成扩展(np.random.)的常用函数(简,仅列出名称): 简单随机: rand, randn, randint, choice 概率分布: normal, uniform, binomial, poisson, exponential, beta, gamma, chisquare 乱序函数: shuffle, permutation 随机种子: seed 其他如 特殊函数扩展(np.special.) 等,在具体使用时,可自行前往 官网档案馆 查阅。 Pandas(Python Data Analysis Library) Pandas(Python Data Analysis Library) 是 用于数据操作和分析的强大工具库,提供了针对 数理统计服务 的 高效格式类型和相关统计分析工具,在处理 结构化数据 方面具有巨大优势 [2] 。尤其是对于 表格类数据 的处理。我们可以通过其 DataFrame 和 Series 这两个核心类型,轻松的获取 经数组化后能提供给 NumPy 处理的数据集。进而允许我们更方便地进行数据的清洗、修改和分析操作。此外,对于科学统计类的时间序列数据,Pandas 亦能完美解析到需要使用的格式。是辅助我们进行统计工作和数据预处理的利器。 主要功能: 高效的 数据结构(即,DataFrame 、Series 和 两者关联方法) 丰富的 时序结构(即,DatetimeIndex, Timedelta, Period 时刻/时间/时差) 丰富的 数据清洗、数据转换、数据标准化 能力 支持 多种格式 I/O 操作,如 CSV、Excel、SQL、JSON 等 通用格式类型 提供诸如时间序列数据的索引、切片、重采样、滚动窗口等,时间序列数据处理能力 提供对 缺失值、异常值、重复数据 等问题数据的,检测、填充、转换、过滤能力 基础库(pd.)的常用函数(简,仅列出名称): 数据结构: , , 时序结构: , , 数据创建: read_csv, read_excel, read_sql, read_json, read_html, read_clipboard, read_parquet, read_feather, read_orc, read_sas, read_spss, read_stata, read_hdf, read_pickle 数据导出: to_csv, to_excel, to_sql, to_json, to_html, to_clipboard, to_parquet, to_feather, to_orc, to_sas, to_spss, to_stata, to_hdf, to_pickle 数据变换: assign, drop, rename, pivot, pivot_table, melt, stack, unstack, get_dummies 数据聚合: groupby, agg, aggregate, transform, apply, rolling, expanding, resample 数据清洗: isnull, notnull, dropna, fillna, replace, interpolate, duplicated, drop_duplicates 数据合并: merge, concat, join, append 选择过滤: loc, iloc, at, iat, ix 基本统计: mean, median, std, var, min, max, sum, cumsum, prod, cumprod, describe 数据结构扩展(pd.Series, pd.DataFrame)的辅助方法(简,仅列出名称): 方法: append, drop, drop_duplicates, dropna, fillna, replace, interpolate, isnull, notnull, unique, value_counts, apply, map, astype, copy, shift, diff, pct_change, rank, sort_values, sort_index 方法: append, drop, drop_duplicates, dropna, fillna, replace, interpolate, isnull, notnull, pivot, pivot_table, melt, stack, unstack, get_dummies, merge, concat, join, groupby, agg, aggregate, transform, apply, rolling, expanding, resample, sort_values, sort_index, rank, describe, corr, cov, hist, boxplot, plot 时间序列扩展(pd.DatetimeIndex, pd.Timedelta, pd.Period)的辅助方法(简): 方法: to_pydatetime, to_period, to_series, to_frame, normalize, strftime, snap, shift, tz_convert, tz_localize, floor, ceil, round 方法: total_seconds, to_pytimedelta, components, is_leap_year 方法: asfreq, start_time, end_time, to_timestamp, strftime 这些方法和结构类型,涵盖了数据创建、选择、过滤、变换、聚合、清洗、合并、时间序列处理以及数据输入输出等多个方面,进而使得 Pandas 成为了数据科学和数据分析领域的基础工具,亦被广泛应用于数据清洗、数据变换、数据分析、数据可视化等任务。 不过,在 可视化方面,我们一般不会使用 Pandas 自身的绘制模块所提供的绘图功能,而是采用更为专业的 Matplotlib 库协助获取结果。实际上 Pandas 自身的绘制模块(pd.plotting.)在过程方面,也是采用的 Matplotlib 做为绘制执行器。调用绘图模块,仅仅是调用了封装好的绘制流而已,而这并不是 Pandas 所擅长的部分。 其他如 日期类型扩展(pd.DateOffset) 等,在具体使用时,可自行前往 官网档案馆 查阅。 Matplotlib Matplotlib(Mathematics Python Plotting Library)是基于 Python 语言开发,专用于数据图形化的高级图表绘制库。在数据科学、工程、金融、统计等领域有着广泛的应用 [3] 。通过库所包含的各种核心及辅助模块,我们能够轻松的 将经由 NumPy 和 Pandas 处理后的数据,以静态、动态 或 交互式图的方式展示出来。它提供了 丰富的绘图功能,可以被用于生成各种类型的图表,如折线图、柱状图、散点图、直方图等。而灵活的 API 设计,则允许我们在自定义图表的各个方面,进行相对自由的定制。因此,其成为了工程中 首选的数据可视化工具,帮助我们更为 直观地展示数据分析 的结果。 主要功能: 支持包括 折线图、柱状图、热力图、3D 复合等,丰富的绘图类型 高可定制化 的展示细节,包括 图例、命名、注释、线条、样式等几乎所有图表元素 高可交互性 的图表操作,且与 大部分不同平台的 GUI 库(如 Qt、wxWidgets)兼容 多种输出格式支持,如 PNG、PDF、SVG 等 与主流科学计算库(如 NumPy、Pandas、SciPy 等)的 无缝集成 基础库(matplotlib.pyplot. as plt.)的常用函数(简,仅列出名称): 图形容器: , , 样式类型: 略(如 等,有关样式有较多扩展库,详见官方文档) 创建图形和子图: figure, subplot, subplots, add_subplot, subplots_adjust 图形导入: imread, imshow 绘图函数: plot, scatter, bar, barh, hist, pie, boxplot, errorbar, fill, fill_between, stackplot, stem, step 图形属性: title, xlabel, ylabel, xlim, ylim, xticks, yticks, grid, legend, text, annotate 图形样式: style.use, set_cmap, get_cmap, colormaps 线条样式: set_linestyle, set_linewidth, set_color, set_marker, set_markersize 文本样式: set_fontsize, set_fontweight, set_fontstyle, set_fontname 布局样式: tight_layout, subplots_adjust, get_current_fig_manager 交互工具: ginput, waitforbuttonpress, connect, disconnect 事件处理: mpl_connect, mpl_disconnect 图形保存: savefig 颜色映射(matplotlib.cm. as cm.)的常用函数(简,仅列出名称): 映射对象(颜色映射结构): 映射注册与获取: get_cmap, register_cmap 常用映射: viridis, plasma, inferno, magma 图形容器(plt.Figure, plt.Axes)的常用函数(简,仅列出名称): 方法: add_subplot, add_axes, subplots, subplots_adjust, savefig, clf, gca, tight_layout, subplots_adjust, get_current_fig_manager 方法: plot, scatter, bar, barh, hist, pie, boxplot, errorbar, fill, fill_between, stackplot, stem, step, set_title, set_xlabel, set_ylabel, set_xlim, set_ylim, set_xticks, set_yticks, grid, legend, text, annotate, cla, twinx, twiny, set_aspect, set_facecolor 3D 绘图(mpl_toolkits.mplot3d.)的常用函数(简,仅列出名称): 3D 图形容器: 3D 图形属性: set_xlabel, set_ylabel, set_zlabel, set_xlim, set_ylim, set_zlim, view_init 常用通用方法: text, annotate, grid, legend, set_aspect, set_facecolor 其他如 描绘效果扩展(matplotlib.patheffects) 等,在具体使用时,可自行前往 官网档案馆 查阅。 三个关键基础库介绍完毕,那么现在,让我们用它们做些简单的数据练习。 简单练习:用 常用数学库 完成 加州房地产信息统计 为了更贴近数据处理中所面临的真实情况,我们这里使用 Google 开源的 加利福尼亚州模拟房地产统计信息,作为数据源。 练习事例按照标准工程工作流进行。 第一步,确立已知信息: 数据来源:房地产统计 CSV 格式(.csv)表 [本地文件] 处理环境:依赖 ,Python 脚本执行 工程目标: 1) 根据数据获取 归一化后的房价,并以经纬度为横纵坐标,颜色表示处理结果 2) 根据数据获取 人均占有房间数,并以经纬度为横纵坐标,颜色表示处理结果 第二步,准备执行环境: 检测是否已经安装了 Python 和 pip(对应 Python 版本 2.x) 或 pip3(对应 Python 版本 3.x) 包管理器: python --version pip --version 若 Python 和 pip 不存在,则需要去 Python 官网(https://www.python.org/downloads/) 下载对应当前主机平台的安装文件。而 pip 的安装(如果未随安装包安装的话),需要先准备安装脚本。 # Windows curl -o %TEMP%\\get-pip.py https://bootstrap.pypa.io/get-pip.py # MacOS & Linux curl -o /tmp/get-pip.py https://bootstrap.pypa.io/get-pip.py 之后,执行如下命令安装: # Windows python -m ensurepip --upgrade python %TEMP%\\get-pip.py # MacOS & Linux python -m ensurepip --upgrade python /tmp/get-pip.py 但这样的分平台执行方式,不够简单。所以,我们考虑将 整个 pip 安装过程封装成一个面向全平台的 Python 脚本,如果需要安装时,直接运行该脚本即可。而脚本需要做的事,是检测 pip 未安装的情况下,执行对应当前 Python 版本的 pip 安装过程。有: import os import subprocess import sys import tempfile import urllib.request def is_pip_installed(): try: subprocess.run([sys.executable, \"-m\", \"pip\", \"--version\"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return True except subprocess.CalledProcessError: return False def download_get_pip(temp_dir): url = \"https://bootstrap.pypa.io/get-pip.py\" file_path = os.path.join(temp_dir, \"get-pip.py\") print(f\"Downloading {url} to {file_path}...\") urllib.request.urlretrieve(url, file_path) return file_path def run_get_pip(file_path): print(f\"Running {file_path}...\") subprocess.run([sys.executable, file_path], check=True) def main(): if is_pip_installed(): print(\"pip is already installed.\") else: # Create a temporary directory with tempfile.TemporaryDirectory() as temp_dir: # Download get-pip.py file_path = download_get_pip(temp_dir) # Run get-pip.py run_get_pip(file_path) if __name__ == \"__main__\": main() 将上方的脚本保存为 install_pip.py 文件。我们只需要 将该脚本拷贝到相应平台,并执行脚本 即可: python install_pip.py 同理,对于案例中需要使用到的 NumPy、Pandas、Matplotlib 三库。我们也采用自动化脚本进行检测和安装。创建脚本 install_math_libs.py 如下: import subprocess import sys def is_package_installed(package_name): try: subprocess.run([sys.executable, \"-m\", \"pip\", \"show\", package_name], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return True except subprocess.CalledProcessError: return False def install_package(package_name): print(f\"Installing {package_name}...\") subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", package_name], check=True) subprocess.run([sys.executable, \"-m\", \"pip\", \"show\", package_name], check=True) def main(): packages = [\"numpy\", \"pandas\", \"matplotlib\"] for package in packages: if is_package_installed(package): print(f\"{package} is already installed.\") else: install_package(package) print(f\"{package} has been installed.\") if __name__ == \"__main__\": main() 随后,使用 Python 执行脚本: python install_math_libs.py 如果包已安装,则会输出 \"[基础数学库] is already installed.\"。如果包未安装,则会安装该包并输出 \"[基础数学库] has been installed.\",并显示包的详细信息。 到此,完成基础库的环境准备工作。 第三步,数据预处理: 现在,我们正式进入事例的工作流。 随后的步骤,我们建立 practice_1_mathetics_libs_using.py 脚本后,在其中处理。 首先,在新建脚本的头部添加: import math import numpy as np import pandas as pd import matplotlib.pyplot as plt import matplotlib.cm as cm import matplotlib.gridspec as gridspec from mpl_toolkits.mplot3d import Axes3D 导入工程使用的核心库。 根据 ,我们需要的目标可视化数据,来自于对 CSV 表中数据做简单处理所得。因此,首先应将表中有效数据提取出来,有: california_housing_dataframe = pd.read_csv( \"https://download.mlcc.google.cn/mledu-datasets/california_housing_train.csv\", sep=\",\") california_housing_dataframe = california_housing_dataframe.reindex( np.random.permutation(california_housing_dataframe.index) ) 其中,california_housing_dataframe.reindex 的目的是打乱 样本数据 的行顺序。用以确保数据在后续处理和分析过程中是 随机的,有助于避免因数据顺序带来的偏差。 我们的两个目标关键数据,分别为 “归一化后的房价” 和 “人均占有房间数”,而这两个量并不在原表中。需根据 california_housing_dataframe 已有数据,通过 计算获取 这两个值。而为了区别用处起见(例如,后续我们需要用 “人均占有房间数” 作为回归特征,来建立其与 “归一化后的房价” 的线性回归模型),我们定义两个方法,分别用于 生成补充 “人均占有房间数” 的新特征表,和 只有遴选特征计算得到 “归一化后的房价” 的靶向特征: def preprocess_features(data): \"\"\" Preprocess the input features from the data. Args: data (pd.DataFrame): The input data containing various features. Returns: pd.DataFrame: A DataFrame containing the selected and processed features. \"\"\" selected_features = data[ [\"latitude\", \"longitude\", \"housing_median_age\", \"total_rooms\", \"total_bedrooms\", \"population\", \"households\", \"median_income\"] ] processed_features = selected_features.copy() processed_features[\"rooms_per_person\"] = ( data[\"total_rooms\"] / data[\"population\"] ) return processed_features def preprocess_targets(data, need_normalize): \"\"\" Preprocess the target values from the data. Args: data (pd.DataFrame): The input data containing the target values. need_normalize: Whether to normalize the output median_house_value Returns: pd.DataFrame: A DataFrame containing the processed target values. \"\"\" output_targets = pd.DataFrame() output_targets['median_house_value_is_high'] = ( (data['median_house_value'] > 265000).astype(float) ) output_targets[\"median_house_value\"] = ( data[\"median_house_value\"] / 1000.0 ) if need_normalize: output_targets[\"median_house_value\"] /= output_targets[\"median_house_value\"].max() return output_targets 通过 preprocess_features 方法,建立包含 rooms_per_person 信息的新 pd.DataFrame 用于 和 补充替换 原 california_housing_dataframe 数据的作用,而作为基础信息使用。通过 preprocess_targets 方法,建立只有 median_house_value 信息的新 pd.DataFrame 用于处理 。 调用两个方法,并取 CSV 表的头部 17000 个数据作为有效数据,有: total_examples = preprocess_features(california_housing_dataframe.head(17000)) total_targets = preprocess_targets(california_housing_dataframe.head(17000), True) print(\"total::\\n\") print(total_examples.describe()) print(total_targets.describe()) 其中,total_examples 即新特征表,total_targets 即靶向特征。获得预处理完毕的数据,可以开始进行绘制了。 第四步,结果可视化: 当下我们已经取得了需要的数据内容,只用通过 Matplotlib 将数据展示即可。由于 中存在 两种图样类型。为了方便起见,我们依然采用封装的形式,将对应类型图表的绘制流程函数化使用。有: def ploting_2d_histogram(examples, targets): \"\"\" Plot a 2D histogram of the examples and targets. Args: examples (pd.DataFrame): The input features to plot. targets (pd.DataFrame): The target values to plot. Returns: None \"\"\" # Create a new figure with a specified size plt.figure(figsize=(13.00, 9.00)) # Add a 2D subplot to the figure plt.subplot(1, 1, 1) # Set the title and labels for the 2D plot plt.title(\"California Housing Validation Data\") plt.xlabel(\"Longitude\") plt.ylabel(\"Latitude\") plt.autoscale(False) plt.ylim([32, 43]) plt.xlim([-126, -112]) # Create a 2D scatter plot plt.scatter( examples[\"longitude\"], examples[\"latitude\"], cmap=\"coolwarm\", c=targets ) # Display the plot plt.show() def ploting_3d_histogram(examples, targets, z_label): \"\"\" Plot a 3D histogram of the examples and targets. Args: examples (pd.DataFrame): The input features to plot. targets (pd.DataFrame): The target values to plot. z_label (string): The Z-Label descriptions Returns: None \"\"\" # Create a new figure with a specified size fig = plt.figure(figsize=(13.00, 9.00)) # Add a 3D subplot to the figure ax = fig.add_subplot(111, projection='3d') # Set the title and labels for the 3D plot ax.set_title(\"California Housing 3D Data\") ax.set_xlabel(\"Longitude\") ax.set_ylabel(\"Latitude\") ax.set_zlabel(z_label) # Create a 3D scatter plot scatter = ax.scatter( examples[\"longitude\"], examples[\"latitude\"], targets, c=targets, cmap=\"coolwarm\" ) # Add a color bar which maps values to colors cbar = fig.colorbar(scatter, ax=ax, shrink=0.5, aspect=5) cbar.set_label('Color State') # : Set initial view angle ax.view_init(elev=30, azim=30) # Display the plot plt.show() 而在完成函数化后,绘制的过程就很简单了,直接调用方法即可: ploting_2d_histogram(total_examples, total_targets[\"median_house_value\"]) ploting_3d_histogram(total_examples, total_targets[\"median_house_value\"], \"Median House Value (in $1000's)\") ploting_3d_histogram(total_examples, total_examples[\"rooms_per_person\"], \"Rooms/Person\") 最终,通过 Python 执行 practice_1_mathetics_libs_using.py 脚本,就能得到想要的结果了。执行成功会获得 3 张图表: 图 5-1 模拟加利福利亚房价中位值 2D 热力图 图 5-2 模拟加利福利亚区域房价中位值 3D 热力图 图 5-3 模拟加利福利亚人均占有房间数 3D 热力图 至此,对基础库的练习完毕。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:10:00 "},"Chapter_5/Language/cn/Docs_5_1_2.html":{"url":"Chapter_5/Language/cn/Docs_5_1_2.html","title":"5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio)","keywords":"","body":"5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio) 在完成对基础库的熟悉后,我们接下来需要做的就是对工程中,音视频分析的相关核心功能库的学习。以音频分析库为切入点。 如果期望对 一段音频(或音频流)进行解读,根据我们已有的认知,将当前的音频数据从封装的音频格式,还原为采样模拟信号对应的 PCM 数字信号载体 只是第一步。该操作是后续所有工作的起点。 而音频格式在前文已有介绍,分为 三大类别,即 无压缩编码格式、无损压缩编码格式、有损压缩编码格式。虽然能通过一些针对 某单个类型 或 类型族 的 音频编解码库 来做解码工作,但我们在分析过程中,更希望能够通过 简单而统一 的方式,排除掉格式本身细部的工程干扰。使我们能够更关注于对 音频所含有信息本身 的分析。 既然如此,为何不直接使用大名鼎鼎的 FFMpeg 来完成从 编解码到分析,甚至是 重排、编辑 等操作呢? 其中的关键就在于,FFMpeg 虽然功能强大,但在以 实时处理、数据集成、特征提取 等为主要应用场景的音频分析情况下,FFMpeg 并不具备足够的优势。更不用提 Python 的使用环境 和 对断点调试临时插值,与 基础库的高度兼容 方面的要求了(尤其对 模型训练时,提取的数据能够 直接被训练过程使用 的这一点)。 所以,音频分析场景,除非只需要当前音视频数据的 元信息(Metadata),即 头部信息(Header),一般会采用以下这些库来进行。至于 FFMpeg ,在实际使用中会把其核心能力局限于 编解码 和 转码 的范围里,虽然 其核心库 和 辅助插件 是包含了包括滤镜在内的多种功能的,但通常我们只会以 最简形式接入。这一部分,伴随着网络推拉流协议和更贴近于规格的编解码协议库(如 x264 等),将在本系列书籍的进阶篇中细讲。此处暂不做更进一步的讨论。 现在,让焦点回到音频分析库上。常用的音频分析库主要有四个,为 SoundFile、PyAudio、Librosa、Aubio,分别对应 [ 音频文件读写、音频流数据的输入输出、工程乐理分析、实时音频处理 ] 的需求。 SoundFile(Python Sound File) SoundFile(PySoundFile [Python Sound File]) 是一个 用于读写音频文件的 Python 库,主要被用于解码(或者编码)常用的 音频格式文件 [4] 。例如前文介绍过的 WAV、AIFF、FLAC 等大多数常见音频格式,SoundFile 都已完整支持。并且,通过 SoundFile 取出的音频数据,可以和其他音频分析库(如 Librosa、Aubio 等)和科学计算库(如 NumPy、SciPy 等)配合使用。 实际上,SoundFile 核心能力来自于 C开源库 Libsndfile,正是 Libsndfile 为它 提供了多种音频文件格式的支撑。而 PySoundFile 则可以看做是 Libsndfile 这个 C语言库的 Python 套接访问入口。因此,如果我们在常规工程中存在对音频文件的读写需求,不妨考虑采用 Libsndfile 来处理,它的官网位于 http://www.mega-nerd.com/libsndfile/ ,含有该库的相关技术参数。 主要功能: 支持 WAV、AIFF、FLAC、OGG 等多种常见 音频文件格式,适用于 广泛的 音频读写需求 支持长音频处理,提供快速读写大文件的功能,并可用于临时性的(分块)流式处理 提供 高可定制化的 API,允许用户自定义音频处理流程和数据操作,适合快速分析 允许以不同的数据格式(如浮点型、整型)读取和写入音频数据,及 基本元数据访问 与主流科学计算库(如 NumPy、Pandas、SciPy 等)的 无缝集成 单一的文件操作专精库,不存在多个子模块,仅有有限但明确的 API 入口 基础库(sf.)的常用函数(简,仅列出名称): 数据结构: 关联文件: open 音频读写: read, write 基本信息: info 核心类(sf.SoundFile 即 )的常用函数(简,仅列出名称): 基础参数: samplerate, channels, format, subtype, endian, frames 帧位索引: seek, tell 数据访问: read, write, read_frames, write_frames 分块读写: buffer_read, buffer_write 由上可知,SoundFile 本身的调用极其简便,但已满足完整的音频文件读写需求。开源项目位于 Github:bastibe/python-soundfile。使用细节,可自行前往 官方档案馆查阅。 PyAudio(Python Audio) PyAudio(Python Audio) 是音频分析中 常用的音频输入输出操作库,即 音频 I/O 库 [5] 。换句话说,它提供了一组工具和函数,使得开发者可以在项目的 Python 程序中,利用 PyAudio 已有的函数接口,快速进行音频的流式(这里指本地流)录制和输出。同 SoundFile 一样,PyAudio 依赖于底层 C语言库 PortAudio 的帮助,而其内核 PortAudio 库实则为一个 专精于多种操作系统上运行(即跨 Windows、MacOS、Linux 平台)的底层音频输入输出(I/O)库。 所以,与 SoundFile 注重于对音频文件(即本地音频流结果)的操作不同,PyAudio 或者说 PortAudio 的操作重点,在于 处理对 “实时” 音频流的捕获和析出。实时音频流,是能够被连续处理传输的音频数据,例如采样自麦克风输入模数转换后的持续不断的数字信号,或者取自播放音频的连续到来分块数据,即 过程中音频数据。 由此,音频分析中常用 PyAudio 来完成对被分析音频的 “启停转播”(Play/Stop/Seek/Pause),所谓 音频本地流控(LASC [Local Audio Stream Control])。 主要功能: 专业音频本地流控 Python 库,支持实时音频流的捕获和播放,适合 实时音频处理任务 稳定的 跨平台兼容性,完整覆盖主流操作系统,包括 Windows、macOS 和 Linux 灵活的 音频流配置,提供多种配置选项,如采样率、通道数、样本格式、缓冲区大小等 提供 接入式回调,支持使用回调函数处理音频数据,适合低延迟的实时音频分析 与主流科学计算库 和 其他音频库(如 SoundFile)的 无缝集成 单一的音频本地流读写专精库,不存在多个子模块,仅有有限但明确的 API 入口 基础库(pyaudio.)由于特殊的套接设计,仅用于创建 即 PortAudio 实例: 数据结构: 、 创建实例: PyAudio 核心类(pyaudio.PyAudio 即 设备实例)的常用函数(简,仅列出名称): 销毁实例: terminate 联音频流: open (返回 实例,通过 stream_callback 参数配置回调) 设备查询: get_device_count, get_device_info_by_index, get_host_api_count, get_default_input_device_info, get_default_output_device_info, get_host_api_info_by_index, get_device_info_by_host_api_device_index 参数查验: get_sample_size, is_format_supported 核心类的(pyaudio.Stream 即 音频流实例)的常用函数(简,仅列出名称): 音频流启停: start_stream, stop_stream 音频流关闭: close(注意, 的 open 状态来自于设备实例,亦是其初始状态) 流状态检测: is_active, is_stopped 流数据读写: read, write 余下使用细节,可自行前往 项目官网 ,或 官方档案馆查阅。 上述关键函数已包含 PyAudio 的 几乎全部调用,但并没有列出 PyAudio 回调格式。这是因为,这一部分正是 PyAudio 分析适用性的关键。在具体使用中,PyAudio 回调 的设定方式,和回调各参数意义与取值,是我们留意的重点。 参考 PyAudio 0.2.14 当前最新版,回调的设置方式和格式都是固定的,有: def callback(in_data, frame_count, time_info, in_status): # 在此处处理音频数据(例如,进行实时分析或处理) return (out_data, out_status) p = pyaudio.PyAudio() stream = p.open( format=p.get_format_from_width(2), channels=1 if sys.platform == 'darwin' else 2, rate=44100, input=True, output=True, stream_callback=callback ) 其中,callback(in_data, frame_count, time_info, status) 即 回调传入,包含四个关键参: in_data 为 音频数据的输入流,通常配合 np.frombuffer(in_data, dtype=np.int16) 读取数据 frame_count 为 输入流当前数据对应音频帧数,即当前 in_data 数据覆盖的 帧数 time_info 是一个包含了 三个设备相关时间戳 的 数据字典,有参数(注意表述): input_buffer_adc_time 表示 输入音频数据被 ADC 处理时的时间戳(如果适用) output_buffer_dac_time 表示 输出音频数据被 DAC 处理时的时间戳(如果适用) current_time 表示 当前时间,即 当前调用触发时的系统时间戳 in_status 是 记录当前输入回调时,流状态的枚举类标识。可取三个状态常量,分别是: pyaudio.paContinue 表示 流继续,即恢复播放和正常播放时的状态,也是默认状态 pyaudio.paComplete 表示 流完成,即代指当前输入流数据为最末尾的一组 pyaudio.paAbort 表示 流中止,即立刻停止时触发,一般为紧急关流或异常情况 在 callback 处理完毕后,回调要求以 return (out_data, out_status) 的 格式返回。同样: out_data 为 音频数据的输出流,根据协定好的音频 PCM 位数对应的格式输出,一般同输入 out_status 是 记录当前输出的状态,同 in_status 的可取值一致,一般同 in_status 不变 配置好 callback 后,我们该如何使用呢?只需要于 实例调用 open 开启流 实体时,以 stream_callback=callback 将 函数句柄以参数传入 即可生效。而这里的 callback 也可 根据具体情况修改命名,比如 audio_analyze_callback 。 随之就可以在回调中,完成分析作业了。 Librosa Librosa 是一个功能强大且易于使用的 音频/乐理(工程)科学分析原生 Python 库,成体系的提供了用于 音频特征提取、节拍节奏分析、音高(工程)估计、音频效果器(滤波、特效接口) 等处理的算法实现。其设计理念来自于 SciPy 2015 年的第十四届 Python 科学大会中,有关音频处理、音频潜藏信息提取与分析快捷化的讨论 [6] 。因此,在设计之初就完全采用了,与其他科学计算库(如 NumPy、SciPy)和可视化库(主要指 Matplotlib)的 无缝集成。而极强的分析能力和可操作性(工程层面),使 Librosa 成为了我们做 音频分析与操作时的重要工具。 必须熟练掌握。 主要功能: 临时处理友好,提供简便的方法,在必要时做临时读取和写入音频文件,支持多种格式 快速时频转换,提供短时傅里叶变换(STFT)、常规Q变换(CQT)等,方便时频域分析 音频特征提取,支持对梅尔频率倒谱系数(MFCC)、色度特征、频谱对比度等特征提取 节拍节奏分析,具有节拍跟踪、起音检测等,音乐(工程)分析能力 分割与重采样,提供音频分割与重采样工具,便于快速分析对比 调音与音频特效,具有音高估计和调音功能,并支持音频时间伸缩和音高变换等音频效果 当然还有最重要的【无缝集成】特性 基础库(librosa.)的常用函数(简,仅列出名称): 音频加载: load, stream 音频生成: clicks, tone, chirp 简化分析: to_mono, resample, get_duration, get_samplerate 时频分析: stft, istft, reassigned_spectrogram, cqt, icqt, hybrid_cqt, pseudo_cqt, vqt, iirt, fmt, magphase 时域校准: autocorrelate, lpc, zero_crossings, mu_compress, mu_expand 谐波分析: interp_harmonics, salience, f0_harmonics, phase_vocoder 相位校准: griffinlim, griffinlim_cqt 响度单位换算: amplitude_to_db, db_to_amplitude, power_to_db, db_to_power, perceptual_weighting, frequency_weighting, multi_frequency_weighting, A_weighting, B_weighting, C_weighting, D_weighting, pcen 时轴单位换算: frames_to_samples, frames_to_time, samples_to_frames, samples_to_time, time_to_frames, time_to_samples, blocks_to_frames, blocks_to_samples, blocks_to_time 频率单位换算: hz_to_note, hz_to_midi, hz_to_svara_h, hz_to_svara_c, hz_to_fjs, midi_to_hz, midi_to_note, midi_to_svara_h, midi_to_svara_c, note_to_midi, note_to_svara_h, note_to_svara_c, hz_to_mel, hz_to_octs, mel_to_hz, octs_to_hz, A4_to_tuning, tuning_to_A4 基底频率生成: fft_frequencies, cqt_frequencies, mel_frequencies, tempo_frequencies, fourier_tempo_frequencies 乐理乐谱工具: key_to_notes, key_to_degrees, mela_to_svara, mela_to_degrees, thaat_to_degrees, list_mela, list_thaat, fifths_to_note, interval_to_fjs, interval_frequencies, pythagorean_intervals, plimit_intervals 乐理音高音调: pyin, yin, estimate_tuning, pitch_tuning, piptrack 适配杂项: samples_like, times_like, get_fftlib, set_fftlib 图表显示扩展(librosa.display.)的常用函数(简,仅列出名称,依赖于 Matplotlib): 数据可视化: specshow, waveshow 坐标轴设置: TimeFormatter, NoteFormatter, SvaraFormatter, FJSFormatter, LogHzFormatter, ChromaFormatter, ChromaSvaraFormatter, ChromaFJSFormatter, TonnetzFormatter 适配杂项: cmap, AdaptiveWaveplot 音频特征提取(librosa.feature.)的常用函数(简,仅列出名称): 工程频谱特征: chroma_stft, chroma_cqt, chroma_cens, chroma_vqt, melspectrogram, mfcc, rms, spectral_centroid, spectral_bandwidth, spectral_contrast, spectral_flatness, spectral_rolloff, poly_features, tonnetz, zero_crossing_rate 乐理节奏特征: tempo, tempogram, fourier_tempogram, tempogram_ratio 特征计算: delta, stack_memory 反向逆推: inverse.mel_to_stft, inverse.mel_to_audio, inverse.mfcc_to_mel, inverse.mfcc_to_audio 起音检测扩展(librosa.onset.)的常用函数(简,仅列出名称): 峰值检测: onset_detect 小值回溯: onset_backtrack 强度统计: onset_strength, onset_strength_multi 节拍节奏扩展(librosa.beat.)的常用函数(简,仅列出名称): 节拍追踪: beat_track 主位脉冲: plp 语谱分解扩展(librosa.decompose.)的常用函数(简,仅列出名称): 特征矩阵分解: decompose 源分离滤波: hpss, nn_filter 音频效果器扩展(librosa.effects.)的常用函数(简,仅列出名称): 谐波乐源分离: hpss, harmonic, percussive 时间伸缩: time_stretch 时序混音: remix 音高移动: pitch_shift 信号操控: trim, split, preemphasis, deemphasis 时域分割扩展(librosa.segment.)的常用函数(简,仅列出名称): 自相似性: cross_similarity, path_enhance 重复矩阵: recurrence_matrix, lag_to_recurrence 延迟矩阵: timelag_filter, recurrence_to_lag 时域聚类: agglomerative, subsegment 顺序模型扩展(librosa.sequence.)的常用函数(简,仅列出名称): 顺序对齐: dtw, rqa 维特比(Viterbi)解码: viterbi, viterbi_discriminative, viterbi_binary 状态转移矩阵: transition_uniform, transition_loop, transition_cycle, transition_local 跨库通用扩展(librosa.util.)的常用函数(简,仅列出名称): 数组转换: frame, pad_center, expand_to, fix_length, fix_frames, index_to_slice, softmask, stack, sync, axis_sort, normalize, shear, sparsify_rows, buf_to_float, tiny 条件匹配: match_intervals, match_events 统计运算: localmax, localmin, peak_pick, nils, cyclic_gradient, dtype_c2r, dtype_r2c, count_unique, is_unique, abs2, phasor 输入评估: valid_audio, valid_int, valid_intervals, is_positive_int 本库样例: example, example_info, list_examples, find_files, cite 具体使用细节,可自行前往项目 官方档案馆查阅 。 Librosa 在音频方面,涵盖了大多数基本的科学分析手段,足够一般工程使用。 但在 数据科学方面 和 集成性 的高度倾注,也让 Librosa 的 实时性相对有所降低(本质为复杂度和精度上升,所伴随算力消耗的升高)。可若此时我们对误差有相对较高的容忍度,且更希望音频处理足够实时和高效时,就得采用 Aubio 库来达成这一点了。Aubio 和 Librosa 的特性相反,是满足这种情况有效补充手段。 Aubio Aubio 是主要用于 音乐信息检索(MIR [Music Information Retrieval]) 的 跨平台轻量级分析库。设计之初就是期望实时进行 MIR 使 Aubio 采用了 C语言 作为库的核心语言。不过,因其已在自身的开源项目中,实现了 Python 的套接调用入口 [7] ,我们仍然可以在 Python 中使用。 功能性方面,Aubio 和 Librosa 在音频浅层信息处理上,如果排除效率因素,则几乎不相上下。但 Aubio 的处理效率,不论从整体架构还是本位支撑上,都着实比 Librosa 更加高效。 因此,在音频分析领域,对于类似 ‘音高检测’ 等以实时性作为主要求的分析点,我们常采用 Aubio 而不是 Librosa 处理。而对于 梅尔频率倒谱系数(MFCC)之类的科学分析,则多数用 Librosa 解决,虽然 Aubio 也有此功能。除此外,科学分析不以 Aubio 合并解决的另一原因,还在于 Aubio 对主流科学计算库的兼容程度,要略逊 Librosa 一筹,并向当局限。即有利有弊。 此外,相比 Librosa,Aubio 仅能提供相对基础的分析。 主要功能: 实时处理能力,面向低延迟的音频处理能力,专为快速高效设计 专精通用检测,提供 节拍检测、起音检测、音符分割等通用基础音频分析 简易实时效果,提供快速重采样、过滤、归一化能力,只能实现部分简易效果 跨平台支持,可以在主流操作系统(Windows、macOS、Linux)上运行 有限集成性,提供 Python 入口,虽不完美兼容计算库,但仍可有效利用实时特性 受局限的调用方式,但官方提供了很多样例,学习门槛较低 基础库(aubio.)对常用过程的类封装(简,仅列出名称): 数据读写: 、 乐理分析: 、 、 、 、 频谱分析: 、 、 、 、 、 简易滤波: 一些常用过程封装的常用操作简示(非所有,仅列出名称): 音高 相关:[entity]([source]), [entity].set_unit, [entity].set_tolerance 节奏 相关:[entity]([source]), [entity].get_bpm 起音 检测:[entity]([source]), [entity].set_threshold 音频写入 类: [entity].close 音频读取 类: [entity].seek, [entity].close 官方样例,可从 项目官网 获取,而各个封装结构内的 额外参数配置/获取方式,可查阅 官方档案馆查阅 。 由于是 C语言库,其 Python 套接后的使用形式,也 相对更接近 C 的使用习惯。所以,Aubio 的的过程类,在创建实体时就需要传入配置参数,如下例: # 创建音频源读取实例 source = aubio.source('example.wav', 44100, 512) # 创建音频写入实例 sink = aubio.sink('output.wav', 44100, 1) # 创建音高检测实例 pitch_o = aubio.pitch(\"yin\", 1024, 512, 44100) pitch_o.set_unit(\"Hz\") pitch_o.set_silence(-40) # 创建节拍检测实例 tempo_o = aubio.tempo(\"default\", 1024, 512, 44100) # 创建起音检测实例 onset_o = aubio.onset(\"default\", 1024, 512, 44100) # 创建音调检测实例 notes_o = aubio.notes(\"default\", 1024, 512, 44100) # 创建离散余弦变换实例 dct_o = aubio.cqt(16) # 创建快速傅里叶变换实例 fft_o = aubio.fft(1024) # 创建梅尔频率倒谱系数实例 mfcc_o = aubio.mfcc(40, 1024, 44100) # 创建滤波器组实例 filterbank_o = aubio.filterbank(40, 1024) # 创建频谱描述符实例 specdesc_o = aubio.specdesc(aubio.specdesc_type.centroid, 1024) # 创建相位声码器实例 pvoc_o = aubio.pvoc(1024, 512) 上述过程中,我们进行了一些配置,基本涵盖了 Aubio 在 Python 上的 大部分经常被使用到的实用功能 。以上例中的配置,对创建的实体意义进行说明,有: 音频读取():读取 example.wav,采样率 44100 Hz,每次读取 512 帧 音频写入():写入 output.wav,采样率 44100 Hz,单声道 音高检测():yin 算法,窗口 1024/跳频 512/采样率 44100 Hz,静音阈 -40 dB 节拍检测():使用默认算法,窗口 1024,跳频 512,采样率 44100 Hz 起音检测():使用默认算法,窗口 1024,跳频 512,采样率 44100 Hz 音调检测():使用默认音集,窗口 1024,跳频 512,采样率 44100 Hz 离散余弦变换():离散余弦变换,以 16 个由短至长余弦周期构成解集(见前文) 快速傅里叶变换():快速傅里叶变换,窗口 1024 梅尔频率倒谱系数():提取 MFCC,梅尔带 40,窗口 1024,采样率 44100 Hz 滤波器组():分解为 40 个频率带,窗口 1024 频谱描述符():提取频谱描述符,计算频谱流,窗口 1024 相位声码器():配置相位声码器,窗口 1024/每次取 512 个样本 (即跳频 512) 而其使用时的方式,由于是以 __call__ 的 Python 调用实现的,有: # 读取音频数据并处理 while True: samples, read = source() # 音高检测 pitch = pitch_o(samples)[0] print(f\"Detected pitch: {pitch} Hz\") # 节拍检测 is_beat = tempo_o(samples) if is_beat: print(f\"Beat detected at {source.positions}\") # 起音检测 is_onset = onset_o(samples) if is_onset: print(f\"Onset detected at {source.positions}\") # 音调检测 notes = notes_o(samples) print(f\"Detected notes: {notes}\") # 离散余弦变换 dct_data = dct_o(samples) print(f\"DCT Data: {dct_data}\") # 快速傅里叶变换 fft_data = fft_o(samples) print(f\"FFT Data: {fft_data}\") # 提取梅尔频率倒谱系数 mfcc_data = mfcc_o(samples) print(f\"MFCC Data: {mfcc_data}\") # 滤波处理 filtered_data = filterbank_o(samples) print(f\"Filtered Data: {filtered_data}\") # 提取频谱描述符 specdesc_data = specdesc_o(samples) print(f\"Spectral Descriptor: {specdesc_data}\") # 使用 pvoc 对象处理样本 spec = pvoc_o(samples) spectrogram.append(spec) # 写入音频数据 sink(samples, read) if read 即,直接用创建并配置好的对应功能实体,循环取 获取的 采样片段 samples 传入,就可以得到检测处理结果了。可见,Aubio 的使用非常的 “面向过程”,创建出的实体,与其说是 “对象”,不如说是对 “过程的封装”。 从 Aubio 的设计体现出了,其作为库的有限调用方式,并没有为使用者提供基于调用侧的功能扩展入口。 所以,除实时处理外,Aubio 的能力有限。只适合作为 补充手段 应用于分析中。 四个关键音频库介绍完毕,那么现在,让我们用它们做些简单的实践。 简单练习:用 常用音频库 完成 带有实时频响图的音频播放器 为了相对可能的便利,我们需要让这个练习用播放器有一个 UI 界面,且能根据需要的自主选择音频文件。而 波形图(Waveform) 就是整个音频所有频段在 波形切面(TLS) 叠加后的投影。 对于界面,我们需要引入 Tkinter 库来协助进行绘制。Tkinter 是 Python 标准模块其中之一,专用于创建图形用户界面(GUI)的工具,提供了一系列简易的按钮、图表、交互组件和标准布局。这里只需了解即可。 练习事例按照标准工程工作流进行。 第一步,确立已知信息: 数据来源:用户自选的 \".wav .flac *.mp3\" 音频格式文件(如需可自行在源码中拓展) 处理环境:依赖 、,Python 脚本执行 工程目标: 1) 提供一个具有 GUI 的简易音频格式文件播放器,自选择播放音频文件,可控播放/暂停 2) 图形界面显示选定音频文件的波形图,并提供 Seekbar 可进行 Seek 操作 第二步,准备执行环境: 检测是否已经安装了 Python 和 pip(对应 Python 版本 2.x) 或 pip3(对应 Python 版本 3.x) 包管理器。此步骤同我们在 的练习 中的操作一致,执行脚本即可: python install_pip.py python install_math_libs.py 完成对 Python 环境 的准备和 的安装。具体脚本实现,可回顾上一节。 同理,对于 的准备工作,我们也按照脚本方式进行流程化的封装。创建自动化脚本 install_acoustic_libs.py 如下: import subprocess import sys import platform def is_package_installed(package_name): try: subprocess.run([sys.executable, \"-m\", \"pip\", \"show\", package_name], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return True except subprocess.CalledProcessError: return False def install_package(package_name): print(f\"Installing {package_name}...\") subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", package_name], check=True) subprocess.run([sys.executable, \"-m\", \"pip\", \"show\", package_name], check=True) def is_portaudio_installed(): try: if platform.system() == \"Darwin\": # macOS result = subprocess.run([\"brew\", \"list\", \"portaudio\"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) elif platform.system() == \"Linux\": result = subprocess.run([\"dpkg\", \"-s\", \"portaudio19-dev\"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: return True # Assume portaudio is handled manually on other platforms return result.returncode == 0 except subprocess.CalledProcessError: return False def install_portaudio(): if platform.system() == \"Darwin\": # macOS print(\"Installing portaudio using Homebrew...\") subprocess.run([\"brew\", \"install\", \"portaudio\"], check=True) elif platform.system() == \"Linux\": print(\"Installing portaudio using APT...\") subprocess.run([\"sudo\", \"apt-get\", \"install\", \"-y\", \"portaudio19-dev\"], check=True) else: print(\"Please install portaudio manually for your platform.\") sys.exit(1) def main(): packages = [\"soundfile\", \"pyaudio\", \"librosa\"] for package in packages: if package == \"pyaudio\": if not is_portaudio_installed(): install_portaudio() if is_package_installed(package): print(f\"{package} is already installed.\") else: install_package(package) print(f\"{package} has been installed.\") else: if is_package_installed(package): print(f\"{package} is already installed.\") else: install_package(package) print(f\"{package} has been installed.\") if __name__ == \"__main__\": main() 此处有个流程上的关键,即 PyAudio 依赖于 PortAudio 库提供的 音频输入输出设备拨接。我们需要在安装 PyAudio 前,先行安装 PortAudio 以保证 PyAudio 的正常执行,否则会报如下的 IO访问错误: OSError: [Errno -9986] Internal PortAudio error PyAudio 的安装过程由于 未配置对 PortAudio 的强依赖标注,且 PortAudio 并未提供 pip 的可用包。因此,不会在 pip 包管理安装过程中,自行获取前置库。需要我们 手动在脚本中完成 检测 与 安装。 随后,使用 Python 执行脚本: python install_acoustic_libs.py 如果包已安装,则会输出 \"[基础音频库] is already installed.\"。如果包未安装,则会安装该包并输出 \"[基础音频库] has been installed.\",并显示包的详细信息。 到此,完成音频库的环境准备工作。 为什么建议 采用执行脚本的形式,对需要的库进行准备流水封装呢?因为这是一个非常好的习惯。而随着工作的积累,相关的 工具库快速部署脚本会逐步的累积,形成足够支撑大部分情况的 一键部署工具集。在这过程中,工程师 可以养成对环境准备以流水线方式处理的逻辑链,使之后再遇到新的情况时,也能快速的理清思维,便于减轻维护工作压力。 第三步,搭建音频播放器: 由于只是个简易播放器,我们选择在单一文件中实现所有基本功能。 首先,需要思考一下,必要包含于 GUI 的交互组件都有哪些。有: 停止(Stop):用于在音频开始播放后,停止播放并重置音频到起始位置; 播放/暂停(Play/Pause):用于控制音频的播放,与过程中暂停; 打开(Open):用于满足选择要播放的音频格式文件; 进度条(Seekbar):用于提供 Seek 功能,并实时显示播放进度 而纯粹的用于显示展示于 GUI 的组件,只有: 波形图(Waveform):在 “打开” 选择音频文件后,显示该音频波形图; 至此,我们获得了此播放器的基本交互逻辑。 图 5-4 简易音频播放器的交互逻辑关系示意图 根据上图交互关系,将每一个节点作为函数封装,就能轻松完成相关实现了。编写代码: import tkinter as tk from tkinter import filedialog import numpy as np import soundfile as sf import pyaudio import threading import queue import matplotlib.pyplot as plt from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg class AudioPlayer: def __init__(self, root): self.root = root self.root.title(\"Simple Audio Player\") # Initialize pyaudio self.pyaudio_instance = pyaudio.PyAudio() # Create control buttons frame self.control_frame = tk.Frame(self.root) self.control_frame.pack(side=tk.TOP, fill=tk.X) self.stop_button = tk.Button(self.control_frame, text=\"Stop\", command=self.stop_audio) self.stop_button.pack(side=tk.LEFT) self.play_pause_button = tk.Button(self.control_frame, text=\"Play\", command=self.toggle_play_pause) self.play_pause_button.pack(side=tk.LEFT) self.open_button = tk.Button(self.control_frame, text=\"Open\", command=self.open_file) self.open_button.pack(side=tk.LEFT) self.playing = False self.audio_data = None self.fs = None self.current_frame = 0 self.stream = None # Create matplotlib figure and axes for waveform display self.fig, self.ax_waveform = plt.subplots(figsize=(6, 3.6)) self.canvas = FigureCanvasTkAgg(self.fig, master=self.root) self.canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1) # Create progress bar self.progress_frame = tk.Frame(self.root) self.progress_frame.pack(side=tk.TOP, fill=tk.X) self.progress_bar = tk.Scale(self.progress_frame, from_=0, to=1000, orient=tk.HORIZONTAL, showvalue=0) self.progress_bar.pack(fill=tk.X, expand=True) # Timer to update waveform line self.update_interval = 1 # milliseconds # Create thread event to stop update thread self.update_thread_event = threading.Event() # Queue for inter-thread communication self.queue = queue.Queue() # Flag variable to detect if the progress bar is being dragged self.is_seeking = False self.was_playing = False # Mark the playback state when seeking # Bind events self.progress_bar.bind(\"\", self.on_seek_start) self.progress_bar.bind(\"\", self.on_seek_end) self.progress_bar.bind(\"\", self.on_seek) # Start thread to update progress bar self.root.after(self.update_interval, self.update_progress_bar) def open_file(self): file_path = filedialog.askopenfilename(filetypes=[(\"Audio Files\", \"*.wav *.flac *.mp3\")]) if file_path: self.audio_data, self.fs = sf.read(file_path, dtype='float32') self.current_frame = 0 duration = len(self.audio_data) / self.fs self.progress_bar.config(to=duration * 1000) # Set the maximum value of the progress bar to the audio duration in milliseconds self.play_pause_button.config(text=\"Play\") self.playing = False self.plot_waveform() def toggle_play_pause(self): if self.playing: self.play_pause_button.config(text=\"Play\") self.playing = False self.pause_audio() self.update_thread_event.set() # Stop update thread else: self.play_pause_button.config(text=\"Pause\") self.playing = True self.update_thread_event.clear() # Clear update thread event threading.Thread(target=self.play_audio).start() def audio_callback(self, in_data, frame_count, time_info, status): end_frame = self.current_frame + frame_count data = self.audio_data[self.current_frame:end_frame].tobytes() self.current_frame = end_frame self.queue.put(end_frame / self.fs * 1000) # Current time (milliseconds) if self.current_frame >= len(self.audio_data): return (data, pyaudio.paComplete) return (data, pyaudio.paContinue) def pause_audio(self): if self.stream is not None: self.stream.stop_stream() self.stream.close() self.stream = None def play_audio(self): self.stream = self.pyaudio_instance.open( format=pyaudio.paFloat32, channels=self.audio_data.shape[1], rate=self.fs, output=True, stream_callback=self.audio_callback ) self.stream.start_stream() def stop_audio(self): self.playing = False self.current_frame = 0 if self.stream is not None: self.stream.stop_stream() self.stream.close() self.stream = None self.play_pause_button.config(text=\"Play\") # Reset the red line to the beginning self.update_thread_event.set() # Stop update thread self.plot_waveform() # Reset waveform plot self.progress_bar.set(0) def plot_waveform(self): self.ax_waveform.clear() time_axis = np.linspace(0, len(self.audio_data) / self.fs, num=len(self.audio_data)) self.ax_waveform.plot(time_axis, self.audio_data) self.ax_waveform.set_title(\"Waveform\") self.ax_waveform.set_xlabel(\"Time (s)\") # Set x-axis label to seconds self.ax_waveform.set_ylabel(\"Amplitude\") self.canvas.draw() def update_progress_bar(self): try: while not self.queue.empty(): current_time = self.queue.get_nowait() if not self.is_seeking: # Only update when not dragging the progress bar self.progress_bar.set(current_time) except queue.Empty: pass self.root.after(self.update_interval, self.update_progress_bar) def on_seek_start(self, event): self.was_playing = self.playing # Record the playback state when seeking if self.playing: self.toggle_play_pause() # Pause playback self.is_seeking = True # Mark that the progress bar is being dragged def on_seek(self, event): # Update current_frame in real-time value = self.progress_bar.get() self.current_frame = int(float(value) / 1000 * self.fs) def on_seek_end(self, event): self.is_seeking = False # Mark that dragging has ended self.plot_waveform() # Update waveform plot if self.was_playing: # If it was playing before, resume playback self.toggle_play_pause() def seek(self, value): if self.audio_data is not None: self.current_frame = int(float(value) / 1000 * self.fs) if __name__ == \"__main__\": root = tk.Tk() app = AudioPlayer(root) root.mainloop() 有运行效果如下: 图 5-5 简易音频播放器的运行效果图 至此,对音频库的练习完毕。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:10:00 "},"Chapter_5/Language/cn/Docs_5_1_3.html":{"url":"Chapter_5/Language/cn/Docs_5_1_3.html","title":"5.1.3 视频分析库(PyOpenCV、Color-Science)","keywords":"","body":"5.1.3 视频分析库(PyOpenCV、Color-Science) 和音频一样,外部工程里,对视频的分析处理焦点多在于 帧分析,并非于流分析。或者说,有关音视频编解码与网络流的评估,是属于完整编解码工程内部范畴。其更多的是与网络子系统进行结合,并依托于诸如 ITU-T(或其他音视频组织,较少)提出的相关协议(如 H.264、H.265 等)约束之标准规格背景,来作为整体工程中的 子评估系统。所以,音视频流分析(Audio/Video Stream Analysis)和编解码协议是强耦合的,一般会将之归属于 编解码器内部监测 部分,平行于项目的正常作业流水线,来监控各个环节。 而 视频帧分析(Video Frame Analysis) 或 帧处理(Frame Processing) 的有效介入点,是在 编码前(Before encoding) 和 解码后(After decoding)。此时,我们用来处理的数据,已经是纯粹的 色彩格式(Color Format) 数据了。 以解码为例,在解码后的必要环节是什么样的呢? 图 5-6 简易音频播放器的运行效果图 首先,是 颜色空间转换,亦是大量使用第二章知识的地方。一般解码后的图像因为考虑到存储空间成本,会采用 传输格式(Transport Format),即 YUV 体系色彩格式。 不过,只凭借 YUV 是无法做为 唯一且足够泛化的 随后步骤起点的。这并不是指 YUV体系的色彩格式 无法直接交由如 OpenGL、DirectX、Vulkan 等驱动处理,相反这些驱动内部往往已经通过 模式编程方法,完成了一些 固定格式自硬件抽象层(HAL)的映射式转换工作(原理同第二章中,已讲解并推导过的色彩空间转换,部分算子的硬件化实现在驱动层面的组合)。同理于 RGB,在硬件支持的情况下,直接以 YUV 上屏在流程上会更简短。可当我们的目的是需要对每一帧的图片,做 基于传统图形学算法上的调整,或 为模型进行特征分析/提取的预处理 时,未经存储空间压缩并贴近人自然感受的 原色格式(Primaries Format),即 RGB 体系色彩格式,还是会更便于操作。 另外,并不一定是由 YUV 转 RGB,在某些场景,我们也会要求将 RGB 转 YUV,或完成两个体系内的其他细分类型互转。所以,具体如何转换是 由后续步骤所需的输入而定,相当灵活。 在色彩格式转换后,则是 帧分析与预处理步骤。这一步完成 对前者输出帧数据的特征提取与解析。将会使用到相关的分析方法,例如 二维傅立叶 或其他 基础图像算法、滤波核 或 模型接入。此处也是我们本节进行操作的重点。 最后一步是 GPU 上屏缓冲和通信,则需要由 选定的图形驱动(Vulkan 等)来建立相应的信道,提供指令通信和显存更新功能。本节中,这些相关的环境和上屏更新,是由 Python 的 Tinker 界面库走系统 UI 环境 或 常用视频分析库(如 OpenCV)在 库内自行维护。暂不需要我们介入。 而当需要项目自行处理驱动和 GPU 通信环境上下文维护时,整个渲染引擎的部分,都应当在 同一个主体环境下(也可以用代表其通信句柄名的,实时上下文/通信上下文,来代指),辅助其他(如果需要)用于 时间片复用 或 GPU 信令预封装 的 辅助环境(如 延迟上下文 或 类似的自定义指令组装结构)使用。从而方便各个 前后关联密切环节的处理结果,在 GPU 资源池中实现互通。 这一涉及驱动资源协同和池化设计的部分,就属于 图形引擎(Graphics Engine) 的关键处理技术之一了。让我们在未来的进阶一册中再单独讲解。 常用的视频分析库主要有两个,为 Colour-Science、PyOpenCV,分别对应 [ 颜色科学综合分析、图像处理与科学计算 ] 的需求。常被用于 工程原型验证(即设计思路的验证) 和 外部(指工程外)帧分析。 尤其是 PyOpenCV,该库是重中之重。不仅是视频分析的核心库,在业务中也会经常直接使用到它的 C++ 内核。 Colour-Science(Color-Science) Colour-Science(Color-Science) 是一个专注于 色彩科学计算、光谱分析、色彩转换 和 色彩管理 的 Python 计算库。其由 Colour Developers 开发和维护,旨在为色彩科学领域的研究和应用提供一个 全面而强大的工具集 [8] 。注意区别库名为 Colour-Science 。 主要功能: 色彩空间转换,支持 CIE 标准下的 RGB、XYZ、LAB、LUV 等各种 色彩空间 转换与互转 支持色彩科学如 黑体辐射、辐射亮度、色温 等的 物理量评估 提供感官量与科学量间的换算,支持 配色函数 和 CIE 统一化色彩差异对比计算 支持由设备制造商提供的 LUT、CSV、XRite 等 不同种色彩配置文件 校准、评估、转换 能够提供完备的色彩学分析图表可视化能力 Colour-Science 是一个 相当齐全的色彩科学库,其方法基本涵盖了现行大部分通用(或较广范围使用)的色彩规格,并实现了相互间的联结。通过它,我们能够轻易的将不同色彩系统内的自定义变量等内部概念,转换到统一 CIE 规格下衡量。当然,也可以反向提供相应的配置内容。 由于库的体量过于巨大,此处仅列出部分相对高频次使用的函数,仅供参考。 核心模块(colour.)的常用函数(简,仅列出名称): 色彩空间: RGB_COLOURSPACES, RGB_to_XYZ, XYZ_to_RGB, XYZ_to_Lab, Lab_to_XYZ, xyY_to_XYZ, XYZ_to_xyY, LMS_to_XYZ, XYZ_to_LMS, UCS_to_XYZ, XYZ_to_UCS 色彩比对: XYZ_to_xy, xy_to_XYZ, XYZ_to_uv, uv_to_XYZ 色温转换: xy_to_CCT, CCT_to_xy 色彩感知: chromatic_adaptation, contrast_sensitivity_function, corresponding_chromaticities_prediction 色差计算: delta_E (CIE 1976, CIE 1994, CIE 2000, CMC etc.), index_stress (Kruskal’s Standardized Residual Sum of Squares) 光度计算: lightness, whiteness, yellowness, luminance, luminous_flux, luminous_efficacy, luminous_efficiency, 光谱处理: 光谱分析的主体类, sd_to_XYZ, sd_blackbody, sd_ones, sd_zeros, sd_gaussian, sd_CIE_standard_illuminant_A sd_CIE_illuminant_D_series 颜色代数: table_interpolation, kernel_nearest_neighbour, kernel_linear, kernel_sinc, kernel_lanczos, kernel_cardinal_spline, 数据读写: read_image, write_image, read_LUT, write_LUT, read_sds_from_csv_file, write_sds_to_csv_file, read_spectral_data_from_csv_file, read_sds_from_xrite_file, 辅助模块(colour..)的常用函数(简,仅列出名称): 绘图可视化(plotting.): plot_single_colour_swatch, plot_multi_colour_swatches, plot_single_sd, plot_multi_sds, plot_single_illuminant_sd, plot_multi_illuminant_sds, plot_single_lightness_function, plot_multi_lightness_functions, plot_single_luminance_function, plot_multi_luminance_functions 读写扩展(io.): image_specification_OpenImageI LUT_to_LUT, 色彩模型(models.): RGB_COLOURSPACE_CIE_RGB, RGB_COLOURSPACE_BT709, RGB_COLOURSPACE_BT2020, RGB_COLOURSPACE_DCI_P3, RGB_COLOURSPACE_sRGB 色温扩展(temperature.): mired_to_CCT, CCT_to_mired, xy_to_CCT_CIE_D, CCT_to_xy_CIE_D 光谱恢复(recovery.): sd_Jakob2019, LUT3D_Jakob2019, XYZ_to_sd_Jakob2019, find_coefficients_Jakob2019 代数扩展(algebra.): euclidean_distance, manhattan_distance, eigen_decomposition, vecmul Colour 开源项目 位于:Github:colour-science/colour 。使用细节,可自行前往官方档案馆查阅:官方档案馆查阅 。 PyOpenCV(Python Entry of Open Source Computer Vision Library) PyOpenCV(Python OpenCV) 是 计算机视觉和图像机器学习 OpenCV 库 的 官方 Python 套接接口,项目自 Intel 奠基,现由 OpenCV 开源开发社区进行维护 [9] 。其核心 OpenCV 覆盖了数百个计算机视觉算法,并 官方预训练好了 大量用于 传统 CV 的 ML 功能线下模型(详见 Github:OpenCV-contrib/Modules ),囊括从 简单图像处理 到 复杂应用的视觉任务,如边缘检测、图像滤波、基础变换(旋转、缩放、错切、仿射变换)、对象检测等,都可通过调用其方法功能实现。并且,考虑到机器学习拓展性,本身提供了 对模型训练和推理的相关扩展接口,方便处理中使用。 此外,OpenCV 有着对图片、视频文件、视频流(本地流、网络流)等数据源的完整支持,使得基本大部分涉及视频的分析工作,都能够用该库一库解决。非常强大。但其是一个以计算机视觉和 2D 图像处理为核心的库,具有 有限 的 3D 功能,并不专注于全面的 3D 图形学处理。 另外需要注意的是 OpenCV 并不是 专门用于进行深度学习的框架,虽然能够进行推理,可 并不能 达到最好的资源利用效率和训练与推理性能。这点在应用或非分析工程中,当存在大量模型处理需求或模型流水线时,应该考虑。 主要功能: 图像处理,支持图像读取、写入、滤波、变换、边缘检测等基本操作 视频处理,支持视频文件的读取、写入、帧捕获和视频流处理 特征检测,提供关键点检测和特征匹配,如 SIFT、SURF、ORB 等 对象检测,支持 Haar 级联分类器、深度学习模型(如 YOLO、SSD)等 机器学习,支持多种机器学习算法,如 SVM、KNN、决策树等 三维重建,提供立体匹配、相机标定、三维重建功能(有限) 图像分割,支持阈值分割、轮廓检测、分水岭算法等 相机补益,支持镜头畸变校正和图像增强 运动分析,提供光流计算和运动跟踪功能 图像拼接,支持全景图像拼接和图像对齐 GPU 加速,部分算法支持 GPU 加速,提升计算性能 高级图像处理,支持图像金字塔、模板匹配、霍夫变换(Hough)等高级操作 丰富的库和模块,集成了大量的图像处理和分析工具 良好的库兼容性,可以与 NumPy、SciPy 等科学计算库结合使用 多模型格式支持,支持 Caffe、TensorFlow、ONNX(关键) 等多种框架的模型格式 跨平台支持,可以在主流操作系统(Windows、macOS、Linux)上运行 由于 OpenCV 对 API 入口进行了统一,以下模块调用前缀皆为 “cv2.”,比如 “cv2.add”,后续如无特殊说明,则按此依据。 因为 OpenCV 的复杂度,我们参考官方的 核心库(对应 opencv-python) 和 扩展库(opencv-contrib-python) 两大分类,将主要的常用函数和封装,也拆分为 两部分描述。 首先,是核心库(opencv-python)所包含的内部模块。 核心模块(cv2.core)的常用函数(简,仅列出名称): 基本数据结构: 、 、 、 、 基本算法和操作: add, subtract, multiply, divide, absdiff 线性代数: solve, invert, determinant, eigen 随机数生成: , randu, randn 类型转换: convertScaleAbs, normalize 数据操作: minMaxLoc, meanStdDev, reduce 输入输出: imread, imwrite, imdecode, imencode 时间操作: getTickCount, getTickFrequency, getCPUTickCount 图像克隆和复制: copyMakeBorder 数学函数: exp, log, sqrt, pow 图像处理模块(cv2.imgproc)的基础函数(简,仅列出名称): 基本图像变换: resize, warpAffine, warpPerspective 颜色空间转换: cvtColor, inRange 图像滤波: GaussianBlur, medianBlur, bilateralFilter, blur 阈值处理: threshold, adaptiveThreshold 直方图处理: calcHist, equalizeHist 几何变换: getRotationMatrix2D, getAffineTransform, getPerspectiveTransform 图像金字塔: pyrUp, pyrDown 图像插值: linearPolar, remap 直线与形状绘制: line, rectangle, circle, ellipse, putText 图像处理模块(cv2.imgproc)的结构分析与形态学(Morphology)函数(简,仅列出名称): 边缘检测: Canny, Sobel, Laplacian, Scharr 霍夫变换: HoughLines, HoughLinesP, HoughCircles 轮廓检测: findContours, drawContours 形态学操作: morphologyEx, erode, dilate 矩形拟合: boundingRect, minAreaRect 圆形拟合: minEnclosingCircle 椭圆拟合: fitEllipse 多边形拟合: approxPolyDP 凸闭包计算: convexHull, convexityDefects 形状匹配: matchShapes 视频读写模块(cv2.videoio)的常用函数(简,仅列出名称): 视频捕获: , isOpened, read, release 视频写入: , write, release 视频属性: get, set (归属 创建的流句柄所有) 视频编码: 图形用户界面模块(cv2.highgui)的常用函数(简,仅列出名称): 创建窗口: namedWindow 显示图像: imshow 等待键盘事件: waitKey 销毁窗口: destroyWindow, destroyAllWindows 鼠标事件: setMouseCallback 滑动条(Trackbar): createTrackbar, getTrackbarPos, setTrackbarPos 传统机器学习对象检测模块(cv2.objdetect)的常用函数(简,仅列出名称): 分类器实例: 使用分类器检测对象: detectMultiScale 保存和加载 XML 分类器文件: save, load (为 加载分类器) 官方提供的 XML 分类器文件,位于 OpenCV 的安装目录,主要有两类,加载方式一致: data/haarcascades 为 Haar 分类器(矩形像素差)的指定目标训练所得分类特征 data/lbpcascades 为 LBP 分类器(纹理描述符)的指定目标训练所得分类特征 特征检测与匹配模块(cv2.features2d)的常用函数(简,仅列出名称): 特征检测对象: 、 、 、 、 特征匹配对象: 、 特征检测创建: SIFT_create, SURF_create, ORB_create, FastFeatureDetector_create, BRISK::create 特征描述获取: compute, detect(由 [xx]_create 创造的对应特征检测方法的对象调用) 特征匹配: match, knnMatch(由 等特征匹配对象调用) 关键点绘制: drawKeypoints, drawMatches 相机校正与三维影射模块(cv2.calib3d)的常用函数(简,仅列出名称): 相机校正: findChessboardCorners, cornerSubPix, calibrateCamera, initUndistortRectifyMap, undistort, undistortPoints, getOptimalNewCameraMatrix 立体校正: stereoCalibrate, stereoRectify, stereoBM_create, stereoSGBM_create 匹配校正: correctMatches 3D 重建: reprojectImageTo3D 基本矩阵与本质矩阵(重要): findFundamentalMat, findEssentialMat, recoverPose 三角化: triangulatePoints 图像分割模块(cv2.segmentation)的常用函数(简,仅列出名称): 阈值分割: threshold, adaptiveThreshold(同 [结构分析与形态学函数] 已并入基础库) 路径分割: findContours, drawContours(同 [结构分析与形态学函数] 已并入基础库) 形态学分割: morphologyEx(套接,基于图像形状 膨胀、腐蚀、开/闭运算,增减益) 分水岭算法: watershed 图割(Graph Cut)算法: grabCut 超像素分割(需引入 opencv-contrib-python 扩展的 cv2.ximgproc 模块): ximgproc.createSuperpixelLSC 为创建 线性光谱聚类(LSC) 超像素分割器 ximgproc.createSuperpixelSLIC 为创建 简单线性迭代聚类(SLIC) 超像素分割器 ximgproc.createSuperpixelSEEDS 为创建 能量驱动采样(SEEDS) 超像素分割器 图像拼接模块(cv2.stitching)的常用函数(简,仅列出名称): 图像拼接对象: 图像拼接创建: create, createStitcher 设置参数: setPanoConfidenceThresh, setWaveCorrection(由 对象调用) 图像拼接: stitch(由 对象调用) 特征检索: featuresFinder(由 对象调用) 图像修复与 HDR 模块(cv2.photo)的常用函数(简,仅列出名称): 图像修复: inpaint 去噪: fastNlMeansDenoising, fastNlMeansDenoisingColored HDR 合成: createMergeDebevec, createMergeMertens, createMergeRobertson 色调映射: createTonemap, createTonemapDrago, createTonemapMantiuk, createTonemapReinhard 辐射校正: createCalibrateDebevec, createCalibrateRobertson 图像质量评估模块(cv2.quality)的常用函数(简,仅列出名称): 图像质量评估对象(重要): 无参考图像空间质量评估(BRISQUE) 评估实例 梯度幅度相似性偏差(GMSD) 评估实例 通用像素点间均方误差(MSE) 评估实例 像素峰值信噪比(PSNR) 评估实例 结构相似性指数(SSIM) 评估实例 图像质量评估创建: create 图像质量评估计算: compute 预训练模型加载: load(继承自 的关键方法) 文本处理模块(cv2.text)的常用函数(简,仅列出名称): 文本检测对象: 文本识别对象: , 文本检测创建: createERFilterNM1, createERFilterNM2 文本识别创建: createOCRHMMDecoder, createOCRHMMTransitionsTable 文本检测: detectRegions 文本识别: run(由所创建 调用) 字符识别: loadOCRHMMClassifierNM, loadOCRHMMClassifierCNN 视频分析模块(cv2.video)的常用函数(简,仅列出名称): 背景建模: , 光流计算: calcOpticalFlowFarneback(HS 法), calcOpticalFlowPyrLK(LK 法) 运动检测: CamShift, meanShift 视频稳定化: estimateRigidTransform, findTransformECC 轨迹跟踪模块(cv2.tracking)的常用函数,用于物体跟踪(重要,节省算力),仅列出名称: 跟踪器对象: 、 单目标跟踪( 跟踪器): Tracker_create, TrackerKCF_create, TrackerMIL_create, TrackerBoosting_create, TrackerMedianFlow_create, TrackerTLD_create, TrackerGOTURN_create, TrackerMOSSE_create, TrackerCSRT_create 多目标跟踪( 跟踪器集): MultiTracker_create, add 跟踪初始化: init 跟踪当前帧: update 其次,是扩展库(opencv-contrib-python)所包含的额外模块。 扩展库涵盖了较多 传统计算机视觉(CV)高级算法,部分使用配参会较核心库更为复杂。同时,其中涉及 3D 匹配 的功能,大部分会用到 空间位姿计算(Spatial Posture Calculation) 来表示物体 在场景中的定位情况。而对于此类涉及具有实际意义 3D 场景或物体的算法,想要展示其处理结果,一般都需要用构建空间化的渲染管线完成,而无法再直接使用 Matplotlib 做快速绘制(除非引入外部位姿库,或自实现)。介于此,有关 3D 绘制的部分,我们于未来再行讨论。 现在,让我们来看都有哪些 功能扩展。 生物识别扩展模块(cv2.bioinspired)的常用函数(简,仅列出名称),用于感知模拟(重要): 视网膜模型(需 opencv-contrib-python 扩展的 cv2.bioinspired_Retina 模块),通过(cv2.)bioinspired_Retina.create 创建实例: 视网膜模拟类型实例 .clearBuffers 初始化清空模型历史缓冲 .run 运行模型分析传入数据 .getParvo 获取视网膜小细胞(Parvo Cells)的感知模拟 .getMagno 获取视网膜大细胞(Magno Cells)的感知模拟 .write 配置视网膜模型参数,需要 .xml 格式的模型参数配置文件 .setupIPLMagnoChannel 设置视网膜大细胞通道数 .setupOPLandIPLParvoChannel 设置视网膜小细胞通道数 脉冲神经网络对象(需 opencv-contrib-python 扩展的 cv2.bioinspired 模块),通过(cv2.)bioinspired.TransientAreasSegmentationModule.create 创建实例: 脉冲神经网络进行瞬态区域检测实例 .run 运行模型分析传入数据 .getSegmentationPicture 获取检测结果 结构光扩展模块(cv2.structured_light)的常用函数(简,仅列出名称): 扫描蒙皮光栅生成器(需 opencv-contrib-python 扩展的 cv2.structured_light 模块),通过(cv2.)structured_light..create 创建实例: 、 .setWhiteThreshold 设置白色阈值 .setBlackThreshold 设置黑色阈值 .getImagesForShadowMasks 获取阴影校验图像(用于结构光解码) .generate 生成用于投影到被扫描物体上的光栅化蒙皮(锚点定位,必须) 扫描结果范式解码(需 opencv-contrib-python 扩展的 cv2.structured_light 模块),方法提供自 继承的 父类: 实物结构光光栅化投影解码器 .decode 解码捕获的光栅投影 三维重建,需要用到核心库三维影射模块(cv2.calib3d)能力: triangulatePoints, reprojectImageTo3D, convertPointsFromHomogeneous 表面检测点对特征匹配(PPF)扩展模块(cv2.ppf_match_3d)的常用函数(简,仅列出名称): 点云模型(需 opencv-contrib-python 扩展的 cv2.ppf_match_3d 模块),通过(cv2.) ppf_match_3d.loadPLYSimple 加载 多边形点云格式(PLY [Polygon File Format])文件(.ply),来创建点云模型实例: 模型被加载 PLY 文件的光栅化与法线等信息,以 OpenCV 的 Mat 格式储存 模型检测器(基于局部几何特征匹配),即粗配准(Coarse Global Registeration)。需要在使用(cv2.)ppf_match_3d. 创建时指定 关联采样步长(relativeSamplingStep)决定使用时的模型检测精度,值越小则越严格(精确匹配): 采用点对特征匹配(Point Pair Features)算法的场景模型检测 .trainModel 将点云模型传入检测器训练,制作指定模型的场景内检测器 .match 使用训练好的模型检测器实例,检测 3D 场景内模型/位姿匹配 位姿匹配器(基于初始位姿特征匹配),即精配准(Fine Local Registeration)。需要在使用(cv2.)surface_matching. 创建时,对使用的 临近点迭代(ICP [Iterative Closest Point]) 算法进行初始设定 [10] 。位姿匹配器是对 粗配准 结果的进一步优化,用于细化点位,需要注意, 有这些参数: iterations 为 ICP 算法的最大迭代次数 tolerence 为 ICP 算法的收敛容差,变换矩阵更新差值小于该值时,停止迭代 rejectionScale 为 ICP 剔除放缩因子,剔除点对距离大于该因子乘平均距离时的点对 numLevels 为 ICP 点云对齐时的分辨率像素金字塔层数,层数越多越耗时,越精确 sampleType 为 ICP 点云对齐 采样类型,一般为 0 默认值 numMaxCorr 为 ICP 算法的最大对应点对(Point Pairs)数,可调节模型结果精度 位姿匹配器执行后,可以取得 源模型(Model)在场景(Scene)中的具体点位的场景内位置情况。常被用于 SLAM、场景重建、3D 环境分析。以: .registerModelToScene 注册物体点云到场景,来获关键点场景内的位姿矩阵 得到经过 ICP 校准后的 PPF 结果(需要在调用 .registerModelToScene 方法时,传入 PPF 返回的各点位姿矩阵数组)。 二维条码定位校准 ArUco 标记模块(cv2.aruco)的常用函数(简,仅列出名称): 创建标记字典: aruco.Dictionary_create, aruco.getPredefinedDictionary 标记检测: aruco.detectMarkers 标记绘制: aruco.drawDetectedMarkers, aruco.drawDetectedCornersCharuco 标记校准: aruco.calibrateCameraAruco 姿态估计: aruco.estimatePoseSingleMarkers, aruco.estimatePoseBoard, aruco.estimatePoseCharucoBoard 标记板创建: aruco.GridBoard_create, aruco.CharucoBoard_create 坐标面绘制: aruco.drawPlanarBoard Charuco 标记: aruco.drawCharucoDiamond, aruco.detectCharucoDiamond, aruco.interpolateCornersCharuco 机器学习模块(cv2.ml)常用方法封装(简,仅列出名称),提供传统机器学习分类算法: 数据准备: ml.TrainData_create 支持向量机: ml.SVM_create, .trainAuto, .predict K 近邻: ml.KNearest_create, .train, .findNearest 决策树: ml.DTrees_create, .train, .predict 随机森林: ml.RTrees_create, .train, .predict 加速树分类: ml.Boost_create, .train, .predict 正态贝叶斯分类器: ml.NormalBayesClassifier_create, .train, .predict 神经网络: ml.ANN_MLP_create, .train, .predict EM 聚类: ml.EM_create, .trainEM, .trainM, .predict 深度学习模块(cv2.dnn)常用方法封装(简,仅列出名称),提供深度学习单一模型前向推理: 模型加载: , dnn.readNet, dnn.readNetFromCaffe, dnn.readNetFromTensorflow, dnn.readNetFromTorch, dnn.readNetFromONNX, dnn.readNetFromDarknet 输入处理: dnn.blobFromImage, dnn.blobFromImages 输入设置: .setInput 推理后端: .setPreferableBackend, .setPreferableTarget 模型推理: .forward GPU 加速扩展模块(cv2.cuda)的常用函数,是同名基础模块算法 CUDA 加速版,仅列出名称: GPU 信息: cuda.getCudaEnabledDeviceCount, cuda.printCudaDeviceInfo 内存管理: , cuda.registerPageLocked, cuda.unregisterPageLocked 图像处理: cuda.cvtColor, cuda.resize, cuda.threshold, cuda.warpAffine, cuda.warpPerspective 图像滤波: cuda.createBoxFilter, cuda.createGaussianFilter, cuda.createSobelFilter, cuda.createLaplacianFilter, cuda.createCannyEdgeDetector 特征检测: cuda.ORB_create, cuda.SURF_CUDA_create 立体匹配: cuda.createStereoBM, cuda.createStereoBeliefPropagation, cuda.createStereoConstantSpaceBP 视频处理: cuda.createBackgroundSubtractorMOG, cuda.createBackgroundSubtractorMOG2 光流计算: cuda.calcOpticalFlowFarneback, cuda.calcOpticalFlowPyrLK 空频变换: cuda.dft(1D/2D 离散傅立叶), cuda.mulSpectrums(频域乘) 图像金字塔: cuda.pyrUp, cuda.pyrDown 以上只列出了少部分常用的函数,仅覆盖了 OpenCV 的部分常用基础能力。 更多的使用细节,可自行前往项目 官方档案馆查阅 。 注意,上文中,并行计算扩展模块(cv2.parallel)并未例入其中。因为其主要为库内部加速,且对外的自定义函数自由度太高,使用时应对可能存在数据访问冲突进行自管理。考虑到必要程度不高(存在替代方案且库本身的 CUDA 加速就能满足性能要求),不太建议使用。 仍然如前,让我们用它们做些简单的实践。 简单练习:用 常用视频库 完成 带有均色分析的简易单人脸跟踪识别 这次,我们尝试完成,用 OpenCV 的 传统机器学习对象检测 和 视频分析对象跟踪算法 来实现对 单一人脸的识别与跟踪。且对人脸区域的 RGB、XYZ、LAB 三类色彩空间通道均值进行实时监测,绘制历史图表并显示在 UI 界面。 由于 OpenCV 提供了部分图形功能,能够做基础绘图(点、线、几何面等)。我们直接选用 OpenCV 来创建练习的图形用户界面(GUI)。而色彩分析则用在此领域更专业的 Colour-Science 完成。 练习事例按照标准工程工作流进行。 第一步,确立已知信息: 数据来源:使用电脑自带(或默认外接)摄像头的采样作为输入 处理环境:依赖 、,Python 脚本执行 工程目标: 1) 提供一个具有 GUI 的简易单人脸(Single Face)区域监测,并在监测到人脸后跟踪 2) 对人脸区域内的像素值进行关于 RGB、XYZ、LAB 色彩空间的区域内均值分析 第二步,准备执行环境: 检测是否已经安装了 Python 和 pip(对应 Python 版本 2.x) 或 pip3(对应 Python 版本 3.x) 包管理器。此步骤同我们在 的练习 中的操作一致,执行脚本即可: python install_pip.py python install_math_libs.py 完成对 Python 环境 的准备和 的安装。具体脚本实现,可回顾上一节。 同理,对于 的准备工作,我们也按照脚本方式进行流程化的封装。创建自动化脚本 install_grapic_libs.py 如下: import subprocess import sys def is_package_installed(package_name): try: subprocess.run([sys.executable, \"-m\", \"pip\", \"show\", package_name], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return True except subprocess.CalledProcessError: return False def install_package(package_name): print(f\"Installing {package_name}...\") try: subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", package_name], check=True) print(f\"{package_name} has been installed.\") except subprocess.CalledProcessError: print(f\"Failed to install {package_name}. Please try installing it manually.\") def main(): packages = [\"colour-science\", \"opencv-python\", \"opencv-contrib-python\"] for package in packages: if is_package_installed(package): print(f\"{package} is already installed.\") else: install_package(package) if __name__ == \"__main__\": main() 这套脚本流程应该相当熟悉了。随后,使用 Python 执行脚本: python install_grapic_libs.py 如果包已安装,则会输出 \"[基础视频库] is already installed.\"。如果包未安装,则会安装该包并输出 \"[基础视频库] has been installed.\",并显示包的详细信息。 到此,完成音频库的环境准备工作。 第三步,搭建人脸检测分析 Demo: 际上,这一次的 Demo 较上节的 来说,在交互逻辑上会少很多内容(基本没有操作上的交互)。但其功能逻辑链路,会比 要深一些。所以,我们可以把 功能上的诉求按照同一条执行流水线,进行概念原型设计。 而细化的两个 就是执行流水线的 “必要目标节点”,有关键步骤图: 图 5-7 人脸检测分析 Demo 处理过程节点示意图 至此,我们获得了此播放器的基本运行逻辑。根据上图节点作函数封装,构建实时处理流水线。编写代码: import cv2 import numpy as np import colour from collections import deque # 加载 Haar 级联分类器用于人脸检测 face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') # 打开摄像头 cap = cv2.VideoCapture(0) # 初始化跟踪器标志 init_tracker = False tracker = None # 定义一个队列来保存历史颜色数据 history_length = 100 # 只保留最近 100 帧的数据 history_rgb = [deque(maxlen=history_length) for _ in range(3)] history_xyz = [deque(maxlen=history_length) for _ in range(3)] history_lab = [deque(maxlen=history_length) for _ in range(3)] def calculate_colour_metrics(frame, bounding_box): x, y, w, h = bounding_box face_roi = frame[int(y):int(y + h), int(x):int(x + w)] # 计算 RGB 平均值 mean_rgb = np.mean(face_roi, axis=(0, 1)) / 255.0 # 归一化到 [0, 1] 范围 # 获取 D65 光源的色度坐标 illuminant = colour.CCS_ILLUMINANTS['CIE 1931 2 Degree Standard Observer']['D65'] # 转换到 XYZ 颜色空间 mean_xyz = colour.RGB_to_XYZ(mean_rgb, colour.RGB_COLOURSPACES['sRGB'], illuminant=illuminant) # 转换到 Lab 颜色空间 mean_lab = colour.XYZ_to_Lab(mean_xyz, illuminant) return mean_rgb, mean_xyz, mean_lab def draw_graph(frame, data, position, colors, title): \"\"\" 在 frame 上绘制图表 :param frame: 要绘制图表的帧 :param data: 要绘制的数据(deque) :param position: 图表的位置 :param colors: 图表的颜色列表 :param title: 图表的名称 \"\"\" graph_height = 100 graph_width = 200 x, y = position # 创建半透明背景 overlay = frame.copy() cv2.rectangle(overlay, (x, y - graph_height), (x + graph_width, y), (0, 0, 0), -1) alpha = 0.5 # 透明度 cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame) # 绘制坐标轴 cv2.line(frame, (x, y), (x + graph_width, y), (0, 0, 0), 1) cv2.line(frame, (x, y), (x, y - graph_height), (0, 0, 0), 1) # 绘制图表名称 cv2.putText( frame, title, (x, y - graph_height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1 ) # 绘制数据曲线 for channel, color in enumerate(colors): if len(data[channel]) > 1: for i in range(1, len(data[channel])): cv2.line( frame, (x + int((i - 1) * graph_width / (history_length - 1)), y - int(data[channel][i - 1] * graph_height)), (x + int(i * graph_width / (history_length - 1)), y - int(data[channel][i] * graph_height)), color, 1 ) while True: # 读取摄像头帧 ret, frame = cap.read() if not ret: break # 转换为灰度图像 gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if not init_tracker: # 检测人脸 faces = face_cascade.detectMultiScale( gray, scaleFactor=1.1s, minNeighbors=5, minSize=(120, 120), # 增大最小尺寸以减少局部特征检测 flags=cv2.CASCADE_SCALE_IMAGE ) # 如果检测到人脸,选择最大的矩形框初始化跟踪器 if len(faces) > 0: # 选择最大的矩形框 largest_face = max(faces, key=lambda rect: rect[2] * rect[3]) x, y, w, h = largest_face bounding_box = (x, y, w, h) # 确保检测到的是整张人脸而不是局部特征(例如通过宽高比) aspect_ratio = w / h if 0.75 有运行效果如下: 图 5-8 带有均色分析的简易单人脸跟踪识别 Demo 效果图 完成针对视频分析库的练习。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:10:00 "},"Chapter_5/Language/cn/References_5.html":{"url":"Chapter_5/Language/cn/References_5.html","title":"【参考文献】","keywords":"","body":"五、【参考文献】 [1] Oliphant, Travis E. Guide to numpy. Vol. 1. USA: Trelgol Publishing, 2006. [2] Snider, L. A., and S. E. Swedo. \"PANDAS: current status and directions for research.\" Molecular psychiatry 9, no. 10 (2004): 900-907. [3] Tosi, Sandro. Matplotlib for Python developers. Packt Publishing Ltd, 2009. [4] SoundFile library (SoundFile: An audio library based on libsndfile, 2023. Available at: https://pysoundfile.readthedocs.io/) for audio file read/write operations, which is based on the libsndfile library (Erik de Castro Lopo. libsndfile: A C library for reading and writing sound files, 2023. Available at: http://www.mega-nerd.com/libsndfile/). [5] H. Pham, “Pyaudio website.” https://people.csail.mit.edu/hubert/pyaudio, 2006. [6] McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis, Matt McVicar, Eric Battenberg, and Oriol Nieto. “librosa: Audio and music signal analysis in python.” In Proceedings of the 14th python in science conference, pp. 18-25. 2015. [7] Brossier, Paul M. \"The aubio library at mirex 2006.\" Synthesis (2006). [8] Colour Developers. 2023. Colour-Science. Version 0.3.16. Accessed October 5, 2023. https://www.colour-science.org/. [9] Bradski, Gary, and Adrian Kaehler. \"OpenCV.\" Dr. Dobb’s journal of software tools 3.2 (2000). [10] P. J. Besl and N. D. McKay, \"A method for registration of 3-D shapes,\" in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 14, no. 2, pp. 239-256, Feb. 1992, doi: 10.1109/34.121791. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-11 14:10:00 "}}
\ No newline at end of file
+{"./":{"url":"./","title":"《音视频开发技术:原理与实践》©","keywords":"","body":"《音视频开发技术:原理与实践》© =[>> 关于作者© =[>> 赞助本作© =[>> 版权申明© 目标 对于音视频工程师/架构师来说,日常工作长中总会有大量的知识技术积累,亟待梳理以期望能够被快速检索查阅。但由于工程技术所处领域的复合特征,往往针对一个工程问题所需要的专业知识,不论深浅程度,都会横跨几门学科。而想要获取有效的处理问题所能使用的信息,都需要依次回顾、搜集和关联。这样必不可少会花费大量时间查阅各类大部头资料和文献。而这么做往往是因为,对于待解答问题非常重要的知识点,分布碎片化导致的。 音视频规格的跨度构成了本身技术的多个维度,使得我们并不能按照以往的工程思维,从单一角度来考虑涉及此类型的复合问题。 因此,本书的目的旨在以工程解决方案的实践思路过程,对相关联的各学科核心知识进行串联。以求用一套完整且关联的技术栈模板,来贯穿当下多媒体技术的所有核心技术模块。从而 为读者提供针对多媒体(音视频)分析/处理/整合/架构方面,有效技术指导与学习路线。 特色 本书结合作者工作实践,对架构师日常工作工程中涉及使用到的:数字信号处理、计算机图形学、色彩学、相关工程规格规范、驱动特征及软件框架设计等,领域的专业学科知识进行了梳理和提炼。从音视频工程师不同的技术阶段需要面临的问题为出发点,将 全书分为,音视频基础与音视分析、流媒体规格与简易编解码播放框架设计、通用统一化音视频编辑框架与渲染驱动设计,三大阶段。每一阶段,统一采用知识图谱串联工程规格与编码实践,全面讲解对应技术阶段下需要掌握的,多媒体(音视频)技术之简史、原理、算法、设计及相关推导、制定、架构与应用。 基于此,全书按照技术逐级递进的关系,构成了整体音视频从数据分析、编解码器开发、播放器开发到图形化与图像处理、特效与特效引擎的 完整技术栈。使得全书每个章节内部自成一体但确相互关联,从而便于做技术字典、工程手册和整体学习之用。 面向 书中原理与技术面向全平台,因此主要开发语言为 C/C++。部分平台化及数据分析场景,会一定程度的应用到 C#、Java、Python 等其他语言。本书适合: 初入音视频开发的新手: 本书为您提供了完整学习路径,对于打算初入本行业的开发者,本书能够帮您梳理完整的音视频开发技术路线。协助您成功入行。 有基础的音视频工程师: 本书为您提供了知识技术字典,对于日常开发工作中涉及到的相关问题分析,本书能够帮您快速定位到所需要的核心知识点,进而方便您进一步根据所给信息来做出判断,或根据提示方向来进行深度资料查阅。 多媒体编解开发者友好: 本书为您提供了ITU-T的编解码协议技术索引和讲解,您可以快速通过本书查阅常用 H.264、H.265、H.266 的关键资料和技术对比。 流媒体协议开发者友好: 本书为您提供了常用流协议的拆分解析,您可以快速通过本书查阅常用 RTP/RTCP、RTMP、HLS 的规格设定和消息类型。 学研成果转向生产部署: 本书为您提供了理论转实践的事例方案,对于将研究成果转换到实际工业生产活动的老师,本书能够为您介绍一些现已有成功实践的多媒体方面学转产探索。协助您梳理思路。 硬核的多媒体技术大咖: 若您是深耕此领域多年的老师,您不妨将本书当作一次有趣的思维之旅,从不同的视角感受音视频工程魅力,希望本书能为您提供一些帮助。当然,也更希望获得您的交流。 为方便您定位章节难度,此处提供 =[>> 难度向导 建议。 受限于作者,本书难免存在一些不足,您可以 Book-issues 进行反馈,感谢您的帮助! 目录 音视频工程基础 一、音频的保存与还原 1.1 音频基础 1.2 声波三要素(Three Elements of Acoustics) 1.3 声音三要素(Three Elements of Sounds) 1.3.1 音高(Pitch) 1.3.2 响度(Loudness) 1.3.3 音色(Timbre) 1.4 声音的解构 1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths) 1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz) 1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour]) 1.4.4 感观:频响曲线(FRC [Frequency Response Contour]) 1.4.5 工程:频谱图(Spectrum) 1.5 声音数字化 1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source) 1.5.2 模数转换(A/D [Analog-to-Digital]) 1.5.3 数模转换(D/A [Digital-to-Analog]) 1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM) 1.6 音频的存储 1.6.1 音频格式(Audio Format) 1.6.2 无压缩编码格式(Uncompressed Encode) 1.6.3 无损压缩编码格式(Lossless Encode) 1.6.4 有损压缩编码格式(Uncompressed Encode) 【参考文献】 二、色彩的运用与存储 2.1 色彩基础 2.2 颜色三要素(Three Elements of Color) 2.2.1 色调(Hue) 2.2.2 饱和度(Saturation) 2.2.3 光亮度(Luminance) 2.3 色彩的衡量 2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化 2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space) 2.3.3 经典三原色函数(Trichromatic Primaries Functions) 2.3.4 经典三刺激函数(Tristimulus Values Functions) 2.3.5 现代色彩体系(Modern Color System) 2.4 色彩的对比 2.4.1 色域(Color Gamut ) 2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram) 2.4.3 色差(Chromatic Aberration) 2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature) 2.4.5 标准光源(Standard Illuminants)& 白点(White Point) 2.4.6 显色指数(Color Rendering Index) 2.5 经典色彩空间(Classical Color Space) 2.5.1 光学三原色色彩空间(RGB) 2.5.2 颜料三原色色彩空间(CMY / CMYK ) 2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space) 2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space) 2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space) 2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space) 2.5.7 颜色三要素色彩空间(HSV / HSI / HSL) 2.6 色彩的存储 2.6.1 色彩格式(Color Format)与色彩存储 2.6.2 RGB 体系色彩格式 2.6.3 YUV 体系色彩格式 【参考文献】 三、音视频常用基础算法 3.1 信号分析的核心算法 - 傅立叶变换 3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT) 3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT) 3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT) 3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach) 3.2 频率信息提取 - 常用滤波算法 3.2.1 高斯滤波(Gauss Filter) 3.2.2 双边滤波(Bilateral Filter) 3.2.3 拉普拉斯滤波(Laplacian Filter) 3.2.4 马尔滤波(Marr Filter) 3.2.5 索贝尔滤波(Sobel Filter) 3.2.6 各向异性扩散(Anisotropic Diffusion) 3.3 时间冗余控制 - 常用特征提取与朴素阈值处理 3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient]) 3.3.2 朴素目标检测结果度量 - IoU & GIoU 3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window) 3.4 空域冗余控制 - 基础光流算法与色度压缩 3.4.1 传统光流法(Classic Optical Flow Methods) 3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow]) 3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow]) 3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling]) 3.5 频域冗余控制 - 基础变换编码 3.5.1 整数离散正余弦变换(DST/DCT) 3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform]) 3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform]) 【参考文献】 四、音视频机器学习基础 4.1 发展概览 4.2 模型工程基础 4.2.1 算子(Operator)& 层(Layer) 4.2.2 神经元(Neuron) 4.2.3 神经网络(NN [Neural Network]) 4.2.4 特征选择(Feature Selection) 4.3 经典激活函数(Classic Activation Function) 4.3.1 Sigmoid 4.3.2 Tanh 4.3.3 Softplus 4.3.4 ReLU 族 4.3.5 ELU & SELU 4.3.6 Mish 4.3.7 Swish 族 4.4 连接函数/衰减函数(Connection/Attenuation Function) 4.4.1 Dropout 4.4.2 Maxout 4.4.3 SoftMax 4.5 损失函数(Loss Function) 4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error]) 4.5.2 回归项-均方误差(MSE [Mean Squared Error]) 4.5.3 回归项-休伯损失(Huber Loss) 4.5.4 回归项-分位数损失(Quantile Loss) 4.5.5 分类项-对数损失(Log Loss) 4.5.6 分类项-交叉熵损失(Cross Entropy Loss) 4.5.7 分类项-合页损失(Hinge Loss) 4.5.8 分类项-对比损失(Contrastive Loss) 4.5.9 分类项-三元损失(Triplet Loss) 4.5.10 分类项-对组排异损失(N-Pair Loss) 4.5.11 正则项-L1 惩罚 4.5.12 正则项-L2 惩罚 4.6 常用最优化算法(Optimizer Operator) 4.6.1 基础优化算法 4.6.2 优化算法的优化-应对震荡 4.6.3 优化算法的优化-应对重点强(弱)化更新 4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation]) 4.6.5 优化算法对比与使用建议 4.7 模型结构速览 4.7.1 卷积神经网络(CNN [Convolutional Neural Network]) 4.7.2 循环神经网络(RNN [Recurrent Neural Network]) 4.7.3 自注意力网络(Transformer) 【参考文献】 五、音视频帧分析与数据处理 5.1 音视频帧与环境准备 5.1.1 常用数学库(Numpy、Pandas、Mateplotlib) 5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio) 5.1.3 视频分析库(PyOpenCV、Color-Science) 5.1.4 其他分析软件 【参考文献】 本作品采用知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议进行许可。 This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-14 11:28:51 "},"AUTHOR.html":{"url":"AUTHOR.html","title":" =[>> 关于作者© <<]= ","keywords":"","body":"关于作者 本书由 李述博©(Arikan.Li©)独立完成 李述博(Arikan.Li)👇(This guy!)👇 CV工程师 & 架构师 & Baker 借用名言( 0_0): “名字仅是代号,知识才是真理。” 编写有感( )_T): 写作难度较大,各种资料查阅、相关知识点梳理以及辅助Demo和配套项目开发,带来了极大的压力。因此,您的您宝贵支持与意见,将是作者的重要的力量之源。 如何联系( -w-): 您可以通过 知乎 或 Github 联系到作者,感谢您的帮助。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"COPYRIGHT.html":{"url":"COPYRIGHT.html","title":" =[>> 版权申明© <<]= ","keywords":"","body":"版权申明© 本书由 李述博©(Arikan.Li©)独立完成 本人的所有作品,包括但不限于文字,图片等内容受《著作权》法的保护,凡未经权利人明确书面授权,转载上述内容,本人有权追究侵权行为。 本人关于图片作品版权的声明: 本人在此刊载的原创作品,其版权归属本人所有。 任何传统媒体、商业公司或其他网站未经本人的授权许可,不得擅自从本人转载、转贴或者以任何其他方式复制、使用上述作品。 传统媒体、商业公司或其他网站对上述作品的任何使用,均须事先与本人联系。 对于侵犯本人的合法权益的公司、媒体、网站和人员,本人聘请的律师受本人的委托,将采取必要的措施,通过包括法律诉讼在内的途径来维护本人的合法权益。 特此声明,敬请合作。 通常情况下,允许 个人及非商业使用转载,但是请标注 作者和链接。 本作品采用知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议进行许可。 This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"GUIDER.html":{"url":"GUIDER.html","title":" =[>> 难度向导© <<]= ","keywords":"","body":"难度向导 《入门基础》 阶段:音视频基础与音视分析,入门必需掌握之基础 入门的五章也是概念与基础理论最多的章节了。这几张的工程实践较少,但非常重要的原理、规格、定义及多。是后续更为复杂的工程实践中,被音视频工程师们做为根基般的存在。因此非常重要。 第一章 数字音频的保存与还原 本章从声学和心理声学角度对音频的相关工程量, 以及数模转换和分析对比的关键概念进行了阐述。 结合发展与规格演进,提供整体音频工程概念的梳理。 第二章 图像色彩的运用与存储 从色彩学发展史到工业体系对色彩的规格定义,章节大章以工程概念的递进关系进行介绍, 并在小章节中按照相关规格原理的发现提出时间顺序进行了由浅入深的推导说明。从而保证前后逻辑和发展上的连贯性。 第三章 音视频常用基础算法 属于纯数理基础,对音视频开发过程中常用的 图像/音频 的 分析/处理 算法,进行了梳理和讲解。 本章列出的部分,是作者在筛选掉大量非必需算法后的最小知识集合。 第四章 音视频机器学习基础 本章介绍了机器学习特别是深度学习在音视频处理中的基础知识背景。 通过对机器学习发展简史、部分关键算法和经典模型的阐述,帮助读者理解机器学习技术的一些基本运用。 第五章 音视频帧分析与简单处理 本章将音视频帧的基本概念、分析方法和简单处理技术进行了整理说明。 通过对音视频帧的深入理解和操作,读者可以掌握音视频处理的核心技术,为后续的复杂应用打下坚实的基础。 入门五章完成后,读者将有一定的音视频图像工程分析能力。并能够使用当前掌握的知识来处理音视频基本问题。 《编解传输》 阶段:流媒体规格与简易编解码播放框架设计,流的编解码与网络传输,音视频工程实践 第五章 音视频解码与流传输,是一个综合性较强的章节。这一章将对当前编解码规格进行详细的拆分与解析。通过对 H.264、H.265、H.266 的规格分析,详细的阐述当今音视频工程中,如何对视频保质保量的进行数据压缩和处理。并通过对 主流三协议:RTMP、RTP/RTCP、HLS 的分析,从协议的封装、信号设计、传输过程、规格规定上全面说明了音视频传输过程的各个方面细节。完成本章,将会使读者较为深度的理解编解码与传输,并使其能够有一定程度的规格定制与改进能力。 第六章 音视频的编解播与流分析,结合了第五章与入门四章的知识要领做工程实践。本章节将注重工程能力建设,从软件工程设计角度剖析音视频的编解播三大经典工程方案,并引导读者建立架构师思维与匹配的动手能力。 中级四章完成后,读者将能够胜任大部分业界的音视频项目工作需求,和一定程度的音视频架构师要求。 《渲染进阶》 阶段:通用统一化音视频编辑框架与渲染驱动设计,图像处理技术与特效引擎 第七章 图形驱动统一化的理论基础,是为后续章节开始进行的计算机图形处理,进行相关的理论基础铺垫与解析。中级/高级架构师,在工作内容上已不可避免会涉及到音视频2D、3D特效的处理与实践,并会较多的参与到 AI 技术工程化的框架设计工作中。因此,对于计算机图形学的了解是必要且必须的。 第八章 图形驱动与渲染引擎技术,则是一个较为复杂的复合章节。本章结合作者开源工程实践(UltraDriver),在前面几章铺垫的基础上,深入驱动底层逻辑,剖析了常见渲染引擎的核心元素,并完整的讲解了从GPU通信管线建立到实际场景渲染的完整过程。完成本章,将会使读者对整个渲染驱动有详尽的理解,并能够独立运用GPU驱动特性完成复杂的 3D 渲染工作。 第九章 音视频播放与特效编辑,结合作者开源工程实践(UltraTimeline),讲解了音视频编辑中的最为关键的技术系统:UTT 统一时间轴系统,通过此系统,读者将能够独立完成一系列复杂音视频的编辑过程。从而在音视频特效处理方面正式的进入工程大门。 高级三章完成后,后续的继续学习提升将脱离工程范畴。因此,更进一步的探索,就要求深入了解算法和硬件驱动,从而衔接到 AI-CV 等方面的相关研究工作,或游戏引擎物理引擎的开发架设。此两个方向的经典文献与著作较多,且已有成熟体系,因此本书既到此为止。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:11 "},"DONATE.html":{"url":"DONATE.html","title":" =[>> 赞助本作© <<]= ","keywords":"","body":"买杯咖啡 如果您愿意为本书爆肝的作者买一些精神食粮,来让他当一名 24H 狼灭的话... Buy Me Espresso 👇( ✨w✨)👇 WeChat Best Wish!💗 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:11 "},"Chapter_1/Language/cn/Apex_1_Introduce.html":{"url":"Chapter_1/Language/cn/Apex_1_Introduce.html","title":"一、音频的保存与还原","keywords":"","body":"一、音频的保存与还原 引言 声音是音视频的重要组成部分。当代计算机体系中对声音的一系列处理,被统称为数字音频技术(Digital Audio Tech)。什么是音频?音频是如何被数字化表示和重现的? 本章节主要整理说明了,部分数字音频的构成、调制和保存。通过对当代计算机图像有关音频处理发展史的梳理,以期为工程上对音频进行操作和分析,提供必要知识图谱。 声波是音频的载体,因此对音频的讨论,也就是对声波特性的讨论。 关键字:音频基础、音频三要素、音频频谱图、音频调制、音频压缩、音频格式 目录 1.1 音频基础 1.2 声波三要素(Three Elements of Acoustics) 1.3 声音三要素(Three Elements of Sounds) 1.3.1 音高(Pitch) 1.3.2 响度(Loudness) 1.3.3 音色(Timbre) 1.4 声音的解构 1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths) 1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz) 1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour]) 1.4.4 感观:频响曲线(FRC [Frequency Response Contour]) 1.4.5 工程:频谱图(Spectrum) 1.5 声音数字化 1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source) 1.5.2 模数转换(A/D [Analog-to-Digital]) 1.5.3 数模转换(D/A [Digital-to-Analog]) 1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM) 1.6 音频的存储 1.6.1 音频格式(Audio Format) 1.6.2 无压缩编码格式(Uncompressed Encode) 1.6.3 无损压缩编码格式(Lossless Encode) 1.6.4 有损压缩编码格式(Uncompressed Encode) 【参考文献】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_1.html":{"url":"Chapter_1/Language/cn/Docs_1_1.html","title":"1.1 音频基础","keywords":"","body":"1.1 音频基础 数字音频技术(DAT [Digital Audio Tech]) 是当代计算机音像学的基础综合科学。其代指一系列,以 电-力-声类比(Electrical-Mechanical-Acoustic Analogy) 、 心理声学模型(Psychoacoustics Model) 等数学工具对声音进行记录、转换、存储、编解为可由挂载数字音频设备处理、播放、操作的数据,的方法论。它是一门包含了 心理声学(Psychoacoustics) 、 电声学(Electroacoustics) 、 数字信号处理(DSP [Digital Signal Processing]) 等领域知识的复合学科。 早期的探索与积累 人类对声音的探索从诞生伊始就伴随着文化和科技的发展,贯穿于历史长河之中。但以可持续化存储保存为目的,并系统性的总结为科学体系,还要从 音频录制(Audio Recording) 技术的出现开始,一直持续至今。由于历史跨度和分界相对明显,学界公认采用以音频存储介质的更替,来作为不同时代的划分。依此,当下总共经历了 4 个大的时期: 唱筒时代【物理介质】(The Phonograph era,1877 ~ 1925) 唱片时代【物理介质】(The Gramophone era,1925 ~ 1945) 磁带时代【磁力介质】(The Magnetic era,1945 ~ 1975) 数字处理时代(The Digital era,1975 ~ Present) 唱筒时代(物理介质) 1857 年,由法国科学家 斯科特· 德· 马丁维尔(Scott de Martinville) 发明声波记振仪,实现了将自然界中物理声音保存到留声媒介上,使人们首次完成了对声音信号的长时间保存。开启了物理媒介时代。当时声波记振仪的主要应用是用来辅助声学研究,通过仪器虽然能够绘制被录制声音的声波线,却无法再将声波线还原为音频回放。不过,这样的探索却为了后续提供了前置物理基础。 图 1-1 声波记振仪记录的首个音频振幅信号 在 1877年,托马斯·爱迪生基于声波记振仪原理,和与贝尔竞争发明电话过程中的启发,发明了第一台 留声机(Phonograph)。逆向思维解决了回放的问题。 留声机通过类似声波记振仪的运行方式,通过摆动式金属唱针,将录制的音频刻画在配套裹有锡箔纸的金属圆桶上。通过收音喇叭录制声音的同时,转动手摇把,推动锡纸移动,来记录保存声音到桶上对应螺纹声波线里。因此,回放时,只需要将录制用的金属探针更换为轻压弹簧探针后,从开始录制位置,手摇转动把推动锡箔纸,按录制方向旋转推移,就能够得到保存的音频回放了。不过第一代原型机,存在留声时间短,且音量不足,声音不清晰的问题。因此,在 1897 年发布的第二代留声机中,爱迪生使用了蜡筒代替了锡箔纸,让录制变为了可重复的过程。并通过增加了发条传动机制,剔除了人为摇动传动杆不匀速,引入的失真问题。二代在扩音器上使用了耳蜗结构大喇叭,物理提高了收扩音效果。 即便如此,二代留声机也因为无法复制拷贝留音,而最终以无法普及的失败告终。 这让人们逐渐意识到,单纯的录制,是无法满足人们对音频的需求的。人们开始寻找一种能够便于拷贝,且能高质量保存声音的手段。 唱片时代(物理介质) 其实早在初代留声机蓬勃发展的后几年,唱片就已经开始流传了。1887 年德裔美国工程师艾米利·伯林纳,发明了 圆盘式留声机(Gramophone) 和 唱片(Gramophone Record)。但是由于早期唱片先是面临了复刻问题,后虽然通过涂蜡锌板和镀金母版解决了量产问题,可却因为成本问题并不能被大众接受。直到 1891 年,伯林纳发现通过虫胶作为原材料,能达到和植物橡胶等同的保存水平,并具有高可塑性的特点,才最终解决了量产和成本问题。 图 1-2 首个手摇式圆盘唱片机(Gramophone) 于是,1893年,伯林纳和其合作伙伴共同于美国、英国、德国,先后成立了 留声机公司(Gramophone Company),制作销售圆盘式留声机并灌录唱片。这就是后来业界顶顶大名(中古黑胶铁烧)的 RCA、EMI(Electric&Musical Industries Ltd.)、DG 公司的前身。从此正式开启了与唱筒留声机的竞争。 图 1-3 早期黑胶唱片(Gramophone/Vinyl Record)示意图 直到 1929 年,随着爱迪生停止了最后一条唱筒生产线,唱片类型留声机因其高效、高保真(在当时看来)、高性价比和高量产的特性,彻底的击败了唱筒时代。 但看似已然立于不败之地的唱片,也仍然存在各种各样的问题。其中最致命的莫过于,唱片本身保存所需要占用的物理空间,有些过大了。换一种更为科学的表述就是,唱片本身的物理信息密度仍然不够小。这便是它的阿喀琉斯之踵。 磁带时代(磁力介质) 20世纪30年代,德国“法尔本”和“无线电信”两家公司的工程师们发明了一种有氧化铁涂层的塑料带,创造出了 磁带(Magnetic Type) 的雏形。但由于三氧化二铁本身化学特性,使得录制的声音因材质均匀程度偏差导致了部分失真问题,而无法与唱片抗衡。直到1947年,美国人马文·卡姆拉斯对原有的三氧化二铁磁带进行了改善。卡姆拉斯采用了一个完整的磁性线圈来代替录制磁头,使用一根钢丝(后来工程优化成为了磁针)嵌入到磁线圈中。利用空气作为缓冲,以磁场频震间接录制音频,从而保证了线圈磁力不会干扰到信号的录制。这使得磁带记录所得音频数据,较改进之前的响度有了4倍左右的提升。不过,随之而来的是复杂工艺带来的成本问题。 图 1-4 早期磁带机(Tape Drive)示意图(糟糕的大小) 这个邻人绝望的情况,直到1963年,才由来自飞利浦公司的荷兰工程师 劳德维克·卢·奥滕斯(Lou Ottens) 解决。奥滕斯受到了 RCA 的 “音频盒子”(Sound Tape Cartridge) 启发,通过缩减存储时长和采用多年迭代而来的更先进磁性塑料软带材质,推出了 卡式磁带(Cassette Tape)。极大的缩减了磁带保存音频的空间体积,使得一般客户有了更大的意愿来使用这种便携的音频存储媒介。 图 1-5 卡式磁带(Cassette Type)示意图 而随着 1964 年,察觉到卡式磁带革命性成果的飞利浦,乘势而为的推出了可自由录音的便携式磁带录音机后,由黑胶唱片所主导的最后一片大众音频阵地,也正式宣告被磁带所取代。 值得庆幸的是,因为黑胶唱片的物理录入和存储特性,使得它再音质还原上,能够较好的稳定保存录制时的音轨特征。从而让众多高质量唱片公司仍然愿意为新颖的歌手专辑,推出黑胶介质。让黑胶唱片在音乐发烧友等群体中,延续了较高的认同,从而避免了像唱筒一般,被彻底淘汰的命运。 之后,磁带登临主流,在 19世纪 70 年代至 80 年代间,得到了快速发展。以索尼(Sony)Walkman 系列为代表,造就了集磁带和便携随身听技术于一身的全世界范围的风靡。 但是,好景不长。数字时代随着 CD 进入大众视野,在短期内极大的抢占了原磁带的消费级音频市场。 数字处理时代 值得一提的是,磁带虽然淡出了主流视线,但是因为其本身的可靠存储特性,仍然被拥有大量数据的团体和公司,作为最为可靠的数据备份方式而使用着。这让磁带仍然在纯粹的数据持续化存储方面,保有了巨大活力。大公司往往也不吝惜对此投入,例如 2015 年 IBM 和富士影视(Fuji Film)就共同研发推出了高达 220TB 存储量的单盒存储磁带,其成本仅为同类型硬盘十五分之一。 近几年,磁带存储保有存储量发展,也仍然保持了每年越 32%~37% 的增速,成为了在摩尔定律逐步失效背景下,仍然满足摩尔定律,并久经考验的信息存储技术。 可见,未来随着大数据模型和各种存储骤增的前提下,长效存储的磁带,短时间内必然是不会退出历史舞台的。 其实,到这里我们能够发现,音频的数字时代似乎并不是一种标的于存储媒介的物理类型的一种分类。数字时代音频,更多的是对音频本身的保存方式抽象手段,进行的一种分代。即介质本身,已经不在被认为是区分时代差异的关键点,而各类压缩算法的突破,则成为了真正的关键。 在 CD 时代伊始,音频的格式就从传统的纯物理记录方式,演变成了调制解调(PCM)配合格式压缩存储的处理过程。这正是数字时代和以往传统时代相比,最为显著的特征。 因此,想要理解并处理音频,首先需要从如何衡量 声音(Sounds) 开始。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_2.html":{"url":"Chapter_1/Language/cn/Docs_1_2.html","title":"1.2 声波三要素(Three Elements of Acoustics)","keywords":"","body":"1.2 声波三要素(Three Elements of Acoustics) 声音(Sounds) 是对所有由振动产生,可以被人感知并理解的,由固体、液体、气体等介质而传播的一类 声波(Acoustic Wave) 的统称 [1] 。只有被人能够听到的声波,才属于声音。所以,声音也可以被称为狭义声波(Narrow Acoustic Wave)。由于其本质仍是声波。客观对声波的测定量,也是同作用于声音的。 什么是声波呢? 声波(Acoustic Wave) 是指在介质中传播的机械波。其本质是振动中的质点,在介质中以波的形式传播 [2] 。因此,声波既可以是 纵波(Longitudinal Wave),也可以是 横波(Transverse Wave)。 根据两种基本机械波的特性, 横声波(Transverse Acoustic Wave) 只能在固体介质中传播; 纵声波(Longitudinal Acoustic Wave) 则能在固/液/气或介于中间态的介质中传播; 理想声波方程式(Ideal Acoustic Wave Equation) 即然是机械波,那么从波的传播角度,就可以根据机械波的物理性,测量出衡量声波的一维传播方向关系: ∂2p∂t2=c2⋅∂2p∂x2 {\\displaystyle \\begin{aligned} \\frac{\\partial^2 p}{\\partial t^2} = c^2 \\cdot \\frac{\\partial^2 p}{\\partial x^2} \\\\ \\end{aligned} } ∂t2∂2p=c2⋅∂x2∂2p 其中, 以 ccc 代表 声速(Propagation Speed),即声波的传播速度,单位常用 米/秒(m/s) 以 ppp 代表 声压(Acoustic Pressure),即声波的压强,单位为 帕斯卡(Pa) 以 xxx 代表 声位(Spatial Position),即声波的当前空间位置,单位为常用 米(m) 以 ttt 代表 时刻,单位常用 秒(s) 这就是著名的 一维声波恒等式(1D Acoustic Wave Equation)。而以 x⃗\\vec{x}x⃗ 表示当前声位在空间中距离发出点(即原点,我们假设声波从原点产生)的位姿,将传播从一维扩展到 dim(x⃗)=ndim(\\vec{x}) = ndim(x⃗)=n 维空间,则有: ∂2p∂t2=c2⋅∇2p=c2⋅Δpx⃗=c2⋅∑i=0dim(x⃗)(∂2p∂xi2) {\\displaystyle \\begin{aligned} \\frac{\\partial^2 p}{\\partial t^2} &= c^2 \\cdot \\nabla^2 p = c^2 \\cdot \\Delta p_{\\vec{x}} \\\\ &= c^2 \\cdot \\sum_{i=0}^{\\dim(\\vec{x})} \\left( \\frac{\\partial^2 p}{\\partial x_i^2} \\right) \\\\ \\end{aligned} } ∂t2∂2p=c2⋅∇2p=c2⋅Δpx⃗=c2⋅i=0∑dim(x⃗)(∂xi2∂2p) 得到了通用的 理想声波方程(Ideal Acoustic Wave Equation),也称为 费曼三维声波恒等式(Feynman's 3D Acoustic Wave Equation) [3] 。 可见,时间 ttt 时的声压有关时间的二阶偏导数,和该时刻下,声波所处空间位置的二阶导数与声速的平方,成正相关。在介质均匀的理想条件下,已知 声速 ccc 、声压 ppp 、声位 xxx 中的任何两个量,都能推导出时刻 ttt 的另外一个定量取值。 所以,声速(Propagation Speed) 、声压(Acoustic Pressure) 、声位(Spatial Position) 被称为 广义声波三要素,简称 声波三要素(Three Elements of Acoustics)。 但是,人对声音的感知充满了主观因素,纯物理测量值虽然能够描述声音的客观特性,却无法度量声音的主观成分。我们还需要介于主客观之间的兼容标的,来协助对主观感受的量化表示。不过,根据声波三要素,我们却可由此来对新定主观体系下的度量衡,进行客观测定。并用于不同参考系下的转换表示。这点即是声音三要素的底层科学支撑,很重要。 什么是 声音三要素(Three Elements of Sounds) 呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_3.html":{"url":"Chapter_1/Language/cn/Docs_1_3.html","title":"1.3 声音三要素(Three Elements of Sounds)","keywords":"","body":" Hex Data Display .hex-container { text-align: center; } .hex-data { display: inline-block; text-align: left; font-weight: bold; font-family: monospace; white-space: pre; } 1.3 声音三要素(Three Elements of Sounds) 声音三要素(Three Elements of Sounds) 是人们从 心理声学(Psychoacoustics) 角度,对最能影响人对声音本身感官感受的,三个最重要关键参数的归纳。分别是:音高(Pitch) 、 响度(Loudness) 、 音色(Timbre)。 预备乐理(声乐)知识 由于 声音(Sounds) 和 音乐(Musics) 密不可分。而很多知识,尤其是人的主观认知,总是会和先验艺术有关。为了接下来的工程方面理解,这里先非展开的,提前介绍一些关键 声乐(艺术)概念 : 纯音(Pure Tone),是指单一频率的正弦函数波形声波的声音; 音阶(Octave),即倍频程,指半频率增减的八度音阶。属于工程声乐学; 音程(Interval),指两个纯音之间所差的音阶体系下的距离,即度数; 音名(Names),指音阶不变的前提下,相隔八度的纯音的集合。属于声乐术语(艺术); 音级(Steps),指同音名下,从低到高的每个独立纯音的层级。属于声乐术语(艺术); 半音(Semitone),指音程为音阶一半的音名,即(八度下的)四度音; 音调(Notes),全体音名、半音的统称。在欧拉提出 调性网络 后,来自于网络拓扑; 音分(Cent),人为对两个相邻半音间的音程,以 波长比 为 12002^{1200}\\sqrt{2}1200√2 ,作 100 非等分; 音序(Sequence),指顺序排列下,同音级的相邻两个音调的距离; 除了八度音外,还有 七度音(Heptachord) 和 五度音(宫商角徵羽),本文从工程特点出发, 统一用八度音(Octave)代指音阶(音阶英文是 Gamut、Scale,易与其它概念造成混淆)。 理想的音阶是由纯音构成。 下文若无说明,音阶 均采用理想音阶(Ideal Octave)。 而 八度指的是在八度音下,同一个音名两个相临音级差异,即音程,为 八度。 八度音阶包含 7 个音名,5 个半音,8 个音级,即钢琴键位: 图 1-6 八度音钢琴键盘示意图 图中,黑色琴键为半音,白色琴键为纯音(理想)。 而 C4 则是 A4(440 Hz)对应通用 A440 标准下的基准键(Standard Key),有 C4 为 261.63Hz 标准。 Hz 是频率的单位,我们将在后续介绍。 A440 八度音阶又被称为 斯图加特音阶(Stuttgart Octave),属于 ISO 16 标准。根据标准,有音阶频率表如下: StepsNames 0 1 2 3 4 5 6 7 8 C 16.352(−48) 32.703(−36) 65.406(−24) 130.81(−12) 261.63(0) 523.25(+12) 1046.5(+24) 2093.0(+36) 4186.0(+48) C♯/D♭ 17.324(−47) 34.648(−35) 69.296(−23) 138.59(−11) 277.18(+1) 554.37(+13) 1108.7(+25) 2217.5(+37) 4434.9(+49) D 18.354(−46) 36.708(−34) 73.416(−22) 146.83(−10) 293.66(+2) 587.33(+14) 1174.7(+26) 2349.3(+38) 4698.6(+50) D♯/E♭ 19.445(−45) 38.891(−33) 77.782(−21) 155.56(−9) 311.13(+3) 622.25(+15) 1244.5(+27) 2489.0(+39) 4978.0(+51) E 20.602(−44) 41.203(−32) 82.407(−20) 164.81(−8) 329.63(+4) 659.26(+16) 1318.5(+28) 2637.0(+40) 5274.0(+52) F 21.827(−43) 43.654(−31) 87.307(−19) 174.61(−7) 349.23(+5) 698.46(+17) 1396.9(+29) 2793.8(+41) 5587.7(+53) F♯/G♭ 23.125(−42) 46.249(−30) 92.499(−18) 185.00(−6) 369.99(+6) 739.99(+18) 1480.0(+30) 2960.0(+42) 5919.9(+54) G 24.500(−41) 48.999(−29) 97.999(−17) 196.00(−5) 392.00(+7) 783.99(+19) 1568.0(+31) 3136.0(+43) 6271.9(+55) G♯/A♭ 25.957(−40) 51.913(−28) 103.83(−16) 207.65(−4) 415.30(+8) 830.61(+20) 1661.2(+32) 3322.4(+44) 6644.9(+56) A 27.500(−39) 55.000(−27) 110.00(−15) 220.00(−3) 440.00(+9) 880.00(+21) 1760.0(+33) 3520.0(+45) 7040.0(+57) A♯/B♭ 29.135(−38) 58.270(−26) 116.54(−14) 233.08(−2) 466.16(+10) 932.33(+22) 1864.7(+34) 3729.3(+46) 7458.6(+58) B 30.868(−37) 61.735(−25) 123.47(−13) 246.94(−1) 493.88(+11) 987.77(+23) 1975.5(+35) 3951.1(+47) 7902.1(+59) 表格横向为音级,纵向为音名(包含半音)。橙色为标准钢琴,所包含的八度音阶。表中数值格式为: 【对应频率(Hz)】(距离 C4 基准的(+/-)音序) 简单乐理知识准备就绪。现在,读者肯定存在大量思考,比如:什么或为什么是频率?这和声音三元素又有什么关系?乐理和工程又是怎么关联的? 让我们带着这些知识和疑问,来进入细节。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_3_1.html":{"url":"Chapter_1/Language/cn/Docs_1_3_1.html","title":"1.3.1 音高(Pitch)","keywords":"","body":"1.3.1 音高(Pitch) 音高(Pitch) 是代表声音振动频率高低的 主观感知量(Subjective Perceptions),是映射自对应声波频率纯客观物理量的心里声学概念。有时,我们会用 音调/声调(Tone)代指音高的工程名称,这其实不够准确。若发生这种情况,我们就 不能 将代指音高的音调,与乐理中关联音阶(Octave)的音调(Tone)等同。两者存在换算但并不是一个概念。 即,音高(Pitch)不是音调/声调(Tone),更不是音阶(Octave)。 美(Mel [Melodies])& 美体系(Mel Scale) 音高的单位是 美(Mel [Melodies]),这是一个主观标定的单位。以 美(Mel)单位来衡量音高的系统,被称为 美体系(Mel Scale)。该体系来自于美国心理学家 史丹利·史密斯·史蒂文斯(Stanley Smith Stevens,1906-1973) 于 1963 年进行的有关心理声学定量的研究 [4] 。所以, 不属于 当前 国际通用的计量体系单位(SI Unit [International System of Units])。 不过,凭借 美体系(Mel Scale) 在人耳感知上相对生理准确的量化,和本身在出发点设定上存在和频率(Frequency)之间的 直接函数映射。所以,美(Mel)常被选定为统一单位,在声学工程上作基础标的。记美体系音高为 PmP_mPm ,频率为 FFF ,有 1963 的早期换算(现已废弃): Pm=2595⋅log10(F) {\\displaystyle \\begin{aligned} P_m &= 2595 \\cdot \\log_{10} \\left(F \\right) \\\\ \\end{aligned} } Pm=2595⋅log10(F) 这是以 1000 Hz 响度为 40 dB(声压级)的纯音(即只包含一个频率)为 1000 Mel ,来测算拟合得到的经验公式。受限于检测设备,会存在一定的误差。 因此,该公式对应拟合方式,在随后的 1968、1976、1990 年,分别经历了三次较大程度的重测。而现在我们采用的主要有 两套转换标准。 一个是由 道格拉斯·奥肖内西(Douglas O'Shaughnessy) 在 1976 年修订的 1000Hz 基准(1000 mel)按 700Hz 分割转换标准 [5] ,被称为 奥肖内西美体系(O'Shaughnessy's Mel Scale) : Pm=2595⋅log10(1+F700) {\\displaystyle \\begin{aligned} P_m &= 2595 \\cdot \\log_{10} \\left(1 + \\frac{F}{700} \\right) \\\\ \\end{aligned} } Pm=2595⋅log10(1+700F) 另一个则是 1999 年由 MATLAB 主导的修订结果 [6] ,被称为 斯莱尼美体系(Slaney's Mel Scale)。这也是 librosa 库采用的算法,有: Pm={3F200, F1000 Hz15+27⋅log6.4(F1000), F≥1000 Hz {\\displaystyle \\begin{aligned} P_m &= \\begin{cases} \\frac{3F}{200} &, \\ F Pm=⎩⎪⎨⎪⎧2003F15+27⋅log6.4(1000F), F1000 Hz, F≥1000 Hz 两者差异,如下图: 图 1-7 两种美体系(Mel Scale)差异对比(0-8000 Hz)示意图 相对来说,在不存在体系换算的条件下,会优先选择 奥肖内西 转换公式。而当存在系统换算,尤其是涉及 librosa 库时,建议优先以统一体系为要求,采用相同体系的转换公式。 需要注意的是,美体系都是对单一频率纯声的转换。而什么是频率呢? 频率(Frequency) 频率(Frequency) 是指声音对应机械波属性的源振动频率。是声音三要素中唯一的纯客观物理量。当然,一般我们所称的声音的频率,都是指可被感知的声音频率,即前文提到的 狭义声波(Narrow Acoustic Wave) 范围的 可听频率(AF [Audible Frequency])。 频率的单位是 赫兹(Hz [Hertz]),表示单位时间一秒内,振源发生完整周期性往复运动的次数,即 10Hz=10/s10 Hz = 10/s10Hz=10/s 。假设存在波长为 λ\\lambdaλ ,波速为 ccc 的波,有相应周期为 TTT ,频率为 FFF ,则: F=1T,c=λF {\\displaystyle \\begin{aligned} F &= \\frac{1}{T} \\quad ,\\quad c = \\lambda F \\\\ \\end{aligned} } F=T1,c=λF 在标准大气压的理想空气介质中,人类能够听见并识别大约 20Hz~20000Hz 频率范围的声波。有 AF 属于 20Hz~20000Hz。 以此为基准, 频率小于 20Hz 范围的声波,被我们称为 次声波(Infrasound)。而 频率大于 20000Hz 范围的声波,被我们称为 超声波(Ultrasound)。次声波和超声波都是相对于人而言的 单阈范围域。 图 1-8 三类声波范围示意图(蓝色指狭义声波) [2] 即然被归为声音三要素,就表示人对不同频率声音的听感有不少差异。在假设其它影响量不变的理想情况下,本书查阅了一些基于日常关联心理声学测量的结果,汇总如下表以供参考: Frequency(Median) Object Feelings Description(Subjective) 20Hz 发动机 汽车呼啸而过的轰鸣声 25Hz 大提琴的最低音调 类似低音炮发出的震撼 50Hz 洗衣机的运转时 洗衣机正常工作时的声响 100Hz 柜式冰箱运转时 柜式冰箱压缩机工作的声响 200Hz 剧院环境男低音 低沉浑厚的歌声 500Hz 轮播式电话铃声 是种清脆响亮的声音 1000Hz 钢琴中音C大调 更为清脆明亮的声音 2000Hz 剧院环境女高音 高亢嘹亮的歌声 4000Hz 蚊子飞行时 嗡嗡且恼人的脆鸣声 8000Hz 发光二极管示波器 实验室示波器工作的声响 12000Hz 成熟家犬 狗吠叫警示时的吼声 18000Hz 超声波清洗器 清洗器工作时的吱吱声 从上表可知,以听感角度考虑会十分的主观。但请不要忘记,频率本身是客观的。上述统计中采用的,是由选定样本声音中,所包含的所有频率声波的 复合频率中值(Median)。自然界中大部分声音 并非 由 单一 频率波构成。这也是产生不同音色(Timbre)的原因之一。 观察例举的统计结果,会发现直觉上非常吵闹的声音,如飞机发动机的声音,其频率并不一定高。而一些我们生活中感觉难以察觉的声音,如蚊子飞行声,却不一定低频。 显然,频率并不能代表声音的高低。我们还需要其它参数表示,那就是 响度(Loudness)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_3_2.html":{"url":"Chapter_1/Language/cn/Docs_1_3_2.html","title":"1.3.2 响度(Loudness)","keywords":"","body":"1.3.2 响度(Loudness) 响度(Loudness),有时虽不准确但也会被称为 音量(Volume),是指人对声音大小的 主观感知量(Subjective Perceptions),是对声波的 声压(Acoustic Pressure) 物理量的感观描述。响度是根据人对不同声压反应,而人为测量出的一种 非客观(Non-Objective) 的量化值。 响度的早期单位是 宋(Sone),这是一个 主观标定的单位。同 音高 一样,来自于 史丹利·史密斯·史蒂文斯(S. S. Stevens) 于 1963 年的实验结果 [4] 。 由于主观成分因素,宋 同样不属于当前 国际通用的计量体系单位(SI Unit [International System of Units]),而且因相对粗粒度而不太经常被采用。工程通用对响度进行衡量的单位,是声压级。 声压级(SPL [Sound Pressure Level]) 声压级(SPL [Sound Pressure Level]) 是由 美国国家标准学会(ANSI [American National Standards Institute]) 测定,同样为 主观标定的 响度单位。但由于相对精确的度量水平,在通常非实验误差情况下,可以作为稳定的工程单位使用。声压级单位为 分贝(dB),常用 NNN 表示代指。 我们有当前最新一次实验室精确测量的 ANSI/ASA S1.1-2013 规格为基准 [7] 。修正锚定 以 1000Hz 纯音,在人耳能听见的最小阈限压强 pref=20μPap_{ref} = 20 \\mu Papref=20μPa 为 1 dB1 \\ dB1 dB 值,由此推导得声压级公式: N=20⋅log10(ppref) {\\displaystyle \\begin{aligned} N &= 20 \\cdot \\log_{10} \\left( \\frac{p}{p_{ref}} \\right) \\\\ \\end{aligned} } N=20⋅log10(prefp) 其中, 以 ppp 代表当前目标声音,对应声波的 声压(Acoustic Pressure); 以 prefp_{ref}pref 代表 参考声压(Reference Acoustic Pressure),为规格固定量, pref=20μPap_{ref} = 20 \\mu Papref=20μPa ; 而在 ANSI 的声压级单位系统下,记宋体系响度为 LNL_NLN ,则分贝(dB)与 宋(Sone)存在换算关系: LN=2N−4010 {\\displaystyle \\begin{aligned} L_N &= 2^{\\tfrac{N - 40}{10}} \\\\ \\end{aligned} } LN=210N−40 即: N=40+log2(LN) {\\displaystyle \\begin{aligned} N &= 40 + \\log_2 \\left( L_N \\right) \\\\ \\end{aligned} } N=40+log2(LN) 除 宋(Sone) 以外,另一个常见的体系是 方(Phon)。在该修订里,规定: 40 dB=40 Phon=1 Sone {\\displaystyle \\begin{aligned} 40 \\ dB = 40 \\ Phon = 1 \\ Sone \\\\ \\end{aligned} } 40 dB=40 Phon=1 Sone 一般情况下,宋(Sone)和方(Phon)用于常量标记,而 SPL分贝(dB)用于响度值。 但是,从前文我们得知,自然界中的大部分声音,其本身就是复合的。这种情况下怎么评估它的响度呢? 此时,就需要使用 复合频率下 的声音计算公式了。 复合响度公式(Multi-Source Loudness Formula) 假设一个 单一的自然声(Natural Sound),记为 N∑N_{\\sum}N∑ 由一组声压为 p=[p0, p1, ⋯ , pn]p = [p_0,\\ p_1,\\ \\cdots \\ ,\\ p_n]p=[p0, p1, ⋯ , pn] 的单频率声波组成,有: N∑=10⋅log10(p02+p12+⋯+pn2pref2)=10⋅log10(∑n(pipref)2) {\\displaystyle \\begin{aligned} N_{\\sum} &= 10 \\cdot \\log_{10} \\left( \\frac{ {p_0}^2 + {p_1}^2 + \\cdots +{p_n}^2 }{ {p_{ref}}^2 } \\right) \\\\ &= 10 \\cdot \\log_{10} \\left( \\sum^n \\left( \\frac{p_i}{ p_{ref}} \\right)^2 \\right) \\\\ \\end{aligned} } N∑=10⋅log10(pref2p02+p12+⋯+pn2)=10⋅log10(∑n(prefpi)2) 而工程中的单频率声波代表参数,可能直接为响度,如 L=[L0, L1, ⋯ , Ln]L = [L_0,\\ L_1,\\ \\cdots \\ ,\\ L_n]L=[L0, L1, ⋯ , Ln] 。则带入单频率响度公式,上式可写为: N∑=10⋅log10(∑n(pipref)2)=10⋅log10(∑n10Li10 dB)=10⋅log10(10L010 dB+10L110 dB+⋯+10Ln10 dB) {\\displaystyle \\begin{aligned} N_{\\sum} &= 10 \\cdot \\log_{10} \\left( \\sum^n \\left( \\frac{p_i}{ p_{ref}} \\right)^2 \\right) = 10 \\cdot \\log_{10} \\left( \\sum^n 10^{\\frac{L_i}{10\\ dB}} \\right) \\\\ &= 10 \\cdot \\log_{10} \\left( 10^{\\frac{L_0}{10\\ dB}} + 10^{\\frac{L_1}{10\\ dB}} + \\cdots + 10^{\\frac{L_n}{10\\ dB}} \\right) \\\\ \\end{aligned} } N∑=10⋅log10(∑n(prefpi)2)=10⋅log10(∑n1010 dBLi)=10⋅log10(1010 dBL0+1010 dBL1+⋯+1010 dBLn) 这就是 声音(复合声波)的响度公式。 可虽然 分贝(dB)系统最为广泛且常常被使用,但却 仍然不属于 国际通用的计量体系单位(SI Unit)。 真正被作为科学的单位,是声强(Sound Intensity)。 声强(Sound Intensity) 声强(Sound Intensity) 是对单个声波强度的科学表示,指声波在单位面积下所具有的声压(Acoustic Pressure),对外功率之和。 声强单位为 瓦每平方( W/m2W/m^2W/m2 ),一般被记为 III 表示。有: I=p⋅v⃗ {\\displaystyle \\begin{aligned} I &= p \\cdot \\vec{v} \\\\ \\end{aligned} } I=p⋅v⃗ 其中, 以 ppp 代表当前目标声音,对应声波的 声压(Acoustic Pressure) ; 以 v⃗\\vec{v}v⃗ 代表机械波的做功方向,是个 速度量,每个维度分量单位都为 米每秒(m/s) ; 由于一般我们用声强来计算,理想状态的当前声波能量值。为了简化计算,通常会选择均匀介质情况的理想单点声源,作为背景条件。这种情况下,做工方向 v⃗\\vec{v}v⃗ 就可以被认为是 球面坐标中,单位平方点的向外法向量了。 于是,声强 III 即可转为,由声压 ppp ,传播介质密度 ρ\\rhoρ ,和声速 ccc ,计算表示: I=p2ρc {\\displaystyle \\begin{aligned} I &= \\frac{p^2} {\\rho c} \\\\ \\end{aligned} } I=ρcp2 因为一般都是在空气介质中进行衡量,所以有 ρ≈1.293 kg/m3\\rho \\approx 1.293 \\ kg/m^3ρ≈1.293 kg/m3 ,而 c≈343 m/sc \\approx 343 \\ m/sc≈343 m/s 。 所以,根据声压快速获取对应声音在空气中的声强公式为: I≈p2443.499 W/m2 {\\displaystyle \\begin{aligned} I &\\approx \\frac{p^2} {443.499} \\ W/m^2\\\\ \\end{aligned} } I≈443.499p2 W/m2 使用上式速算时,压强取 帕斯卡(Pa)数量级下的数值即可。 那么,声强和响度是什么关系呢? 很遗憾,两者分属不同系统,并不存在 直接上的 关联。但存在 间接换算 关系。 声强级(SIL)与 声压级(SPL)的换算 声强级(SIL [Sound Intensity Level]) 类似于 声压级(SPL [Sound Pressure Level]) 的定义,皆是用来 主观标定 响度的单位系统/系统单位。SIL 的单位沿用了 SPL 的 分贝(dB),甚至两者换算公式,都可基本等同。所以,仍然以 NNN 表示声音响度,有: IIref=p2pref2=(ppref)2N=20⋅log10(ppref)=10⋅log10(IIref) {\\displaystyle \\begin{aligned} \\frac{I}{ I_{ref} } = \\frac{p^2}{ {p_{ref}}^2 } &= \\left( \\frac{p}{ p_{ref} } \\right)^2 \\\\ N = 20 \\cdot \\log_{10} \\left( \\frac{p}{ p_{ref} } \\right) &= 10 \\cdot \\log_{10} \\left( \\frac{I}{ I_{ref} } \\right) \\\\ \\end{aligned} } IrefI=pref2p2N=20⋅log10(prefp)=(prefp)2=10⋅log10(IrefI) 以 N∑N_{\\sum}N∑ 表复合,取声压 p=[p0, p1, ⋯ , pn]p = [p_0,\\ p_1,\\ \\cdots \\ ,\\ p_n]p=[p0, p1, ⋯ , pn] ,声强 I=[I0, I1, ⋯ , In]I = [I_0,\\ I_1,\\ \\cdots \\ ,\\ I_n]I=[I0, I1, ⋯ , In] ,则复合响度公式有: N∑=10⋅log10(∑n(pipref)2)=10⋅log10(∑n(IiIref))=10⋅log10(10L010 dB+10L110 dB+⋯+10Ln10 dB) {\\displaystyle \\begin{aligned} N_{\\sum} &= 10 \\cdot \\log_{10} \\left( \\sum^n \\left( \\frac{p_i}{ p_{ref} } \\right)^2 \\right) = 10 \\cdot \\log_{10} \\left( \\sum^n \\left( \\frac{I_i}{I_{ref}} \\right) \\right) \\\\ &= 10 \\cdot \\log_{10} \\left( 10^{\\frac{L_0}{10\\ dB}} + 10^{\\frac{L_1}{10\\ dB}} + \\cdots + 10^{\\frac{L_n}{10\\ dB}} \\right) \\\\ \\end{aligned} } N∑=10⋅log10(∑n(prefpi)2)=10⋅log10(∑n(IrefIi))=10⋅log10(1010 dBL0+1010 dBL1+⋯+1010 dBLn) 至此,两个主客观系统间,达成了转换条件。一般的 pref=20μPap_{ref} = 20 \\mu Papref=20μPa 时,有 Iref=1 pW/m2I_{ref} = 1 \\ pW/m^2Iref=1 pW/m2 。我们用声压级表示响度,而以声强计算能量。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_3_3.html":{"url":"Chapter_1/Language/cn/Docs_1_3_3.html","title":"1.3.3 音色(Timbre)","keywords":"","body":"1.3.3 音色(Timbre) 音色(Timbre),是指声音的主观成色。本身更偏向于乐理而非工程。 注意一个关键的理解偏差:音色不是音质! 心理声学(Psychoacoustics)有时称其为 音调色(Tone Color) 或 音调质(Tone Quality),转译到中文若用音质来代替,实则是不准确的。音质(Sound Quality),是由信噪比(SNR [Signal to Noise Ratio])决定的工程量,后面章节会有介绍。之所以强调,是因为该点非常容易造成初学者混淆,从而提高学习门槛。 如果说音高(Pitch)和响度(Loudness)都是单一影响参数的代称,那么音色则是一种 复合影响因子 带来的,基于传统声乐经验和历史因素,对体感的 弱标准化 规定描述。也就是说,音色是三要素中,最为主观的一个了。 而不同的音色到底有何种区别呢?这需要从音色本质说起。 音色的频率链(Frequency Series)/ 谐波链(Harmonic Series) 考虑一个来自 ISO 16 标准音阶(Octave)的 纯音 A4(乐理 la,440 Hz),理想情况是 只有一个 频率,即 440Hz 的。但是,实际生活中,假设完美调校的钢琴和其他乐器,演奏的 A4 虽然能够听出来是对应音调,但也能明显可区分来自于不同乐器。甚至不同品牌厂家的同一种乐器,演奏同一音调时,也会有不同听感。 这种似是而非、若即若离的情况,是怎么一回事呢?想要回答这个问题,需要解释两个方面:“似是”的一面,和“若离”的一面。 我们知道,声音都是复合的一组频率。因此,从声波物理性出发: 将决定声音基准音调的单一频率,称为 基波(Fundamental) ; 将决定声音本身特征的衍生频率,称为 谐波(Harmonic) ; “似是”来自于相同的基波,基波决定音调(Note),即标志着声音本身的指向。“若离”来自于 谐波,这是决定一个 声音具体特征 的主要因素。不同声源弹奏同一 乐理音调(Music Note) 时,相同音调理想情况下,基波都是完全一致的。而组成声音的所有谐波差异,才导致了不同听感。 一般的,我们将由 一个基波(Single Fundamental) 和 一组谐波(Multi Harmonics) 共同叠加而成的声音,为 复合音(Complex Sounds)。 如下图,就是来自于ISO 16 标准音阶(Octave)调音的,单一音调 B3 在实际某钢琴上的表现。此钢琴 B3 复合音的组成中,最左侧蜂刺状 246Hz 频率位置 即为 B3 基波,而基波右侧其余蜂刺位则为该复合音谐波。 图 1-9 某钢琴标准 B3 调音的频率响度特征(响度归一化) [8] 而一个复合音中,从低频到高频所有纯音的频率,所构成的数组,就是 频率链(Frequency Series)/ 谐波链(Harmonic Series)。即从 工程角度 所理解的,声音的 音色(Timbre)。 由此,我们可知基波、谐波、音色三者的关系了。 基波(Fundamental) 基波(Fundamental),也称为 第一谐波(First Harmonic),指感观音色对应某指定 基准纯音(Standard Pure Tone) 的频率。基波决定某标准音阶在器乐设备上的准确性。 同一规范(如 A440)下,调校准确的各类声音设备,基波频率完全相等。 基波和频率间的换算为: Note(n)=(122)n−9×440Hz {\\displaystyle \\begin{aligned} Note(n) = \\left( ^{12}\\sqrt{2} \\right)^{n - 9} \\times 440 Hz \\\\ \\end{aligned} } Note(n)=(12√2)n−9×440Hz 其中, 以 nnn 表示 当前音名(Names)距离 C4 的音序(Sequence) ; 而 440Hz 即 A4 标定值,A4 与 C4 标准键(Standard Key) 的音序为 +9 ; 而同样的,当我们已知对应基波的频率,则可以计算出它与 C4 的音序,从而反向查表得到它在乐理上的音调。记目标基波频率为 F(n)F(n)F(n) ,则: n=12 log2(F(n)440 Hz)+9 {\\displaystyle \\begin{aligned} n = 12 \\ \\log_2\\left( \\frac{F(n)}{440\\ Hz} \\right)+9\\\\ \\end{aligned} } n=12 log2(440 HzF(n))+9 此公式,即为工程上常用的 A440 频率音序公式(Frequency Sequence Formula)。 谐波(Harmonic) 谐波(Harmonic) 指自指定基波以 整数倍频率 衍生的纯音声波。基波衍生的谐波一般不会仅有一个。假设基波位于谐波链(Harmonic Series)的第一位,有 i=1i = 1i=1 ,频率为 F(n)=F1F(n) = F_1F(n)=F1 。则位于顺序第 iii 位的谐波频率 F(i)F(i)F(i) 有: F(i)=i×F1 {\\displaystyle \\begin{aligned} F(i) = i \\times F_1 \\\\ \\end{aligned} } F(i)=i×F1 所以,仍然以之前的 钢琴 B3 为例,有: 图 1-10 某钢琴标准 B3 调音的谐波链(响度归一化)示意图 [8] 可见,决定整个谐波链的关键,就在于第一谐波,也就是基波上。而在基波响度相同的情况下,产生的第二、第三、... 、第 i 谐波,其 数目 和 各自的响度,才确定了声源特色。 至此,声音三要素与工程量映射,就解释清楚了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_4.html":{"url":"Chapter_1/Language/cn/Docs_1_4.html","title":"1.4 声音的解构","keywords":"","body":"1.4 声音的解构 声音,大多为复合声,从前文的介绍中我们可以发现,至少能够从三个角度去构建参考系。即,乐理角度(艺术) 、 心理声学(感观) 、 声乐工程(声音三要素)。 不同角度观察到的,可以认为是同一声音在各自领域平面的投影。而我们通过这种方式,从不同的视角,拼接出了声音本身。所以,声音也可以被称为是某种程度上的高维信息。 并非 在不考虑传播时,直觉上的仅有时频那么简单。 接下来,我们便分别从这三个不同的视角,去看如何处理。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_4_1.html":{"url":"Chapter_1/Language/cn/Docs_1_4_1.html","title":"1.4.1 乐理:音调(Notes) & 五度圈(Circle of Fifths)","keywords":"","body":"1.4.1 乐理:音调(Notes)& 五度圈(Circle of Fifths) 在声音三要素开始的部分,我们已经简单介绍了一些乐理基础概念。而乐理对声音的描述,都是基于 音调(Note) 为出发点的。通过音调指向基音,建立主观参考系下的客观不变量,从而构造统一的联系。结合 系统的 记录方式,完成对一定时间段下,音乐的保存。 所以,乐谱(Musical Notation) 就是一种,以手动编排抄录的方式,进行声音持续化存储的早期人工手段。而 乐理音调(Music Note),以下我们简称为 音调(Note),就是一种粗粒度(相对于数字时代编码调制而言)的固定采样。 依旧采用 八度音(Octave)的音阶体系,首先需要建立乐理(艺术)心理(感观)转换。 音调(Notes)的 音程尺度描述(ISN [Interval Scale Name]) 这个我们已经介绍过。 八度音阶(Octave) 以钢琴音级为 4 时为准,包括音名、半音在内,共有 12 个,即 C、C♯/D♭、D、D♯/E♭、E、F、F♯/G♭、G、G♯/A♭、A、A♯/B♭、B 。为方便说明,我们补充下一级的 C5 到表中,有: C C♯D♭ D D♯E♭ E F F♯G♭ G G♯A♭ A A♯B♭ B C5 261.63(0) 277.18(+1) 293.66(+2) 311.13(+3) 329.63(+4) 349.23(+5) 369.99(+6) 392.00(+7) 415.30(+8) 440.00(+9) 466.16(+10) 493.88(+11) 523.25(+12) 明明是 八度音,却有包扩 C5 在内的 13 个音调。尺度不一太尴尬了,怎么办呢? 音乐艺术先贤们也遇到了同样的问题。于是,根据 两两相邻音调间的音程(Interval),在同音级下,有了不同的 音程尺度描述(ISN [Interval Scale Name])。正好作为转换起点。 在 C4 所在第 4 音级取 ISN。所有音调与 C4 相比音程为: Notes Frequency(Sequence) Interval Scale Name Interval as Notes C4 261.63 (0) 纯一度(P1 [Perfect Unison]) 0 C♯/D♭ 277.18 (+1) 小二度(m2 [Minor Second]) 0.5 D 293.66 (+2) 大二度(M2 [Major Second]) 1 D♯/E♭ 311.13 (+3) 小三度(m3 [Minor Third]) 1.5 E 329.63 (+4) 大三度(M3 [Major Third]) 2 F 349.23 (+5) 纯四度(P4 [Perfect Fourth]) 2.5 F♯/G♭ 369.99 (+6) 增四度(A4)/减五度(d5) 3 G 392.00 (+7) 纯五度(P5 [Perfect Fifth]) 3.5 G♯/A♭ 415.30 (+8) 小六度(m6 [Minor Sixth]) 4 A 440.00 (+9) 大六度(M6 [Major Sixth]) 4.5 A♯/B♭ 466.16 (+10) 小七度(m7 [Minor Seventh]) 5 B 493.88 (+11) 大七度(M7 [Major Seventh]) 5.5 C5 523.25 (+12) 纯八度(P8 [Perfect Octave]) 6 表内抽象的音程名中,出现了一些 非精确量词(Inaccurate Quantifiers) 被使用其中。确切的来说,基础量词有五种,由小到大分别是(注意简写时的 大小写区分 ): 减(d [Diminished]) 小(m [Minor])、纯(P [Perfect])、大(M [Major]) 增(A [Augmented]) 上述量词怎么来的呢?直接意义上,这是两套体系。一套基于 相对音程,一套基于 绝对音程。简单来说,取整数 k∈Zk \\in \\mathbb{Z}k∈Z 表示通用数字级。则 绝对音程(n.AI [Absolute Interval]),采用 减(d [Diminished]) 、 增(A [Augmented]),是取用第 4 级的纯一度(P1)就是 C4 261.63 Hz 作为原点。记原点音调音序为 norin_{ori}nori ,目标音调音序为 ntagn_{tag}ntag ,有: 减 k 度(dk [Diminished k]),意味着 Δn=ntag−nori=2(k−2)\\Delta n = n_{tag} - n_{ori} = 2(k - 2)Δn=ntag−nori=2(k−2) ; 增 k 度(Ak [Augmented k]),意味着 Δn=ntag−nori=2(k−0.5)\\Delta n = n_{tag} - n_{ori}= 2(k - 0.5)Δn=ntag−nori=2(k−0.5) ; 相对音程(n.RI [Relative Interval]),采用 小(m [Minor]) 、 纯(P [Perfect]) 、 大(M [Major]),是一种差值概念。记被比较的音调音序为 ncomn_{com}ncom ,而目标音调音序为 ntagn_{tag}ntag ,有: 小 k 度(mk [Minor k]),指 Δn=(ntag−ncom)%12∈{1, 3, 8, 10}\\Delta n = ({n_{tag} - n_{com}})\\%12 \\in \\{1,\\ 3,\\ 8,\\ 10 \\}Δn=(ntag−ncom)%12∈{1, 3, 8, 10} 时对应 k∈{2, 3, 6, 7}k\\in \\{2,\\ 3,\\ 6,\\ 7 \\}k∈{2, 3, 6, 7} ; 纯 k 度(Pk [Perfect k]),指 Δn=(ntag−ncom)%12∈{0, 5, 7, 12}\\Delta n = ({n_{tag} - n_{com}})\\%12 \\in \\{0,\\ 5,\\ 7,\\ 12 \\}Δn=(ntag−ncom)%12∈{0, 5, 7, 12} 时对应 k∈{1, 4, 5, 8}k\\in \\{1,\\ 4,\\ 5,\\ 8 \\}k∈{1, 4, 5, 8} ; 大 k 度(Mk [Major k]),指 Δn=(ntag−ncom)%12∈{2, 4, 9, 11}\\Delta n = ({n_{tag} - n_{com}})\\%12 \\in \\{2,\\ 4,\\ 9,\\ 11 \\}Δn=(ntag−ncom)%12∈{2, 4, 9, 11} 时对应 k∈{2, 3, 6, 7}k\\in \\{2,\\ 3,\\ 6,\\ 7 \\}k∈{2, 3, 6, 7} ; 而在 所有音调与 C4 相比的音程表 中,之所以出现了 F♯/G♭ 用绝对音程(Absolute Interval)与 A440 440 Hz 相比,其它采用相对音程(Relative Interval)与 C4 261.63 Hz 相比的原因,正是在 Δn=ntag−ncom=6\\Delta n = n_{tag} - n_{com} = 6Δn=ntag−ncom=6 时,用相对音程的 小(m)、纯(P)、大(M) 无法描述 该音程。所以,不得已 才借用了绝对音程的 增(A)、减(d) 描述法。 现在,我们已知同音级下的音程表示了。不过实际使用中,往往会出现两个参与计算的音调是跨级的情况。虽然两方法都适用于跨越多音级(跨级)的音程计算,但 绝对音程(n.AI)和 相对音程(n.RI)在对此的表达上,还是存在较大差异的。 绝对音程(n.AI)的 跨级计算 绝对音程(n.AI) 因为存在原点而且不区分范围,因此可以在单一方向上持续增,或持续减。不过因为往低频方向持续运动,会可能有负值。 所以,除 Δn=6\\Delta n = 6Δn=6 情况外,我们一般只用它来像高频方向计数。而这种处理使得以 n.AI 公式计算出来是多少 k ,就应该称为增减多少 k 度(AK/dK)。 例如, 从 D4->C6 的音序差 Δn=24−2=2×11→k=13\\Delta n = 24 - 2 = 2 \\times 11 \\rightarrow k=13Δn=24−2=2×11→k=13 ,为 减十三度(d13) ; 从 D4->F6 的音序差 Δn=29−2=2×13.5→k=14\\Delta n = 29 - 2 = 2 \\times 13.5 \\rightarrow k=14Δn=29−2=2×13.5→k=14 ,为 增十四度(A14) ; 从 C4->F6♯/G6♭ 的音序差 Δn=30−0=2×15→k=17\\Delta n = 30 - 0 = 2 \\times 15 \\rightarrow k=17Δn=30−0=2×15→k=17 ,为 减十七度(d17) ; 相对音程(n.RI)的 跨级计算 相对音程(n.RI) 的跨级计算就要麻烦一些。这个麻烦主要体现在相对音程的音程尺度描述(ISN)在带上 Δn=6\\Delta n = 6Δn=6 从绝对音程中借用的 增四度(A4)/减五度(d5)后,也仅有 13 个。 所以,在跨级描述上,相对音程情况需要引入其它的量词用以记录级数差。一个简单的方法就是 在公式基础上,根据跨越的级数,在称为中增加 级数 x 七度 的大小。 例如, 从 D4->C6 的音序差 Δn=(24−2)%12=10→k=7(+7×1)\\Delta n = (24 - 2)\\%12 = 10 \\rightarrow k= 7 \\left(+ 7 \\times 1 \\right)Δn=(24−2)%12=10→k=7(+7×1) ,为 小十四度(m14) ; 从 D4->F6 的音序差 Δn=(29−2)%12=3→k=3(+7×2)\\Delta n = (29 - 2)\\%12 = 3 \\rightarrow k= 3 \\left(+ 7 \\times 2 \\right)Δn=(29−2)%12=3→k=3(+7×2) ,为 小十七度(m17) ; 但当 Δn=6m, m∈Z\\Delta n = 6m, \\ m \\in \\mathbb{Z}Δn=6m, m∈Z 时, 借用 的 增四度(A4)/减五度(d5) 又不能 换回绝对音程来重新计算,该怎么办呢? 相对音程针对这种情况,引入了 倍数(Multiples)来辅助标记。即 m倍增/m倍减。 例如, 从 C4->F6♯/G6♭的音序差 Δn=(30−0)%12=6→A4/d5(×2)\\Delta n = (30 - 0)\\%12 = 6 \\rightarrow A4/d5 \\left(\\times 2 \\right)Δn=(30−0)%12=6→A4/d5(×2) ,有 m=2m = 2m=2 的值,称为 二倍增四度(AAA4)/二倍减五度(ddd5)。即多出来的倍数 m=2m = 2m=2 ,就代表着需要 多写 几个 增(A)或 减(d)。 至此,结合 Δn=6\\Delta n = 6Δn=6 时的倍数描述 和 “±7” 度法,我们就能够从乐理(艺术)上形容跨多音级(Steps)的相对音程了。 不过,这样的算法要求我们知道当前音调的音序。但因为一般情况下,乐谱中采用的都是确认 大/小调 主音(Keytone) 后,对包含音调距离主音音程的符号化记录。所以, 必须要能获取主音的音序才能相对计算出,乐谱中的实际乐符的音程,进而推得音序和标的频率。 大/小调(Major Scale/Minor Scale) 什么是大/小调?大/小调(Major Scale/Minor Scale) 是古典音乐中,对一组参与演奏音调韵律的总结。不同 大/小调所采用的音调是不同的。 这里有相当多的乐理(艺术)细分,为了便于说明,除非特别声明,否则都认为 未指明类型的 大/小调 皆属于 自然音阶(Diatonic Scale)。 其中 大/小调 中的 大/小,虽同名于 相对音程 的 大/小,但两者却 并不是一个概念。大/小调 对大/小 的定义,并不是指音程差,而是指组成 大/小调 的自然音阶(Diatonic Scale)中 包含的一系列古典音调(Classical Tone)。 例如,C 大调(Major C)的主音(Keytone)就是 C4 261.63 Hz 。但总共包含: C4→D4→E4→F4→G4→A4→B4 C4 \\rightarrow D4 \\rightarrow E4 \\rightarrow F4 \\rightarrow G4 \\rightarrow A4 \\rightarrow B4 C4→D4→E4→F4→G4→A4→B4 所以,如果直接算。 从乐谱到我们可以使用的音程尺度名称间,还需要进行一次大/小调到实际音调组间的转换。 之后,才能够利用相对音程公式,完成快速反向计算来得到换算音序值。再用得到的音序值,查询基音频率。 因此,必须要依赖于 快速确定 大/小调 的手段。该手段就是 五度圈查询法。 五度圈(Circle of Fifths)查寻法 回到音乐(艺术)史早期,人们制定了诸如:古典五律、十二平均律等非精确度量衡。而在 十二平均律(12-TET [12-Tone Equal Temperament]) 中,将属于自然音阶(Diatonic Scale)的自然大调(Major Scale)第 4 音级 C–D–E–F–G–A–B 取为标准(此处取现代声学标准,明朝皇族世子朱载堉发明时,近代物理才刚起步,还未有机械波概念,所以仍是依赖于古筝琴律),而对 C-B(12-TET 采用的实际是 等效到同间隔的 C-F ) 间音调进行了 比例分割。 此举启发了人们对古典五律的划分,从而有了 自然大调(Major Scale),即 C大调, 的五度圈(Circle of Fifths)查寻法。这是一种将上文 12 音调以圆圈的形式串联的表示方式。当然,人们创造出该方法的时候,是凭借着历史经验总结而来的。不得令人感叹其中的智慧。 有速查图如下: 图 1-11 五度圈音调表示意图 [9] 此即为最早且被应用至今(如吉他等)的快速跨级查表法。 图中,大写字母代表自然大调(Major), 小写字母代表自然小调(Minor)。 音调(Note)所带的升降号( ♯/♭\\sharp/\\flat♯/♭ ),在音乐(艺术)中,被称作 调号(Key Signatures)。 以此为出发点,转换到同音级处理。就有, 同圈层 的音调,相邻两个音调间的音程(Interval),顺时针时差值为 Δn=(ntag−ncom)=7→k=5\\Delta n = ({n_{tag} - n_{com}}) = 7 \\rightarrow k=5Δn=(ntag−ncom)=7→k=5 纯五度(P5),逆时针时为 Δn=(ntag−ncom)=5→k=4\\Delta n = ({n_{tag} - n_{com}}) = 5 \\rightarrow k=4Δn=(ntag−ncom)=5→k=4 纯四度(P4)。称为 相邻调(Adjacent Key)。 五度圈中位于 同位置内外圈 的大小调,两者间的 主音(Keytone) 音程为 小三度(m3),且 主音调号(Key Signatures)相同。称为关系调(Relative Key)。 当我们从内圈向外查找,有音程: a->C(如 A4->C5)有 Δn=(12−9)%12=3(+0)→k=3\\Delta n = (12 - 9)\\%12 = 3(+0) \\rightarrow k=3Δn=(12−9)%12=3(+0)→k=3 ,为 小三度(m3) ; d->C(如 D4->C5)有 Δn=(12−2)%12=10(+0)→k=7\\Delta n = (12 - 2)\\%12 = 10(+0) \\rightarrow k=7Δn=(12−2)%12=10(+0)→k=7 ,为 小七度(m7) ; d->E(如 D4->E5)有 Δn=(16−2)%12=2→k=2(+1×7)\\Delta n = (16 - 2)\\%12 = 2 \\rightarrow k=2(+1\\times7)Δn=(16−2)%12=2→k=2(+1×7) ,为 大九度(M9) ; 所以, 以升/降序来看,五度圈是螺旋上升/下降的。 我们以表中 C♯C\\sharpC♯ 代表着进入了 更上层高音级,而 C♭C\\flatC♭ 代表降至 更下层低音级。则跨越三层的升序大调五度圈,就如下所示: 图 1-12 三层五度圈(升调方向)音调表示意图 自然大/小调,均包含 7 个音调。分别是主音前 1 个音调,和包括主音在内的后 6 个音调。在此基础上,结合五度圈的维度特点,只需要以 滑动窗口来标记对应调位,即可速查大小调中的各个音调: 【,主音,】 例如, 查表得 CCC 大调的基础音调,组成为 [F, C, G, D, A, E, B][F,\\ C,\\ G,\\ D,\\ A,\\ E,\\ B][F, C, G, D, A, E, B] 查表得 F♯F\\sharpF♯ 大调的基础音调,组成为 [B, F♯, C♯, G♯, D♯, A♯, E♯][B,\\ F\\sharp,\\ C\\sharp,\\ G\\sharp,\\ D\\sharp,\\ A\\sharp,\\ E\\sharp][B, F♯, C♯, G♯, D♯, A♯, E♯] 查表得 E♭E\\flatE♭ 大调的基础音调,组成为 [A♭, E♭, B♭, F, C, G, D][A\\flat,\\ E\\flat,\\ B\\flat,\\ F,\\ C,\\ G,\\ D][A♭, E♭, B♭, F, C, G, D] 查表得 C♯C\\sharpC♯ 大调的基础音调,组成为 [F♯, C♯, G♯, D♯, A♯, E♯, B♯][F\\sharp,\\ C\\sharp,\\ G\\sharp,\\ D\\sharp,\\ A\\sharp,\\ E\\sharp,\\ B\\sharp ][F♯, C♯, G♯, D♯, A♯, E♯, B♯] 五度图中一圈(不升降),就是音级为 4 时 ISO 16 标准的 A440 八度音阶 C4 子表。 C C♯D♭ D D♯E♭ E F F♯G♭ G G♯A♭ A A♯B♭ B C5 261.63(0) 277.18(+1) 293.66(+2) 311.13(+3) 329.63(+4) 349.23(+5) 369.99(+6) 392.00(+7) 415.30(+8) 440.00(+9) 466.16(+10) 493.88(+11) 523.25(+12) 而当发生升降时,对于在表中没有对应的额外 Δn\\Delta nΔn 个 ♯/♭\\sharp/\\flat♯/♭ 标志,提升或降低 Δn/2\\Delta n / 2Δn/2 个音级再次查对应 4±Δn24 \\pm \\tfrac{\\Delta n}{2}4±2Δn 音级,对应的 音阶频率子表。 例如, 对于五度圈更上一层的 C♯♯=C5=253.25(+12)C\\sharp \\sharp = C5 = 253.25 (+12)C♯♯=C5=253.25(+12) ,而 C♯♯♯=D♭♯=C♯5=554.37(+13)C\\sharp \\sharp \\sharp = D \\flat \\sharp = C\\sharp 5 = 554.37 (+13)C♯♯♯=D♭♯=C♯5=554.37(+13) 。 至此达成,利用音调在乐理上的音程尺度描述(ISN),以两种参考系的关联,利用公式或搜图,来转换到工程音序频率关系了。从而方便我们根据 乐理音调(Musical Note)查询它的 基波(Fundamental)频率。 为何频率在分析中,显得格外重要呢?因为频率是贯穿三种分析视角的唯一量。 ISN 本身在乐理(艺术)上,是人为认为的尺度平均的。不过,乐理上的平均,是否意味着实际频率的平均呢?结合 A440 频率音序公式(FSF)判断可知, 乐理平均并不意味着均匀的频率划分。这和人耳的听感息息相关。 经过近代心理声学对人耳感观的样本统计测定后,了解到其中的一些端倪。 音调(Notes)的 频率比(Frequency Ratio) 我们发现,以 C4 261.63 Hz 为标准,人对与 C4 频率呈现一定特殊比例的音调,会有更好的听感反馈(详见 等响曲线)。而以某些相应比例,按照从低到高的非线性变化,会使人产生 聆听时的平滑感(Smoothly)。根据这样的研究结果,可见古人间接以 非线性频率比 (虽然发明命名法的时候并无测定,而是后续心理声学补测), 主观确定了音调划分。 仍然采用该音级 4 的例子。有 12 个音调间,距离 C4 基础音调的音程(Interval)和 大致频率比(C4: 当前音调,精确小数点后一位)如下: C4 C♯D♭ D D♯E♭ E F F♯G♭ G G♯A♭ A A♯B♭ B C5 261.63(0) 277.18(+1) 293.66(+2) 311.13(+3) 329.63(+4) 349.23(+5) 369.99(+6) 392.00(+7) 415.30(+8) 440.00(+9) 466.16(+10) 493.88(+11) 523.25(+12) 0 0.5 1 1.5 2 2.5 3 3.5 4 4.5 5 5.5 6 1:1 16:15 9:8 6:5 5:4 4:3 45:32 3:2 8:5 5:3 16:9 15:8 2:1 那么,这一发现有什么作用呢? 它的作用,体现在 创造新的音色。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_4_2.html":{"url":"Chapter_1/Language/cn/Docs_1_4_2.html","title":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz)","keywords":"","body":"1.4.2 乐理:和声(Harmony) & 和弦(Chord)& 调性网络(Tonnetz) 我们对乐理音调到感观转换已有基本认知。但当我们遇到一些频率不在表中,且也不属于表中任意一个独立音调频率倍数(即其它音级),却又悦耳到想要记录的,非隶属单一琴键的声音时。或者想要合理的创造一种不存在于自然中的合成音时。音调频率比关系的平滑听感,就成为了指引。 它使我们可以通过已有音调频率的合理组合,以响度代替融合比例(可以说是古早的混音了,非常感性比较考验演奏者水平),来拟合新的声音。 在乐理中,称之为 和声(Harmony)。 和声(Harmony) & 协和(Consonance)& 不协(Dissonance) 和声(Harmony) 是指将音调以两两形式组和,而产生新的声音的过程。当然,为了给予艺术发挥空间,不会也不能固定响度入内。所以,和声是指参与声音其音色频率链的合成。此处为了通用说明,需要固定图形化时,采用相同大小的抽象响度表示。 不过请注意,在实际在数字合成过程中,还是需要 结合响度构建和声后的新谐波链的。 图 1-13 某钢琴同音级下纯一度(P1)与纯五度(P5)的泛音链和声图 上例为同音级的 P1 + P5 和声(如 C4 + G4)。这样的一组声音同时弹奏时,为人们带来了听感上的和谐。而同音级下的 P1 + M2 和声(如 C4 + D4),则没有这么融洽: 图 1-14 某钢琴同音级下纯一度(P1)与大二度(M2)的泛音链和声图 从两者和更多样本的 频率链重叠情况上,过往的研究者们发现,如果参与合成的声音,在频率链上有 较多的重合谐波 时,人耳会觉得声音 和谐(Harmony) 不突兀。 而同音名不同级的 乐理音调(Musical Note),其 谐波链几乎完全重合,各频率总是相差 2 的整数倍大小。如 C4 + C5 或 F4 + F6 等,几乎可以认为就是一个声音。依此称为 完美协和(Perfect Consonance)。 但相仿 P1 + P5 和声情况的音调组合,其 谐波链存在较多重合,却依然可以被分辨。我们称其为 不完美协和(Inperfect Consonance)。 而相仿 P1 + M2 和声情况的音调组合,其 谐波链几乎很少重合,参与基音相对可辨。我们称之为 不协(Dissonance)。 至于不协情况中,其 谐波链在一定范围内完全无重合的情况,参与基音完全可辨。我们称之为 完全不协(Perfect Dissonance)。 同理,可以扩展至更复杂的和声组合。 显然,协与不协的问题,同人耳对频率的敏感度高度相关。 仍然采用该音级 4 的例子。以 12 个音调间和声情况统计。距离 C4 基础音调的音程(Interval)和 大致频率比(C4: 当前音调,精确小数点后一位)如下: C4 C♯D♭ D D♯E♭ E F F♯G♭ G G♯A♭ A A♯B♭ B C5 P1 m2 M2 m3 M3 P4 A4/d5 P5 m6 M6 m7 M7 P8 0 0.5 1 1.5 2 2.5 3 3.5 4 4.5 5 5.5 6 1:1 16:15 9:8 6:5 5:4 4:3 45:32 3:2 8:5 5:3 16:9 15:8 2:1 表中,橙色 表示与 C4 完美协和,黄色 表示与 C4 不完美协和,蓝色 表示与 C4 不协,靛色 表示与 C4 完全不协。将统计扩展到整个当前音级中两两音调时,就有(音程 P1 省略比例): 可见, 当两音调间音程 为 [P1, P4, P5, P8][P1,\\ P4,\\ P5,\\ P8][P1, P4, P5, P8] 时,两音调和声 完美协和 ; 当两音调间音程 为 [m3, M3, m6, M6][m3,\\ M3,\\ m6,\\ M6][m3, M3, m6, M6] 时,两音调和声 不完美协和 ; 当两音调间音程 为 [M2, A4/d5, m7][M2,\\ A4/d5,\\ m7][M2, A4/d5, m7] 时,两音调和声 不协 ; 当两音调间音程 为 [m2, M7][m2,\\ M7][m2, M7] 时,两音调和声 完全不协 ; 至此,我们便可以利用此规律,使构成复杂和谐音的组成音,满足两两和声协和匹配。继而创造出新的声音。 这种特殊的和声过程,即是和弦(Chord)。 和弦(Chord)& 三和弦(Triad) 以协和(包括完美协和、不完美协和)音程规律,取一组由升调方向选择的三个或更多音调组成的和声,在乐理中被称为 和弦(Chord)。 和弦以成组的两两相邻音间音程差异,分 三度和弦 和 非三度和弦 两个类别。 三度弦,即以三度音程(包括 m3、M3)构成的一组和弦。 根据组成的个数又可以细化为:三和弦(三音) 、 七和弦(四音) 、 九和弦(五音) 、 十一和弦(六音) 、 十三和弦(七音)。 非三度弦,即音间音程非三度。 情况则较为复杂,包括 转位/离调和弦 所代表的一系列和弦。 在工程上,相对较常用的是三度弦。而三度弦分类下,各中和弦概念存在基本规律,可以直接从三和弦向上衍生。因此,为了便于记忆,本书采用三和弦(三音)讲解。至于非三度弦的其它类型,借助对三和弦的理解,需要时再行查阅乐理专业资料即可。 三和弦(Triad) 的组成音有三个,根据 升调 顺序被分别称为 一音(First) 、 三音(Third) 、 五音(Fifth)。有时也称为 根音(R [Root]) 、 中音(M [Mediant]) 、 冠音(T [Top])。 根音(R),即一音(First),指组成音中位于低音位置的音调; 中音(M),即三音(Third),指组成音中与根音音程三度的音调; 冠音(T),即五音(Fifth),指组成音中与根音音程五度的音调; 一般的,我们会结合两种称谓,用 根音(Root) 、 三音(Third) 、 五音(Fifth) 指代三和弦组成。 因为三度、五度包含了共有 m3、M3、d5、P5、A5 的 5 种音程在内的类型。在根音选定时,可以产生 4 种不同的组合方式,有: 大三和弦,取 Root + M3 + P5,记为 RRR ; 小三和弦,取 Root + m3 + P5,记为 rrr ; 增三和弦,取 Root + M3 + A5,记为 R+R^+R+ ; 减三和弦,取 Root + m3 + d5,记为 r∘r^{\\circ}r∘ ; 根音的选择是不受限的,比如取 C4 即 C大调的主音为根音,则有 C4 下的 大/小/增/减三和弦分别为: C=[C, E, G]c=[C, E♭, G]C+=[C, E, G♯]c∘=[C, E♭, G♭] {\\displaystyle \\begin{aligned} C &= [C,\\ E,\\ G] &c &= [C,\\ E\\flat ,\\ G] \\\\ C^+ &= [C,\\ E,\\ G\\sharp] &c^{\\circ} &= [C,\\ E\\flat,\\ G\\flat] \\\\ \\end{aligned} } CC+=[C, E, G]=[C, E, G♯]cc∘=[C, E♭, G]=[C, E♭, G♭] 而取 C大调 的 F4 为根音,则有 F4 下的 大/小/增/减三和弦分别为: F=[F, A, C5]f=[F, A♭, C5]F+=[F, A, C5♯]f∘=[F, A♭, B] {\\displaystyle \\begin{aligned} F &= [F,\\ A,\\ C5] &f &= [F,\\ A\\flat ,\\ C5] \\\\ F^+ &= [F,\\ A,\\ C5\\sharp] &f^{\\circ} &= [F,\\ A\\flat,\\ B] \\\\ \\end{aligned} } FF+=[F, A, C5]=[F, A, C5♯]ff∘=[F, A♭, C5]=[F, A♭, B] 这种组合类型是固定的,可以类推至任意音调。理论适用于所有三度音程和弦,即三度弦。 不过,因为音程之于 自然音阶 间的转换原因。通过组合公式,直接计算的方式依旧会显得比较繁琐。能不能参考五度圈对大/小调的查表方式,构建一个相类似的查表法,来快速完成多音调的和弦组合呢? 答案是可以的,调性网络(Tonnetz) 就是答案。 调性网络(Tonnetz) 现代调性网络(Tonnetz)原型,来自于数学大家 莱昂哈德·欧拉(Leonhard Euler,1707~1783) 在早年尝试的,以数学建模构造良好合声的探索 [10] 。欧拉首次采用数学图论方法,解决和弦问题,提出了 欧拉调性网络(Euler's Tonnetz,Tonnetz 是德语,相当于英文的 Tone-net)。 图 1-15 欧拉论文原稿中的调性网络(Euler's Tonnetz)示意图 欧拉调性网络可视化的表示了,协和和弦间的音程关系。从上而下的标识了两种联系。位于 上方的音调,其 左分支 是距离它最近的五度(P5、d5、A5)音程对应音调,而 右分支 是距离它最近的大三度(M3)音程对应音调。例如,F->C(F4->C5) 有 C 是 F 的 P5,F->A(F4->A4) 有 A 是 F 的 M3。全图涵盖了同音级下的一套完整标准十二律。 不过因为范围和和弦上的局限性,欧拉调性网络没有得到太多的应用。 状态一直持续到 19 世纪时期末,被 胡戈·里曼(Hugo Riemann,1849~1919) 打破。 胡戈·里曼结合五度圈查表法有关升降调的螺旋延展性,在对 大小调间和弦二元性(Major/Minor Chord Dualism) 的研究时,发现了大三和弦和小三和弦间,在欧拉调性网络沿音级的五度展开上。可以通过简单的 平移变换(Schritt) 和 倒影变换(Wechsel),得到同源与相邻向上/向下和弦效果。由此,提出了 里曼理论(Riemannian Theory)。并在这之后,将原有两个主要变换中的倒影变换,扩展到了 关系变换 与 导音变换 双变换。结合原有大小和弦二元论的 平移变换,构成三主要变换体系 [11] ,称为 新里曼三元理论(Neo-Riemannian Triadic Theory),简称 新里曼理论(Neo-Riemannian Theory)。 图 1-16 新里曼理论(Neo-Riemannian Theory)的三主要变换 注意,上图中选择三和弦时,必须按照从根音到冠音的相同箭头方向选择。箭头表示了 位于下一位 的组成音。 依托调性网络的几何化,新里曼理论的 三种主要变换,分别是: P变换(P Transformation),即 平行变换(Parallel Transformation)。如上图蓝色箭头标识关键步骤。 P变换只能在完全相同主音的自然音阶,即同一大/小调,内进行。 在已知一则三和弦组成情况下,查询根音(Root)和五音(Fifth)相同的另一组三和弦的方式。再以根音五音连线作为平行四边形对角线,用两组三音(Third)构造平行四边形。、结果中与原三和弦中音相对的另一角,为所求和弦中音。如图中 C=[C→E→G] & c=[C→E♭→G]C = [C \\rightarrow E \\rightarrow G] \\ \\& \\ c = [C \\rightarrow E\\flat \\rightarrow G]C=[C→E→G] & c=[C→E♭→G] 。 P变换,让我们能够快速完成 同主音间 大/小三和弦 的转换。 R变换(R Transformation),即 关系变换(Relative Transformation)。如上图红色箭头标识关键步骤。 R变换只能在主音音程互为小三度(m3)关系的自然音阶,即关系调(Relative Key),间进行。 在已知一则三和弦组成情况下,通过将五音(Fifth)升/降一个五度(P5、d5、A5),再次以移动后的五音(Fifth)与原三和弦三音(Third)查询 大/小调的同位关系三和弦。构成结果平行四边形的另一角,就是升调方向时所求关系大调五音,或降调方向时所求关系小调根音。如图中 c=[C→E♭→G] & E♭=[E♭→G→B♭]c = [C \\rightarrow E\\flat \\rightarrow G] \\ \\& \\ E\\flat = [E\\flat \\rightarrow G \\rightarrow B\\flat]c=[C→E♭→G] & E♭=[E♭→G→B♭] 。 R变换,让我们能够快速完成 关系调间 大三和弦 与 小三和弦 的转换。 L变换(L Transformation),即 导音变换(Leading-Tone Transformation)。如上图靛色箭头标识关键步骤。 L变换只能在主音音程互为升调方向纯五度(P5)关系的自然音阶,即相邻调(Adjacent Key),间进行。 在已知一则三和弦组成情况下,通过将五音(Fifth)升/降一个大三度(M3),查询新位置下的五音所处的三和弦。构成结果平行四边形的另一角,就是升调方向时所求相邻调五音,或降调方向时所求相邻调根音。如图中 A♭=[A♭→C→E♭] & c=[C→E♭→G]A\\flat = [A\\flat \\rightarrow C \\rightarrow E\\flat] \\ \\& \\ c = [C \\rightarrow E\\flat \\rightarrow G]A♭=[A♭→C→E♭] & c=[C→E♭→G] 。 L变换,让我们能够快速完成 相邻调间 大/小三和弦 的转换。 新里曼理论,进一步完善了现代调性网络的音程变换图系统。使得以该理论为根据的几何建模,存在 无限延伸的音调覆盖 和 快速可查 的特点。从而变得足够泛化且实用。由此绘制而成的新里曼理论平面拓扑调性网络,成为了三和弦的快速查表法的基石: 图 1-17 以新里曼理论(Neo-Riemannian Theory)绘制的调性网络 再配合上五度圈的自然音阶快速确定,与音序频率表的音调频率映射关系,即可完成对和弦的音调频率转换。 以频率,打通乐理到心理声学、声乐工程的关系。 至此,我们已经掌握了基础的乐理观测方法,并能够较为客观的评判。而在前文中,我们提到人耳对频率的感知,是促成一切的关键。而心理声学的实验结果,是其客观数据化的前提。 那么,其具体是怎样的测量过程,而结果又是怎样体现的呢? 这就需要提到 等响曲线(Equal Loudness Level Contour) 了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_4_3.html":{"url":"Chapter_1/Language/cn/Docs_1_4_3.html","title":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour])","keywords":"","body":"1.4.3 感观:等响曲线(ELLC [Equal Loudness-Level Contour]) 等响曲线(ELLC [Equal Loudness-Level Contour]) 是反映在面对指定常量响度的稳定纯音时,人耳对各纯音基波频率下的响度感知最小临界点情况 [12] 。作为心理测量值,最终的结果是基于多组样本测量结果所得的 平均值。 等响曲线的测量 有记录可查的最早心理声学测量结果,来自 哈维·弗莱彻(Harvey Fletcher,1884~1981) 和 怀尔登·芒森(Wilden A. Munson,1902~1982) 于 1933 年发表的 “响度、其定义、测量和计算” 一文 [13] 。正是该论文,奠定了等响曲线的基本测量方式。 弗莱彻和芒森对于每个频率和强度,以选取的 1000Hz 参考音 为基准。调整参考音的响度,直到听众认为它与测试输入的 “稳定纯音” 响度相同,作为一次有效记录标准。统计了大量样本。 测量中,样本指的是不同的受试者。所以样本的基数决定了标准的有效程度。而样本的输入所用的 “稳定纯音”,则指的是在 固定响度下 的一组,按照基音以一定频率步长 (一般是取三分之一个八度递增,或以 10Hz 递增,前者居多)从人听力下限 20Hz 开始递增至 20000Hz (ISO 226 系列因采用音调测量,频率递增是非等步长的,范围为 20~12500Hz [14] )的 所有纯音音调。 因此,假设选用了 [0, 20, 40, 60, 80, 100][0,\\ 20,\\ 40,\\ 60,\\ 80,\\ 100][0, 20, 40, 60, 80, 100] 共 6 组固定响度,选择 10Hz 频率步长。则每组有 1999 个纯音输入,共记 6×1999=119946 \\times 1999 = 119946×1999=11994 个输入。当然,一般统计采用的是三分之一八度递增,不会有如此多且密集的输入。 虽然样本存在差异,但是输入却可以在一定程度上客观的表示,以减小个体不同造成的误差。在本书前面的章节已经介绍了, 单一音调声音都是复合音(Complex Sounds)。而其 和声(Harmony) 可看作是由一系列不同基音频率下的一组纯音,将各自谐波链按响度叠加组合而成。 这一理论可以延生至自然界中其他种类声音的合成,即声音的合成就是泛音链的合成。当然,也可以作用于 非标准音程(Interval) 的特殊单一音调合成。 频律响度特征(FLF [Frequency Loudness Feature]) 我们可以通过以横坐标为频率而纵坐标为响度,将构成某一时间点上的一个单音的所有频率成分,拆分到各频率混合后的响度标识状态了。 而形成的频率响度曲线,为了 区别于乐理 中的和弦和声相对未量化响度的概念,被称为 该单音在该时刻下 的 频律响度特征(FLF [Frequency Loudness Feature])。通过 FLF,我们可以判断 音色 情况,这点在之前的声音三要素部分时,已有使用。 图 1-18 某低音(Bass)的频律响度特征(FLF)示意连峰图 在真实场景中,我们很少能拿到指定时刻的存粹数据。所以,一般会 统计一小段时间片 下的 频率响度信息,并通过计算这一段时间片内各频率自身的 响度加合求算术平均,来表示该时间片 中间点的时刻,所具有的 频率响度特征。 如下图所示(相关代码实现,参见第五章),响度 、 频率 、 谐波链 情况表露无遗: 图 1-19 多乐器演奏音调 A4 时在 5s 处取 100ms 所得频率响度特征 频率响度信息的来源,则是 来自对相应时间片内的原音频数据,进行离散傅立叶变换做 时频分离 获取。具体原理,会在本书第三章中进行详细阐述。 FLF 反映的是,指定时刻声音本身的构成。 等响曲线的输入,是以 FLF 为标准,按照选取频率处理所得的指定响度,有相对纯粹谐波链的音调。 等响曲线的最新修订 既然是统计所得,那么等响曲线就存在 迭代 和 标准修订。最新一次修订来自于 国际标准化组织(ISO [International Organization for Standardization]) 在 2023 年发布的 ISO 226:2023 标准。该标准联合了来自世界各地的多家研究机构的综合数据结果,统计了从 18 至 25 岁来自欧美和东南亚的大量受试者测量结果求均值所得,是 国际通用标准。 2023 年的再次测量,选择了 [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100][0,\\ 10,\\ 20,\\ 30,\\ 40,\\ 50,\\ 60,\\ 70,\\ 80,\\ 90,\\ 100][0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 共 11 组固定响度。以 10 方(Phon)为输入响度步长,统计仍然以从 20Hz 按照 三分之一个八度(One-Third Octave) 递增至 12500Hz 的,每个响度分组包含 29 个音调频率输入。共计 11×29=31911 \\times 29 = 31911×29=319 类 输入信号 [15] 。 图 1-20 ISO 226:2023 标准等响曲线 [15] 上图展示了相应结果,注意,横坐标并非是等频率步长的,而是以音程尺度描述(ISN [Interval Scale Name])标记的 三分之一个八度(One-Third Octave) 步长。 最下方的 0 方(Phon)线,表示 人的可听下限,称为 可闻阈(The Threshold of Hearing)。第二小的 10 方(Phon)线,表示人的 最小可辨认下限,称为 静音阈(The Threshold of Quiet)。介于可闻阈和静音阈间的声音,能够听见但不可辨认。最上方的 100 方(Phon)线,则表示 人的听觉痛觉线,称为最大安全听阈上限,或 痛觉阈(The Threshold of Pain)。超过痛觉阈的响度,会使人听觉不适并产生 疼痛感,且在持续一段时间后,对人的听力造成 永久性 的损害。 三者都 相对缺少样本,因此采用了 虚线 标记。由 可闻阈 和 痛觉阈 所围成的区域,被称为安全听阈(Safety Hearing Threshold)。 所有的工程技术,按理来说,皆因该在安全听阈范围内进行。避免对人耳造成损害。而我们该如何衡量这一点,并同时检测此类设备是否符合我们期望的标准呢?只需考察其 频率响应(Frequency Response)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_4_4.html":{"url":"Chapter_1/Language/cn/Docs_1_4_4.html","title":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour])","keywords":"","body":"1.4.4 感观:频响曲线(FRC [Frequency Response Contour]) 频率响应(Frequency Response) 用来指代某系统(既可以是设备,也可以是人,泛指针对频率的感知器)在 接收频率(Frequency)输入时的输出响度(Loudness)(或最佳接收响度) 的特点 [16] 。是描述设备频段内,频率响度尺度 的客观测定结果。 频率响应是对声音的响应。我们需要先了解,怎样描述一个单音在某时刻的特征,才能更好的理解什么是频率响应。虽然两者并不相同。 另外的,声音的频率响应 和 电路学(Circuitology)中的频率响应效应 也是 非等位 的概念(虽然两者有着一样的名称),注意区分差异。电路学频率响应效应不在本书范围。 需要注意的是,前文中介绍的 频律响度特征(FLF) 虽然和 频率响应曲线(FRC,泛指所有频率响应的绘图结果)采用了 相同的坐标系设置,并因横纵坐标一致,而能够同参考系内展示。但是 两者含义完全不同。切勿混淆! 频率 响应(Response)与 响度(Loudness)的辨析 响应是某系统的尺度,而 响度是系统尺度下的值。响度对应的尺度,既可以来自于 考察范围内的某系统响应(例如,某设备),也可以来自于更大范围的相对客观系统(例如,自然界)。考察范围,就是该声音传播感知链中的设备部分。 对于 相对客观系统,因为一般作为 响度的基础标的,当其他系统属于其子系统时, 子母系统尺度转换的响度不需要放缩。而只有 发生于兄弟系统间的尺度变换,需要放缩交互时的响度。 因此,在采用 ANSI/ASA S1.1-2013 或 ISO 226 系列 规格中,以 声压级(SPL) 的相对客观 分贝(dB) 单位表示响度作为前提。如果在 考察范围内传递,不同兄弟系统 下的响度,就需要 将前一级 的响度值,从源尺度变换为新尺度,计算 放缩后的抽象值。 例如,从自然届的客观环境测量某单一频率纯音为 50dB,在当前采样设备上,有该频率下 0~100dB 的响应范围,采样到的该频率为 50dB。但如果采样后,接收该频率的下一级设备只有0~60dB 的响应范围,则经过该二级设备处理后,源频率就变成了约取值 30dB 大小。当然,实际上 并不是 可以通过 计算得到的,而需要进行 频谱测量。但 大致变化的形势,可以等效来看。 即,存在 非精确换算 : Ldev≈FRdevFRori⋅Lori {\\displaystyle \\begin{aligned} L_{dev} \\approx \\frac{FR_{dev}}{FR_{ori}} \\cdot L_{ori} \\\\ \\end{aligned} } Ldev≈FRoriFRdev⋅Lori 以参考值,表示该设备对前一级输入的影响。 而设备频响是如何测定的呢? 其测量的方式为,在保证输入声音的复合响度值不变的前提下,逐步增大指定频率的响度(保证复合响度不变则通时衰减其他频率响度),直到达到复合响度值。最终测得的该频率响度上限,就是该复合响度下的对应频率频率响应上限。 上限可以为负数,代表该复合响度下,无法感知指定频率。 同理,将该频率的响度衰减到 0 并保持复合响度不变时,得到的就是该复合响度下的该频率的频率响应下限。当然,下限值不出意外一般都是 0 ,并不会高于上限。也就是,当上限为 0 或负数时,下限也不存在意义了。 频率响应在不同系统下的作用 为了便于说明,本书将 “某系统” 分成三种:接收声音转换为信号的 收音设备(Hearer),接收信号并输出声音的 放音设备(Speaker),和 末端系统(Terminus)。 对于 收音设备(Hearer),频率响应反应的是,当收音设备 接收到声音信号后,由设备本身决定的当前放音音量下,能够感知到 声音包含频率中,某 单个指定频率 的实际 响度尺度。什么是收音设备呢?诸如,人耳、麦克风、MIDI 测试仪、助听器 等,用来接收声音或临耳(临近末端)播放的东西。 对于 放音设备(Speaker),频率响应反应的是,当放音设备 接收到频率信号后,由设备本身作用而产生 相应频率声音的最大响度,与 输入频率 之间的关系。什么是放音设备呢?简单来说,就是例如:人的声带、音响、麦克风、钢琴等乐器、蜂鸣器、耳机 之类,能够产生声音或提供声音至后级的东西。 对于 末端系统(Terminus),频率响应表示的则是 感知的终点。其自身拥有的 听阈(Hearing Threshold),决定了最终结果。而 听阈就是该系统对声音感知的等响曲线围成的范围。这里的末端系统,指的即为整个传播感知链的最尾端。不一定为人的神经系统,也可以是测量仪器,或者存粹的虚拟端。 频率响应对于三种设备的意义不同。但三者都会以人类较通用的听觉频率范围(20~20000Hz),作为 主要区段(Major Range) 进行考察。 除了末端系统(Terminus)的频率响应可由等响曲线衡量,收放音设备的频率响应情况,则还需要其他方式表示。 收音频响曲线(HFR [Hearer Frequency Response]) 我们将作用于收音设备的频响曲线,称为 收音频响曲线(HFR [Hearer Frequency Response])。HFR 被用来确认 收音设备的优良程度。代表 收音设备的收音频段的频率敏感范围,和响度尺度关系。 而根据情况差异,对收音设备的衡量,既可以按照其 HFR 可以有效地重现 20~20000Hz 的整个频段(如监听麦克),也可以限制在可听频谱内的较小频段(如通讯麦克)。 以 HiFi 收音为例: 图 1-21 AKG C414XLS 麦克风的 HFR 如上图所示,我们选择 AKG C414XLS 来做 全向收音。从 C414XLS 的官方 HFR 上,可以看到在 40 Hz 、80Hz 、160Hz 基音频率的声音采集时,C414XLS 频率响应从对应采集频率起始点位置(横坐标的 基音频率 作垂线至图中 标记的当前频率线焦点)至 20000Hz 频段,基本都控制在了 ±3dB 范围内,且有较少的波动。说明了 C414XLS 有一个近乎完美平坦的频率响应曲线,能够 更好的 保存采集声音的清晰度和立体感。 由此,我们引申出了衡量 HFR 好恶的 主要考察指标: 即,从采集频率位置 开始的 足够平直(Flat) 和 足够光滑(Smoothly)。 平直且光滑的 HFR 代表着,收音设备能够以 相同大小的响度尺度,来 采集 任何落于 频段范围 内的声音,而不会引入较大的设备误差。从而使得接收的声音,在经过本级处理后,不会 输出发生形变的原声采集信号。 由于采集基本是满尺度,响度尺度在 HFR 上基本等同于当前响度了。所以,为了规范和便于区分,通常会在 HFR 中采用相对于原声的响度差,来表示还原程度。即 HFR 的纵坐标,表示的是 采集结果尺度对比原响度差值,称为 相对声压级(Relative SPL)。 放音频响曲线(SFR [Speaker Frequency Response]) 我们将作用于放音设备的频响曲线,称为 放音频响曲线(SFR [Speaker Frequency Response])。SFR 被用来确认 放音设备的优良程度。代表 放音设备的防音频段的响度尺度,和频率稳定范围关系。 同 HFR 类似,衡量 SFR 好恶主要考察指标,几乎与 HFR 一致: 即,从附和误差范围 的起始点后,有 足够平直(Flat) 和 足够光滑(Smoothly) 曲线。 平直且光滑的 SFR 代表着,放音设备能够以 相同大小的响度尺度,来同尺度的 播放 任何落于 频段范围 内的,未超过尺度范围响度 的声音。这同样意味着,该放音设备不会引入较大的设备误差,从而导致接收的输入信号,或者前一级输出声音的频率响度特征,在本级输出发生形变。 因此,理论上的最佳 SFR 就应该是一条直线。 图 1-22 一条在 110~18000Hz 下(±3dB)平直的 SFR 样例 这样的理想状态,基本无法企及。所以,参考 HFR 测定的区间量,SFR 设置了 浮动标准,即 在一定的响度范围内的相对水平 即可。如上图,就是音响某设备的 SFR 测试结果。该设备在 110~18000Hz 有响应均在 ±3dB 的修正内,称 该设备 110~18000Hz(±3dB)平直。这既是它的 SFR 属性。对于人造设备来说 SFR 一般是固定的。 而对于 生物器官,例如动物或人的声带等,衡量 SFR 是没有意义的。生物声带 SFR 受客观个体差异的影响,是独特且不一而同的(相比工业制品)非平直曲线族构成的区域范围。 生物有通过改变声带的大小和形状,来产生不同的声音的能力。这使得其可以 经由训练,来调整自身在某些声音频段上的频率响应,来达到更高或更低的频率稳定的响度增减。这种动态的能力,让生物具有了 动态的频率响应范围。同时,也可以后天调整频率响应表现。 例如,经过训练的歌手,其好听的嗓音究其原因,就是在处于自身主要声音特色频段,且适合自身音量大小的发声时,有着快速变换音调(基音)但始终处于相对光滑平直的 SFR 的能力。 如果我们能够使人造放音设备也具有这样的能力,或许也能实现根据不同的需要,动态调整放音频率响应。不过这样的技术成本太高。且由于需要考虑共振等因素,不一定能够得到我们想要的平滑平直 SFR ,让市面上并没有这样的产品。 大多数音响类产品,在 对自身品质有自信 的情况下,都会给出 SPR 以提供客户参考。 图 1-23 B&C 的 5FG44 喇叭单元官方 SFR 上图为意大利 B&C Speakers 公司给出的,在空间响度恒定在 92dB 的情况下,测得自家低频驱动器 5FG44 喇叭单元的官方 SFR 。 但 SPR 并不一定都可以这么轻松获取,大多时候我们只能得到 SPR 结果的范围参数。在这种情况下,不参考 SFR 比较两个设备的好坏可用:固定频段比较浮动范围,或者 固定浮动范围比较频段。两者都是快速判断的办法。频段越广,浮动范围越小,则设备越优秀。 现在,让我们进入一个完整的感知过程。 传播感知链 & 频响上下文(Frequency Response Context) 以录音棚采样这一事件举例。在监听的过程中,通常调音监理希望对链路的声源,产生的声音特征,进行完整的保存,直到进入终端(也就是人的神经系统)评估。在这种上下文语境的理想状态下,最终的末端接收的声音频率响度特征,需要 尽可能的和声源频率响度特征保持一致。 假设此时歌唱者发出了一个单音,整个传播感知链如下(实际情况中的曲线要复杂得多): 图 1-24 录音棚采样场景的理想传播感知链模拟 图中, 橙色线,表示 歌唱者该时刻单音 的 频率响度特征(FLF); 红色线,表示作为 收音/放音设备 时的 收音频响曲线(HFR)/放音频响曲线(SFR); 蓝色线,表示 传输过程 中,源单音在该级下的 频率响度特征(FLF); 上例就很好的展示了,三类设备间的关系。对于前一级来说的收音设备,对于其后一级而言,是它的放音设备。 但这种监听模式,即 狭义上的 HiFi(High Fidelity),是否和人直接听到相同的声音的感受一致呢?答案可能和大部分发烧友的直观认知不一致,那就是“不是”。或者说,这种 HiFi 上的听觉感受,更近似于通过骨头传导下,歌唱者自己听到的自己的原声。 同样情况下,假设聆听声音完全一致。不考虑位姿(听声方位不同,也会导致听到的声音不同,这部分不在本书讨论范围内),存在一个站在声源附近,直接通过空气传播,用耳朵来听的聆听者。此时,他的传播感知链如下(图例中略微夸张了效果): 图 1-25 录音棚内听众(假设)场景的理想传播感知链模拟 显然,相同声源的某个音,在不同体系的传播感知链下,有不同的末端系统(Terminus)的直观感受。而 决定不同体系的关键背景要求(Majot Background Requests),就被称为 频响上下文(Frequency Response Context)。 可见,背景条件(即上下文)的不同,对需求的频率响应的衡量方式,也是完全不同的。因此,在实际情况中,背景信息至关重要,决定了我们如何利用频率响应进行有效的分析和处理。界定上下文,往往是开始频响分析的第一步,也是最为关键的一步。 为了方便理解,以频率响应应用的 HiFi 监听领域,来作为下文讲解的 频响上下文。 监听 HiFi 耳机的 SFR 设计原理 这里或许已经有读者存在疑问。即然 平直光滑 是衡量 HFR & SFR 的统一标准。那为何在上文的 HiFi 传播感知链中,理想监听耳机的 SFR 却并不平直呢? 先考虑一个类似的场景,例如电影院的声场营造。 对于影院,理想的放音设备,应尽可能的在接收到指定范围频率的等响度输入时,能够响度恒定(理想)的输出该频段的任何声音。从而在音源上,物理客观的保证对输入的恒等还原。但有时,因为设备本身或者环境因素,我们需要突出或只产生某一个频段的声音时,此时的 SFR 就被用来作为调整的依据,突出一些某频段并减弱一些评断。直到设备的 SFR 被控制在突出该频段下,有最大响度的最佳反馈。 有来自于《声音的重建》其作者的研究工作,在平稳 SFR 输出音响下,各类影院在座位处的 HFR 综合统计 [17] : 图 1-26 不同大小影院的座位平均听感 HFR 统计数据 [17] 上面展示了影院环境下,座位上的 HFR 已经不是平稳的了。因此,为了保证座位处的听感一致于自然环境听感(类似于前文中举例的录音棚内听众直听),就需要让音响的 SFR 在 20~1000Hz 有一定程度的响应(即响度尺度上的)衰减。这么做的结果就是,影院音响的 SFR 会在低频和中频的频段,不够平直。 所以,为了抵消环境或个体等的主客观影响,会调整发音设备,使其 SFR 满足条件。如下: 图 1-27 影院环境根据座位平均听感 HFR 对音响调节结果 SFR 示意图 [17] 同理,当我们处于 HiFi 监听的频响上下文时,由于直贴人耳,以及 HiFi 期望对歌手原声完全还原的目标,也需要对耳机进行调整,使得耳机的 SFR 可以抵消人耳的听阈特征。 这意味着,需要监听耳机尽可能的在接收到指定范围的频率时,能够拟合人耳(或最终感知器)在对应响度下的频响曲线。从而保证,通过前级与本级的 HFR 增减向消,实现传递数据的线性稳定。最终在末端感知节点的源数据还原时,具有目标一致性。 想到这里,首先就是如何获取人耳在对应响度下的频响曲线。这点其实很容易,只需要对照 等响曲线(ELLC),按照相应的输入响度声压级(SPL)转方(Phon)单位后,查找所在曲线即可。查到的曲线(当响度并不在图中,而是落于两曲线间时,需要计算等效曲线),就是 当前响度下的人耳频响曲线。 以 50dB 为例,由于在满足 ISO 标准条件下有 1 dB=1 Phon1\\ dB = 1\\ Phon1 dB=1 Phon 可知,需要查找的等响曲线为 50 方(Phon)等响曲线。 有下图( 橙色 为查找结果): 图 1-28 ISO 226:2023 标准等响曲线截取 50 方(Phon)线 这即为所需 50dB 输入下的人耳平均频响曲线,即 50dB 的人耳 HFR。显然这只是个平均统计数据(非常高端的定制 HiFi 耳模入耳式耳机,除了采样耳道模型来制作耳机音道外,还会为每个人都进行个体 ELLC 测量,并基于测量结果,独立设计专属于个人的耳机 SFR,但这么做不适用于批量生产且极度昂贵)。 那么,想要 抵消掉这种频响情况,让听者能够完整感受到歌唱者的原始声音,该怎么做呢? 只需要,让生产的耳机在选定的响度输入时,其 SFR 每个频率下的响度尺度,完全与此时人耳 HFR 对应频率下的响度尺度,以 50dB 水平线为轴对称,就能达到效果。如下: 图 1-29 基于 ISO 226:2023 的 50 方(Phon)线设计的入耳耳机 SFR 上图中,绿色线 即为想要在 50dB 响度下达到完全监听 HiFi 效果的入耳式耳机,其理想的 50dB SFR 曲线。入耳式耳机由于没有空胞(即耳罩式耳机,发生单元与耳朵间的空腔)问题需要考虑,等效下待处理的只有人耳 HFR 曲线,因此才会呈现出对称关系。 以该 SFR 调整设备,直到设备频率响应近似如此,就能达到最好效果。 不过,耳机这种人造设备,其频响特性基本是固定的。这代表着,如果我们选定一个响度作为 基准响度(Standard Volume),那么在设备面临或高或低的其他响度输入时,其 SFR 是会有一定程度形变的。也就是说,我们只能尽可能的保证在选定基础响度的一定误差范围内,贴合抵消该响度范围内的人耳 HFR 曲线。 所以,我们需要一定的标准,来方便生产活动的统一产品质量衡量。 哈曼曲线(Harman Target Curve) 就是这类标准之一。 为什么是之一呢?因为人耳的特殊性和厂商各自的特色,不同厂商或研究机构,基于不同的样本集,指定了多种适用于一定范围人群或自身产品特色的 SFR 标准。而 哈曼曲线则属于其中被接受程度最广的标准之一。 搞清楚哈曼曲线,对于其他类似的标准,即可举一反三触类旁通。 哈曼曲线(HTC [Harman Target Curve]) 哈曼曲线(HTC [Harman Target Curve]) 是用类似于前面本书提到 “ 50dB 响度时,入耳式耳机 SFR 抵消 人耳 ISO 226 标准 50Phons HFR” 的目标导向,获取的 人耳 85dB 情况下 的 HiFi 场景,用于设备参考的 主观测量 SFR 标准。 最早的 HTC 2013 标准,是由 肖恩·奥利佛(Sean Olive) 博士 和 哈曼音频实验室(Harman Audio Lab) 的其他研究人员,在 2013 年利用研究所条件,设计了 6 组 双盲试听对比实验(Double-Blind Listening Test) 测得的,经修正后听者 HFR 采样均值曲线。 图 1-30 哈曼曲线 2013 标准实验采用的测试听力设备 [18] 他们对如上表单的听音设备,进行了 每组 10 位不同听力情况听众 的,听众评分和设备频响曲线测试和统计,最终得到了如下结果: 图 1-31 哈曼曲线 2013 测试听力设备听众评分与设备 HFR 结果 [18] 奥利佛博士和其团队,将 评分最高的 HP1~HP4 组的 HFR 数据,进行了基于 感知均衡 情况(图中绿色线即为均衡器调整)的 平均化修正,再将四组结果进行了拟合,得到了光滑的设备 85dB 时的 HFR 人造曲线。 图 1-32 哈曼曲线 2013 和 2015 标准 以此,认为贴近于该曲线的耳机设备,有着满足大多数人最佳听感的主观度量曲线。 不过由于样本量过小,2013 年的测量结果并没有足够的说服力。为了解决说服力问题,在 2015 年、 2017 年,哈曼曲线又经过了两轮样本量级和受试设备的扩充,并重新测定了结果。 而最 新一次的测定就是 2017 年的 HTC 2017标准。相对更具有参考价值: 图 1-33 哈曼曲线 2013 和 2015 标准 但正如 肖恩·奥利佛 本人所言,“It is important for the reader not to draw generalizations from these results beyond the conditions we tested.” (“重要的是,读者不要从这些结果中得出超出我们测试条件的概括。”) [19] 哈曼曲线只能是参考,是存在大量主观作用和客观条件的。只能作为一种主观标准提供有限的意见。而这也和其他类似的耳机 SFR 标准,有着同样的问题。 至于具体是否能先觉的量化每个人的听觉体验呢?或许肖恩博士的另一句话,会有更大的参考价值,那就是 [19]: “It makes perfect sense, at least to me. Only then will listeners hear the truth -- music reproduced as the artist intended.”(“至少对我来说是这样的。(不过也正是)只有这样,听众才能听到真相——音乐按照艺术家的意图复制。”) 这就是感官感受和工程测量的不同了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_4_5.html":{"url":"Chapter_1/Language/cn/Docs_1_4_5.html","title":"1.4.5 工程:频谱图(Spectrum)","keywords":"","body":"1.4.5 工程:频谱图(Spectrum) 如果说之前我们介绍的几种对声音的解构方式,或多或少都显得有些唯心的话,那 频谱图(Spectrum)则是一种纯客观的声音度量与观测法了。 频谱图(Spectrum) 是对持续一段时间的某段声音,其频率情况在由 指定频率区间 与 时域 构成 复平面(Complex Plane) 上展开的可视化描述图。同 某时刻声音的频率响度特征(FLF)一样,需要利用傅立叶变换时频分离获取。而从从时间流逝的角度,即时间轴作为 z轴 来观察,两者也存在相互关系。 频率响度特征(FLF)与频谱图的关系 可以认为,如果时间是离散的,频谱图就相当于由该声音持续长度时间的一系列各个时刻 频率响度特征切片,构成的图表。 虽然频谱图中的时间被认为是连续的(实际只是步长较小而已,毕竟考虑计算及性能,会采用从属于傅立叶离散变换的 短时傅立叶变换(STFT [Short-TIme Fourier Transform]) 快速处理),但参考 FLF 的生成方式,只需要 小范围求平均 就能完成切片。 如下: 图 1-34 一组乐器演奏 A4 音调约 12s 的 3D 频谱图 上图就是一张完整的频谱图,如果我们在 5s左右截取前后 100ms 左右数据,并取平均。就能获得其在 5s 左右 ±100ms 范围的频率响度特征(FLF),而这结果在之前的章节中,已经见过了,即: 图 1-35 多乐器演奏音调 A4 时在 5s 处取 100ms 所得频率响度特征 而这张频谱图,就是该 FLF 的声源分析结果(具体的生成用代码,在本书第五章节提供)。 因此,通过某段声音的频谱图分析,我们通过切片手段,能够获取该声源,在任意时刻的频率响度特征。这也是为什么,频率响度特征,有时被称为频率响度切片的原因。 一般的,若无特指,声音的频谱图皆代指该声音的 三维频谱图(3D Spectrum)。 频率响度特征(FLF)与频谱图的关系 显然,频谱图有三个坐标轴,分别是 时间轴(Time Axis)、频率轴(Frequency Axis)、响度轴(Loudness Axis)。三个坐标轴两两构成平面,而这些平面在某种程度上,提供观察声音信息的不同视角: 由 时间轴(Time Axis) 和 频率轴(Frequency Axis) 构成了 时频切面; 由 时间轴(Time Axis) 和 响度轴(Loudness Axis) 构成了 波形切面; 由 频率轴(Frequency Axis) 和 响度轴(Loudness Axis) 构成了 频响切面; 可见,前文中有关声音在乐理和感受上的解构,多发生于 波形切面 和 频响切面(尤其是后者),及其关联平行平面(如 ELLC、FRC 等)的观察窗口。可以说是 对该切面信息主观度量的衍生产物。 语谱图(Spectrogram)与 时频切面(TFS [Time-Frequency Section]) 时频切面(TFS [Time-Frequency Section]) 能够用来获取,在指定响度大小情况下,严格满足该响度的频率随时间的分布关系。由于单独看某一个固定的响度值下的时频关系,并没有太大意义,因此常以某声音完整数据的所有响度时频切片,按照切片所处响度高低用不同颜色表示后叠加,来二维的表示该声音的频谱情况。 这样做的几何意义,即为获得了该声音的频谱图,在时频切面的投影。而通过不同颜色(通常为冷暖过渡色)对原频谱信息进行了完整的降为保存,使得这个投影结果,也可以用来代表原频谱图情况,被称为 声纹图(Voiceprint)。 声纹图(Voiceprint)因此也被称为 二维频谱图(2D Spectrum)。为了区别于 三维频谱图 以免产生混淆,被改称为 语谱图(Spectrogram)。 图 1-36 一组乐器演奏 A4 音调约 12s 的 3D 频谱图 & 投影所得 语谱图(上) 波形图(Waveform)与 波形切面(TLS [Time-Loudness Section]) 波形切面(TLS [Time-Loudness Section]) 是声音信息在经过时频分离后,得到的 从时域观察频域角度的频域维度切片(注意观察方向,垂直于 FLS 情况,时频分离原理在第三章详解)。能够用来获取,指定某频率下,该频率随时间的幅度(即响度)变化情况。 通常而言,如果需要分析指定频率的情况,可以采用如此切割手段。这种处理方式一般被用在降噪模型训练,或一段特定频段频率的综合分析。所以,会取用指定频段的相应所有切片,按照其频段内频率用不同颜色表示后叠加,来二维的表示该频段内的响度时间情况。这种表示方式所构成的声音二维图表,被称为 有限频段波形图(Limited Band Waveform)。 而当我们选择的频段涵盖了整个声音的全部频段(大多为人耳听力的频率范围,即 20~20000Hz)时,就能够得到整个声音的完整波形图了。而这也是最为人熟知的声音图表形式,同时也是该声音 完整的时域信息,即大部分情况下所指的 时域(Time Domain)。 图 1-37 一组乐器演奏 A4 音调约 12s 的 3D 频谱图 & 投影所得 波形图(右) 频响切面(FLS [Frequency-Loudness Section]) 频响切面(FLS [Frequency-Loudness Section]) 是声音信息在经过时频分离后,得到的 从频域观察时域角度的时域维度切片(注意观察方向,垂直于 TLS 情况,时频分离原理在第三章详解)。能够用来获取,指定某时刻下,该时刻的频率构成情况。即 频率响度特征(FLF)。 这个视角我们已经充分的辨析过了,此处亦不再赘言。 但有一点还需强调。 我们以类似获取声音 波形图 和 语谱图 的方式,获得频谱图在频响切面的投影,涵盖了该声音的 完整频域信息,即大部分情况下所指的 频域(Frequency Domain)。 注意 频率响应切片(FLF) 和 频域 的区别,与父子关系(频域切片 和 完整频域)。 图 1-38 一组乐器演奏 A4 音调约 12s 的 3D 频谱图 & 投影所得 完整频域(前) 至此,从乐理角度(艺术)、心理声学(感观)、声乐工程(声音三要素)。读者以具备基本的完整分析一段声音,并初步提取有效数据的认知能力! 下一节,让我们开始音频的采样与调制,掌握声音是如何从物理信号,转化为可传递数字信号的关键。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_5.html":{"url":"Chapter_1/Language/cn/Docs_1_5.html","title":"1.5 声音数字化","keywords":"","body":"1.5 声音数字化 在本章伊始,我们提到了当下 音频录制(Audio Recording) 技术所处的时代,为 数字处理时代(The Digital era)。在数字时代最为显著的特征,就是从传统的纯物理记录方式,演变成了调制解调配合格式压缩存储的处理过程。 而将声音从物理波转为数字保存,并在需要时提供还原能力的技术,就是 调制解调(Modulation & Demodulation) 技术。 这既是本节讨论的内容。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_5_1.html":{"url":"Chapter_1/Language/cn/Docs_1_5_1.html","title":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source)","keywords":"","body":"1.5.1 数字信号(Digital Signal)& 模拟信号(Analog Signal)& 真实波源(Original Source) 由于从数字处理时代开始,信号间的差异已逐步变大。为了 更好地区分 通过传感器采集的电流数据格式和数模模数转换后的格式,我们将其分为 模拟信号(Analog Signal) 和 数字信号(Digital Signal) 两种: 模拟信号(Analog Signal) 为采用电物理量表示的真实波源的象形连续信息,属于 连续时间信号(CTS [Continuous Time Signal]); 数字信号(Digital Signal) 为有限离散值表示的离散信息,是 量化(Quantification) 后的 离散时间信号(DTS [Discrete Time Signals]); 同时,为了区分两者和数据源的关系,将 信号现实源头 称为 真实波源(Original Source)。 所以,只有真实波源(Original Source)才代表物理世界中的实际波情况。 模拟信号与数字信号,一个是 通过电压电阻等电力学技术采集(Collecting)到的真实波源数据,一个是 通过电子信息技术处理的电压电流转数字码结果。 注意,采集(Collecting) 并不是 采样(Sampling)。两者没有直接联系,属于不同阶段的不同过程。但有时也会将从真实波源获取模拟信号的过程,称为 模拟信号采样(Analog Signal Sampling),需要通过具体上下文来区别。 小心容易混淆。 真实波源一般通过一些 电传感器 来转为模拟信号。这些传感器包括:由多感光电阻单元构成照相机感光器(Camera Sensor)、由驻极体(ECM)单元构成的麦克风传感器、以动圈切割磁感线产生电流的动圈麦克风,简单的压力传感器(Pressure Sensor)等。 简单来说, 模拟信号(Analog Signal) 是 电流信号; 数字信号(Digital Signal) 是 电位信号; 真实波源(Original Source) 是 现实世界里的波(光波、机械波、引力波等); 我们所听到的声音,在物理介质(如空气、水等)中直接传导的信息,在转为电流电压表形后,就可以被认为是模拟信号。数字信号则在自变量(如时间)和因变量(如幅度)上,都是离散且有限的。 但我们 并不能直接简单的将离散时间信号(DST),等同于 数字信号。因为,离散时间信号在不经过量化因变量的操作前,其只是自变量的离散。例如,时间上间隔的从一段声音的模拟信号上截取切片,构成的时序离散的信号,其因变量的波动情况仍然属于自然量描述。 所以,采样自模拟信号的未量化离散时间信号,即为对应数字信号的 中间形态。 由此引申出,模拟信号到数字信号的转换过程: 称为 模数转换(A/D [Analog-to-Digital])。作用于 模数转换(A/D)的设备为 模数转换器(ADC [Analog-to-Digital Converter])。 而从 数字信号到模拟信号的还原过程: 称为 数模转换(D/A [Digital-to-Analog])。作用于 数模转换(D/A)的设备为 数模转换器(DAC [Digital-to-Analog Converter]),即所谓 HiFi 的 解码 DAC。 数模转换和模数转换,并不只局限于音视频的信息转换。其他类型的现实世界信息,也存在同样复杂或简单和电信号互转的过程。由于原理相近,本书选择以音频作为主,不做其他信号的相关展开。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_5_2.html":{"url":"Chapter_1/Language/cn/Docs_1_5_2.html","title":"1.5.2 模数转换(A/D [Analog-to-Digital])","keywords":"","body":"1.5.2 模数转换(A/D [Analog-to-Digital]) 模数转换(A/D) 完成对采集到真实物理信息(如温度、气压、举例等)所得的 模拟信号,到 数字信号的映射。这么做的目的,是为了利用数字信号 可度量、可改动、可计算 的特性,来实现对信息的 操作、保存、传递。 大多数情况下, A/D 会把采样和量化 放到 ADC 单元里 一步完成。为了同时控制这两个变量到同一基准上,ADC 通过引入 固定参考输入(Reference Input),使其频率在合成累加器单元门电路数代表 比特分辨率(Bit Resolution) 作用下,转为十进制的等步长分割。从而可以拆解为由比特分辨率表示的,整数比特离散值,即门电路的电位。控制电位调整参考输入,逼近模拟信号。 最终得到的门电路开关状态以 数字码(Digital Codes) 记录,即为 输出数字信号。 可见,参考输入的电压就是 ADC 所能处理的最大电压。该值通常都来自于各国行业标准采样电压,或由特种设备的内部变压器/脉冲单元/时钟芯片决定。而 ADC 量化采样公式每被执行一次,就是一次量化采样完整过程。采样的频率,来自于设备内部时钟频率,通常是以电脉冲的形式触发。但模拟信号的输出是连续的,因此该时钟频率(即采样频率)的大小,会对采样结果有一定影响。 采样的准确度与采样率设定 根据 香农采样定律(Nyquist–Shannon Sampling Theorem),时钟频率需要为 采样数据源最大频率的至少两倍大小,才能保证采集最大频率时,不会因为非整数倍取样而导致变形。这种变形属于来自于采样过程的 源头干扰,会产生 难以消弭的 影响,例如:在一定距离拍照电子屏时出现的摩尔纹。具体原理相对简单,如下图所示,不再展开赘述(模拟代码见本章节事例)。 图 1-39 香农采样定律取 1.3 倍于被采样频率时的采样失真演示 依此,我们对采样频率的制定,亦有标准公式。强调这里的采样指的是 A/D 过程中的采样。 假设,当前已知一 ADC 设备,想要处理的 模拟信号 脉冲频率范围 为 FAno∈[Fmin, Fmax]F_{Ano} \\in [F_{min},\\ F_{max}]FAno∈[Fmin, Fmax] ,该 设备的采样频率 为 FADCF_{ADC}FADC 。则 理想中能够覆盖最大高频模拟信号的无失真频率 FADCF_{ADC}FADC 需满足: FADC≥2⋅Fmax {\\displaystyle \\begin{aligned} F_{ADC} \\ge 2 \\cdot F_{max} \\\\ \\end{aligned} } FADC≥2⋅Fmax 按照该不等式设置的 ADC 采样频率,即可符合要求。上式因此常在工程中被称为 安全采样不等式(Safety Sampling Inequality)。而根据安全采样不等式设定的 FADCF_{ADC}FADC ,称为该设备的 数字信号采样率(Digital Sampling Rate),即 采样率(Samplerate/Sample Rate)。 现在,采样频率的问题解决了。如何处理获取的离散数据,将其转换为数字码标识呢?这需要依赖 A/D 量化公式(A/D Quantization Formula),即量化采样公式的帮助。 量化采样公式(A/D Quantization Formula) 如果记 模拟信号(Analog Signal)的电压(Voltage) 为 VAnoV_{Ano}VAno ,参考输入(Reference Input)的电压(Voltage) 为 VRefV_{Ref}VRef 。合成累加器的门总数,即该 ADC 的 最大比特分辨率(Max Bit Resolution) 为 NNN 。假设模拟信号经过 ADC 处理后,某时刻输出的 数字信号(Digital Signal)十进制表示 为 DDD ,则这几个量间的关系就为: D=VAnoVRef⋅(2N−1) {\\displaystyle \\begin{aligned} D = \\frac{V_{Ano}}{V_{Ref}} \\cdot (2^N - 1) \\\\ \\end{aligned} } D=VRefVAno⋅(2N−1) 此式即为 ADC 量化采样公式,由于 采样不依赖于公式,也被称为 A/D 量化公式(A/D Quantization Formula)。 图 1-40 在 ADC 量化采样公式作用下的 A/D 映射结果 如上,当取用 VRef=6 VV_{Ref} = 6\\ VVRef=6 V 时,有输入模拟信号电压 VAno∈[0, 6]V_{Ano} \\in[0,\\ 6]VAno∈[0, 6] 的数字码映射情况。连续信号通过公式处理,变成了离散值。而 1VRef⋅(2N−1)\\tfrac{1}{V_{Ref}} \\cdot (2^N - 1)VRef1⋅(2N−1) 就是每个十进制下数字码(数字码都是二进制)所能覆盖的电压范围,称之为 1 单位 的 最小显著字节(LSB [Least Significant Bit]) 范围。 而用上例参数的 ADC 对一个时长为 4 个周期且 VAno∈[0, 6]V_{Ano} \\in[0,\\ 6]VAno∈[0, 6] 的正弦模拟信号,进行模数转换。其完整处理的效果如下: 图 1-41 模拟信号经 ADC 量化采样演示 对于一款 ADC 单元,在设计确定了 采样率(Samplerate)、最大比特分辨率(Max Bit Resolution) 和 参考输入(Reference Input) 后,对于该设备的这些相关属性,既成 常数固定。其中,最大比特分辨率(Max Bit Resolution)取值 NNN ,被标注为 ADC 设备的 采样位深(Sampling Bit Depth)。 取值 NNN 为多少,就代表着单个 ADC 上,有多少以 参考输入电压二的幂指倍缩小电压信号 所组成的门后电压单元。 由于参考电压一般要求稳定,所以至少需要以内部元件提供稳定三相电来作为基准。不过,对于精度要求极低的设备,为了电子组件复用和电路板的简化,会采用把采样时钟信号的电压作为参考输入的非常做法。但对于高精度设备(包括麦克风等),时钟信号为高频信号,是严格不能作为参考输入的。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_5_3.html":{"url":"Chapter_1/Language/cn/Docs_1_5_3.html","title":"1.5.3 数模转换(D/A [Digital-to-Analog])","keywords":"","body":"1.5.3 数模转换(D/A [Digital-to-Analog]) 数模转换(D/A) 是对模数转换(A/D)的逆向过程,完成 从数字信号还原至模拟信号 的工作。注意,还原的是模拟信号(Analog Signal),而并非真实波源(Original Source)。对音频来说,转换所得的模拟信号,再经过放音设备播放(如音响、扬声器单元等),成为真实波源。 数模转换公式(D/A Transfer Function) 如果记 数字信号(Digital Signal)十进制表示 为 DDD ,参考输入(Reference Input)的电压(Voltage) 为 VRefV_{Ref}VRef ,合成累加器的门总数,即该 DAC 的 最大比特分辨率(Max Bit Resolution) 为 NNN 。假设数字信号经过 DAC 处理后,某时刻输出的 模拟信号(Analog Signal)电压(Voltage) 为 VAnoV_{Ano}VAno ,则这几个量间的关系就为: VAno=D2N−1⋅VRef {\\displaystyle \\begin{aligned} V_{Ano} = \\frac{D}{2^N-1} \\cdot V_{Ref} \\\\ \\end{aligned} } VAno=2N−1D⋅VRef 此式即为 DAC 数模转换公式,由于 DAC 为参考输入构建波形的内部时钟信号脉冲周期不依赖于公式,也被称为 D/A 转换公式(D/A Transfer Function)。 同 ADC 一致,DAC 中参考输入电压二的幂指倍缩小电压信号,组成了位门后的各个门电路所对应电压输入。其所有输入的周期皆为时钟信号的周期,即周期完全一致。 图 1-42 在 DAC 数模转换公式作用下的 D/A 映射结果 如上(注意坐标轴),当取用 VRef=6 VV_{Ref} = 6\\ VVRef=6 V 有 DAC 最大比特分辨率 N=4N = 4N=4 时,输入数字信号十进制表示 D∈[0, 15]D \\in[0,\\ 15]D∈[0, 15] 的模拟信号还原的理想情况。在没有 DAC 设备误差的情况下,上一小节经过我们模数转换所得时长为 4 个周期的数字信号,就能还原为原 VAno∈[0, 6]V_{Ano} \\in[0,\\ 6]VAno∈[0, 6] 的正弦模拟信号: 图 1-43 数字信号经 DAC 数模转换演示 对于一款 DAC 单元,在设计确定了 时钟频率(Clock Frequency)、最大比特分辨率(Max Bit Resolution) 和 参考输入(Reference Input) 后,对于该设备的这些相关属性,既成 常数固定。其中,最大比特分辨率(Max Bit Resolution)取值 ,被标注为 DAC 设备的 解析位深(Analytical Bit Depth),即俗称的解析力。 同样的,想要达到 较好的还原 模拟信号效果,DAC 的 时钟频率(Clock Frequency),需要和 ADC 的工业标准保持一致。因此,有时也被用 ADC 的采样率(Samplerate)的称谓代指,即所谓 DAC 采样率。这种称谓其实是不准确的。 而在多级设备的放音场景,为了保证包括 DAC 在内的整条解码放音链路上设备的时钟频率一致,常需要我们提供外侧时钟信号(Clock Signal),来避免由于设备间的差异,而导致还原后的模拟信号,在传递和还原真实波源时,发生周期上的挤压/拉伸形变。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_5_4.html":{"url":"Chapter_1/Language/cn/Docs_1_5_4.html","title":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM)","keywords":"","body":"1.5.4 脉冲编码调制(PCM)& 脉冲密度调制(PDM) 实际上,为了避免理解中的混淆,我们在上文中介绍的 模数转换(A/D)和 数模转换(D/A)方式,都是基于 脉冲编码调制(PCM [Pulse-Code Modulation]) 进行的。能够完成数模模数转换的方法,除了 PCM 法外,还有 脉冲密度调制(PDM [Pulse-Density Modulation]),以及一系列不同出发点的调制模式,比如 脉冲带宽调制(PDM [Pulse-Width Modulation]) 等。由于在本领域内相对非主流,或是已经属于相对落后技术,亦不再讨论。 本节主要以相对具有代表性的 PCM 与 PDM 进行比对。 脉冲编码调制(PCM [Pulse-Code Modulation]) 是通过将模拟信号的 电压幅度,以 离散数字码 的形式等效表示,从而转换为数字信号。其在转换前后 时序(周期)上是一致的。转换后的数字信号,幅度变化拟合原有模拟信号幅度变化轮廓。 脉冲密度调制(PDM [Pulse-Density Modulation]) 则是将模拟信号的 电压幅度,以 一段时间内的高密度脉冲数量 来表示,从而转换为数字脉冲。其在 时序(周期)上是差异的,转换后的数字信号,幅度二元变化(只有 0/1 值)。 主要的不同,来自于对模拟信号 幅度抽象模式。 PCM & PDM 异同辨析 基于 PCM 的 A/D、D/A 过程,数字信号是二维信号,时序信息与幅度信息依旧保持为两个维度。而基于 PDM 的 A/D、D/A 过程,数字信号是一维信号,原模拟信号的时序信息和幅度信息,被叠加到同一维度上,以采样频率对应周期长度进行了转后数字信号单一维度上的分片。 相应的,对于 PCM 法所得结果幅度切割程度的重要指标,采样位深(Sampling Bit Depth),则 不存在于 PDM 法中。 PDM 采用 过采样系数(Oversampling Ratio) 配合 数字信号采样率(Digital Sampling Rate) 的方式,来表示 采样分辨率(Sampling Resolution)。即 PDM 和 PCM 的采样率,在意义上是不一样的: PCM 采样率,代表在一个时钟信号周期内,设备对模拟信号采样的次数; PDM 采样率,代表在一个时钟信号周期内,设备对模拟信号一次采样的幅度累计上限; 因此,PDM 设备在一个时钟信号周期内,仅仅数字化模拟信号 一个时刻。PCM 设备在一个时钟信号周期内,则数字化模拟信号 多个时刻。 需要注意的是,PDM 采样率(Samplerate) 决定了 PDM 设备的 可采样幅度范围,但这 并不 意味着可以等价于设备的时钟频率,这是两个概念。仍然记该 PDM 设备 参考输入(Reference Input) 大小为 VRefV_{Ref}VRef ,数字信号采样率(Digital Sampling Rate) 为 FADCF_{ADC}FADC ,过采样系数(Oversampling Ratio) 为 SrS_{r}Sr ,而 采样率(Digital Sampling Rate) 为 FFF 。则顺序 iii 的 二元数字信号(0-1 Digital Signal) 值 DiD_iDi 与几个量间的关系有: ∑i=0Sr⋅FDi={1, VAnoVRef>00, VAnoVRef=0 {\\displaystyle \\begin{aligned} \\sum_{i=0}^{S_r \\cdot F} D_i &= \\begin{cases} 1 &, \\ \\frac{V_{Ano}}{V_{Ref}} > 0 \\\\ 0 &, \\ \\frac{V_{Ano}}{V_{Ref}} = 0 \\end{cases} \\\\ \\end{aligned} } i=0∑Sr⋅FDi=⎩⎪⎪⎨⎪⎪⎧10, VRefVAno>0, VRefVAno=0 一个时钟信号周期内 Di=1D_i = 1Di=1 累积个数,就是 PDM 数字信号的 脉冲密度(Pulse Density)。我们记脉冲密度为 IpI_pIp ,原模拟信号被采样时间点为 ttt 则: Ip=(∑Di⋅(Sr⋅F))t {\\displaystyle \\begin{aligned} I_p = \\left( \\sum D_i \\cdot (S_r \\cdot F) \\right)_{t} \\\\ \\end{aligned} } Ip=(∑Di⋅(Sr⋅F))t 所以,对 PDM 设备来说,IpI_pIp 才代表了原模拟信号在 ttt 时的 等效振幅(即电压),有: VAno(t)=Ip(t)⇔∑i=C⋅tC⋅t + Sr⋅FDi {\\displaystyle \\begin{aligned} V_{Ano}(t) = I_p(t) \\Leftrightarrow \\sum_{i =C \\cdot t}^{C \\cdot t \\ +\\ S_r \\cdot F} D_i \\\\ \\end{aligned} } VAno(t)=Ip(t)⇔i=C⋅t∑C⋅t + Sr⋅FDi 其中,时钟频率(Clock Frequency) 记为 CCC 。 所以,PDM 是完全不同于 PCM 的方法论。 而不论是 PCM 还是 PDM,其理想情况下都可以保持转换还原前后,原模拟信号不发生改变。 对于 PDM 来说,最显著的特点就是在同等情况下,能够提供 比 PCM 更细腻的分辨率,但缺点也很明显,即 更窄的动态范围(时钟周期性和等效较低的对原模拟信号的采样频率)。 此外,PDM 受分时分区的采样频率,和通过电压控制的开关门电路累计计数关系,而易受外界和设备自身影响,导致容易引入内外噪音干扰。不过由于只需要按频率对应的一个周期内,累计幅度时发送单一信号(1⋅VRef1 \\cdot V_{Ref}1⋅VRef),无幅度累计发送单一信号(0⋅VRef0 \\cdot V_{Ref}0⋅VRef)的方式,转换数字信息。而使 PDM 的构造显然要简单于 PCM 方式的 ADC、DAC,这让用 PDM 方式构造的该类设备,具有较低能耗和低造价(制造简单)的优势。 由于这些原因,PDM 设备常被用在一些低电力和相对精度较低的需求场景,如电器控制单元、LED灯驱动器、一些微型麦克风设备等,相对更靠近使用端的设备。 相比之下,PCM 的处理方式,显然更容易相对完整的保存原有模拟信号信息。 音视频工程场景中,我们常处理的音频信号,基本为 PCM 方式获取的数字信号。 对于想要进行调整的 PDM 数字信号,通常需要转换为 PCM 数字信号后,再行以 PCM 更具优势的直接编辑方式,进行相关操作。而位于计算机体系内用来实现音频存储的数字信号基础类型,亦为 PCM 类型的数字信号。 由此可见 PCM 数字信号的重要性。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_6.html":{"url":"Chapter_1/Language/cn/Docs_1_6.html","title":"1.6 音频的存储","keywords":"","body":"1.6 音频的存储 经过上一小节,我们已经能够将大多数声音的模拟信号,转为 PCM 数字信号的音频数据。而接下来,在现代计算机系统内,这些数据具体该怎么进行 储存保存 呢?考虑到其本身 已经为数字码(Digital Codes) 格式,一种直接使然的思路,就是什么都不再进行变动,采用直接写入到磁盘中的方式,保存原始数字信号。 这就是音频存储的基础格式,PCM 音频格式。 什么是音频格式? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_6_1.html":{"url":"Chapter_1/Language/cn/Docs_1_6_1.html","title":"1.6.1 音频格式(Audio Format)","keywords":"","body":"1.6.1 音频格式(Audio Format) 音频格式(Audio Format),也被称为 音频文件格式(Audio File Format)。其被用来指代,对当前目标 音频数字信号(Digital Signal) 数据,进行保存至数字存储设备中的 处理方式和最终数据结果。 即,音频格式 包含了两部分重要的信息:压缩算法(Compress Formula) 和 存储格式(Data Format)。两者共同构成了 音频格式 的核心属性。 不过,由于存储格式大都是由压缩算法决定,且采用相同于原数字信号本身的数字码表示方式进行存储。可以说压缩算法的差异,才是决定不同音频格式间差异的关键。 而音频的存储格式,在这一点上,仅仅作为压缩算法的运算结果,并不起主导作用。 三者关系如下所示: 所以,根据格式本身所采用的压缩算法类型,音频格式可以分为 三大种类: 未压缩音频格式(Uncompressed [Uncompressed Audio Format]),不采用任何压缩算法直接存储,例如前文提到的 PCM 音频格式; 无损压缩音频格式(Lossless [Lossless Compression Audio Format]),采用无损压缩算法,对数字信号进行压缩后的存储格式,例如 FLAC 音频格式; 有损压缩音频格式(Lossy [Lossy Compression Audio Format]),采用有损压缩算法后,得到的存储格式,例如著名的 MP3 音频格式; 显然,想要理解这几类的划分,从 压缩算法 入手,是个较好的切入点。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_6_2.html":{"url":"Chapter_1/Language/cn/Docs_1_6_2.html","title":"1.6.2 无压缩编码格式(Uncompressed Encode)","keywords":"","body":" Hex Data Display .hex-container { text-align: center; } .hex-data { display: inline-block; text-align: left; font-weight: bold; font-family: monospace; white-space: pre; } 1.6.2 未压缩音频格式(Uncompressed Audio Format) 未压缩音频格式(Uncompressed [Uncompressed Audio Format]) 即 没有经过任何压缩算法处理,而直接将数字信号的数字码,作为存储格式保存的音频格式。因此,未压缩音频格式的存储格式(Data Format) 与 原数字信号 在音频数据内容部分,具有 完全一致的数字码值。 常见的未压缩音频格式,除了前文反复提到的 PCM 格式外,还有 PDM、WAV、AIFF、AU 等。PCM、PDM 格式自不必多提,而在系统中常用的 WAV 、SND/AU、AIFF等 则需额外引申。 首先的一个疑问就是,为什么会有这么多未压缩音频格式? 未压缩音频格式种类的产生 如此结果的产生,主要来自于 两个重要因素:调制模式的差异,和 描述信息的不同。 调制模式的差异,造成了如 PCM、PDM 之类的区分,这也是上节末尾我们所谈到的(其具体原理,已经于前面的章节中讲解,如有疑问可以回顾),是来自于不同 AD/DA 方法论的区别。 描述信息的不同,则指向该格式,是否携带了 描述音频的头部信息(Header Information) 用来标记当前音频文件对应音频数据的全局附加信息。而这,则是来自于 系统文件规范(System File Specifications) 指定的人为因素。 WAV 音频格式 自微软在早期 IBM 时代提出了 资源交换档案标准(RIFF [Resource Interchange File Format]) 规范后,所有于 Windows 系统内的数据存储文件,都需要按照: 【分块描述信息块(Chunk Descriptor)】+【复合数据子块(sub-Chunks)】 的形式,完成数据封装。而音频数据所对应的,即为 波形格式数据(Wave File Format),也就是所谓 WAV(.wav) 格式。同时,随着 Windows 系统的极大普及,WAV 格式也成为了一种于多系统间的通用基础音频格式类型。 Windows 系统以后缀来区别具体归属 RIFF 类型。而不论是否采用 PCM 调制模式,想要存储且不采用压缩算法的音频数字信号,都需要按 RIFF 要求进行封装。当然,现行的 WAV 格式文件,基本都是对 PCM 数据的 RIFF 套壳。 RIFF 规定,WAV 数据格式包含了三部分数据,分别是: 分块描述信息块(Chunk Descriptor),存放基于 RIFF 体系的当前块标记; 音频格式子块(fmt sub-Chunk),存放当前存储音频相关数据的附加信息; 音频数据子块(data sub-Chunk),存放当前存储音频的存储数据; 不同区域,包含的信息(即各自参数)的 目标作用域 是不同的。分块描述信息块,主要是基于 RIFF 体系的 相对宏观存储信息记录,目的是为了方便于计算机系统据此进行数据归纳处理。而 音频格式子块 和 音频数据子块 才是对该 数字信号 代表的 音频实际信息的描述和保存。 因此,三部分各有参数标记各自重点。 分块描述信息块(Chunk Descriptor) 主要包含 3 个属性,分别是: Params Range(bytes) Details ChunkID 0x00~0x03 (4) 标记当前块 ID,固定存储 'RIFF' 四个大写字母的 ASCII 码,即 == 0x52494646 ChunkSize 0x04~0x07 (4) 记录当前块除 ChunkID 和 ChunkSize 属性外,完整文件的总体大小(bytes),== 4 + (8 + Subchunk1Size) + (8 + Subchunk2Size) Format 0x08~0x0b (4) 标记当前 RIFF 文件类型,WAV 固定存储 'WAVE' 四个大写字母的 ASCII 码,即 == 0x57415645 音频格式子块(FMT sub-Chunk) 主要包含 8 个属性和 2 个额外字段,分别是: Params Range(bytes) Details Subchunk1ID 0x0c~0x0f (4) 标记当前 子块-1 的 ID(即子块类型),固定存储 'fmt' 三个小写字母的 ASCII 码,即 == 0x666d7420 Subchunk1Size 0x10~0x13 (4) 记录当前子块除 ID 和 Size 属性外的大小(bytes),而对于存储 PCM 数字信号,该值恒定 == 16 bytes AudioFormat 0x14~0x15 (2) 音频格式类型,非常用参数,因为当本身为 != 1 的值时,代表着文件存储的音频数据,采用了对应标记值的压缩算法。此时一般会采用对应的格式后缀。对于 PCM 格式,该值恒定 == 1 NumChannels 0x16~0x17 (2) 存储音频数据的通道数,单通道(Mono == 1),双通道(Stereo == 2),N 通道(== N) SampleRate 0x18~0x1b (4) 数字信号采样率,注意不同调制类型需要考虑前文提到的差异,对 PCM 来说就是 ,有该值 == 8000 | 11025 | 24000 | 44100 等 ByteRate 0x1c~0x1f (4) 比特率,即当前全通道单采样周期所得数据的传输率 == SampleRate * NumChannels * BitsPerSample/8 BlockAlign 0x20~0x21 (2) 全通道数据单次采样的对齐大小,即一次全通道采样的有效数据大小,固定 == NumChannels * BitsPerSample/8 BitsPerSample 0x22~0x23 (2) 代表来自于 数模模数转换 的 采样位深(Sampling Bit Depth)/ 最大比特分辨率(Max Bit Resolution),该值单位为 bits,有 == 8 | 16 | 32 bits 等 ExtraParamSize 0x24~0x25 (2) 额外参数信息大小,如无则不占用字节大小,非常用参数,原因同 AudioFormat,对于 PCM 来说,该值始终 == 0,且字段不存在 ExtraParams 0x26~0x26+X (X) 额外参数内容,同上,对 PCM 始终 X == 0 需要注意的是,音频格式子块(FMT sub-Chunk)中的 ExtraParamSize 和 ExtraParams 并不是始终存在的。对于 以 PCM 数字信号数据为主要载荷信息的 WAV 格式,该两个字段在 fmt 子块中,是不存在。 即,ExtraParamSize 和 ExtraParams,在 WAV 中并不占用任何有效数据字段。 音频数据子块(DATA sub-Chunk) 主要包含 3 个属性,分别是: Params Range(bytes) Details Subchunk2ID 0x24~0x27 (4) 标记当前 子块-2 的 ID(即子块类型),固定存储 'data' 四个小写字母的 ASCII 码,即 == 0x64617461 Subchunk2Size 0x28~0x2b (4) 记录当前子块除 ID 和 Size 属性外的大小(bytes),而对于存储 PCM 数字信号,该值为数字信号数据大小 == PCM-data-size bytes Data 0x2c~0x2c+X (X) 当前 PCM 数字信号的数字码信息,共计 X bytes 所以,音频数据子块(DATA sub-Chunk) 其实就是 PCM 音频格式时,被存储到计算机系统中的 PCM 存储文件(.pcm)有效数据部分。 三个 WAV 的组成部分以固定顺序排布,如下所示: 图 1-44 WAV 音频格式的完整结构成分 共同构成了一则有效的 WAV 音频格式文件。 现在,让我们再来看一段 72 bytes 的 WAV 音频文件(十六进制格式单字节展开): 52 49 46 46 24 08 00 00 57 41 56 45 66 6d 74 20 10 00 00 00 01 00 02 00 22 56 00 00 88 58 01 00 04 00 10 00 64 61 74 61 00 08 00 00 00 00 00 00 24 17 1e f3 3c 13 3c 14 16 f9 18 f9 34 e7 23 a6 3c f2 24 f2 11 ce 1a 0d 按照上述划分,就能得到各自子块的信息了: 图 1-45 演示用 72 bytes 的 WAV 音频文件解析 从上可知,样例其实是从一段 2048 Bytes 的 PCM 音频对应 WAV 文件 中,从头 截取 72 Bytes 数据 组成的。所以,利用头部信息来交验数据完整性,或取得更早阶段(即调制阶段)的信息,在 WAV 这种 具有 分块描述信息块(Chunk Descriptor) 的音频格式(Audio Format)里成为了可能。 这也是为何类 WAV 结构音频格式(包括将要提到的 SND/AU 和 AIFF 等),会代替了直接以 PCM 在电脑中进行非工程化存储的原因。 不过,这也引申出了 两种截然相反的,有关未压缩音频格式的 制定思路:缩减头文件信息减少复杂度 ,和 增加头文件所能涵盖的辅助数据。典型代表,分别是 SND/AU 和 AIFF 音频格式。 SND/AU 音频格式 SND/AU 是一种极简的,携带有描述信息的未压缩音频格式。该格式由已于 2009 年被 甲骨文(Oracle)收购的 美国昇阳电子有限公司(Sun Microsystems, Inc) 提出,用于解决麦克风录制转换后的 PCM 数字信号,快速简易存储的问题。 SND/AU 音频格式(.snd/.au) 不以 块(Chunk)/子块(sub-Chunk) 形式对关键数据进行分离,而是 直接将存储分为三个部分区段,分别是: 头信息区段(Header),存储必要的最基本音频描述信息; 变长辅助信息区段(Variable-length Informational Field),存储需要的额外信息; 音频数据区段(Data),存放当前存储音频的存储数据; 初看之下可能感觉同 WAV 格式并无太大差异,然而事实并非如此。SUD/AU 的相关音频数据的参数,以及自身有关文件系统的标志,都被 集中于头信息字段的 6 个固定参数中。而涉及音频本身,诸如版权信息、作者名称等数据,则并未规范如何存储,只指定了必须放入 变长辅助信息区段 的要求。这使 系统并不需要管理这部分信息。而音频数据区段,则只能存放 PCM 数字信号的数字码数据。 所以,变长辅助信息区段(Variable-length Informational Field) 只规定 必须占用 4 bytes 大小,并在有 额外信息 时,以整数字节增加,如:5(4+1) bytes、6(4+2) bytes 等。而 音频数据区段(Data)则紧随其后,直接以 PCM 采样按通道(Channels)数,交替分 blocks 存储即可(同 WAV 的 data 部分)。 头信息区段(Header) 主要包含 6 个属性,分别是: Params Range(bytes) Details magic 0x00~0x03 (4) 标记当文件类型,固定存储 '.snd' 四个字符文件后缀的 ASCII 码,即 == 0x2e736e64 hdr_size 0x04~0x07 (4) 记录音频数据起始偏移(bytes),用于快速数据,有 == 24 + Informational_Field_Size(bytes) data_size 0x08~0x0b (4) 本用于记录数字信号数据大小,但由于可通过,文件大小 - hdr_size 算得,因此可取 == 0xffffffff 表示无记录/ == n 表示 n bytes 大小 encoding 0x0c~0x0f (4) 用于标记具体存储的 PCM 数据,所采用的标准见下方表格,只可取 == 1, 2, 3, 4, 5, 6, 7, 23, 24, 25, 26, 27 sample_rate 0x10~0x13 (4) 数字信号采样率,由于 SUD 只能存 PCM,对 PCM 来说就是 ,有该值 == 8000 | 11025 | 24000 | 44100 等 channels 0x14~0x17 (4) 存储音频数据的通道数,单通道(Mono == 1),双通道(Stereo == 2),N 通道(== N) 不难发现,我们认为的 比特率(bitrate),或者至少该有的 采样位深(Sampling Bit Depth) 信息,并没有直接体现在头信息字段的参数中。这 并不 意味着没有包含该信息,而是 SND/AU 音频格式,通过 固定格式可支持类型 的方式,将这部分信息 封入了头信息字段的 encoding 子段 里,间接表示 了。 而 SND/AU 所支持的 PCM 采样规格,总计有 12 种,如下: Type Name ID Details 8 bit ISDN u-law 1 采样位深 为 8-bit 电话信号 uLaw 有损传输压缩算法 8 bit linear PCM 2 采样位深 为 8-bit 的线性 PCM 调制 16 bit linear PCM 3 采样位深 为 16-bit 的线性 PCM 调制 24 bit linear PCM 4 采样位深 为 24-bit 的线性 PCM 调制 32 bit linear PCM 5 采样位深 为 32-bit 的线性 PCM 调制 32 bit IEEE floating point 6 采样位深 为 32-bit 的 IEEE 归一化浮点 PCM 数据 64 bit IEE floating point 7 采样位深 为 64-bit 的 IEEE 归一化浮点 PCM 数据 4 bit CCITT G721 ADPCM 23 采样位深 为 4-bit 的 ITU G721 自适应 PCM 规格 CCITT G722 ADPCM 24 采样位深 为 4-bit 的 ITU G722 自适应 PCM 规格 CCITT G723 ADPCM 25 采样位深 为 4-bit 的 ITU G723 自适应 PCM 规格 5 bit CCITT G723 ADPCM 26 采样位深 为 5-bit 的 ITU G723 自适应 PCM 规格 8 bit ISDN a-law 27 采样位深 为 8-bit 电话信号 aLaw 有损传输压缩算法 通过 标记 encoding 取指定 ID 的方式,锚定规定好并确认具体参数的规格档次,来简化了头内容。当然弊端也很明显。由于选定的规格,并指定了档次,使得 相关参数是固定的,无法使用同规格下的其他参数组,而 无法进行动态扩展。这一部分仅了解即可,如使用到相应详细参数,再行查阅。 三个 SND/AU 的组成部分以固定顺序排布,如下所示: 图 1-46 SND/AU 音频格式的完整结构成分 较之 WAV 格式,简化了大量块信息。 但也正是因为这些原因,使得工程上在处理 SND/AU 格式时,需要花费额外的工作,来处理被固定的信息成分。这相当于另一种通过规定来实现的压缩手段了,变相的增加了系统处理资源消耗。因此,除了在 NeXT 系统上得到了大范围应用外,现如今 SND/AU 格式已成为逐步被淘汰的一种类型。 而与之相对的,WAV 和 AIFF 则仍被大量使用在 Windows/Linux 和苹果 MacOS/iOS 系统中。让我们不得不考虑,过渡的简化信息,是否仍有必要。 AIFF 音频格式 音频交换文件格式(AIFF [Audio Interchange File Format]),即 AIFF 音频格式(.aif/.aiff),正如刚刚所提,是一种被使用在 MacOS/iOS 上的未压缩音频格式。是一种隶属于 交换文件格式(IFF [Interchange File Format]) 文件管理体系的 文件格式(File Format)。该格式的特点相比 WAV 的 RIFF 分块体系而言,有着 更为复杂的子块类别。极大提升了能够涵盖辅助信息的广度,并以此为 苹果/Linux 等系统的文件管理,提供了更为方便的归类参考项。 AIFF 音频格式,从整体角度包含量种成分: 文件格式块(FORM Chunk),用以描述服务于系统文件管理的文件本身信息; 附属信息子块(INFO sub-Chunks),一系列不同类型的持续存储子块; 附属信息子块 也被称为 本地信息块(Local Chunks),所有的 本地信息块 都以 参数值的形式,保存于 文件格式块的 chunks 数组参数属性中,作为数组值存储。 于是,由这两类共同构成了一个完整的 AIFF 文件结构,如下: 图 1-47 AIFF 音频格式的完整结构成分(文件结构)简图 从此处即可看出,IFF 体系与 RIFF 体系的差异了。IFF 体系下,子块是以树状从属关系,挂载在 IFF 文件格式块的。而 RIFF 则是 分块描述信息块 和 子块 同属一级。 IFF 文件格式块(FORM Chunk) 主要包含 4 个属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记文件起始占位符,固定存储 'FORM' 四个大写字母的 ASCII 码,即 == 0x464f524d ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外,完整文件的总体大小(bytes),== 4 + Sum(Local Chunk Size) fromType 0x08~0x0b (4) 标记当前 IFF 文件类型,AIFF 固定存储 'AIFF' 四个大写字母的 ASCII 码,即 == 0x41494646 chunks -- 用于存储 本地信息块,即子块,的完整数据见下方表格,只可取 == Sum(Local Chunk Size) 所有 IFF 文件都有以上属性,而不同 IFF 文件区别,主要存在于 本地信息块的差异上,即 chunks 数组内容的不同。 对于 AIFF 音频格式来说,它的子块情况是什么样的呢? AIFF 将子块(sub-Chunk)拓展到了共计 12 种。且所有子块,在 AIFF 文件中,只能存在一份,或完全不存在。有(按优先程度,非组装顺序,见后文): Type Details Common Chunk 通用信息(子)块,用于存放有关文件本身包含所有子块的通用参数记录 Sound Data Chunk 音频数据(子)块,用于存放音频数据,即当前 PCM 数字信号的数字码信息 Marker Chunk 标记信息(子)块,用于存放有关当前音频的标记(如发行公司等)信息 Instrument Chunk 乐器信息(子)块,用于存放直接作用于当前音频数据的声乐信息 Comment Chunk 评论信息(子)块,用于存放用户等人的交互评价信息 Name Chunk 命名文字(子)块,用于存放当前文件命名信息 Author Chunk 作者文字(子)块,用于存放当前文件作者信息(区别于标记) Copyright Chunk 版权文字(子)块,用于存放当前文件版权信息 Annotation Chunk 声明文字(子)块,用于存放当前文件声明信息 Audio Recording Chunk 录制信息(子)块,用于存放音频录制采用的设备相关信息 MIDI Data Chunk 迷笛(MIDI)数据(子)块,用于存放需迷笛系统处理的通用数据 Application Specific Chunk 应用信息(子)块,用于存放经软件调整音频后,想要持续存储的调整设定参数 如果说 SND/AU 是精简的一端,那 AIFF 无疑将尽可能多的信息装填到了音频文件中。这种复杂的数据归纳,使 AIFF 格式中,出现了 多级数据结构。 我们分别来看一下,各个分块中的参数。 通用信息块(Common Chunk) 主要包含 6 种属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'COMM' 四个大写字母的 ASCII 码,即 == 0x434f4d4d ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值固定 == 18 numChannels 0x08~0x09 (2) 存储音频数据的通道数,单通道(Mono == 1),双通道(Stereo == 2),N 通道(== N) numSampleFrames 0x0a~0x0d (4) 用于标记音频数据在数模转换时的有效采样个数,即总音频帧数 == 音频的全通道总采样次数 / 通道数 sampleSize 0x0e~0x0f (2) 即 采样位深(Sampling Bit Depth)/ 最大比特分辨率(Max Bit Resolution),该值单位为 bits,有 == 8 | 16 | 32 bits 等 sampleRate 0x10~0x13 (4) +6 (extendable) 数字信号采样率,由于 SUD 只能存 PCM,对 PCM 来说就是 ,有该值 == 8000 | 11025 | 24000 | 44100 等 音频数据块(Sound Data Chunk) 主要包含 5 种属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'SSND' 四个大写字母的 ASCII 码,即 == 0x53534e44 ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == 8 + X offset 0x08~0x0b (4) 全通道数据单次采样的偏移大小,无需偏移则为 0 即一次全通道采样的有效数据大小,存储起始偏移,== offset_per_sample blockSize 0x0c~0x0f (4) 全通道数据单次采样的对齐大小,无需对齐则为 0 即一次全通道采样的有效数据大小,固定 == numChannels * sampleSize/8 soundData 0x10~0x10+X (X) 当前 PCM 数字信号的数字码信息,共计 X bytes 标记信息块(Marker Chunk) 主要包含 4 种属性和 1 种 子数据体(sub-Data Info),本身具有 两层数据结构模型: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'MARK' 四个大写字母的 ASCII 码,即 == 0x4d41524b ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == 2 + Sum(sub-Data Size) numMarkers 0x08~0x09 (2) 当前附加标记总数,即标记子数据体的个数,== sub-Data number Markers 0x0a~0x0a + (numMarkers * perMarkerSize) 当前附加标记构成的 数组(Array),子数据体 Marker 的持有者,标记作用于总采样的每个独立采样上,时序顺序标记 Maker (bytes) Sub-Detail id (4) 当前标记唯一ID position (4) 作用于哪个采样的数组序号 markerName (str) 当前标记命名(字符串) 乐器信息块(Instrument Chunk) 主要包含 11 种属性和 1 种 子数据体(sub-Data Info),本身具有 两层数据结构模型: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'INST' 四个大写字母的 ASCII 码,即 == 0x494e5354 ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == 20 baseNote 0x08 (1) 乐器片段的基准乐理音调(Note),需配合 detune 该值采用 迷笛(MIDI)数字音调,范围为 0~127,而迷笛数字音调中,C4 == 60 detune 0x09 (1) 指定乐器片段演奏音高(Pitch),该值并未采用美体系,而直接取 音分(Cent)计数,范围为 -50~+50 ,在 baseNote 基础上乐理偏移 lowNote 0x0a (1) 指定乐器片段的最低兼容乐理音调(Note),该值采用 迷笛(MIDI)数字音调,范围为 0~127,lowNote highNote 0x0b (1) 指定乐器片段的最高兼容乐理音调(Note),该值采用 迷笛(MIDI)数字音调,范围为 0~127,highNote >= baseNote + MIDI (detune) lowVelocity 0x0c (1) 指定乐器片段的最低兼容播放速度(下限),该值采用 迷笛(MIDI)播放速度,范围为 0~127,音乐只能以大于等于该值的速度播放 highVelocity 0x0d (1) 指定乐器片段的最高兼容播放速度(上限),该值采用 迷笛(MIDI)播放速度,范围为 0~127,音乐只能以小于等于该值的速度播放 gain 0x0e~0x0f (2) 指定乐器片段的音高(Loudness)增减益,该值采用 声压级(SPL)取 n = -32768~+32767 ,代表在当前播放音量基础上,增减 n dB sustainLoop 0x10~0x19 (6) 指定乐器片段 持续播放部分 的循环和帧数据位置设定 Looper (bytes) Sub-Detail playMode (2) 记录当前循环模式 beginLoop (2) 记录循环起点的采样序号 endLoop (2) 记录循环终点的采样序号 releaseLoop 0x1a~0x23 (6) 指定乐器片段 持续播放部分 的循环和帧数据位置设定 Looper (bytes) Sub-Detail playMode (2) 记录当前循环模式 beginLoop (2) 记录循环起点的采样序号 endLoop (2) 记录循环终点的采样序号 乐器信息块 是一种用来记录音乐背景节奏,或者特殊效果器的附属信息子块。其中的 “乐器” 并不是由该块本身所指定的,而是来自于 sustainLoop 和 releaseLoop 所标定的,来自于 音频数据块(Sound Data Chunk) 的 Looper 子参数 指定 序号 beginLoop~endLoop 范围 的音频帧构成的原声片段中,采样到全部相关乐器的集合。 片段分两部:持续播放片段 和 收尾播放片段。即该信息块最后两个参数所指定的信息。 持续播放片段(sustainLoop),被用于通过序号(间接)存放声音正常播放过程中,在进入结束阶段(Release Phase)前,需要循环播放的音频帧区段。 收尾播放片段(releaseLoop),被用于通过序号(间接)存放声音正常播放过程中,在进入结束阶段(Release Phase)后,需要循环播放的音频帧区段。 而何时进入所谓 结束阶段(Release Phase),是由 标记信息块(Marker Chunk)标记数组(Markers) 中的 “结束阶段”标记(Marker) 决定的。因此,标记信息块的重要程度,要高于乐器信息块。 那么,循环的播放模式,即 循环模式(playMode) 都有哪些呢? 主要有 3 种,在无自定义情况下,分别是: 无循环模式(NoLooping),标记 0 ,表示片段只需单次顺序播放即可; 前向循环模式(ForwardLooping),标记 1 ,播完后从头部重新开始顺序播放; 前后循环模式(ForwardBackwardLooping),标记 2 ,播完后反向时序播放,以此循环; 三种模式的直观效果如下: 图 1-48 AIFF 乐器信息块 循环模式示意图 所以,乐器信息块 的 “乐器” ,实则为该指代片段数据中,用于乐理节奏或乐理意义上背景节拍器(Metronome)的乐器组合的抽象代称。 至于,乐理基调(baseNote)、偏移音高(detune)、最低音调(lowNote)、最高音调(highNote)、播速下限(lowVelocity)、播速上线(highVelocity)、音高增减益(gain)参数,则都是对整个信息块中,全部循环片段的 补充修饰,以 方便达到最佳放音效果。 评论信息块(Comment Chunk) 主要包含 4 种属性和 1 种 子数据体(sub-Data Info),本身具有 两层数据结构模型: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'COMT' 四个大写字母的 ASCII 码,即 == 0x434f4d54 ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == 2 + Sum(sub-Data Size) numComments 0x08~0x09 (2) 当前附加评论总数,即评论子数据体的个数,== sub-Data number Comments 0x0a~0x0a + (numComments * CommentSize) 当前附加评论构成的 数组(Array),子数据体 Comment 的持有者,评论以时间戳而非顺序定位,可关联 Marker Comment (bytes) Sub-Detail timestamp (4) 评论指向时间戳,单位 ms marker (4) 评论关联标记 ID count (4) 评论文字总字数 text (str) 当前位置评论(字符串) 评论信息块 存放的 评论信息,其索引标记和其他块中 以音频帧序列号 的方式 有所不同。是 直接采用音频时间戳来标记的。注意区分差异。 同时,评论内容可以通过 marker 参数,挂靠到 标记信息块 的标记中。这让相关评论数据能够同音频产生 一定程度的直接交互,该点即为评论信息块使用中的优势。 在评论信息块之后的优先级顺序中,出现了 一连 4 个文字块(Text Chunk),分别是: 命名文字块(Name Chunk)、 作者文字块(Author Chunk)、 版权文字块(Copyright Chunk)、 声明文字块(Annotation Chunk)。这 4 个文字块,拥有着相同的参数体系,为 包含 3 种属性的单层结构。 我们放在一起说明: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定(注意空格) 命名文字块 == 'NAME' == 0x4e414d45作者文字块 == 'AUTH' == 0x41555448版权文字块 == '(c) ' == 0x28232920声明文字块 == 'ANNO' == 0x414e4e4f ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == text_size == X text 0x08~0x08+X (X) 当前块的文字信息(字符串),填写相关类型块的文字信息,如:'Author: Mr.M' etc. 于 4 个文字块后的 3 类信息块,相对于 AIFF 持有的 PCM 数据来说,则并不是特别重要。当然,此处的重要性是相对于音频数据本身而言的,并不是指该 3 类信息块完全没有意义。 实际上,该 3 类信息块,即 录制信息块(Audio Recording Chunk)、 迷笛数据块(MIDI Data Chunk)、 应用信息块(Application Specific Chunk),对于分别所处的 音频工程协会(AES)规格领域、 迷笛编辑器领域、 指定的系统应用 来说,都有 至关体系设定之直接存储、操作、保存的重要性。 因此,当传递的 AIFF 文件 有涉及该三类领域时,这 3 个信息块的作用是无可替代的。不过,如非非录音师、调音师或专业乐理工程师的话,则仅需要做简单了解即可。 录制信息块(Audio Recording Chunk) 主要包含 3 种属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'AESD' 四个大写字母的 ASCII 码,即 == 0x41455344 ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值固定为 == 24 AESChannelStatus 0x08~0x20 (24) 该值用于协助 AES 实时数字音频传输(转录时),来自 AES3-1-2009 (r2019) 规定,通常只需关注位于字节第 0、2、3、4 位的预强调(Pre-emphasis)辅助值 [20] 。该值自音源生成后,就是固定参数。这里不做展开,具体见参考文献 AES3-1-2009 (r2019) 规定 迷笛数据块(MIDI Data Chunk) 主要包含 3 种属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'MIDI' 四个大写字母的 ASCII 码,即 == 0x4d494449 ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == MIDI_data_size == X MIDIData 0x08~0x08+X (X) 该值用于协助 MIDI 音频演奏/编辑系统,存储一系列位于 MIDI 体系下的关键编辑数据,部分可能重叠于 乐器信息块(Instrument Chunk)内的数据,但其他数据则更丰富且复杂,需要配合迷笛解析器或硬件设备使用 对于 迷笛数据块 中,可能与 乐器信息块 产生冲突的数据,在 无 迷笛(MIDI)设备(涵盖软硬件) 的情况下,乐器信息块 的优先级高于 迷笛数据块。而当 存在 迷笛解析器 的情况下,两套块设备内的同类信息,将以不同的 迷笛配置(MIDI Profile) 形式,展现在解析软件中。 应用信息块(Application Specific Chunk) 主要包含 3 种属性,分别是: Params Range(bytes) Details ckID 0x00~0x03 (4) 标记当子块类型,固定存储 'APPL' 四个大写字母的 ASCII 码,即 == 0x4150504c ckSize 0x04~0x07 (4) 记录当前块除 ckID 和 ckSize 属性外 的子块大小,单位(bytes),该值为 == 4 + App_data_size == 4+X signature 0x08~0x0b (4) 标记指向系统应用签名(Application Signature),应用签名是已发布应用的唯一标识,每个应用都不同,但该字段标记的是系统级别应用的简写。在 IFF 管理体系下,该值为 4 bytes,通过这个字段,我们能准确定位数据归属的目标系统应用所在,如 Apple II Applications 有 == 'pdos' == 0x70646F73 data 0x0c~0x0c+X (X) 系统应用的相关数据,具体内容由签名指定的系统应用处理 到此, 12 种 附属信息子块(INFO sub-Chunks),即 本地信息块(Local Chunks),的作用介绍完毕。而各个子块的 信息内容优先级,便有如下顺序: 图 1-49 AIFF 乐器信息块 信息优先级排序 信息优先级高的子块,在出现同类信息的情况下,会被优先参考。不过对于特殊情况,也需要注意体系内差异。 而如果我们按照 AIFF 格式,去同样 封装一段 PCM 数字信号数据 时,它的文件结构有: 图 1-50 完整 AIFF 音频格式文件的文件结构示意图 显然,AIFF 相较于 WAV、SND/AU 来说,更加的复杂。这也是为何 AIFF 格式的运用没有 WAV 更为宽泛的原因。但富余而详细的子数据块,也使 AIFF 在多体系任务系统下,会更加的游刃有余。 借此,常用的 三种未压缩编码格式(或者说两种,即 WAV 和 AIFF) 与 PCM 基础格式,共同构成了 音频格式的地基。 但如此直接或相对直接的对 PCM 数据的存放方式,还是会有 大量空间占用浪费。于是,为了进一步缩减音频数据,在计算机系统中的持续化存储问题,工程师们开始采用压缩算法来提高空间利用率。这带来了携带压缩算法的,无损压缩编码格式(Lossless [Lossless Compression Audio Format]) 和 有损压缩编码格式(Lossy [Lossy Compression Audio Format])。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_6_3.html":{"url":"Chapter_1/Language/cn/Docs_1_6_3.html","title":"1.6.3 无损压缩编码格式(Lossless Encode)","keywords":"","body":" Hex Data Display .hex-container { text-align: center; } .hex-data { display: inline-block; text-align: left; font-weight: bold; font-family: monospace; white-space: pre; } 1.6.3 无损压缩编码格式(Lossless Compression Audio Format) 无损压缩编码格式(Lossless [Lossless Compression Audio Format]) 是采用 无损压缩算法(Lossless Compression Method)对 PCM 数字信号数据,进行封装保存的音频格式(Audio Format)。 无损压缩算法(Lossless Compression Method) 无损压缩算法(Lossless Compression Method) 是对原始数字信号经过算法压缩后,仍可以通过算法本身的逆运算,完全一致的还原回原始数字信号的算法。属于在压缩和解压缩过程中,都 不会丢失任何原始数据的可逆压缩算法(Reversible Compression Method)。[4] 常用无损压缩算法主要为 四类,分别是: 熵编码算法(Entropy Coding),采用如 哈夫曼编码(Huffman Coding)[21] 、香农-范诺编码(Shannon–Fano Coding)[22] 、算数编码(Arithmetic Coding)[23] 等。此类算法通过调整信息熵,为高出现频次信息分配较短字节位,而低出现频次信息分配较长字节位的方式,缩减整体所占字节空间大小。 预测编码算法(Predictive Coding),如 线性预测编码(LPC [Linear Predictive Coding])[24]、自适应差分脉冲编码调制(ADPCM [Adaptive Differential Pulse Code Modulation)[25] 等。这类算法通过预测下一个数据点的值,并仅存储预测误差,从而减少数据量。除了 ADPCM 外,一些诸如 差分脉冲编码调制(DPCM [Differential Pulse Code Modulation])等的主要被运用于 数模模数转换 的调制方法,也是可以被在此处的。这种调制类方法,一般通过存储相邻采样点之间的差值来减少数据量,当运用于压缩时,也可归类至预测编码算法分类。 变换编码算法(Transform Coding),如 离散傅里叶变换(DFT)、离散余弦变换(DCT) 等。该类算法通过将时域信号转换为频域信号,来更有效地表示和压缩音频数据。由于其关键程度,在本书第三章中,会重点讲解。 复合算法(Hybrid),是指一类 采用了多种类型常规算法,按一定处理流排布,共同进行压缩的算法类型。大部分无损压缩编码格式,都属于此类。比如,结合了熵编码和预测编码 FLAC、ALAC,以及多种算法混合处理的 APE。 另外需要区别一点。从 原始波源(Original Source) 到 数字信号(Digital Signal) 的过程是 有损的。但 这与此处的压缩算法毫无关联。 通过前面章节的讲解,我们可以认识到,模拟信号本身采样自原始波源的过程其实是有损的,而从模拟信号到数字信号的过程,依然也是有损的。最简单来看,单 A/D、D/A 中的 硬件比特分辨率(Bit Resolution),就可能因存在从 连续到离散值再回到模拟连续值 过程,而 引入损失。这一过程的损失被称为 采样损失(Sampling Loss)。 所以,无损压缩算法虽然没有损失,但算法接收并处理的信号本身,就已经有一定的数据丢失了。不过,相比有损算法而言,该损失可以通过部署更优质的硬件设备来降低损失量,且相对更适合在采集模拟信号过程考察。因此,与之算法因素,采样损失并不在格式中计入。 回到格式本身。无损压缩编码格式(Lossless) 最常见的主要有 三种,分别是 FLAC(.flac)、ALAC(.m4a) 和 APE(.ape)。但因为 APE 的处理流及算法闭源,与 ALAC 的平台兼容性问题,FLAC 成为当下主流,全平台兼容且具有三者中最高压缩率(30%~60%)的,无损压缩编码格式首选。 因此,本书以 FLAC 为主,介绍 无损压缩编码格式 类型的处理过程和结构特性。其他类型触类旁通,不再另行赘述。 FLAC 音频格式 开放无损编码格式(FLAC [Free Lossless Audio Codec]),即 FLAC 音频格式(.flac),是由 开放无损(音频)编码组织(Xiph.Org Foundation.) 提供的一种,针对音频数据进行压缩存储的无损音频格式。由于是复合算法,其处理流水线如下(红线编码,绿线解码,解码逆运算): 图 1-51 FLAC 音频格式编解码执行作业流水线 分块(Blocking) 是 将输入音频分解成多个连续的块(Block)的步骤。在 FLAC 中,这些块的大小是可变的。而块的最佳大小,通常受包括 采样率、随时间变化的频谱特性 等多种因素影响。虽然 FLAC 允许在一个流中使用不同的块大小,但我们仍需要参考编码器的建议,使用固定的块大小。另一方面,固定的块大小也能便于解码时的处理。 通道间去相关(Interchannel Decorrelation) 是 针对多通道(Stereo、Multi-Channel)情况 进行的,以选择的指定 去相关策略(Decorrelation Strategy) 计算新组值代原有通道数据,来 减小原始信息冗余的辅助压缩手段。 去相关策略(Decorrelation Strategy)一般有三种,即: 对称去相关(Symmetric Decorrelation)、主成分分析(PCA)、奇艺值分解(SVD)。三者都是可逆的,而 对称去相关 则是其中最快最简便的算法。 记分块后有 (C1,C2)(C_1, C_2)(C1,C2) 数据,对称去相关会根据分组的组内 平均值(Mean) 和 差值(Sub),生成该组的中间信号与侧信号结果 (M,S)(M, S)(M,S) 代替原 (C1,C2)(C_1, C_2)(C1,C2) 。有: M=C1+C22,S=C1−C22 {\\displaystyle \\begin{aligned} M = \\frac{C_1 + C_2}{2} \\quad , \\quad S = \\frac{C_1 - C_2}{2} \\\\ \\end{aligned} } M=2C1+C2,S=2C1−C2 即 简单的线性变换。理所当然,其去相关的去数据冗余和降维能力也 相对较弱。 三种策略该如何选择呢?我们可以依据下表进行决定: Strategy Features When to use? Example Symmetric FastestLow Complexity simple linear transformations简单线性变化场景 一般双通道音频 PCA SlowerHigh Complexity when needs dimensionality reduction and feature extraction需要降维和特征提取场景 需要所有通道的基本分类特征信息,用于模型 SVD SlowestHighest Complexity when needs precise matrix decomposition需要精确矩阵分解的场景 需要矩阵化全通道特征张量,用于模型 可见,除非后续步骤中涉及模型或想要更高压缩比的结果,否则选择 对称去相关 基本已能满足大多数需求。注意,解码时需要逆运算。 预测(Prediction) 则是将去相关性后的块,通过尝试找到信号的 相近数学解集,来 转换块的保留数据。一般而言,解集通常都比原始信号要 小得多。由于预测方法对编码器和解码器都是已知的,因此只需在压缩流中包含预测器的参数即可。FLAC 目前只支持四种不同类别的内置已定义好的预测器,但预留了额外的改进空间,以便添加其他方法。而从设计上讲,FLAC 允许预测器类别在块与块之间,甚至在块的各个通道之间变化。而解码时,亦需要采用相同预测方法做逆运算。 残差编码(Residual Coding) 是 必须的校准步骤,该步骤的目的是确认,预测器是否能准确的使用预测结果,描述输入的去相关块信号。此时,必须对原始信号和预测信号之间的差异(即误差,或残差信号)进行无损编码。 怎么判断预测器结果是否满足要求呢?粗略的方法,是 通过判断 残差信号(Residual Signal)所需的每个样本位数,是否少于原始信号。少于则预测有效,否则无效。而当差值过大时,通常意味着,编码器需要用 调整块大小、改变块数目、切换预测器、改变去相关方法 的流程内改动,来 重新生成预测结果。 所以,残差编码的作用,相当于整个编码过程的 自动化结果检验。同理于解码。 在经过这些步骤后,我们就得到了 用于 FLAC 格式持续化存储的数据,包含两部分: 【预测编码数据(Prediction Data)】+【残差信号(Residual Signal)】 这即是 FLAC 格式下,实际用于保存的 一个完整 音频数据块(Audio Data Block) 构成。存储的音频由一系列此种数据块,按时序排列组成。再配合 FLAC 格式文件结构的头部信息,共同组成了 FLAC 文件。 那么,一个完整的 FLAC 文件,其 文件结构 是什么样的呢?如图: 图 1-52 完整 FLAC 音频格式文件的文件结构示意图 [26] 从简图中可以看出,FLAC 文件结构仍然采用二分,以: 【元数据信息块(Metadata Blocks)】+【音频数据块(Audio Data Blocks)】 的方式,进行信息区域划分。 元数据信息块(Metadata Blocks) 是包含 流信息块 和 附属信息块 在内的,一系列 对音频数据本身特征进行描述 的 存储容器集合。和未压缩音频 AIFF 格式较为相同,FLAC 的元数据信息块对数据的组织方式,采用了分类封装。而原本用于标记文件格式的 ID 字段,被从块中独立拿出,以 恒定占用 FLAC 格式文件头部 4 字节(Bytes)的形式,锚定当前数据结构信息。 即,所有 FLAC 音频格式文件都有头部唯一字段(注意大小写): Params Range(bytes) Details FileID 0x00~0x03 (4) 标记当前文件 ID,固定存储 'fLaC' 四个大小写字母的 ASCII 码,即 == 0x664c6143 至于其他被记录的关键或非关键额外信息,按照相关成分,被分为 7 种不同种类的 基础内构元数据块 和 1 个无效标记块,分别是: Block Type Mark(bit) Details STREAMINFO 0 :0000 000 通用流信息块,必位于首位,用于记录音频流基本信息(比特率、采样率等) PADDING 1 :0000 001 对齐填充块,无内容(全部为 0)用于填充空间,用于在不重新编码音频数据的情况下添加或修改元数据 APPLICATION 2 :0000 010 应用信息(子)块,用于存放经软件调整音频后,想要持续存储的调整设定参数 SEEKTABLE 3 :0000 011 标记信息(子)块,用于存放快速定位音频流中特定位置的查找表 VORBIS_COMMENT 4 :0000 100 评论信息(子)块,包含用户定义的标签信息,用于存放用户等人的交互评价信息 CUESHEET 5 :0000 101 CUE 表(子)块,用于存放音轨的索引信息,即类比 CD 的 CUE 表 PICTURE 6 :0000 110 图像数据(子)块,用于存放当前音频的专辑封面图片等图像信息 [Reserved] 7~126 保留(子)块,预留的 7~126 号标签,为未来扩展或自定义扩展而用 [Invalid] 127 :1111 111 无效标记(子)块,是无效的元数据块类型用于唯一标识错误 在 FLAC 中,元数据块的基本组成高度一致,皆为: 【元数据头(Metadata Header)】+【元数据块数据(Metadata Block Data)】 的形式,不似于 AIFF 中的 ckID 来标记不同类型块,FLAC 采用元数据头中的固定标记位,以 类型序号 标识 元数据块的种类。即,并不以 ASCII 码标记的固定类型值作为头部信息。由此而来的好处是,FLAC 的元数据头,能够以相对统一的结构定义,并包含更多有效信息。 每个元数据块的 固定头部(Metadata Header),以下简称 头部(Header),始终为 4 字节(4 bytes),包含 3 个关键字段: Params Range(bytes) Details Last block flag 0x00 (1 bit)x--- ---- 标记当前块是否为最末位,占第一字节第七位 1 bit当块为最末位时该位为 1 ,否则为 0 Block Type 0x00 (7 bits)-xxx xxxx 块类型标记位,占第一字节的 剩余 7 bits即上表中的块类型 Mark Block Length 0x01~0x03 (3) 块大小,记录当前块的总字节长度(不含头部),24 位 而 所有的元数据块皆有如下结构: 图 1-53 FLAC 音频格式的元数据信息块统一结构示意图 现在,让我们顺序了解各类分块的关键参数(包含元数据头)。方便与系统起见,我们仍然将 元数据块(Metadata Block) 称为 块(Chunk)。 通用流信息块(STREAMINFO Chunk) 主要包含 10 种属性,分别是: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,对于 STREAMINFO,首字节有 [flag bit] 000 0000 Min Block Size 0x04~0x05 (2) 最小块大小(以样本为单位),通常为 16 或 4096 Max Block Size 0x06~0x07 (2) 最大块大小(以样本为单位),通常为 16 或 4096 Min Frame Size 0x08~0x0a (3) 最小帧大小(以字节为单位),表示音频帧的最小字节数 Max Frame Size 0x0b~0x0d (3) 最大帧大小(以字节为单位),表示音频帧的最大字节数 Sample Rate 0x0e~0x10 (2.5 = 20 bits) 数字信号采样率,由于 SUD 只能存 PCM,对 PCM 来说就是 ,有该值== 8000 | 11025 | 24000 | 44100 等 Num of Channels 0x10 (3 bits) xxx- ---- 存储音频数据的通道数,单通道(Mono == 1),双通道(Stereo == 2),N 通道(== N) Bits per Sample 0x10~0x11 (5 bits) ---x xxxx 即 采样位深(Sampling Bit Depth)/ 最大比特分辨率(Max Bit Resolution),该值单位为 bits,有== 8 | 16 | 32 bits 等 Total Samples 0x11~0x15 (4.5 = 36 bits) 用于标记音频数据在数模转换时的有效采样个数,即总音频帧数== 音频的全通道总采样次数 / 通道数 MD5 Signature 0x16~0x26 (16) 完整性 MD5 签名,用于验证音频数据完整性的 MD5 哈希值,128 位。通过验证 MD5 是否和预期一致,快速检测完整性 对齐填充块(PADDING Chunk) 主要包含 2 种属性,分别是: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,对于 PADDING,首字节有 [flag bit] 000 0001 Padding Data 0x04~0x04+X (X) 填充数据,全部为零,用于在不重新编码音频数据的情况下添加或修改元数据 应用信息块(APPLICATION Chunk) 主要包含 2 种属性,分别是: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部, 对于 APPLICATION,首字节有 [flag bit] 000 0010 Application ID 0x04~0x07 (4) 标记指向系统应用签名(Application Signature), 应用签名是已发布应用的唯一标识, 即注册的应用程序 ID,用于标识特定的应用程序 Application Data 0x08~0x08+X (X) 系统应用的相关数据, 具体内容由签名指定的系统应用处理,长度 X 字节 标记信息块(SEEKTABLE Chunk),也可称为索引表块,主要包含 2 种属性和 1 种 子数据体(sub-Data Info),本身具有 两层数据结构模型: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,对于 SEEKTABLE,首字节有 [flag bit] 000 0011 Seek Points 0x04~0x04 + (numSeekPoints * perSPSize) 由查找点构成的 数组(Array),子数据体 SeekPoint 持有者,类似 AIFF 的 Markers 标记作用于总采样的每个独立采样上,时序顺序标记 SeekPoint (bytes) Sub-Detail Sample Number (8) 查找点对应的采样数 Byte Offset (8) 查找点对应的字节偏移量 Sample Offset (2) 查找点对应的采样数偏移量 评论信息块(VORBIS_COMMENT Chunk),主要包含 5 种属性和 1 种 子数据体(sub-Data Info),本身具有 两层数据结构模型: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,VORBIS_COMMENT,首字节有 [flag bit] 000 0100 Vendor Length 0x04~0x07 (4) 标记厂商字符串长度,记为 len 厂商字符串的长度,表示厂商字符串的字节数 Vendor String 0x08~0x08+len 标记厂商字符串,字符串长度为 Vendor Length 值,用来记录当前音频的发行商等信息 User Comment List Length 0x08+len~0x08+len+4 (4) 记录当前评论个数,值为几,就有几条评论 User Comment List 0x08+len+4 ~ 0x08+len+4 + (numComments * perCommSize) 由评论构成的 数组(Array),子数据体 Comment 持有者,不同 AIFF 的 Comment FLAC 的该子数据体记录,包括的评论所有音频额外信息键值对字符串,如 \"TITLE=Example\" Comment (bytes) Sub-Detail Comment Length (4) 评论字符串长度( N 字节) Comment String (N) 评论键值对字符串 CUE 表块(CUESHEET Chunk),主要包含 7 种属性和 2 种 子数据体(sub-Data Info),本身具有 三层数据结构模型: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,对于 CUESHEET,首字节有 [flag bit] 000 0101 Media Catalog Number 0x04~0x43 (64) 记录媒体目录号,表示光盘的媒体目录号 Lead-in Samples 0x44~0x4b (8) 引导样本数,表示光盘引导区的样本数 Is CD 0x4c (1) 是否为 CD,1 表示是 CD,0 表示不是 CD Reserved 0x4d~0x5f (19) 保留字段,全部为零 Number of Tracks 0x60 (1) 总轨数,表示光盘上的总轨数,即声轨,并非通道数 各声轨间独立,是可以在播放上重叠的 Track Information 0x61 ~ 0x61 + (numTrackInfo * perTInfoSize) 由声轨构成的 数组(Array),子数据体 TrackInfo 持有者,记录每个声轨的信息,包括轨号、轨偏移、ISRC、轨索引等 TrackInfo (bytes) Sub-Detail Track Offset (8) 轨偏移,轨道的字节偏移量 Track Number (1) 轨号,即轨道的编号 ISRC (12) 声轨的国际标准录音代码 Track Type (1) 轨类型,轨道的类型 Pre-emphasis (1) 标记是否使用预加重 Reserved (3) 保留开关字段,全部为零 Track Index (N)N = (num*TrackIndexSize) 由轨索引构成的 数组(Array),子数据体 TrackIndex 持有者,记录声轨索引信息 TrackIndex (bytes) Sub-Detail Index Offset (8) 索引偏移,索引字节偏移量 Index Number (1) 索引号,即索引的编号 Reserved (3) 保留字段,全部为零 图像数据(PICTURE Chunk),主要包含 12 种属性,为: Params Range(bytes) Details Header 0x00~0x03 (4) 元数据块的固定头部,对于 PICTURE,首字节有 [flag bit] 000 0110 Picture Type 0x04~0x07 (4) 图片类型,表示图片的用途,例如封面、背面等 MIME Type Length 0x08~0x0b (4) MIME 类型字符串的长度,表示 MIME 类型字符串字节数,值为 X0 单位 bytes MIME Type 0x0c~0x0c+X0 (X0) MIME 类型字符串,表示图片的 MIME 类型,例如 \"image/jpeg\" 或 \"image/png\" ,字符串长度由上一条属性记录 Description Length 0x0c+X0~0x0c+X0+4 (4) 描述字符串的长度,表示描述字符串的字节数,值为 X1 单位 bytes Description last_at~last_at+X1 (X1) 描述字符串,表示图片的描述信息,例如 \"Album Cover\" Width last_at~last_at+4 (4) 图片宽度,单位为像素,例如值 512 ,即 512 像素(Pixels) Height last_at~last_at+4 (4) 图片高度,单位为像素,例如值 512 ,即 512 像素(Pixels) Color Depth last_at~last_at+4 (4) 色深,单位为位(bit),表示每个像素的位数例如值 24,表示单个像素颜色为 24 位,详见下一章 Colors Used last_at~last_at+4 (4) 每像素的颜色数,表示图片使用的颜色数,即颜色总数,例如值 16,像素值为索引,取色自 16 种色的调色板而取 0 则表示,颜色完全由像素自身决定 Picture Data Length last_at~last_at+4 (4) 图片数据的长度,表示图片数据的字节数,值为 N 单位 bytes值指向下一字段,当前块的图片数据所占总字节长度 Picture Data last_at~last_at+N (N) 图片数据(逐行扫描),当前块的实际图片二进制数据,每个像素值取 色深(ColorDepth)值代表的位数 剩下的 保留块(Reserved Chunk) 和 无效块(Invalid Chunk) 类型,因其数据结构定义为无 或 自定制。实际使用中,可根据当前工程情况,内部协议设定加以利用。 至此,对于 FLAC 音频格式,我们就能完整解析了。让我们来看一段 138 bytes 的 FLAC 音频文件数据(十六进制格式单字节展开)事例: 66 4c 61 43 00 00 00 22 00 10 00 10 00 04 00 00 10 00 ac 44 50 00 00 06 ba a8 d4 1d 8c d9 8f 00 b2 04 e9 80 09 98 ec f8 42 7e 86 01 f4 35 00 00 00 03 00 00 00 0a 69 6d 61 67 65 2f 6a 70 65 67 00 00 00 0b 41 6c 62 75 6d 20 43 6f 76 65 72 00 00 02 00 00 00 02 00 00 00 18 00 00 00 00 00 00 01 f4 00 按照上述划分,获取对应子块信息,有: 图 1-54 演示用 138 bytes 的 FLAC 音频文件数据解析 可见,样例只是一段 FLAC 数据的元数据部分,且包含了 STREAMINFO 和 PICTURE 这两个元数据块。同时,PICTURE 的图片数据 并不在上述数据 中。而从 图片数据的长度(Picture Data Length) 和 其他字段携带的信息可知,该图片数据为 512 x 512 的 128000 字节 24 位 JPEG 数据。而原音频,从 STREAMINFO 解读 可得,未在上例中包含的音频数据块中包含的音频,为 采样率 44100 Hz 的 16-bits 双声道立体声(Stereo)总计 44100 个采样值(即 1s 长度)的压缩后数据块数组。 作为无损压缩编码音频格式的代表,FLAC 具有重要的地位。它能够在不丢失任何原始音频信息的情况下,极大的减少文件大小。这使得它被广泛的应用在了高保真音频存储和传输过程中。其 无损特性确保了音频在解码后与原始音频完全一致,令其成为了 音频发烧友 和 专业音频制作 的首选格式。 同样的,该特点也是无损压缩编码音频格式,最为显著具的优势。 然而,尽管无损压缩如 FLAC 提供了最高的音质保真度,但其文件大小仍然相对较大。在许多应用场景中,如 流媒体 和 便携设备存储(尤其是在随身听时代,早期有限的存储空间情况),依然 不够便利。因此,具有更大压缩比的有损压缩编码音频格式,如 MP3 和 AAC 便成为了一种 可以接受的替代方案。这些格式 通过舍弃人耳不易察觉的音频信息,进一步减小文件大小,同时在音质和压缩率之间取得平衡。 虽然为人所带来的听觉感受,介于此,会相对有所衰减。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/Docs_1_6_4.html":{"url":"Chapter_1/Language/cn/Docs_1_6_4.html","title":"1.6.4 有损压缩编码格式(Uncompressed Encode)","keywords":"","body":" Hex Data Display .hex-container { text-align: center; } .hex-data { display: inline-block; text-align: left; font-weight: bold; font-family: monospace; white-space: pre; } 1.6.4 有损压缩编码格式(Lossy Compression Audio Format) 有损压缩编码格式(Lossy [Lossy Compression Audio Format]) 是一种 通过舍弃部分音频数据来减少文件大小的音频压缩技术。其采用 有损压缩算法(Lossy Compression Method)对 PCM 数字信号数据,进行封装保存的音频格式(Audio Format)。 有损压缩算法(Lossy Compression Method) 有损压缩算法(Lossy Compression Method) 是对原始数字信号经过算法压缩后,仍可以通过算法本身的逆运算,相对近似的还原回原始数字信号的算法。因为算法会丢失部分数据导致音质下降,所以,有损压缩算法属于不可逆压缩算法(Irreversible Compression Method)。而按处理流分类来看,被使用在压缩/解压处理上的有损压缩算法,只有复合算法(Hybrid)类型。 对于不可逆压缩算法而言,其对 数字信号的编码(Encoding)关键步骤,大都为 采用数学逼近算法,去寻找复杂曲线的解集,用 记录解集本身 来替代原有数字信号的数字码信息 进行保存。这个求解过程就可能 引入一定的损失。其 解码(Decoding)处理,则是根据解集,结合采用算法重新合成至原有数据。 此时,还原出的数据,是 算法拟合的结果而非原始样本,即非原有数字信号的结果。 从数学角度来看,此类算法大都选择以 离散傅里叶变换(DFT)、小波变换(Wavelet Transform) 等技术的 降低算力消耗近似解方案,作为算法核心。以傅立叶族举例,在实际应用中,当选定解空间的 傅立叶基底函数族 并不是无穷时,离散傅立叶基所表示的原值,本就 会有一定损失(见本书第三章详解)。此外,来自数字信号的 有损采样(Lossy Sampling) 过程的 采样损失(Sampling Loss),依旧会存在。 所以,不可逆压缩算法是会有一定误差的。而误差的引入则来自于算法自身,一经处理后无法消除。 不过,算法带来的 压缩优势极为巨大。大部分采用有损压缩算法的音频格式,都 至少能达到 10 : 1 的压缩比(CR [Compression Ratio])即减少约 90% ,甚至更小。而极致压缩比下,算法对音频音质带来的影响,对大多数情况和使用者来说,却可忽略不计。 因此,采用有损压缩算法的有损压缩编码格式,在进入音频数字化时代后,被大量且广泛的应用于商业音频产品中。其中,普及程度最广并具有最高硬件兼容性的 MP3 格式,就是最具代表的该类类型。 MP3 音频格式 MP3(MPEG-1 Audio Layer III),即 MP3 音频格式(.mp3)。是于 1987 年,由德国 弗劳恩霍夫应用研究促进协会(Fraunhofer [Fraunhofer Society for the Advancement of Applied Research]) 研究所主导完成的 音频有损压缩格式。并于 1993 年第一代 MPEG-1 标准制定中,获得委员会认同,确立为 通用 MPEG-1 和 MPEG-2 标准的音频规格部分。 图 1-55 弗劳恩霍夫应用研究促进协会的 logo PS:弗劳恩霍夫协会是业内标杆的多机构联合体,为音视频技术的发展做出了巨大贡献。 此外,随着 2017 年 MP3 专利到期(在此之间 MP3 由多个复杂的专利体系构筑专利网进行了版权保护),该格式彻底成为了开源开放且兼具标准和广泛适用性的音频格式。 MP3 的处理流水线如下(红线编码,绿线解码,解码逆运算): 图 1-56 MP3 音频格式编解码执行作业流水线(简) 接下来,我们依据编码的流程顺序,对几个环节(包括解码的环节)进行梳理。 分组(Blocking) 和 重组(Reassemble) 是互逆的两个过程,分组是将输入的 PCM 音频信号分解成多个连续的片段的步骤。每片段通常包含 1152 个采样点,目的是将音频信号 分割成适合后续处理的小块,继而 提高压缩效率和音质。它的逆向过程即为重组。重组是将多个片段的时域信号拼接的步骤。解码器逐片段提取和处理压缩数据,逐步重建原始音频信号,直至恢复完整的音频信号(注意,被还原得到的 PCM 已 不完全相同于 原输入 PCM)。 为什么是 1152 个采样点呢?这是因为在 MPEG-1 针对音频 DCT 处理的实验上,发现 1152 个采样点能够在编码效率和质量上,达到最佳平衡点。且能够在保证音质的前提下减少计算复杂度。直到 MPEG-4 将音频更改为 AAC 音频格式,并取用了更为合理的 1024 个采样点设定。 分窗(Windowing) 和 去窗口化(De-windowing/Inverse Windowing) 互逆。 分窗是对每个分块片段,应用加权处理的步骤。常用的 窗口函数 包括 汉宁窗(Hanning Window)、 汉明窗(Hamming Window) 和 黑曼窗(Blackman Window)。几个算法都是对分组后样本片段的缩放处理,目的是 减少频谱泄漏,提高频谱分析的精度。记分组总样本数为 NNN ,而当前分块的某个采样点值为 nnn ,则对 nnn 分窗处理的结果 w(n)w(n)w(n) 有: {Hanning: w(n)=0.5⋅(1−cos(2πnN−1))Hamming: w(n)=0.54−0.46⋅cos(2πnN−1)Blackman: w(n)=0.42−0.5⋅cos(2πnN−1)+0.08⋅cos(4πnN−1) {\\displaystyle \\begin{aligned} \\begin{cases} Hanning &: \\ w(n) = 0.5 \\cdot \\left( 1 - cos \\left( \\frac{2\\pi n}{N - 1}\\right) \\right) \\\\ Hamming &: \\ w(n) = 0.54 - 0.46 \\cdot cos \\left( \\frac{2\\pi n}{N - 1}\\right) \\\\ Blackman &: \\ w(n) = 0.42 - 0.5 \\cdot cos \\left( \\frac{2\\pi n}{N - 1}\\right) + 0.08 \\cdot cos \\left( \\frac{4\\pi n}{N - 1}\\right) \\end{cases} \\\\ \\end{aligned} } ⎩⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎧HanningHammingBlackman: w(n)=0.5⋅(1−cos(N−12πn)): w(n)=0.54−0.46⋅cos(N−12πn): w(n)=0.42−0.5⋅cos(N−12πn)+0.08⋅cos(N−14πn) 通过 对帧的两端进行平滑过渡,减少边界效应。当处于解码流程时,去窗口化直接对编码过程的分窗 窗口函数(Windowing Method)做逆运算即可。当然,窗口函数并不只上面的三种,上述列出的仅为较常用的算法。 在下一步中的 离散余弦变换(DCT)和其逆变换,是整套 MP3 体系的核心之一。DCT 的作用,是 将时域信号转换为频域信号并提取频率成分,以便于 随后的子代分析 与 心理声学模型处理后的数据量化使用。原理不在本章节展开(见第三章)。 经过 DCT 所得的频域数据,被用作 子带滤波(Subband Filtering) 的输入。子带滤波是一种信号处理技术,用于 将输入信号分解成多个频率子带(Subband),每个子带包含特定频率范围内的信号成分。而结合我们在模数转换(A/D)提到的 香农采样定律(Nyquist–Shannon Sampling Theorem) 可知,有来自于 A/D 采样率(Samplerate/Sample Rate)不变的条件下, DCT 过程中的 基底函数族函数,函数可取用的 最大频率值为该采样率值的一半。所以,子带划分依据采样率的半值,以固定步长来切割出各个子带范围。 记子带计划分组数为 MMM ,每个子带的频率范围长度为固定值 SSS ,则: S=SampleRate2M {\\displaystyle \\begin{aligned} S = \\frac{SampleRate}{2M} \\\\ \\end{aligned} } S=2MSampleRate 我们以最常用的 44.1 kHZ 采样率 为例,其子带滤波后的子带划分,在 MP3 格式里,该采样率下需要分为 M=32M = 32M=32 组 [27] ,即: S=SampleRate2M=44.1 kHz2×32≈0.689 kHz {\\displaystyle \\begin{aligned} S = \\frac{SampleRate}{2M} = \\frac{44.1\\ kHz}{2 \\times 32} \\approx 0.689\\ kHz \\\\ \\end{aligned} } S=2MSampleRate=2×3244.1 kHz≈0.689 kHz 所以,通过子带滤波后的子带划分情况为 均匀的 32 组: 图 1-57 MP3 音频格式经过子带滤波后的子带分组情况 但正如本书在 等响曲线(ELLC) 和 收音频响曲线(HFR) 章节所提,人耳对声音的感知本身就是动态的。声音的 瞬时特征、由突发强信号对弱信号的心理性掩盖(即掩蔽效应)、超阈值噪音(即痛觉阈)所产生的噪音遮蔽 等,都会使人对上述 不同子带分段代表频率的感知能力发生变化。以等长划分的子带,显然 不能充分的 表示该变化所带来的影响,进而 无法做到,在筛选掉一些不必要的频段的同时,增强有效频段。 心理声学模型的作用,在此得到了体现。 通过在编码阶段引入 心理声学模型(Psychoacoustic Model) 来 动态的调整子带滤波阶段的划分结果。我们就能进一步的压缩有效数据。在 MPEG 的设定中,层(Layer)的概念是对一套不同编码复杂度和压缩效率级别的方案的代称。MP3 来自于 MPEG-1 的音频层级3(Audio Layer III)。以分窗后的数据为输入,在 Layer III 的规格下,音频需要经过一个 相对独立的复杂流水线处理过程: 图 1-58 MP3 音频格式在 MPEG-1 中的子带及心理声学模型工作流(立体声)[27] 那么,假设经过该流水线后,新的子带集:取低频部分,去除 子带1、子带2 和 子带3。中频部分,略微调整子 带10 到 子带20 的频率范围。高频部分,去除子 带30、子带31 和 子带32,调整 子带29 的频率范围。就有: 图 1-59 MP3 音频格式经过心理声学模型动态处理后的子带分组情况(模拟) 至于 每个子带内所含的对应频率数据,再辅助以 量化处理 与 哈夫曼编码(Huffman Coding)的熵编码(Entropy Coding)处理,完成流程末尾的数据压缩后。按照如下的数据结构封装后,即可得到最终的 MP3 音频数据帧(Audio Frame) 了: 图 1-60 MP3 音频格式的数据组帧后的单帧示例 量化和哈夫曼编码,相信读到此处的朋友都较为熟悉,无需赘言。其结果数据,被用于装填当前帧的帧数据(Frame Data)部分。相关的 完整帧长度(包含头部的完整帧长度),则可以通过如下公式计算: if use [Layer I]:Frame Length=(12×BitrateSampling Rate+Padding Byte)×4.if use [Layer II] or [Layer III]:Frame Length=(144×BitrateSampling Rate)+Padding Byte {\\displaystyle \\begin{aligned} &if\\ use \\ [Layer\\ I] :\\\\ &\\quad \\quad Frame\\ Length = \\left( \\frac{12 \\times Bitrate}{Sampling\\ Rate} + Padding\\ Byte \\right) \\times 4 \\\\ &\\quad .\\\\ &if\\ use \\ [Layer\\ II]\\ or\\ [Layer\\ III] :\\\\ &\\quad \\quad Frame\\ Length = \\left( \\frac{144 \\times Bitrate}{Sampling\\ Rate} \\right) + Padding\\ Byte \\end{aligned} } if use [Layer I]:Frame Length=(Sampling Rate12×Bitrate+Padding Byte)×4.if use [Layer II] or [Layer III]:Frame Length=(Sampling Rate144×Bitrate)+Padding Byte 而 组帧(Framing),简而言之,就是 按格式对数据封装的过程,注意 只占 4 字节的 帧头(Frame Header)高密度音频辅助信息。其中包含如下字段(索引对应单位为 bit ): Params Range(bits) Details Sync Word 0~11 (12) 固定头部【同步字】标签,表示当前音频帧的开始,固定为 0xFFF Version 12~13 (2) MPEG 版本,00 为 MPEG-2.5,10 为 MPEG-2,11 为 MPEG-1,01 保留 Layer 14~15 (2) MPEG 层级,01 为 Layer III,10 为 Layer II,11 为 Layer I,00保留 Protection Bit 16 (1) CRC 交验状态标志,0 指启用 CRC 校验,1 指关闭 CRC 校验,CRC 校验用于检测帧数据的传输错误 Bitrate Index 17~20 (4) 表示音频数据 比特率 的相关查表索引,比特率索引的值范围从 0001 到 1110,对应的比特率从32 kbps到320 kbps。0000表示免费模式,1111保留 Sample Rate Index 21~22 (2) 表示音频数据 采样率 的相关查表索引,00 为 44.1 kHz,01 为 48 kHz,02 为 32 kHz,11保留 Padding Bit 23 (1) 填充字节启用标志,0 表示不使用填充,1 表示使用填充,填充位用于确保帧的长度一致 Private Bit 24 (1) 预留私有标志位,私有位由应用程序自行定义和使用,不影响音频数据的解码 Channel Mode 25~26 (2) 指示音频数据的声道模式,00 为立体声(Stereo),01 为联合立体声(IS),10 为双声道(Dual),11 为单声道(Mono) Mode Extension 27~28 (2) 辅助表示联合立体声的类型,00 为禁用,01 为强制立体声,10 和 11 保留 Copyright 29 (1) 版权状态标志位,取 0 则音频数据不受版权保护,取 1 则音频数据受版权保护 Original 30 (1) 原始媒体标志位,取 0 则音频数据是复制品,取 1 则音频数据是原始录音或原始媒体 Emphasis 31~32 (2) 预强调(Pre-emphasis)处理类型 的相关查表索引,00 为 无强调,01 为 50/15 微秒(50/15 µs)滤波,10 为 保留字段,11 则采用 ITU-CCITT J.17 标准 这些信息多 以查表法 代替了在未压缩音频格式和无损压缩音频格式中,对音频基础信息的数值存储方式。使每组 MP3 帧数据,都能携带这一部分信息,方便了音频以流的形式传输。 那么 MP3 音频格式,是否单纯的只有 MP3 数据帧构成呢?显然不是。 MP3 的文件结构,依旧为 两部分组成(简单示意): 【MP3 ID3 标签(MP3 ID3 Tags)】+【MP3 帧数据(MP3 Audio Frames)】 其中,MP3 ID3 标签,被用于做包括 歌曲标题、艺术家、专辑、年份、流派、评论 等信息的记录。根据使用位置和复杂度分类,可以分类两种: ID3v1 用于 MP3 文件末尾,固定 128 字节(Bytes),最多只能包含一个; ID3v2 用于 MP3 文件开头,长度可变不固定,记录复杂数据并存在多个不同版本; 可见,ID3 标签对 MP3 的意义,几乎等同于 FLAC 的元数据块 或 未压缩音频格式中的信息块,对本身音频格式的作用和地位。只是并不保有音频基础信息而已。 ID3v1 是第一版的 ID3 标签规范,也是通用性最好且最简的 ID3 标签,固定有 7 个字段: Params Range(bytes) Details Identifier 0x00~0x02 (3) 标记当前标签 ID,固定存储 'TAG' 四个大写字母的 ASCII 码,即 == 0x544147 Title 0x03~0x1e (30) 音频标题,记录该音频标题描述,固定 30 字节长度 Artist 0x1f~0x3e (30) 音频艺术家名称,记录创作该音频的艺术家名称,固定 30 字节长度 Album 0x3f~0x5e (30) 音频专辑名,记录该音频所在专辑名称,固定 30 字节长度 Year 0x5f~0x62 (4) 音频发行年份,记录该音频发行时间点 Comment 0x63~0x7e (30) 音频短评或附属文字信息,记录该音频的一些简短的额外文字信息 Genre 0x7f (1) 音频流派 的本地相关查表索引,即音频流派归类,采用本地流派列表,记录索引值 ID3v2 则是对 ID3v1 标签的扩展,现已迭代了 ID3v2.2、 ID3v2.3 和 ID3v2.4 三个主要版本。从 ID3v2.2 开始(即首个 ID3v2 标准标签),ID3v2 类标签就采用了类似于 MP3 音频帧 的封装结构,将自身分为 两个部分组成,以便统一于音频的数据封装习惯: 【ID3 标签头(ID3 Tag Header)】+【ID3 标签帧(ID3 Tag Frame)】 三个版本 ID3v2 标签头(Tag Header)的参数基本一致,可用取值上略有差异: Params Range(bytes) Details Identifier 0x00~0x02 (3) 标记当前标签 ID,固定存储 'ID3' 四个大写字母的 ASCII 码,即 == 0x494433 Version 0x03 (1) 标签主版本号,v2.2 固定取 2,即 0000 0010 v2.3 固定取 3,即 0000 0011 v2.4 固定取 4,即 0000 0100 Revision 0x04 (1) 标签副版本号,固定取 0,即 0000 0000 Flag 0x05 (1) 标签标志位,记录采用的标签特性状态,v2.2 v2.3 v2.4:去同步,解决播放器解读 0xFF 值问题,1000 0000有压缩(仅 v2.2),标签是否压缩,0100 0000v2.3 v2.4:扩展头,标签是否包含扩展头信息,0100 0000实验位,标签是否为实验性质标签,0010 0000v2.4:尾部位,标签是否包含标签尾信息,0001 0000标签尾(Footer)为 Identifier 取 \"3DI\" ,而其余同标签头的相同数据结构信息,便于标志 ID3 结尾其他位为后续拓展保留 Size 0x06~0x09 (4) 当前 ID3 标签的数据内容长度,即不包含标签头(Header)和 标签尾(Footer)的其余部分数据长度字节数,例如:扩展头 (20) + 帧1 (30)+ 帧2 (40) = 90 Bytes 在标签头的标志位中,对于 v2.3 和 v2.4 有一个 专用于扩展的数据结构,即 扩展头(Extended Header) 数据。这一结构体常被用来存放一些额外的自定义信息(一般为一些状态标志,做功能启停和记录),放置于 ID3 帧数据队列的首位。 从参数构成上看,ID3v2.3 的可定制控件较为约束: Params Range(bytes) Details Size 0x00~0x03 (4) 扩展头占用字节数,不包含参数自身的 4 Bytes Extended Flags 0x04~0x05 (2) 扩展头标志位,表示当前扩展头特性,此处不展开 Padding Size 0x06~0x07 (2) 对齐标志位,用于填充 0 来对齐数据位数 CRC Data 0x08~+X (X) CRC 交验信息,一般为 2 Bytes 的 CRC 交验值 相比之下,ID3v2.4 的灵活度就要更高一些: Params Range(bytes) Details Size 0x00~0x03 (4) 扩展头占用字节数,不包含参数自身的 4 Bytes Num of Flag Bytes 0x04 (1) 扩展头标志位总字节数,记为 X ,辅助扩展头标志位 Extended Flags (X) 扩展头标志位,表示当前扩展头特性,此处不展开 同样的,ID3v2 标签帧(Tag Frame) 的数据结构,在几个版本间也有一定差异。 对于 ID3v2.2 有 (注意版本): Params Range(bytes) Details Tag Frame Identifier 0x00~0x02 (3) 标记当前标签帧 ID,固定存储 对应类型的 ASCII 码,具体类型见后续表 Tag Frame Size 0x03~0x04 (2) 当前 ID3 标签帧 的数据内容长度,记为 X Bytes 不包括帧头部信息的字节数,即 头部 6 Bytes 例如:帧1 (30) Size = 30-6 = 24 Bytes = X Tag Frame Flags 0x05 (1) 标签帧标志位(位标记),有:有压缩,标记当前标签帧数据是否压缩,1000 0000 有加密,标记当前标签帧数据是否加密,0100 0000 有分组,标记当前标签帧属于一组分组,0010 0000 Tag Frame Grouping ID 0x06 (1) 标签帧分组标记,动态(可有可无)根据 Flags [有分组] 情况,如有分组,则记录分组 ID 分组 ID 相同的 标签帧,属于一组数据 Tag Frame Contents 0x05~+X (X) or 0x06~+X (X) 当前标签帧的实际数据,例如:\"A Lovely Song\" 对于 ID3v2.3 和 ID3v2.4 有 (注意版本): Params Range(bytes) Details Frame Identifier 0x00~0x03 (4) 标记当前标签帧 ID,固定存储 对应类型的 ASCII 码,具体类型见后续表 Frame Size 0x04~0x07 (4) 当前 ID3 标签帧 的数据内容长度,记为 X Bytes 不包括帧头部信息的字节数,即 头部 10 Bytes 例如:帧1 (30) Size = 30-10 = 20 Bytes = X Tag Frame Status Flags 0x08 (1) 标签帧状态标志位(位标记),有: 标签保留,如修改标签时是否保留此帧,1000 0000 文件保留,如修改文件时是否保留此帧,0100 0000 只读帧,标记当前标签帧是否只能读取,0010 0000 Tag Frame Format Flags 0x09 (1) 标签帧格式标志位(位标记),这里有区分, v2.3: 有压缩,标记当前标签帧数据是否压缩,1000 0000 有加密,标记当前标签帧数据是否加密,0100 0000 有分组,标记当前标签帧属于一组分组,0010 0000 v2.4: 有分组,标记当前标签帧属于一组分组,1000 0000 有压缩,标记当前标签帧数据是否压缩,0100 0000 有加密,标记当前标签帧数据是否加密,0010 0000 去同步,解决播放器解读 0xFF 值问题,0001 0000 原长度,标记该帧是否含有原数据长度,0000 1000 Tag Frame Grouping ID 0x0a (1) 标签帧分组标记,动态(可有可无) 根据 Flags [有分组] 情况,如有分组,则记录分组 ID 分组 ID 相同的 标签帧,属于一组数据 Tag Frame Data Length Indicator 0x0b~0x0d (4) or 0x0b~0x0e (4) 标签帧原数据长度,动态(可有可无) 根据 Flags [原长度] 情况,状态开启则记录数据原长 这一属性一般配合压缩使用,不包含自身 4 Bytes 例如:压缩前 20 Bytes,则该值记录 20 Bytes Tag Frame Contents 0x0a~+X (X) or 0x0b~+X (X) or 0x0e~+X (X) 当前标签帧的实际数据,例如:\"A Lovely Song\" 那么标签帧有哪些类型呢,或者说 ID3v2 的标签帧(Tag Frame),其数据首位 帧 ID 标签 都有哪些? 我们有下表: Tag Frame Type Details Title 标题标签,此类的内容记录标题字符串,固定存储 \"TIT2\" 对应的 ASCII 码 0x54495432 Artist 艺术家标签,此类的内容记录艺术家字符串,固定存储 \"TPE1\" 对应的 ASCII 码 0x54504531 Album 专辑标签,此类的内容记录专辑字符串,固定存储 \"TALB\" 对应的 ASCII 码 0x54414c42 Year 年份标签,此类的内容记录发行年份字符串,固定存储 \"YEAR\" 对应的 ASCII 码 0x59454152 Comment 评论标签,此类的内容记录评论或额外信息键值对字符串,固定存储 \"COMM\" 对应的 ASCII 码 0x434f4d4d Genre 流派标签,此类的内容记录当前音频流派字符串,固定存储 \"TCON\" 对应的 ASCII 码 0x54434f4e Track Number 音轨标签,此类的内容记录当前音轨数(即大多为曲目数),固定存储 \"TRCK\" 对应的 ASCII 码 0x5452434b Attached Picture 关联图片标签,此类的内容记录封面图数据,固定存储 \"APIC\" 对应的 ASCII 码 0x41504943 依具上述固定值,检测不同标签帧种类的内容信息,即可参考 FLAC 的类似元数据块类型举一反三,读取标签帧内容信息(Tag Frame Contents)。对 MP3 格式来说,标签帧内容多为编解码自设定,或着 较为存粹的数据内容(即非多级结构的纯数据值)。不再单独举例。 于是,一个完整的 MP3 文件构成,有如下图数据: 图 1-61 完整 MP3 音频格式文件的文件结构举例 显然,MP3 文件结构并不像本书之前介绍的种类中,包含一个文件头部 4 Bytes 的固定文件类型标记。这是因为,MP3 标签的标签头(Tag Header)和 MP3 数据帧的帧头(Frame Header)同步字(Sync Word),都足以表明当前文件为 MP3 格式。 所以,MP3 文件结构,从数据抽象的角度来看更为精炼。 不过,随着 先进音频编码(AAC [Advanced Audio Coding]) 格式的出现,现在主流的音频流媒体传输,如对音频压缩有需要的工程中,多数选择以 AAC 进行相关音频的硬件抽象层(HAL)封装。较少在流传输中采用 MP3,虽然 AAC 只是半开源。 至此,在详细介绍了MP3格式之后,我们基本了解了音频保存与还原过程。 历经音频的基础知识到声波和声音的三要素,再到声音的解构和数字化处理,以及音频的存储格式。我们终于对音频的各个方面都有了相对深入的认识。而音频相关的基本概念和格式属性,到这里,已经在本书中完成了系统性梳理。 为了帮助开发者们在后续实践中更好地应用这些知识,章节末尾,作者列出了 常用的音频相关开源库 作为本章的句号,供大家参考和使用: FLAC C/C++ Library. by Xiph.Org Foundation. https://xiph.org/flac/api/index.html LAME (LAME Ain't an MP3 Encoder). C/C++. http://lame.sourceforge.net/ MAD (MPEG Audio Decoder). C/C++. https://www.underbit.com/products/mad/ BASS (Basic Audio Stream System). C/C++. http://www.un4seen.com/ Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_1/Language/cn/References_1.html":{"url":"Chapter_1/Language/cn/References_1.html","title":"【参考文献】","keywords":"","body":"一、【参考文献】 [1] Fitz M P. Fundamentalsof Communications Systems[M]. 2007. [2] Obaidat M S, Boudriga N A. Fundamentals of performance evaluation of computer and telecommunication systems[M]. John Wiley & Sons, 2010. [3] Feynman R P, Leighton R B, Sands M. The feynman lectures on physics; vol. i[J]. American Journal of Physics, 1965, 33(9): 750-752. [4] Stevens S S. A scale for the measurement of a psychological magnitude: loudness[J]. Psychological Review, 1936, 43(5): 405. [5] O'Shaughnessy, Douglas. Speech Communications: Human and Machine[M]. 1999. [6] Slaney M. Auditory toolbox: A matlab toolbox for auditory modeling work[J]. Interval Res. Corp. Tech. Rep, 1998, 10: 1998. [7] Terminology A. American national standard[J]. ANSI S1, 2006: 1-1994. [8] Azamian, Mohammadali & Kabir, Ehsanollah. (2019). Synthesizing the note-specific atoms based on their fundamental frequency, used for single-channel musical source separation. Multimedia Tools and Applications. 78. 10.1007/s11042-018-7060-8. [9] Grove G, Sadie S, Tyrrell J, et al. The new Grove dictionary of music and musicians[J]. (No Title), 1980. [10] Capecchi D. Leonhard Euler between mathematics and natural philosophy: An introduction to natural science Anleitung zur Naturlehre[J]. Handbook of the History and Philosophy of Mathematical Practice, 2020: 1-53. [11] Cohn R. Introduction to neo-riemannian theory: a survey and a historical perspective[J]. Journal of Music Theory, 1998: 167-180. [12] Suzuki Y, Takeshima H. Equal-loudness-level contours for pure tones[J]. The Journal of the Acoustical Society of America, 2004, 116(2): 918-933. [13] Fletcher H, Munson W A. Loudness, its definition, measurement and calculation[J]. Bell System Technical Journal, 1933, 12(4): 377-430. [14] International Organization for Standardization. Acoustics: Normal Equal-loudness-level Contours[M]. ISO, 2023. [15] Suzuki Y, Takeshima H, Kurakata K. Revision of ISO 226\" Normal Equal-Loudness-Level Contours\" from 2003 to 2023 edition: The background and results[J]. Acoustical Science and Technology, 2024, 45(1): 1-8. [16] Smith, Steven W. (1997). The Scientist and Engineer's Guide to Digital Signal Processing. California Technical Pub. pp. 177–180. ISBN 978-0966017632. [17] Toole F. The measurement and calibration of sound reproducing systems[J]. Journal of the Audio Engineering Society, 2015, 63(7/8): 512-541. [18] Olive S, Welti T. The relationship between perception and measurement of headphone sound quality[C]//Audio Engineering Society Convention 133. Audio Engineering Society, 2012. [19] Olive S, Welti T. The relationship between perception and measurement of headphone sound quality, from his blog, https://seanolive.blogspot.com/2013/04/the-relationship-between-perception-and.html, Monday, April 22, 2013. [20] AES11-2009 (r2019): AES recommended practice for digital audio engineering - Synchronization of digital audio equipment in studio operations, Audio Engineering Society, https://www.aes.org/tmpFiles/aessc/20240506/aes03-set-2009-r2019-i.pdf, 2009 [21] HUFFMAN, D. A. 1952. A method for the construction of minimum-redundancy codes. In Proceedings of the Institute of Electrical and Radio Engineers 40, 9 (Sept.), pp. 1098-1101. [22] Connell J B. A huffman-shannon-fano code[J]. Proceedings of the IEEE, 1973, 61(7): 1046-1047. [23] Rissanen J, Langdon G G. Arithmetic coding[J]. IBM Journal of research and development, 1979, 23(2): 149-162. [24] O'Shaughnessy D. Linear predictive coding[J]. IEEE potentials, 1988, 7(1): 29-32. [25] Ramamoorthy V, Jayant N S. Enhancement of ADPCM speech by adaptive postfiltering[J]. AT&T Bell Laboratories technical journal, 1984, 63(8): 1465-1475. [26] Roberts Family. FLAC Metadata Structure [EB/OL]. [2023-10-23]. https://www.the-roberts-family.net/metadata/flac.html. [27] Theile, Günther; Stolle, Gerhard; 1992; MUSICAM-Surround: A Universal Multichannel Coding System Compatible with ISO 11172-3 PDF; Institut fur Rundfunktechnik, Munich, Germany; Paper 3403; Available from: https://aes2.org/publications/elibrary-page/?id=6731 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Apex_2_Introduce.html":{"url":"Chapter_2/Language/cn/Apex_2_Introduce.html","title":"二、色彩的运用与存储","keywords":"","body":"二、色彩的运用与存储 引言 自人类对世界有认知开始,从寄思于物的艺术创作,日常生活的打扮穿着,再到科学研究对物理规律的探索,色彩始终伴随左右。什么是色彩?色彩是如何被应用到视觉工程的? 本章节主要整理说明了,部分关键光学与色彩学概念的应用和推导。通过对当代计算机图像有关颜色处理发展史的梳理,以期为工程上应用于单一图像处理、色彩权衡对比等工作,和理论上深入理解图像规格标准迭代及原理,提供必要知识图谱。 图像本身是颜色的载体,因此对图像的讨论,也就是对色彩(颜色)的讨论。 关键字:色彩基础、色彩空间、色彩格式、配色函数、色度、色温 目录 2.1 色彩基础 2.2 颜色三要素(Three Elements of Color) 2.2.1 色调(Hue) 2.2.2 饱和度(Saturation) 2.2.3 光亮度(Luminance) 2.3 色彩的衡量 2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化 2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space) 2.3.3 经典三原色函数(Trichromatic Primaries Functions) 2.3.4 经典三刺激函数(Tristimulus Values Functions) 2.3.5 现代色彩体系(Modern Color System) 2.4 色彩的对比 2.4.1 色域(Color Gamut ) 2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram) 2.4.3 色差(Chromatic Aberration) 2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature) 2.4.5 标准光源(Standard Illuminants)& 白点(White Point) 2.4.6 显色指数(Color Rendering Index) 2.5 经典色彩空间(Classical Color Space) 2.5.1 光学三原色色彩空间(RGB) 2.5.2 颜料三原色色彩空间(CMY / CMYK ) 2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space) 2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space) 2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space) 2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space) 2.5.7 颜色三要素色彩空间(HSV / HSI / HSL) 2.6 色彩的存储 2.6.1 色彩格式(Color Format)与色彩存储 2.6.2 RGB 体系色彩格式 2.6.3 YUV 体系色彩格式 【参考文献】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_1.html":{"url":"Chapter_2/Language/cn/Docs_2_1.html","title":"2.1 色彩基础","keywords":"","body":"2.1 色彩基础 1666年,艾萨克·牛顿(Isaac Newton,1642 - 1726) 通过光的色散实验,发现了太阳光可以分解成依次为红、橙、黄、绿、蓝、靛、紫的单色光,并可以由单色光复合而成白光 ,由此提出了 牛顿颜色原理(Newton's theory of colour)。并于 1705 年结合他在光学领域的其他发现与猜想,编著为《光学》[1]。在此之前,亚里士多德提出的白光为一种纯粹光源才是学界共识。色散试验的伟大,在于为人们揭示了人类视觉感知色彩形式的光学物理特性。人们首次接触到了光谱(Spectrum)概念。此后,人们对光谱进行了大量基于颜色观测的研究,并逐步完成了奠基色彩学(Color Science)的理论归纳总结。 人们发现,如果我们将由红到紫的 可见光谱(380nm - 780nm) 首尾相连,那么就能够得到一个 360 度的连续可分色表。这个表被称之为色轮(Color Wheel) [2]。色轮中, 0 度表示红色,360 度表示紫色。环的圆心,即正中央则为纯白。 在此基础上,色彩学就颜色的合成,产生了三大理论:加法混合论、减法混合论、中性混合论。从物理意义上讲,加法混合论代表着自然界中自发光物体的光源色彩混合,减法混合论代表着反光物体反射光色彩混合情况,中性混合论依赖人类生理特征进行的色彩还原形式。加法混合论和减法混合论分别在光学领域和艺术领域,得到了广泛的应用。所以,加法混合论所采用的红(Red)、绿(Green)、蓝(Blue)三基色被称为光学三原色(RGB),减法混合论所采用的深红(Cyan)、青(Magenta)、黄(Yellow)三基色被称为颜料三原色(CMY)。 图 2-1 色轮(Color Wheel)与颜色(Vienna,1772)[2] 1802年,托马斯·杨(Thomas Young,1773 - 1829) 在对可见光谱范围内光线波长测量时,发现人眼对红绿蓝三色光波极为敏感。杨确定了人眼中存在 3 种能够感知不同波长的光感神经纤维,佐证了光学三原色的生理基础,并粗略的测定了人的三色感知范围 [3]。 1850年,赫尔曼·冯·亥姆霍兹(Hermann von Helmholtz,1821 - 1894) 在杨的研究基础上,经实验确定了杨理论(Young's theory)中所提及三色感知的光感神经纤维,就是后续被我们所熟知的视锥细胞(cone cells),并对三类视锥细胞敏感的红、绿、蓝三色所对应光波波长进行了重测定。由此,进一步推动了三色理论(trichromatic theory)雏形的形成 [4] [5]。人们为了纪念两位的贡献,也将三色理论称为 杨-亥姆霍兹理论(Young–Helmholtz theory)。 图 2-2 赫尔曼·冯·亥姆霍兹的三色理论,关于视锥细胞感知范围的手稿 受限于当时的科研器材水平,亥姆霍兹很遗憾的没有确切的办法,测量到三类视锥细胞可感知的确切波长范围。不过现代医学领域的研究,已相对准确的得到了答案。我们的眼睛基于此三种颜色的波形叠加组合,形成了能够覆盖从紫到红(360nm - 780nm)的 312nm - 1050nm 可观测波长范围 [6]。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_2.html":{"url":"Chapter_2/Language/cn/Docs_2_2.html","title":"2.2 颜色三要素(Three Elements of Color)","keywords":"","body":"2.2 颜色三要素(Three Elements of Color) 1853 年,赫尔曼·格拉斯曼(Hermann Günter Grassmann,1809 - 1877) 基于三色理论,取一组红绿蓝三色光源,尝试还原其他类型视觉单色(monochromat)的实验。这就是著名的光谱的 色度特性实验 (The Colorimetric Properties of the Spectrum) [7]。实验过程经过对红绿蓝三色灯源的水平位置调整,来间接的调整了三色最终组合情况。对比则选用了契合目标结果的参考光源。根据实验结果,格拉斯曼发现,确实可以用一个变权三元一次等式来对所有可见单色光源进行基于光学三原色(RGB)的合成。但是这样的合成是有条件的,对于部分特殊的颜色,例如 橄榄绿(Vibrantgreen),就需要将红色光源摆放到隔板左边靠近对比光源的位置,才能使目标色在目标采样区域合成出来。他将这种现象称为 负色匹配('negative' colors matching)。而在此次试验中,格拉斯曼得到大量需要使用 1 个或 2 个 负色才能匹配的单色。这种现象的出现,在于当时的物理实验设备并不能很好的找到,合适作为人眼感知波峰基准值的光学三原色(RGB)波长,导致需要通过较多负拟合的方式,来人为的处理三相波叠加的还原它色问题。不过这并不影响实验有奠基理论产出。 图 2-3 赫尔曼·格拉斯曼(Hermann Günter Grassmann,1809 - 1877) 1854年,格拉斯曼结合 光谱色度特性试验 的结果,在牛顿颜色混合理论的基础上,总结归纳出了 格拉斯曼颜色定律(Grassmann's law),奠定了光学理论下现代色度学基础 [8] 。定律包含五条,分别为: 1)补色律,指任何一种颜色都有另一种同它混合产生白和灰的颜色; 2)间色律,指混合任何两种非补色便可产生一种新的混合色或介于两者之间的中间颜色; 3)代替律,指任何不同颜色混合产生的颜色可相互替代; 4)相加律,指混合色的总光亮度为组成混合色的各颜色光亮度的总和; 5)混合律,人的视觉只能分辨颜色的色调、光亮度、饱和度三种变化。 这些规律仅适用于色光的加法混合理论。即在基色体系中,只适用于光学三原色(RGB)。格拉斯曼在规律中,首次提出了 色调(Hue)、饱和度(Saturation)、光亮度(Luminance) 的重要性,这三个属性继而被称为 颜色的三要素(Three Elements of Color) [9] 。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_2_1.html":{"url":"Chapter_2/Language/cn/Docs_2_2_1.html","title":"2.2.1 色调(Hue)","keywords":"","body":"2.2.1 色调(Hue) 色调(Hue) 也被称为色相,指颜色实际种类。换一种角度来说。色调是对人眼可观察颜色的基础分类。通过色调,结合其他两个颜色的三要素,我们能够准确的描述自然界中能够形成的任意混合色。 格拉斯曼在混合律 中,以色轮作为环形颜色索引表,对色轮上颜色进行了基于几何弧度的划分,使颜色的色调能够用其与相对基准色的逆时针夹角表示。色调的作用在于,可以将任意两个环上选定颜色的权重看作物理重量,利用两点连线后线段质量中心与圆环圆心连线的延长线,来推算最终结果。 图 2-4 格拉斯曼的混合律颜色推算演示 图中,O 代表理想白点(White Point),D 代表混合后对应单色; 假设我们以选定颜色与0度的夹角,对应的弧度表示该颜色本身。现有两个颜色,分别为 (RA,GA,BA)(R_{A},G_{A},B_{A})(RA,GA,BA) 和 (RB,GB,BB)(R_{B},G_{B},B_{B})(RB,GB,BB) ,那么取权重 (WAC,WCB)(W_{AC},W_{CB})(WAC,WCB) , W=WAC+DCB=1W = W_{AC} + D_{CB} = 1W=WAC+DCB=1 。对于 D 点的颜色 (RD,GD,BD)(R_{D},G_{D},B_{D})(RD,GD,BD) 就有: RD=WACRA+WCBRBGD=WACGA+WCBGBBD=WACBA+WCBBB {\\displaystyle \\begin{aligned} R_{D} = W_{AC} R_{A} + W_{CB} R_{B} \\\\ G_{D} = W_{AC} G_{A} + W_{CB} G_{B} \\\\ B_{D} = W_{AC} B_{A} + W_{CB} B_{B} \\end{aligned} } RD=WACRA+WCBRBGD=WACGA+WCBGBBD=WACBA+WCBBB 混合律是对加法混合论的一次成功拓展,此时已经隐约可以看到最初色度图的理论雏形了。不过这时对颜色的索引还停留在比较初级的阶段。现代学界和工业界已普遍采用 色度(Chromaticity),配合 颜色空间(Color Space),来代替描述颜色种类。色调更多的被用于艺术和设计领域。 另一方面,随着 现代色彩体系(Modern Color System) 的在细分领域的逐步分化,部分颜色空间的规格出发点,也对色调(Hue)和饱和度(Saturation)代表的概念本身进行了充分的抽象,形成了诸如 LAB、LUV 和 颜色三要素(HSL)等经典的色彩空间方案。为当代计算机工业体系中,艺术设计、数据传输和工程计算方面的贯通,提供了较大的帮助(可参见后文 2.5 经典色彩空间 )。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_2_2.html":{"url":"Chapter_2/Language/cn/Docs_2_2_2.html","title":"2.2.2 饱和度(Saturation)","keywords":"","body":"2.2.2 饱和度(Saturation) *饱和度 是指颜色的浓淡程度。以其对比标准的不同,被区分为 光学饱和度(Colorfulness) 和 感官饱和度(Saturation) [10]。光学饱和度多用于工程,感官饱和度则多用于艺术设计中。 光学饱和度指标,被定义为标准白点与实际颜色的强度分量与白点到其纯色分量的长度比; 感官饱和度指标,被定义为一个区域的颜色与其当前亮度的充盈配比; 从定义的角度来看,显然感官饱和度的主观成分较大。虽然色彩的光学饱和度和感官饱和度在概念上面存在较大差异。但是实际工程实践中,这两个通常被混为一谈(虽然这么做并不严谨)。工程师们经常以光学饱和度(Colorfulness)为主,将两个概念统称为饱和度(Saturation)。因此,我们这里使用的饱和度,即代指光学饱和度(Colorfulness)[11]。 在描述的格拉斯曼颜色推算过程中,我们提到过。将其单独抽出来看: 图 2-5 格拉斯曼的饱和度定义说明 其中,D点就是推算颜色 (RD,GD,BD)(R_{D},G_{D},B_{D})(RD,GD,BD) 的最大饱和度,O点则是纯白光 OpureO_{pure}Opure 。 OC 代表白色分量强度,记为 DaD_{a}Da ; CD 代表纯色分量强度,记为 DbD_{b}Db ; 则, D=Da+Db=1D = D_{a} + D_{b} = 1D=Da+Db=1 ,记为总强度。 假设 C点的颜色为 (RC,GC,BC)(R_{C},G_{C},B_{C})(RC,GC,BC),我们就有: RC=DaOpure+DbRD=(1−Db)Opure+DbRDGC=DaOpure+DbGD=(1−Db)Opure+DbGDBC=DaOpure+DbBD=(1−Db)Opure+DbBD {\\displaystyle \\begin{aligned} R_{C} = D_{a} O_{pure} + D_{b} R_{D} = (1-D_{b}) O_{pure} + D_{b} R_{D} \\\\ G_{C} = D_{a} O_{pure} + D_{b} G_{D} = (1-D_{b}) O_{pure} + D_{b} G_{D} \\\\ B_{C} = D_{a} O_{pure} + D_{b} B_{D} = (1-D_{b}) O_{pure} + D_{b} B_{D} \\end{aligned} } RC=DaOpure+DbRD=(1−Db)Opure+DbRDGC=DaOpure+DbGD=(1−Db)Opure+DbGDBC=DaOpure+DbBD=(1−Db)Opure+DbBD 而 DbD_{b}Db 就是饱和度 SSS 。整个格拉斯曼颜色混合律就可以用一个公式表示了: C=(1−S)Opure+S(WACRA+WCBRB)=Opure+S(WACRA+WCBRB−Opure) C = (1-S) O_{pure} + S (W_{AC}R_{A}+W_{CB}R_{B}) = O_{pure} + S (W_{AC}R_{A}+W_{CB}R_{B} - O_{pure}) C=(1−S)Opure+S(WACRA+WCBRB)=Opure+S(WACRA+WCBRB−Opure) 如果记白点 O 为无穷小(0)。那么整个式子就可以简化为: C=S(WACRA+WCBRB)=S⋅D C = S (W_{AC}R_{A}+W_{CB}R_{B}) = S \\cdot D C=S(WACRA+WCBRB)=S⋅D 在已知白点(White Point)和选定色的情况下。依据格拉斯曼饱和度取值,人们可以计算得期望的渐变色泽,从而快速调色。 同 色调(Hue) 一样, 饱和度(Saturation) 也处于简单系统中,不方便体系下的量化。因此,饱和度的概念在现代学界和工业界中,同样也普遍被色度(Chromaticity)配合颜色空间(Color Space)代替表示,以便于工程量化计算。 现代色彩体系(Modern Color System) 中的部分方案,对饱和度概念进行了有效的利用转换(可参见后文 2.5.7 颜色三要素色彩空间 )。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_2_3.html":{"url":"Chapter_2/Language/cn/Docs_2_2_3.html","title":"2.2.3 光亮度(Luminance)","keywords":"","body":"2.2.3 光亮度(Luminance) 光亮度(Luminance) 也被称为辉度,是指固定光所照射单位平面面积光照区域的物理发光强度,单位是尼特( NitNitNit ),代表烛光每立方米( cd/m2cd/m^2cd/m2 ,candela per square metre)。光亮度属于光度学(Luminosity)概念。区别于亮度(Brightness)这种用来形容人生理光强直接感受的主观描述,光亮度是从可见光谱范围计量的物理量。 光亮度的计算依赖于发光强度度量。而 发光强度(Luminous Intensity) 则是用于表示光源给定方向上单位立体角内光通量的物理量,又被称为光强或光度,单位是烛光( cdcdcd , candelacandelacandela )。 如果记光亮度为 LvL_{\\mathrm {v}}Lv ,发光强度为 IvI_{\\mathrm {v}}Iv ,那么两者单位间的关系为 1 Nit=1 cd/m2 1 \\ Nit = 1 \\ cd/m^2 1 Nit=1 cd/m2 光亮度的测量方法在格拉斯曼时期,并没有太好的量化标准,因此更多的是作为一个参数来配合其他要素进行颜色描述的。现如今,对于光亮度的国际统一测量标准如下图所示: 图 2-6 光亮度测量实验与关键变量示意图 其中, 记 Σ\\SigmaΣ 代表光源,SSS 代表接受光线的物体照射表面, 记 dΣ{d\\Sigma}dΣ , 代表发光源上包含到达照射表面指定定向光线出发点的无穷小面积, 记 dSdSdS 代表照射表面上包含指定出发点的光源定向光线照射目标点的无穷小面积, 记 dΩΣd\\Omega_\\SigmadΩΣ , 代表光线出发点与 dSdSdS 所构锥体立体角(Solid Angle)的球面度(sr: Steradian), 记 dΩSd\\Omega_SdΩS , 代表光线接受点与 dΣd\\SigmadΣ 所构锥体立体角(Solid Angle)的球面度(sr: Steradian), 记 nΣn_\\SigmanΣ 代表 dΣd\\SigmadΣ 的法向量, θΣ\\theta_\\SigmaθΣ 代表 nΣn_\\SigmanΣ 与指定定向光线的夹角, 记 nSn_SnS 代表 dSdSdS 的法向量, θS\\theta_SθS 代表 nSn_SnS 与指定定向光线的夹角, 如果取国际通用单位制,且光线在传播过程中经过的介质为无损介质的话,那么就存在如下的光亮度计算公式: LvΣ=d2ΦvdΣdΩΣcosθΣ=d2ΦvdSdΩScosθS=LvS {\\displaystyle L_{\\mathrm {v}_{\\Sigma } }={\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {v} }}{\\mathrm {d} \\Sigma \\,\\mathrm {d} \\Omega _{\\Sigma }\\cos \\theta _{\\Sigma }}}={\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {v} }}{\\mathrm {d} S\\,\\mathrm {d} \\Omega _{S}\\cos \\theta _{S}}}=L_{\\mathrm {v}_{\\mathrm {S}}}} LvΣ=dΣdΩΣcosθΣd2Φv=dSdΩScosθSd2Φv=LvS 取出入面积及立体角相等,记同等出入面积为 AAA ,立体角为 Ω\\OmegaΩ ,照射角为 θ\\thetaθ ,则有: dΩ=dΩΣ=dΩSdθ =dθΣ =dθSdA=dΣ =dS {\\displaystyle \\begin{aligned} &{d} \\Omega = {d} \\Omega _{\\Sigma } = {d} \\Omega _{S} \\\\ &{d} \\theta \\ = {d} \\theta _{\\Sigma }\\ = {d} \\theta _{S} \\\\ &{d} A = {d} {\\Sigma }\\ \\ = {d} {S} \\\\ \\end{aligned} } dΩ=dΩΣ=dΩSdθ =dθΣ =dθSdA=dΣ =dS Lv=d2ΦvdAdΩcosθ L_{\\mathrm {v}} = {\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {v} }}{\\mathrm {d} A\\,\\mathrm {d} \\Omega \\cos \\theta }} Lv=dAdΩcosθd2Φv 公式中, 以 Φv\\Phi _{\\mathrm {v} }Φv 代表 光通量(Luminous Flux) , 单位是流明( lmlmlm ,lumenlumenlumen ),是指标度可见光对人眼的视觉刺激程度,是光度学下的人眼视觉特性导出量(规格量)。1 cd1\\ cd1 cd 点光源在单位立体角( 1 sr1\\ sr1 sr )下的光通量为 1 lm1\\ lm1 lm , 即 1 lm=1 cd⋅sr1 \\ lm = 1 \\ cd \\cdot sr1 lm=1 cd⋅sr 。光通量计算公式是: Iv=dΦvdΩ→Φv=∫ΣIv⋅dΩ {I _{\\mathrm {v}}} = {\\frac {\\mathrm {d} \\Phi _{\\mathrm {v} }}{\\mathrm {d} \\Omega}} \\rightarrow {\\Phi _{\\mathrm {v}}} = \\int _{\\Sigma } I_v \\cdot {d} \\Omega Iv=dΩdΦv→Φv=∫ΣIv⋅dΩ 如果记 EvΣE_{\\mathrm {v}_{\\Sigma }}EvΣ 为单位光源面积发出的光通量即 光出射度(Luminous Exitance),记 EvSE_{\\mathrm {v}_{S }}EvS 为单位受照面积接受的光通量即 光照度(Illumination)。那么在无损截止情况下 EvΣ=EvSE_{\\mathrm {v}_{\\Sigma }} = E_{\\mathrm {v}_{S }}EvΣ=EvS ,我们记为 EvE_{\\mathrm {v}}Ev 。被称为光照射度,单位是勒克斯( luxluxlux , lxlxlx )。 1 lx=1 lm/m21 \\ lx = 1 \\ lm/m^21 lx=1 lm/m2 有: Ev=dΦvdA E_{\\mathrm {v}} = {\\frac {\\mathrm {d} \\Phi _{\\mathrm {v} }}{\\mathrm {d} A}} Ev=dAdΦv 则 d2Φv{\\mathrm {d} ^{2}\\Phi _{\\mathrm {v} }}d2Φv 代表由 dΣd\\SigmadΣ 发出的光线,在 dΩΣd\\Omega_\\SigmadΩΣ 为球面度的立体角下的全方向光通量,即: d2Φv=dEv⋅dA d^{2}\\Phi _{\\mathrm {v} } = dE_{\\mathrm {v}} \\cdot dA d2Φv=dEv⋅dA 那么整个公式就可以化简为: Lv=dEvdΩ⋅cosθ {\\displaystyle L_{\\mathrm {v} }={\\frac {\\mathrm {d} E _{\\mathrm {v} }}{d \\Omega \\cdot \\cos \\theta }}} Lv=dΩ⋅cosθdEv 这个公式就是我们在光度学角度,用来计算物体 理想亮度的标准公式。 如果需要计算介质造成的损耗,那么公式需要引入 光展量(Etendue),即在材质折射率下的光束所通过的面积和光束所占有的立体角的积分。我们计 GGG 代表光展量, nnn 代表折射率,则光展量公式: G=∫Σ∫SdG→dG=n2⋅dAdΩcosθ {\\displaystyle G=\\int _{\\Sigma }\\!\\int _{S}\\mathrm {d} G} \\rightarrow {\\mathrm {d}G }=n^{2} \\cdot {\\mathrm {d} A\\,\\mathrm {d} \\Omega \\cos \\theta } G=∫Σ∫SdG→dG=n2⋅dAdΩcosθ 对于无损介质,折射率 n=1n=1n=1 。因此,整个亮度公式在知道光展量的情况下,就可以简化为: Lv=n2dΦvdG=dΦvdG∣n=1 {\\displaystyle L_{\\mathrm {v} }=n^{2}{\\frac {\\mathrm {d} \\Phi _{\\mathrm {v} }}{\\mathrm {d} G}}} = {\\frac {\\mathrm {d} \\Phi _{\\mathrm {v} }}{\\mathrm {d} G}}|_{n=1} Lv=n2dGdΦv=dGdΦv∣n=1 光亮度不会影响物体的色彩信息,而仅代表物体本身发光的强度。决定物体本身颜色信息的,则是物体所具有的色调和饱和度属性。 光度单位体系是一套反映视觉亮暗特性的光辐射计量单位,被选作基本量的不是光通量而是发光强度,因此这套公式只适用于可见光范围。对光的更为客观的描述则依赖于辐射度学的相关概念。辐射度学从黑体辐射与能量密度的学角度出发更换了物理学参照物,将光度学系统提出的度量理念适用范围,扩展到了包含长短波的完整电磁波段。进而间接的促成了色温概念在色彩学的应用。这个会在后文中有关颜色度量的章节额外说明。 由于光亮度的这种自成体系的特性。在颜色的三要素的应用中,它通常被分离单独处理。所以,现代工程体系中不会直接的应用光度学里的光亮度公式,而是采用 辐射亮度(Radiance) 的科学物理量,结合 色温(Color Temperature),或 色彩空间(Color Space)如 HSL 的色彩强度(Intensity) 的自设定参数,等概念平替。 至此,色彩学的经典理论:格拉斯曼颜色定律,所引申出的重要配参已准备完毕。问题随之而来: 我们能否将描述自然现象的参考标准,应用在有局限的实际生产活动中。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_3.html":{"url":"Chapter_2/Language/cn/Docs_2_3.html","title":"2.3 色彩的衡量","keywords":"","body":"2.3 色彩的衡量 光学三要素的出现,让人们在对颜色的客观描述上,能够凭借传统色彩学体系内的参数,进行有限程度的量化。但这并不足以适用于除科学计算和测定外的批量工程作业。毕竟在算力限定的条件下,我们不可能对每一寸光的每一个物理量都进行独立的计算。同时,大量繁琐且模糊的设定也 无法便于简化,而我们也需要获得 能够将感官上的体验和客观上的物理值联系起来的方法论。 如果能够将光波本身和颜色建立起直接的可量化的转换关系,就能够解决表示上的问题了。这就是 配色函数 的由来。 于是,首先需要做的是 获得科学证明,以 提供函数构建理论上的支持。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_3_1.html":{"url":"Chapter_2/Language/cn/Docs_2_3_1.html","title":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化","keywords":"","body":"2.3.1 辐射亮度(Radiance)& 色温(Color Temperature)& 颜色的量化 辐射亮度(Radiance) 也被称为辐亮度,是用于描述指定辐射源,单位方向上辐射强弱的客观物理量。 辐射度学(Radiometry) 和 光度学(Luminosity),都是对电磁辐射能量进行计量的学科。不同之处在于,辐射度学是物理电磁波能量角度的客观计量,光度学是人眼视觉的主观因素后的相应计量。因此,相比于之前在颜色三要素里提及的 光亮度(Luminance),辐射度学的 辐射亮度(Radiance) 其实才更贴近光亮度的物理本质。 而人们是如何通过辐射度学对能量的定义,将光的波长和颜色对应起来的呢?这就需要提到色温的概念了。 色温(Color Temperature) 是由物体本身的黑体辐射决定的一个物理量,计量单位为 K(开尔文温度)。它被定义为,绝对黑体从绝对零度(-273.15℃)开始加温后所呈现出的颜色。由于颜色本身就是一个主观量,而颜色又是由光的波长决定的,不同的色温本质上对应的是不同波长的光。所以,如果我们将色温这个纯粹的辐射度学概念延伸应用到了色彩领域,就能利用色温代表意义本身,建立起两个体系之间的联系了。 辐射度学与光度学的单位转换 同光亮度,辐射亮度的计算也需要依赖于辐射强度度量。 辐射强度(Radiant Intensity) 是用于表示光源给定方向上单位立体角内辐射通量的物理量,单位是瓦特每球面度( W/srW/srW/sr )。辐射通量(Radiant Flux)是指单位时间内通过某一截面的辐射能,位是瓦特( WWW )。 记辐射亮度为 LeL_{\\mathrm {e}}Le ,辐射强度为 IeI_{\\mathrm {e}}Ie ,辐射通量为 Φe\\Phi _{\\mathrm {e}}Φe ,辐射照射度 EeE _{\\mathrm {e}}Ee 。那么四者间的关系为: Ie=dΦedΩ→Φe=∫ΣIe⋅dΩEe=dΦedA→d2Φe=dEe⋅dALe=d2ΦedAdΩcosθ=dEedΩ⋅cosθ {\\displaystyle \\begin{aligned} &{I _{\\mathrm {e}}} = {\\frac {\\mathrm {d} \\Phi _{\\mathrm {e} }}{\\mathrm {d} \\Omega} \\rightarrow \\Phi _{\\mathrm {e}}} = \\int _{\\Sigma } I_e \\cdot {d} \\Omega \\\\ &E_{\\mathrm {e}} = {\\frac {\\mathrm {d} \\Phi _{\\mathrm {e} }}{\\mathrm {d} A}} \\rightarrow \\mathrm {d} ^{2}\\Phi _{\\mathrm {e} } = \\mathrm {d} E_{\\mathrm {e}} \\cdot \\mathrm {d} A \\\\ & L_{\\mathrm {e}} =\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {e} }}{\\mathrm {d} A\\,\\mathrm {d} \\Omega \\cos \\theta } =\\frac {\\mathrm {d} E _{\\mathrm {e} }}{d \\Omega \\cdot \\cos \\theta } \\\\ \\end{aligned} } Ie=dΩdΦe→Φe=∫ΣIe⋅dΩEe=dAdΦe→d2Φe=dEe⋅dALe=dAdΩcosθd2Φe=dΩ⋅cosθdEe 公式中,辐射源面积为 AAA ,立体角为 Ω\\OmegaΩ ,照射角为 θ\\thetaθ ,概念基本等同光亮度公式同位参数。 显然,光亮度和辐射亮度的差异只在于参考系上。从有效范围上看,光亮度属于辐射亮度仅考虑可见光谱区域的特殊情况。为了使两个体系能够转换,1979年第十六届国际计量大会 上,人们对发光强度单位坎德拉进行了指定。现在我们说说的一单位坎德拉,即指代发光频率为 HzHzHz 的单色光,在垂直光源表面的定向单位幅角下,测量的辐射强度。即: 1 cd=1/683 W/sr=1 lm/sr → 1 W=683 lm 1 \\ cd = 1/683 \\ W/sr = 1 \\ lm / sr \\ \\ \\rightarrow \\ \\ 1 \\ W = 683 \\ lm 1 cd=1/683 W/sr=1 lm/sr → 1 W=683 lm 因此,记光辐转化率为 KKK ,单位为 lm/Wlm/Wlm/W ,则 KKK 、 Φe\\Phi _{\\mathrm {e}}Φe 与 Φv\\Phi _{\\mathrm {v}}Φv 存在两者之间的转换关系: Φv=K⋅ΦeK=683 lm/W {\\displaystyle \\Phi_v = K \\cdot \\Phi_e \\quad \\quad K = 683 \\ lm/W} Φv=K⋅ΦeK=683 lm/W 带入光亮度 LvL_{\\mathrm {v}}Lv 与辐射亮度 LeL_{\\mathrm {e}}Le 的公式,可得: Lv=K⋅Le {\\displaystyle L_{\\mathrm {v}} = K \\cdot L_{\\mathrm {e}} } Lv=K⋅Le 如此就可以通过 KKK 来完成,辐射度学和光度学间计量的转换了。 我们知道光度学中的不同颜色,本质是波长的不同。而不同的波长在辐射度学中,则代表为不同的能量密度。只要求得对应颜色光的能量密度,就能反向推算对应颜色光的波长了,进而可以将感知到的颜色用实际物理量标定。 借此,以主观感受的客观测量值,人为映射量化建立联系。 至于能量密度的测定,则可以经由物理学体系的黑体辐射定律揭示而出。 从色温到颜色 - 颜色的波长标定 色温(Color Temperature) 是由物体本身的黑体辐射决定的一个物理量,计量单位为 K(开尔文温度)。它被定义为,绝对黑体从绝对零度(-273.15℃)开始加温后,在辐射到达指定复合波情况时所具有的温度。 1900年在德国物理学会上,著名的德国物理学大师 马克思·普朗克(Max Karl Ernst Ludwig Planck,1858 - 1947),公布了自己在电磁波能量问题上的假设,这就是在物理学界影响深远的《论正常光谱中的能量分布》报告。报告的细部由同年普朗克发表的两篇论文组成,分别是《关于维恩频谱方程的改进论》(On an Improvement of Wien's Equation for the Spectrum) [23] 和《关于正常光谱中的能量分布的理论》(On the Theory of the Energy Distribution Law of the Normal Spectrum)[24] 。这两篇理论统一了之前由“紫外灾变”问题分割的,高频有效的维恩位移定律和低频有效的瑞利-金斯公式,并直接促成了量子理论的奠基和近代物理学革命。 记 λ\\lambdaλ 代表电磁波长,vvv 代表 λ\\lambdaλ 的频率, TTT 代表色温, ccc 为光速,普朗克黑体辐射定律(Planck's law|Blackbody radiation law) 的能量密度公式提出: uλ (λ,T)=8πhcλ5⋅1ehcλkT−1=4πc⋅Ie(v)=8πhv3c5⋅1ehvkT−1=uv (v,T) {\\displaystyle \\begin{aligned} u_{\\lambda }\\ (\\lambda,T) ={\\frac {8\\pi hc}{\\lambda^{5}}} \\cdot {\\frac {1}{e^{\\tfrac{hc} {\\lambda kT}}-1}} ={\\frac {4\\pi}{c}} \\cdot I_e (v) ={\\frac {8\\pi hv^3}{c^{5}}} \\cdot {\\frac {1}{e^{\\tfrac{hv} {kT}}-1}} ={u_{v }\\ (v,T)} \\\\ \\end{aligned} } uλ (λ,T)=λ58πhc⋅eλkThc−11=c4π⋅Ie(v)=c58πhv3⋅ekThv−11=uv (v,T) 公式中, ccc 为光速, 有 hhh 为 普朗克常数 取 (6.62607015⋅10−34) J⋅s(6.62607015 \\cdot 10^{-34})\\ J\\cdot s(6.62607015⋅10−34) J⋅s ,国际计量大会通过决议值, 有 kkk 为 玻尔兹曼常数 取 (1.380649⋅10−23) J/K(1.380649 \\cdot 10^{-23})\\ J/K(1.380649⋅10−23) J/K ,国际计量大会通过决议值, 当已知黑体辐射源,其单位立方体所含能量与光波长关系如下图所示: 图 2-7 黑体辐射强度与波长分布曲线示意图 图上能明显看到,当物体处于不同色温时,其黑体辐射的总能量被分配到了不同波长光波携带。最终辐射波的情况,则是由不同区段的波长叠加而成,其叠加的强度则和对应波长携带的能量强度正相关。我们取 360nm - 780nm 可见光谱(Visible Spectrum) 范围,那么上图就有如下的展示了: 图 2-8 可见光谱范围内黑体辐射与波长分布曲线示意图 显然,色温高于 5000k 的物体在短波段出现了极大的富集程度,色温低于 5000k 的物体则是长波较为密集。所以自然界中的高温物体在人眼观察中往往偏向蓝白色,相关色温低温的物体则多呈现橙红色。 记色温为 T0T_{0}T0 , T0T_{0}T0 对应的颜色为 C0C_{0}C0 光亮度 L0L_{0}L0 , C0C_{0}C0 对应可见光范围总辐射强度为 IeI_{e}Ie ,光强度 IvI_{v}Iv 。单位面积辐射能为 QQQ ,存在映射函数 Mapping(C0, L0)=QMapping(C_0,\\ L_0) = QMapping(C0, L0)=Q 。 据电磁波辐射能公式有: Q=Le⋅dA=1K⋅Iv⋅dΦvdA2cosθ⋅dA=∫360nm780nmuλ (λ,T0)⋅dλ≈∑360nm780nmuλ (T0)⋅λ {\\displaystyle \\begin{aligned} &Q = {L_e} \\cdot dA = {\\frac {1}{K}} \\cdot {I_v} \\cdot {\\frac {\\mathrm {d} \\Phi _{\\mathrm {v} }}{\\mathrm {dA^2} \\cos{\\theta }}} \\cdot dA = \\int _{360nm} ^{780nm} u_{\\lambda }\\ (\\lambda,T_0) \\cdot {d} {\\lambda} \\approx \\sum _{360nm} ^{780nm} u_{\\lambda }\\ (T_0) \\cdot {\\lambda} \\end{aligned} } Q=Le⋅dA=K1⋅Iv⋅dA2cosθdΦv⋅dA=∫360nm780nmuλ (λ,T0)⋅dλ≈360nm∑780nmuλ (T0)⋅λ 取 1 sr1\\ sr1 sr 单位发光 1 lm1\\ lm1 lm 单位光通量,即 Iv=1 cdI_{v} = 1\\ cdIv=1 cd 。 假设所有区段的电磁波在传播方向上相同,且法线方向。则上式可化为: Q=1K⋅Lv⋅dA=1K⋅IvdA=∑360nm780nmuλ (T0)⋅λ → Q=Lv⋅∑360nm780nmuλIvλ⋅K=Lv⋅∑360nm780nmuλIeλ {\\displaystyle \\begin{aligned} &Q = {\\frac {1}{K}} \\cdot {L_v} \\cdot {dA} = {\\frac {1}{K}} \\cdot {\\frac {I_v}{dA}} = \\sum _{360nm} ^{780nm} u_{\\lambda }\\ (T_0) \\cdot {\\lambda} \\ \\ \\rightarrow \\ \\ Q = {L_v} \\cdot \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_v}} \\lambda \\cdot {\\mathrm K } = {L_v} \\cdot \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_e}} \\lambda \\\\ \\end{aligned} } Q=K1⋅Lv⋅dA=K1⋅dAIv=360nm∑780nmuλ (T0)⋅λ → Q=Lv⋅360nm∑780nmIvuλλ⋅K=Lv⋅360nm∑780nmIeuλλ 那么带入映射函数,我们就有: Mapping(C0,L0)=L0⋅∑360nm780nmuλIeλ=F(C0,L0) {\\displaystyle \\begin{aligned} &Mapping(C_0, L_0) = {L_0} \\cdot \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_e}} \\lambda = F(C_0, L_0) \\\\ \\end{aligned} } Mapping(C0,L0)=L0⋅360nm∑780nmIeuλλ=F(C0,L0) C0=Convert(∑360nm780nmuλIeλ)=F(∑360nm780nmuλIeλ) {\\displaystyle \\begin{aligned} &C_0 = Convert( \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_e}} \\lambda ) = F( \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_e}} \\lambda ) \\\\ \\end{aligned} } C0=Convert(360nm∑780nmIeuλλ)=F(360nm∑780nmIeuλλ) 可见,只要选取合适的转换函数 F(C)F(C)F(C) ,我们就可以将色温为 T0T_{0}T0 时对应的颜色,以 F(C0, L0)F(C_0,\\ L_0)F(C0, L0) 的形式表述到函数所在参考系中。因此,这个用于颜色匹配的转换函数 F(C)F(C)F(C) ,就被称为 配色函数(Color-Matching Functions)。 只要能找到适合的 F(C)F(C)F(C) 使颜色能够被统一的衡量,就能制定工业标准,正式开始现代化的工程实践了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_3_2.html":{"url":"Chapter_2/Language/cn/Docs_2_3_2.html","title":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space)","keywords":"","body":"2.3.2 配色函数(Color Matching Functions)& 色彩空间(Color Space) 配色函数(Color Matching Functions),又被称为 色匹配函数,狭义上是用来完成从物理计量到色彩学计量的转换函数的代称。广义上,我们将用来描述一个人为设定的色彩系统里,用于量化颜色的函数称为配色函数。 我们知道,脱离参考系的函数是没有意义的,于是色彩空间概念伴随而生。通过色彩空间,颜色能够被人为设置条件下的单一系统表达。 那么,什么是色彩空间呢? 色彩空间(Color Space) 又被称为 色彩模型(Color Model),是对使用一组抽象参数结合配色函数(Color-Matching Functions),来表示颜色的数学系统的统称。色彩空间更多的是对学科理论的实践,我们可以将其看为对色彩学最为直观的规格应用。从设计出发点来看,色彩空间大体分为两类:设备相关(Device-Dependent) 色彩空间,和 设备无关(Device-Independent) 色彩空间。 设备相关(Device-Dependent)色彩空间,是指颜色的表达依赖于物理设备本身情况和指定主观参数的色彩空间。诸如:IBM RGB、CMY/CMYK,配色函数可表示颜色范围依赖设备本身性能。 设备无关(Device-Independent)色彩空间,是指一类不依赖于物理设备的客观描述色彩空间。诸如:CIE RGB、CIE XYZ、CIE 1960 LAB、CIE 1960 UCS、HSL、CIE 1964 UVW,设备的选取并不影响色彩空间范围内的颜色表示。 所以,色彩空间虽然是用来理解颜色概念的有力工具,但它本身可能并不客观。需要选定一个,能够统一无论主客的不同数学系统对颜色描述的,基础色彩空间。以此来系统化整个色彩模型体系。直接以光学三原色为基础的设备无关色彩空间,相对的能更好满足这一点,并在简化表达上具有无可替代的优势。基于此,经典三原色函数诞生了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_3_3.html":{"url":"Chapter_2/Language/cn/Docs_2_3_3.html","title":"2.3.3 经典三原色函数(Trichromatic Primaries Functions)","keywords":"","body":"2.3.3 经典三原色函数(Trichromatic Primaries Functions) 1921 年左右,威廉·大卫·赖特(W. David Wright,1906 - 1997) [26] 与 约翰·吉尔德(John Guild,1889 - 1976) [27] 分别对光学三原色的基本度量系数进行了更为科学的测定,并分别于1928年 、1932年以论文形式发表了自己的结果。这两个实验,为 CIE 经典三原色函数(Trichromatic Primaries Functions)标准 的制定提供了极为关键的帮助。 我们将代表不同可见光波长对人眼视锥细胞的刺激程度的函数,称为色感函数,也就是选取人眼为传感器的 光谱响应函数(SPF [Spectral Response Function])。由色感函数在可见光波段所构成的曲线,称为色感曲线。由实验所拟合的三原色的色感曲线,在 435.8nm(蓝)、 546.1nm(绿)、 700nm(红)处达到最大峰值,如下图: 图 2-9 CIE 1931 RGB 采用的三原色色感函数 CIE 在两者实验的基础上,确定了以 光谱功率分布(SPD [Spectral Power Distribution]) 为基准的混合波三色分离函数: R=∫0∞S(λ)r‾(λ)dλG=∫0∞S(λ)g‾(λ)dλB=∫0∞S(λ)b‾(λ)dλ {\\displaystyle \\begin{aligned} &{\\displaystyle R =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {r}}(\\lambda )\\,d\\lambda } \\\\ &{\\displaystyle G =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {g}}(\\lambda )\\,d\\lambda } \\\\ &{\\displaystyle B =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {b}}(\\lambda )\\,d\\lambda } \\\\ \\end{aligned} } R=∫0∞S(λ)r(λ)dλG=∫0∞S(λ)g(λ)dλB=∫0∞S(λ)b(λ)dλ 其中, 以 r‾(λ){\\overline {r}}(\\lambda )r(λ) 、 g‾(λ){\\overline {g}}(\\lambda )g(λ) 、 b‾(λ){\\overline {b}}(\\lambda )b(λ) 即为基准三原色实验测得的拟合结果的色感函数,存在关系: ∫0∞r‾(λ)dλ=∫0∞g‾(λ)dλ=∫0∞b‾(λ)dλ {\\displaystyle \\int _{0}^{\\infty }{\\overline {r}}(\\lambda )\\,d\\lambda =\\int _{0}^{\\infty }{\\overline {g}}(\\lambda )\\,d\\lambda =\\int _{0}^{\\infty }{\\overline {b}}(\\lambda )\\,d\\lambda } ∫0∞r(λ)dλ=∫0∞g(λ)dλ=∫0∞b(λ)dλ 有 S(λ)S(\\lambda )S(λ) 为目标波长 λ\\lambdaλ 的光谱功率分布函数: S(λ)=Le(λ)θ=2∘≈d2Φe(λ)dAdΩ=dEe(λ)dΩ {\\displaystyle S(\\lambda) = L_{\\mathrm {e}}(\\lambda)_{\\theta=2^{\\circ}} \\approx {\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {e} }(\\lambda)}{\\mathrm {d} A\\,\\mathrm {d} \\Omega }} ={\\frac {\\mathrm {d} E _{\\mathrm {e} }(\\lambda)}{d \\Omega }} } S(λ)=Le(λ)θ=2∘≈dAdΩd2Φe(λ)=dΩdEe(λ) SPD 公式式中,LeL_{\\mathrm {e}}Le 为辐射亮度, Φe\\Phi _{\\mathrm {e}}Φe 为辐射通量为, EeE _{\\mathrm {e}}Ee 为辐射照射度。 通过这几个属于 辐射度学(Radiometry) 中的可被测量物理量,指定波长 的光线,就能被相对化表示为: Ray(λ)=C(R,G,B) Ray(\\lambda)= C(R,G,B) Ray(λ)=C(R,G,B) 由于 CIE RGB 所采用的改进后的配色实验,仍然存在亥姆霍兹配色实验里就存在的红光波段的负色匹配。 因此还需要进一步改进才能用于工业应用。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_3_4.html":{"url":"Chapter_2/Language/cn/Docs_2_3_4.html","title":"2.3.4 经典三刺激函数(Tristimulus Values Functions)","keywords":"","body":"2.3.4 经典三刺激函数(Tristimulus Values Functions) CIE 在 1931 年同年提出 CIE XYZ 色彩空间,尝试通过人为设计的色感函数 [12] [13],来避 RGB 的 负色匹配 问题。为了区别于 CIE RGB 中,通过实验测定拟合而得的三原色色感函数。我们将新的函数称为 CIE 三刺激函数(Tristimulus Values Functions),用来代替原有 r‾(λ){\\overline {r}}(\\lambda )r(λ) 、 g‾(λ){\\overline {g}}(\\lambda )g(λ) 、 b‾(λ){\\overline {b}}(\\lambda )b(λ) ,记为 x‾(λ){\\overline {x}}(\\lambda )x(λ) 、 y‾(λ){\\overline {y}}(\\lambda )y(λ) 、 z‾(λ){\\overline {z}}(\\lambda )z(λ) 。三个刺激函数对应的刺激曲线如下图: 图 2-10 CIE 1931 XYZ 采用的三原色色感函数 CIE 在三个刺激函数为基准下,确定了的不同波长光的三刺激值分离函数: X=∫0∞S(λ)x‾(λ)dλY=∫0∞S(λ)y‾(λ)dλZ=∫0∞S(λ)z‾(λ)dλ {\\displaystyle \\begin{aligned} &{\\displaystyle X =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {x}}(\\lambda )\\,d\\lambda } \\\\ &{\\displaystyle Y =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {y}}(\\lambda )\\,d\\lambda } \\\\ &{\\displaystyle Z =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {z}}(\\lambda )\\,d\\lambda } \\\\ \\end{aligned} } X=∫0∞S(λ)x(λ)dλY=∫0∞S(λ)y(λ)dλZ=∫0∞S(λ)z(λ)dλ 其中, 有 r‾(λ){\\overline {r}}(\\lambda )r(λ) 、 g‾(λ){\\overline {g}}(\\lambda )g(λ) 、 b‾(λ){\\overline {b}}(\\lambda )b(λ) 是将理想刺激值峰值 (μ,σ1,σ2)(\\mu ,\\sigma _{1},\\sigma _{2})(μ,σ1,σ2) ,带入高斯公式所得,这和 RGB 色感函数的拟合有一定的不同。峰值 (μ,σ1,σ2)(\\mu ,\\sigma _{1},\\sigma _{2})(μ,σ1,σ2) 中, μ\\muμ 代表峰值波长, σ1\\sigma _{1}σ1 代表 μ\\muμ 值左侧生效范围偏移量, σ2\\sigma _{2}σ2 代表 μ\\muμ 值右侧生效范围偏移量。XYZ 在度量峰值上取用了理想状态值,有: g(λ; μ,σ1,σ2)={exp(−12(λ−μ)2/σ12),λμ,exp(−12(λ−μ)2/σ22),λ≥μ. {\\displaystyle g(\\lambda;\\ \\mu ,\\sigma _{1},\\sigma _{2}) = {\\begin{cases} \\exp {\\bigl (}{-{\\tfrac {1}{2}}(\\lambda-\\mu )^{2}/{\\sigma _{1}}^{2}}{\\bigr )}, &\\lambdag(λ; μ,σ1,σ2)={exp(−21(λ−μ)2/σ12),exp(−21(λ−μ)2/σ22),λμ,λ≥μ. 推导而出: x‾(λ)=1.056g(λ; 599.8, 37.9, 31.0)+0.362g(λ; 442.0, 16.0, 26.7)−0.065g(λ; 501.1, 20.4, 26.2)y‾(λ)=0.821g(λ; 568.8, 46.9, 40.5)+0.286g(λ; 530.9, 16.3, 31.1)z‾(λ)=1.217g(λ; 437.0, 11.8, 36.0)+0.681g(λ; 459.0, 26.0, 13.8) {\\displaystyle \\begin{aligned} &{\\displaystyle {\\overline {x}}(\\lambda ) = 1.056g(\\lambda ;\\ 599.8,\\ 37.9,\\ 31.0)+0.362g(\\lambda ;\\ 442.0,\\ 16.0,\\ 26.7)-0.065g(\\lambda ;\\ 501.1,\\ 20.4,\\ 26.2) } \\\\ &{\\displaystyle {\\overline {y}}(\\lambda ) = 0.821g(\\lambda ;\\ 568.8,\\ 46.9,\\ 40.5)+0.286g(\\lambda ;\\ 530.9,\\ 16.3,\\ 31.1) } \\\\ &{\\displaystyle {\\overline {z}}(\\lambda ) = 1.217g(\\lambda ;\\ 437.0,\\ 11.8,\\ 36.0)+0.681g(\\lambda ;\\ 459.0,\\ 26.0,\\ 13.8) } \\\\ \\end{aligned} } x(λ)=1.056g(λ; 599.8, 37.9, 31.0)+0.362g(λ; 442.0, 16.0, 26.7)−0.065g(λ; 501.1, 20.4, 26.2)y(λ)=0.821g(λ; 568.8, 46.9, 40.5)+0.286g(λ; 530.9, 16.3, 31.1)z(λ)=1.217g(λ; 437.0, 11.8, 36.0)+0.681g(λ; 459.0, 26.0, 13.8) 而 S(λ)S(\\lambda )S(λ) 仍然为为目标波长 λ\\lambdaλ 的光谱功率分布函数: S(λ)=Le(λ)θ=2∘≈d2Φe(λ)dAdΩ=dEe(λ)dΩ {\\displaystyle S(\\lambda) = L_{\\mathrm {e}}(\\lambda)_{\\theta=2^{\\circ}} \\approx {\\frac {\\mathrm {d} ^{2}\\Phi _{\\mathrm {e} }(\\lambda)}{\\mathrm {d} A\\,\\mathrm {d} \\Omega }} ={\\frac {\\mathrm {d} E _{\\mathrm {e} }(\\lambda)}{d \\Omega }} } S(λ)=Le(λ)θ=2∘≈dAdΩd2Φe(λ)=dΩdEe(λ) 同样的,指定波长 λ\\lambdaλ 的光线,就能被相对化表示为: Ray(λ)=C(X,Y,Z) Ray(\\lambda)= C(X,Y,Z) Ray(λ)=C(X,Y,Z) 通过以 (X,Y,Z)(X,Y,Z)(X,Y,Z) 代替 (R,G,B)(R,G,B)(R,G,B) 的度量方式,CIE XYZ 解决了负色匹配问题。为了区别于 (R,G,B)(R,G,B)(R,G,B) 光学三原色的称谓,我们将 (X,Y,Z)(X,Y,Z)(X,Y,Z) 称为 三刺激值(Tristimulus Values)。 不过,CIE 1931 RGB & CIE 1931 XYZ 中对于光学三原色标准波长的测量/设置,在现代光学体系中被认为有所偏颇的。在选取基准波长时,1931 RGB 蓝绿取用气态水银(Hg)放电获谱线产生的峰值波长 435.8nm(蓝)和 546.1nm(绿),而红色却因为人眼在对 700nm 波长附近的颜色感知几乎无变化的情况下,人为介入设定为还原配色实验理想值 700nm。这一历史局限性导致的情况,也被基于 RGB 测定而考量的 XYZ 继承了。以致于为两者的 “均色问题” 埋下了伏笔。 即便如此,经典三原色函数和三刺激函数,仍然为现代色彩体系奠定了基础公式。使我们能够 以数理形式转换对应目标波长的目标光波,到相应的度量衡系统。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_3_5.html":{"url":"Chapter_2/Language/cn/Docs_2_3_5.html","title":"2.3.5 现代色彩体系(Modern Color System)","keywords":"","body":"2.3.5 现代色彩体系(Modern Color System) 现代色彩体系(Modern Color System) 的基石,即为 1931 年由前身为国际光度委员会(1900, IPC [International Photometric Commission])的国际照明委员会(CIE [International Commission on Illumination]) 提出的 CIE RGB & CIE YUV 色彩空间。 图 2-11 现代色彩体系(Modern Color System)关系图谱[20] 上图很好的展示了 CIE RGB & CIE XYZ 色彩空间与经典物理学概念和其余色彩空间之间的转换关系。当前被广泛用于流媒体传输和图像颜色信息压缩的 YUV 系列颜色格式(Color Format),便是 CIE RGB 体系下的产物。 既然 CIE RGB 配合 CIE XYZ 色彩空间已经能够达成贯通存粹理论与工程应用的边界,那为什么还要引入或设计其余的色彩空间呢? 其中最典型的问题就在于上文提到的,CIE RGB & CIE XYZ 的“均色问题”。CIE RGB & CIE XYZ 并不能很好的代表人对色彩的直观感受。通俗来讲,就是人对颜色变化的感知是均匀的,而 CIE XYZ 无法将这种主观的均匀感,再不经过参考系转换的情况下,完全等价的表示出来。 所以,CIE 在 1960 年针对性的提出了 “均匀色彩空间”(UCS [Uniform Color Space])色彩空间 [21] [22],来尝试进一步整合相关概念并更换规范化体系。UCS 自诞生后便经过了多次迭代,如 1960 UCS、1976 UCS 等。1976 UCS 对于均色度量非常关键,它还有另外一个更为知名的名称,那就是 CIE LUV 色彩空间。 另一方面,因为受限于设备和技术,很多商业化产品(包括软硬件)根本无法表表示出来全部可见光谱区域。这种情况下,虽然 CIE RGB & CIE XYZ 色彩空间能够起到度量颜色的作用,却不适合用于指定设备来有条件的表示有限的颜色。这也让很多设备供应商,不得不根据自身的软硬件情况,来定制一些色彩模型供给设备使用。诸如 sRGB 就属于此类。 即便如此,在整个现代色彩体系之下,CIE RGB & CIE XYZ 色彩空间仍然是最为通用的度量衡体系。这或许是因为,它们较高的推广程度和便于计算的特性决定的。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_4.html":{"url":"Chapter_2/Language/cn/Docs_2_4.html","title":"2.4 色彩的对比","keywords":"","body":"2.4 色彩的对比 自 1931 年 CIE RGB & CIE XYZ 色彩空间 [12] 被提出后,色彩在工程中的对比标准就被统一在了 CIE 逐步采纳、整理和定义的 一系列规格之下。而 CIE XYZ 色彩空间具有直观客观和正向全可见光色域的特点,使得它更适合被用来作为工业应用的基准体系。所以,我们往往都会将需要处理的颜色数据, 转换到 CIE XYZ 之下进行权衡。 当然,整个 CIE 色彩空间体系,其提出迭代的过程和当下的统治地位也并不是一蹴而就。这里先对工程上由 CIE 规范的关键概念进行介绍。以便于为我们更好的理解后续章节中,不同色彩空间的提出背景和针对性解决的问题困难,提供帮助。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_4_1.html":{"url":"Chapter_2/Language/cn/Docs_2_4_1.html","title":"2.4.1 色域(Color Gamut )","keywords":"","body":"2.4.1 色域(Color Gamut) 色域(Color Gamut) 是一个泛指的广义概念,代表对应色彩空间可被表达的所有颜色构成的区域。不同色彩空间的色域可能是不一样的,所以必须有一个统一的度量衡系统来进行比对。被选中作为度量衡的系统必须能客观的表示颜色的物理信息,并且不受其他主观因素影响。因此,只有设备无关色彩空间可以满足要求。当前最常用的度量衡系统,就是 CIE XYZ 色彩空间。CIE XYZ 色彩空间的色域,涵盖了人眼能够观察到的整个可见光谱范围,被 CIE 称为 CIE 标准观察者色域(CIE Standard Observer Gamut)。简称 标准色域。 通常,我们使用 CIE 色度图 来表示 CIE 标准观察者色域。 图 2-12 CIE 标准观察者色域在 CIE 色度图上的表示 由于 CIE RGB & XYZ 最基本的定义是基于 2° 角 的 视网膜小窝(Fovea Centralis)间隔 来获取的人眼视觉感受效果。因此,通常我们所称的色域以及其相关概念(如色度等),在未明确说明视网膜小窝间隔夹脚的情况下,都是假定指定基于 2° 角的测量结果( 除 2° 角外,相对常用的还有 10° 角 )。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_4_2.html":{"url":"Chapter_2/Language/cn/Docs_2_4_2.html","title":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram)","keywords":"","body":"2.4.2 色度(Chroma)& 色度平面(Chroma Plane)& 色度图(Chroma Diagram) 色度(Chroma|Chromaticity) 是一个泛指的广义概念,是对除 光亮度(Luminance) 之外,由色调和饱和度或其衍生参数组成的颜色信息的统称。现代工程上的色度概念,最早是在 CIE XYZ 色彩空间对标准色度图的推导过程中引入的。 CIE XYZ 将色度定义为:XYZ 色彩空间内代表颜色的三维向量,由指定平面切割和归一化后,沿 Z 轴垂直方向在 XY 轴平面上二维投影向量。这个用于切割降维和压缩参数范围的平面,被称为 色度平面(Chroma Plane|Chromaticity Plane)。整个色彩空间色域在 XY 轴平面的二维投影,被称为 CIE xyY 色度图(CIE xyY Chromaticity Diagram),简称 色度图(Chroma Diagram)。 为什么是 xyY 色度图?因为决定颜色的除了 xy 代表色度外,还需要光亮度(Luminance)关键量。CIE XYZ 直接取用颜色在 XYZ 色彩空间里的 Y 方向分量,代替指代光亮度。 图 2-13 CIE 色度平面切割标准色域并投影色度图的示意图 可见,使用色度的色彩空间,色度的量化和其内部参数的选取息息相关。不同的色彩空间在色度的定义上,存在着表述上的不同。在大多数情况下,CIE XYZ 之后的色彩空间,都会取用 CIE 测定的 700nm 波长标准红点(Red Point) 为 基准轴正轴方向,来构建自身的色度参数。究其原因是,相同的基准可以便于将自身色域转换到 CIE XYZ 统一度量衡上对比。所以,色度也常常被直接用 CIE XYZ 色彩空间的定义来表示。 CIE XYZ 色彩空间取用 [X=1, Y=1, Z=1] 构成的三棱锥底面所在平面为色度平面。该平面上的 XYZ 坐标系内点存在关系: Plane:{X+Y+Z=1} {\\displaystyle \\begin{aligned} &{\\displaystyle Plane :\\{ {X+Y+Z} = 1 \\}} \\\\ \\end{aligned} } Plane:{X+Y+Z=1} 记 XYZ 色彩空间中存在颜色 (X,Y,Z)(X, Y, Z)(X,Y,Z) 在 XY 平面的投影为 (x,y)(x, y)(x,y) ,则有: Set: (x+y+z)=1 Then:Chromaticity:{(x,y)=(XX+Y+Z,YX+Y+Z)}Luminance:{Y} {\\displaystyle \\begin{aligned} &Set:\\ (x+y+z) = 1 \\ \\ \\ {Then:} \\\\ &{\\displaystyle Chromaticity:\\{ (x,y) = ({\\frac {X}{X+Y+Z}}, {\\frac {Y}{X+Y+Z}}) \\} } \\\\ &{\\displaystyle Luminance:\\{ Y \\} } \\\\ \\end{aligned} } Set: (x+y+z)=1 Then:Chromaticity:{(x,y)=(X+Y+ZX,X+Y+ZY)}Luminance:{Y} 在已知 (x,y,Y)(x, y, Y)(x,y,Y) 的情况下,也可以反向获得 (X,Y,Z)(X, Y, Z)(X,Y,Z) : (X,Y,Z)=(Yy⋅x, Y, Yy⋅(1−x−y) ) {\\displaystyle \\begin{aligned} (X, Y, Z) &= ({\\frac {Y}{y}} \\cdot x, \\ \\ Y, \\ \\ {\\frac {Y}{y}} \\cdot (1-x-y) \\ ) \\end{aligned} } (X,Y,Z)=(yY⋅x, Y, yY⋅(1−x−y) ) 所以,只要根据 (x,y,Y)(x, y, Y)(x,y,Y) 值,就能将色度图上的颜色还原到 XYZ 实际坐标。而其中的 (x,y)(x, y)(x,y) 值,就是 CIE 中颜色色度的表示形式。 那么在颜色能够被统一描述的前提下,颜色间的差异怎么来说明呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_4_3.html":{"url":"Chapter_2/Language/cn/Docs_2_4_3.html","title":"2.4.3 色差(Chromatic Aberration)","keywords":"","body":"2.4.3 色差(Chromatic Aberration) 色差(Chromatic Aberration) 是一个相对概念,指的是两个色度不同的颜色之间的差异。 广义色差(gCA [General Chromatic Aberration]) 不限定用于对比的两个颜色对应的色调,此时的色差计算的是颜色色度的差异。 狭义色差(sCA [Special Chromatic Aberration]) 则要求对比的两个颜色具有相同的色调,此时的色差计算的仅仅是颜色饱和度的变化。因此,狭义色差可以被认为是广义色差的一种特殊情况。 色差的计算为了简洁,通常都选择使用欧式距离表示。记对比的两颜色分别为 C1C_1C1 、 C2C_2C2 ,色差为 CCC ,广义色差为 ΔC\\Delta CΔC ,有: C={gCA:{ΔC=ΔH2+ΔS2≈distance(C1, C2)}sCA:{ΔC∣(ΔH=0)=ΔS2=ΔS≈range(C1, C2)} {\\displaystyle \\begin{aligned} &C={ \\begin{cases} &{\\displaystyle gCA: \\{\\Delta C ={\\sqrt {\\Delta H ^{2} + \\Delta S ^{2} }} \\approx {distance} (C_1,\\ C_2)} \\} \\\\ &{\\displaystyle sCA: \\{ {\\Delta C}|_{({\\Delta H = 0})} = {\\sqrt {\\Delta S ^{2}}} = \\Delta S \\approx {range} (C_1,\\ C_2) \\} } \\end{cases}} \\\\ \\end{aligned} } C={gCA:{ΔC=√ΔH2+ΔS2≈distance(C1, C2)}sCA:{ΔC∣(ΔH=0)=√ΔS2=ΔS≈range(C1, C2)} 带入 CIE XYZ 规则,色差的表示就可以直接以色度 (x,y)(x, y)(x,y) 计算了: C=Δx2+Δy2 {\\displaystyle \\begin{aligned} &C = {\\sqrt {\\Delta x ^{2} + \\Delta y ^{2} }} \\\\ \\end{aligned} } C=√Δx2+Δy2 替换了色调饱和度参数,使广义狭义在公式层面得到了统一。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_4_4.html":{"url":"Chapter_2/Language/cn/Docs_2_4_4.html","title":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature)","keywords":"","body":"2.4.4 色温(Color Temperature)& 相关色温(Correlated Color Temperature) 我们在之前讲述配色函数的理论基础时,已经阐述过色温的概念了。这里做一下回顾。 色温(Color Temperature) 是由物体本身的黑体辐射决定的一个物理量,计量单位为 K(开尔文温度)。它被定义为,绝对黑体从绝对零度(-273.15℃)开始加温后所呈现出的颜色。 CIE 于 1960 UCS 色彩空间 中引入了色温的表示,并根据工业光源特性引入了 相关色温(CCT [Correlated Color Temperature]) 来表示 一系列物理(辐射度)色温的近似值。 色温与 1960 UCS 快速计算 记色温 TTT 有对应复合波长 λT\\lambda_TλT ,色温 TTT 在 CIE XYZ 色彩空间上的颜色为 CT(XT,YT,ZT)C_T(X_T,Y_T,Z_T)CT(XT,YT,ZT) ,则根据前文中对于配色函数理论基础的推导,将波长 λT\\lambda_TλT 带入经典三刺激函数,我们有: CT=FXYZ (λT) , Q=∫0∞S(λ)dλ≈Lv⋅∑360nm780nmuλIeλ=Lv⋅u(λT)XT=∫0∞S(λ)x‾(λ)dλ≈Lv⋅∑360nm780nm(uλIeλ⋅x‾(λ))YT=∫0∞S(λ)y‾(λ)dλ≈Lv⋅∑360nm780nm(uλIeλ⋅y‾(λ))ZT=∫0∞S(λ)z‾(λ)dλ≈Lv⋅∑360nm780nm(uλIeλ⋅z‾(λ)) {\\displaystyle \\begin{aligned} &C_T = F_{XYZ\\ }(\\lambda_T) \\ , \\ \\ \\ Q = \\int _{0}^{\\infty }S(\\lambda )\\,d\\lambda \\approx {L_v} \\cdot \\sum _{360nm} ^{780nm} {\\frac {u_{\\lambda}}{I_e}} \\lambda = {L_v} \\cdot {u (\\lambda_T)} \\\\ &X_T =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {x}}(\\lambda )\\,d\\lambda \\approx {L_v} \\cdot \\sum _{360nm} ^{780nm} ({\\frac {u_{\\lambda}}{I_e}} \\lambda \\cdot {\\overline {x}}(\\lambda )) \\\\ &Y_T =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {y}}(\\lambda )\\,d\\lambda \\approx {L_v} \\cdot \\sum _{360nm} ^{780nm} ({\\frac {u_{\\lambda}}{I_e}} \\lambda \\cdot {\\overline {y}}(\\lambda )) \\\\ &Z_T =\\int _{0}^{\\infty }S(\\lambda )\\,{\\overline {z}}(\\lambda )\\,d\\lambda \\approx {L_v} \\cdot \\sum _{360nm} ^{780nm} ({\\frac {u_{\\lambda}}{I_e}} \\lambda \\cdot {\\overline {z}}(\\lambda )) \\\\ \\end{aligned} } CT=FXYZ (λT) , Q=∫0∞S(λ)dλ≈Lv⋅360nm∑780nmIeuλλ=Lv⋅u(λT)XT=∫0∞S(λ)x(λ)dλ≈Lv⋅360nm∑780nm(Ieuλλ⋅x(λ))YT=∫0∞S(λ)y(λ)dλ≈Lv⋅360nm∑780nm(Ieuλλ⋅y(λ))ZT=∫0∞S(λ)z(λ)dλ≈Lv⋅360nm∑780nm(Ieuλλ⋅z(λ)) 上式就是色温 TTT 在 CIE XYZ 上的表达,这样的表示不够简练。 如果能够找到一个由 XYZ 衍生的色彩空间,能够直接由色温 TTT 值本身计算相应的颜色就好了。借用此空间,我们就能够依据该色彩空间与 XYZ 之间的联系,快速转换色温在该色彩空间上的表示到 XYZ 内,从而间接起到在 XYZ 上精简色温计算的目的。 1960年,CIE 采用了来自柯达实验室的 大卫·麦克亚当(David Lewis MacAdam,1910 - 1998) 在 迪恩·布鲁斯特·朱迪(Deane Brewster Judd,1900 - 1972) 的研究上提出的简化色度、色温与相关色温表示方案,并将方案应用在了 CIE 1931 XYZ 色彩空间上作为对 CIE 1931 XYZ 体系的补充。1960 UCS 对 XYZ 色彩空间的观察角度进行了透视变换,从不同的方向获取了 XYZ 色度平面的投影,以此构建了自身的色度特征。 记 CIE 1960 UCS 中颜色为 (U,V,W)(U, V, W)(U,V,W) ,有: (X, Y, Z)=(32U, Y, 32U−3V+2W)(U,V,W)=(23X, Y, −X+3Y+2Z2) {\\displaystyle \\begin{aligned} &(X,\\ Y,\\ Z) = ({\\tfrac {3}{2}} U, \\ \\ \\ Y, \\ \\ \\ {\\tfrac {3}{2}} U -3V + 2W ) \\\\ &(U, V, W) = ({\\tfrac {2}{3}} X, \\ \\ \\ Y, \\ \\ \\ {\\frac {-X +3Y + 2Z}{2}} ) \\end{aligned} } (X, Y, Z)=(23U, Y, 23U−3V+2W)(U,V,W)=(32X, Y, 2−X+3Y+2Z) 取 (U,V,W)(U, V, W)(U,V,W) 对应色度为 (u,v)(u, v)(u,v) 。存在: (u,v)=(4x12y−2x+3, 6y12y−2x+3)(x,y)=(3u2u−8v+4 , 2v2u−8v+4 ) {\\displaystyle \\begin{aligned} &(u, v) = ({\\frac {4x}{12y-2x+3}}, \\ {\\frac {6y}{12y-2x+3}}) \\\\ &(x, y) = ({\\frac {3u}{2u-8v+4}}\\ \\ , \\ {\\frac {2v}{2u-8v+4}} \\ ) \\end{aligned} } (u,v)=(12y−2x+34x, 12y−2x+36y)(x,y)=(2u−8v+43u , 2u−8v+42v ) 显然,颜色表示在 1960 UCS 和 1931 XYZ 间的坐标转换仅为 线性变化,计算非常便捷。 1960 UCS 有什么优势能让 CIE 大动干戈呢?关键在于 UCS 能够以 切比雪夫近似值 [27] ,逼近范围在 (1000K, 15000K)(1000K, \\ 15000K)(1000K, 15000K) 的色温对应的色度 (u¯(T), v¯(T))(\\bar u(T),\\ \\bar v(T))(u¯(T), v¯(T)) 取值,且控制在 ∣u−u¯∣8⋅10−5\\left|u-{\\bar {u}}\\right|∣u−u¯∣8⋅10−5 和 ∣v−v¯∣9⋅10−5\\left|v-{\\bar {v}}\\right|∣v−v¯∣9⋅10−5 的误差范围。如此误差可称得上相当精确了。有: u¯(T)≈0.860117757+1.54118254⋅10−4⋅T+1.28641212⋅10−7⋅T21+8.42420235⋅10−4⋅T+7.08145163⋅10−7⋅T2v¯(T)≈0.317398726+4.22806245⋅10−5⋅T+4.20481691⋅10−8⋅T21−2.89741816⋅10−5⋅T+1.61456053⋅10−7⋅T2 {\\displaystyle \\begin{aligned} &{\\bar {u}}(T)\\approx {\\frac {0.860117757+1.54118254\\cdot 10^-4\\cdot T+1.28641212\\cdot 10^-7\\cdot T^{2}} {1+8.42420235\\cdot 10^-4\\cdot T+7.08145163\\cdot 10^-7\\cdot T^{2}}} \\\\ &{\\bar {v}}(T)\\approx {\\frac {0.317398726+4.22806245\\cdot 10^-5\\cdot T+4.20481691\\cdot 10^-8\\cdot T^{2}} {1-2.89741816\\cdot 10^-5\\cdot T+1.61456053\\cdot 10^-7\\cdot T^{2}}} \\end{aligned} } u¯(T)≈1+8.42420235⋅10−4⋅T+7.08145163⋅10−7⋅T20.860117757+1.54118254⋅10−4⋅T+1.28641212⋅10−7⋅T2v¯(T)≈1−2.89741816⋅10−5⋅T+1.61456053⋅10−7⋅T20.317398726+4.22806245⋅10−5⋅T+4.20481691⋅10−8⋅T2 上式也被称为 UCS 色温函数。以色温函数计算色度 (u,v)(u, v)(u,v) ,再通过固定三刺激值的 Y=Y0Y = Y_0Y=Y0 来快速的返向计算色温 TTT 的 (X,Y,Z)(X, Y, Z)(X,Y,Z) 表示。有: (X, Y, Z)=(Y0⋅3u2v, Y0, Y0⋅−u−10v+42v) {\\displaystyle \\begin{aligned} &(X,\\ Y,\\ Z) = (Y_0 \\cdot {\\tfrac {3u}{2v}} , \\ \\ Y_0, \\ \\ Y_0 \\cdot {\\tfrac {-u-10v+4}{2v}}) \\end{aligned} } (X, Y, Z)=(Y0⋅2v3u, Y0, Y0⋅2v−u−10v+4) 基于这一点,到现在为止 1960 UCS 仍然是色温及相关色温的最佳计算工具。 CIE 利用 UCS 特性,将一系列物理色温的色彩学近似概念引入了 CIE 系统。 普朗克轨迹(Planckian Locus) 如果取 TTT 范围为 (0, ∞)(0, \\ \\infty)(0, ∞) 开尔文,那么由 TTT 在指定色彩空间上的所有对应颜色所构成的轨迹曲线,就被称为 普朗克轨迹(Planckian Locus),也被称为 黑体轨迹(Blackbody Locus)。换而言之,通过将色温 带入色温函数所求的的色度,都在普朗克轨迹上。所以,我们只需要将选定范围色温带入 UCS 色温函数,就能将普朗克轨迹表示到 UCS 色度图上。 图 2-14 普朗克轨迹在 UCS 色度图上的表示 把 UCS 色温函数转到 XYZ 的色度表示,就有: (x¯(T),y¯(T))=(3u¯(T)2u¯(T)−8v¯(T)+4 , 2v¯(T)2u¯(T)−8v¯(T)+4 ) {\\displaystyle \\begin{aligned} &({\\bar {x}}(T), {\\bar {y}}(T)) = ({\\frac {3{\\bar {u}}(T)}{2{\\bar {u}}(T)-8{\\bar {v}}(T)+4}}\\ \\ , \\ {\\frac {2{\\bar {v}}(T)}{2{\\bar {u}}(T)-8{\\bar {v}}(T)+4}} \\ ) \\end{aligned} } (x¯(T),y¯(T))=(2u¯(T)−8v¯(T)+43u¯(T) , 2u¯(T)−8v¯(T)+42v¯(T) ) 此表达式,即色温在 CIE 色度图上的普朗克轨迹函数(Planckian Locus Functions),也被称为 CIE 色温函数。效果如下: 图 2-15 普朗克轨迹在 CIE 色度图上的表示 含有 普朗克轨迹 的 CIE 色度图,让我们能够 直观的表现自然辐射源 在 CIE 标准下的色彩特点。但是人眼对色温的感受并不会如此精准,很多在感知上近似物理色温的颜色,实际色度却在普朗克轨迹外。 如何在色温基础上引入人眼感受特征呢?相信已经有读者注意到图中,与 普朗克轨迹 垂直或交叉的直线 了。这就是解决人眼感受问题而用到 相关色温 和 等色温线 工具。作为 CIE 体系内的标准度量衡工具,相关色温和等色温线必须具有体系内完全可求的特点,即:从指定色度推算相关色温,和从相关色温推算对应色度的能力。 那么,什么是相关色温? 相关色温(CCT)的等色温线(CCT Isotherms)与麦克亚当法 相关色温(CCT [Correlated Color Temperature]) 是指在同等光亮度情况下,于感知上趋同于选定色温的范围内颜色的集合 [28] [29]。通常我们会直接以选定的色温参考系的温度,来代替表示相关色温的温度。从主观角度理解,色温与相关色温在颜色上并无差异,或差异无法被明显察觉。 而由选定色温与其相关色温共同构成色彩空间内的连线,就被称为 等色温线(CCT Isotherms),有时也会被简称为 等温线 [29]。 相关色温在 CIE 中依赖于等温线表示,而等温线依赖于对普朗克轨迹。CIE 采用麦克亚当建议的测量方式,以两个视觉恰克区分临界点间的跨度为单位麦勒德( mrdmrdmrd [Maillard]),记为 mrdmrdmrd 。有麦勒德和色温单位开尔文间换算关系: mrd=106Tc mrd = {\\tfrac {10^6}T_c } mrd=T106c CIE 以麦勒德为度量,来等分普朗克轨迹。例如,取 1 mrd=106K1 \\ mrd = 10^6 K1 mrd=106K 就指以 106K10^6 K106K 为分割步长,取 500 mrd=2000 K500 \\ mrd = 2000\\ K500 mrd=2000 K 就指以 2000 K2000\\ K2000 K 为分割步长。具体麦勒德的选取,依赖于实际应用场景下,对相关色温配色精确程度的要求。麦勒德取值越小,精确程度越低;反之,麦勒德取值越大,精确程度越高;即精确度与麦勒德成正比关系。可知当麦勒德取值趋近于 ∞ mrd\\infty \\ mrd∞ mrd 时,整个普朗克轨迹不再分割而是完全连续。 那么在分割后,CIE 怎么计算 CCT 呢?麦克亚当采用的是垂直取值法,即从当前想要知道相关色温的颜色在 UCS 色度图上表示位置处,向 UCS 色度图中的普朗克曲线做垂线。做得垂线与普朗克轨迹交点处对应的色温,就是当前相关色温对应的物理色温。 记目标相关色温 TcctT_{cct}Tcct 的色度为 (u¯(Tcct), v¯(Tcct))(\\bar u(T_{cct}),\\ \\bar v(T_{cct}))(u¯(Tcct), v¯(Tcct)) , TcctT_{cct}Tcct 对应的物理色温 TcT_cTc 的色度为 (u¯(Tc), v¯(Tc))(\\bar u(T_c),\\ \\bar v(T_c))(u¯(Tc), v¯(Tc)) ,那么在 麦克亚当法(MacAdam's CCT method) 之下,我们需要计算: cct⃗⋅c⃗′=vector (u¯(Tcct)−u¯(Tc), v¯(Tcct−v¯(Tc))⋅vector (u¯′(Tcct), v¯′(Tcct)=0 \\vec{cct} \\cdot \\vec{c}' = vector\\ (\\bar u(T_{cct}) - \\bar u(T_c),\\ \\bar v(T_{cct} - \\bar v(T_c)) \\cdot vector\\ (\\bar u'(T_{cct}) ,\\ \\bar v'(T_{cct}) = 0 cct⃗⋅c⃗′=vector (u¯(Tcct)−u¯(Tc), v¯(Tcct−v¯(Tc))⋅vector (u¯′(Tcct), v¯′(Tcct)=0 式中 cct⃗\\vec{cct}cct⃗ 为从交点指向色温色度的向量, c⃗′\\vec{c}'c⃗′ 为普朗克轨迹对应交点色温处的导数(即切线方向),代入色温函数和欲求相关色温色度,所得 TcT_cTc 即为所求。 由色度推算相关色温(CCT)的罗伯逊算法 使用麦克亚当法计算 CCT 不太好定位交点求值,因此 艾伦·罗伯逊(Alan R. Robertson) 在 1968年提出了另一种快速算法:选取两个普朗克轨迹上的色温,以线性插值方法近似计算目标相关色温 [30]。这一方法也被称为 罗伯逊相关色温算法(Robertson's CCT method)。 图 2-16 罗伯逊相关色温算法(Robertson's CCT method)示意图 如图 2-16 所示, (uT,vT)(u_T, v_T)(uT,vT) 代表目标相关色温 TcT_cTc 色度, TiT_iTi 、 Ti+1T_{i+1}Ti+1 代表普朗克轨迹上以指定麦勒德分割的量个最近相邻色温, did_idi 、 di+1d_{i+1}di+1 为 (uT,vT)(u_T, v_T)(uT,vT) 与 TiT_iTi 、 Ti+1T_{i+1}Ti+1 所在等温线的垂直距离, θ1\\theta _1θ1 、 θ2\\theta _2θ2 为延 (uT,vT)(u_T, v_T)(uT,vT) 所做等温线与 TiT_iTi 、 Ti+1T_{i+1}Ti+1 所在等温线的交点处夹脚。有罗伯逊相关色温公式就可以如下表示: 1Tc=1Ti+θ1θ1+θ2(1Ti+1−1Ti) \\frac{1}T_c=\\frac{1}{T_i}+\\frac{\\theta_1}{\\theta_1+\\theta_2} \\left( \\frac{1}{T_{i+1}} - \\frac{1}{T_i} \\right) T1c=Ti1+θ1+θ2θ1(Ti+11−Ti1) 可以等价转换为 did_idi 、 di+1d_{i+1}di+1 表示 θ1\\theta _1θ1 、 θ2\\theta _2θ2 ,即: 1Tc=1Ti+didi−di+1(1Ti+1−1Ti) \\frac{1}T_c=\\frac{1}{T_i}+\\frac{d_i}{d_i-d_{i+1}} \\left( \\frac{1}{T_{i+1}} - \\frac{1}{T_i} \\right) T1c=Ti1+di−di+1di(Ti+11−Ti1) 而 did_idi 、 di+1d_{i+1}di+1 在分割用麦勒德 mrdmrdmrd 固定的情况下,可以表示为: di=(vT−vi)−mrd⋅(uT−ui)1+mrd2 d_i=\\frac{ (v_T-v_i)-mrd \\cdot (u_T-u_i) }{\\sqrt {1+mrd^2}} di=√1+mrd2(vT−vi)−mrd⋅(uT−ui) 带入上式可知: Tc=(di−di+1)⋅Ti⋅Ti+1di⋅Ti−di+1⋅Ti+1 T_c = \\frac{ (d_i-d_{i+1}) \\cdot T_i \\cdot T_{i+1}}{d_i \\cdot T_i -d_{i+1} \\cdot T_{i+1}} Tc=di⋅Ti−di+1⋅Ti+1(di−di+1)⋅Ti⋅Ti+1 显然 罗伯逊相关色温算法虽然化解了麦克亚当法的交点坐标问题,但也不够简便。在不追求过度精度的情况下,是否存在一种足够快捷的算法来达成相关色温的近似取值呢?这便有了 相关色温的快速逼近法。 由色度推算相关色温(CCT)的麦卡米快速逼近算法 1992年,卡尔文·麦卡米(Calvin S. McCamy) 以选定参照点后使用 三次厄尔密样条(Cubic Hermite Spline) 的方法,得到了一组能够在 CIE XYZ 上直接使用的快速逼近公式,进一步简化了相关色温的取值过程 [31] 。为了纪念麦卡米的贡献,CIE 将此快速算法称为 麦卡米算法(McCamy's CCT method)。 由于三次厄尔密样条的准确性依赖于参考点选取的特点。麦卡米优化了 肯尼斯·凯利(Kenneth L. Kelly) 的采样实验,取用了 XYZ 色彩空间上,能够使求得逼近函数更贴近于范围内实际值的关键色度 (0.3320, 0.1858)(0.3320,\\ 0.1858)(0.3320, 0.1858) 作为参考点 [32] 。他将这个关键参考点称为 “震中(Epicenter)”。 如果记震中为 (xe,ye)(x_e, y_e)(xe,ye) 则 (xe=0.3320, ye=0.1858)(x_e = 0.3320,\\ y_e = 0.1858) (xe=0.3320, ye=0.1858) ,记 (x,y)(x, y)(x,y) 为指定希望求得相关色温 TcT_cTc 的颜色色度。取 nnn 使得: n=x−xey−ye {n = \\frac {x-x_e}{y-y_e} } n=y−yex−xe 有麦卡米算法公式: Tc=McCamy(n)=−449⋅n3+3525⋅n2−6823.3⋅n+5520.33 {\\displaystyle T_c = McCamy(n) = -449 \\cdot n^{3} + 3525 \\cdot n^{2}-6823.3 \\cdot n + 5520.33} Tc=McCamy(n)=−449⋅n3+3525⋅n2−6823.3⋅n+5520.33 因为只采用了单点的方式逼近 ,算法在保证 精确度的条件下,仅能用于计算物理色温接近于 间的相关色温。 1999年,哈维尔·埃尔南德斯·安德烈斯(Javier Hernández-Andrés) 等人提出的,“在麦卡米算法基础上,采用指数函数的形式以提升公式适用范围” 的改进建议 [33] 。哈维尔等人在论文中给出了两段测量结果,将估值范围扩展到了 [3000 K, 8⋅105 K][3000 \\ K, \\ 8 \\cdot 10^5\\ K][3000 K, 8⋅105 K] 。其改进的指数估值函数为: Tc=A0+A1⋅e−nT1+A2⋅e−nT2+A3⋅e−nT3 {\\displaystyle T_c = A_{0}+A_{1} \\cdot e^{\\frac{-n}{T_{1}}} + A_{2} \\cdot e^{\\frac{-n}{T_{2}}} + A_{3} \\cdot e^{\\frac{-n}{T_{3}}} } Tc=A0+A1⋅eT1−n+A2⋅eT2−n+A3⋅eT3−n 对应生效范围被分为两段,nnn 值计算同麦卡米,其余固定参照点参数取如下《改进指数估值法适用范围表》[33] 的标定值: 指数改进版虽然提升了估值范围,但同时也提升了算法的复杂度。大部分工程相关色温都在传统麦卡米算法适用范围内,这使得改进方法有些鸡肋。相较于使用范围广但复杂度高的算法,传统麦卡米算法就能胜任,这也是 CIE 暂时没有采纳此建议的原因。不过,CIE 将其列入为对传统麦卡米更广域范围的补充方法中,以被特殊情况使用。 在从已知 CIE 色度获取相关色温的手段已经基本够用的情况下,剩下相对急迫的问题,就是找到从已知相关色温反向求其在 CIE 色度的快速算法了。 由相关色温(CCT)推算色度的反向逼近算法 由相关色温反向计算色度的算法,在 2002年和 2006年前并没有太多突破。一方面是因为,如果已知相关色温,那么我们完全可以将其等效为物理色温带入 CIE 色温函数中,直接以求得的物理色温的色度代替;另一方面,也的确没有找到除了直接使用反向求解外的,在满足精度条件的同时还能降低计算复杂度的近似算法来解决这一问题。 2002年,由 康奉顺(Bongsoon Kang)等人 利用相关色温等温线的特点,用双步逐级进行的三次厄尔密样条差值法,构建了一组误差可接受的求解方程 [34] 。此方法在 2006年,经过金敬熙(Kyounghae Kim)等人的进一步测量和推导后,形成了现有的由 TcT_cTc 求色度 (x,y)(x, y)(x,y) 的快速近似值算法 [34]: x={−0.2661239⋅109Tc3−0.2343589⋅106Tc2+0.8776956⋅103Tc+0.179910 1667K≤Tc≤4000K−3.0258469⋅109Tc3+2.1070379⋅106Tc2+0.2226347⋅103Tc+0.240390 4000K≤Tc≤25000Ky={−1.1063814⋅x3−1.34811020⋅x2+2.18555832⋅x−0.20219683 1667K≤Tc≤2222K−0.9549476⋅x3−1.37418593⋅x2+2.09137015⋅x−0.16748867 2222K≤Tc≤4000K+3.0817580⋅x3−5.87338670⋅x2+3.75112997⋅x−0.37001483 4000K≤Tc≤25000K {\\displaystyle \\begin{aligned} x &= { \\begin{cases} -0.2661239 \\cdot {\\frac {10^9}{T_c^3}} -0.2343589 \\cdot {\\frac {10^6}{T_c^2}} +0.8776956 \\cdot {\\frac {10^3}T_c} +0.179910 \\ \\ \\ &1667{\\text{K}}\\leq T_c\\leq 4000{\\text{K}}\\\\ -3.0258469 \\cdot {\\frac {10^9}{T_c^3}} +2.1070379 \\cdot {\\frac {10^6}{T_c^2}} +0.2226347 \\cdot {\\frac {10^3}T_c} +0.240390 \\ \\ \\ &4000{\\text{K}}\\leq T_c\\leq 25000{\\text{K}} \\end{cases} } \\\\ y &= { \\begin{cases} -1.1063814 \\cdot x^{3} -1.34811020 \\cdot x^{2} +2.18555832 \\cdot x -0.20219683 \\ \\ \\ &1667{\\text{K}}\\leq T_c\\leq 2222{\\text{K}}\\\\ -0.9549476 \\cdot x^{3} -1.37418593 \\cdot x^{2} +2.09137015 \\cdot x -0.16748867 \\ \\ \\ &2222{\\text{K}}\\leq T_c\\leq 4000{\\text{K}}\\\\ +3.0817580 \\cdot x^{3} -5.87338670 \\cdot x^{2} +3.75112997 \\cdot x -0.37001483 \\ \\ \\ &4000{\\text{K}}\\leq T_c\\leq 25000{\\text{K}} \\end{cases} } \\\\ \\end{aligned} } xy=⎩⎪⎪⎨⎪⎪⎧−0.2661239⋅Tc3109−0.2343589⋅Tc2106+0.8776956⋅T103c+0.179910 −3.0258469⋅Tc3109+2.1070379⋅Tc2106+0.2226347⋅T103c+0.240390 1667K≤Tc≤4000K4000K≤Tc≤25000K=⎩⎪⎨⎪⎧−1.1063814⋅x3−1.34811020⋅x2+2.18555832⋅x−0.20219683 −0.9549476⋅x3−1.37418593⋅x2+2.09137015⋅x−0.16748867 +3.0817580⋅x3−5.87338670⋅x2+3.75112997⋅x−0.37001483 1667K≤Tc≤2222K2222K≤Tc≤4000K4000K≤Tc≤25000K 但是这一套算法,仍然无法代替非精确场景下,直接通过对应物理色温计算普朗克轨迹上色度的方法实用。因此,CIE 也和麦卡米指数逼近的情况一样,仅是将其列入了相关色温在需求精确值情况下的补充。这里有所了解即可。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_4_5.html":{"url":"Chapter_2/Language/cn/Docs_2_4_5.html","title":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point)","keywords":"","body":"2.4.5 标准光源(Standard Illuminants)& 白点(White Point) 相比其他几个概念,色温 无疑要“物理”的多。所以在工业体系中,由色温衍生出的指标有着更多的应用。CIE 以色温参考,从 1960 年后定义了一系列指定色温的可见光光源点(族)来规范工业用光标准,被称为 标准光源(Standard Illuminants)*。 CIE 标准光源主要分为由 A 到 F,外加 LED 的 7 个类别,分别是: A类光源点,代表相关色温 Tc≈2856 KT_{c} \\approx 2856 \\ KTc≈2856 K 的白炽发光体; B类光源点,代表相关色温 Tc≈4874 KT_{c} \\approx 4874 \\ KTc≈4874 K 的正午日光; C类光源点,代表相关色温 Tc≈6774 KT_{c} \\approx 6774 \\ KTc≈6774 K 的平均日光; D类光源族,代表标准日光源,其存在多个指标,常用有( D50、D55、D60、D65、D75 ); E类光源点,代表相关色温 Tc≈5455 KT_{c} \\approx 5455 \\ KTc≈5455 K 的均匀发光体; F类光源族,代表荧光发光体,常用为 F1~F12 和 FL3.1~FL3.15 的共计 27 个阶梯指标; LED光源族,代表相关色温范围在 [2700 K, 6600 K][2700 \\ K, \\ 6600\\ K][2700 K, 6600 K] 的 LED 光源,于 2018 年最新提出; 我们将光源的指定色温带入上一节中讲解过的 CIE 色温函数,就可以求得对应的色度了。下表中列出了常用的 CIE 标准光源的对应结果 [36] [37] [38] [39] : 除了为工业用光服务外,标准光源对设备相关色彩空间也有着至关重要的作用。设备指定的作为白点的标准光源,将会直接影响设备的色彩表示,从而产生不同的设备色域范围。想要理解这一点,首先就需要了解什么是 白点(White Point)。 白点(White Point)与白点选择对设备相关色彩空间的影响 白点(White Point) 是指一个被用于表示色彩空间标准纯白色的色度点。白点的选取直接影响到色彩空间对颜色的偏向。因此,我们将通过调整白点指定色度,来影响色彩空间实际颜色表示的操作,称为 色温白平衡(Color Temperature White Balance)。有关于包含 白平衡(White Balance) 在内的 颜色平衡(Color Balance) 部分,本书将会在后续的特效处理一章中详细展开。 白点除了选用标准光源外,也可以使用任意色度点。不过,因为太阳是一个标准的黑体辐射源,而人对光线颜色的感知多依赖于阳光。因此,白点最常见的还是取 D65 日光光源,或者其他位于普朗克轨迹上的色温所对应的色度。由于 CIE 规定标准光源三刺激值的 YYY 值被指定为 Y=100Y = 100Y=100 ,实际的标准光源在 XYZ 下的颜色表示值 C(X,Y,Z)C(X,Y,Z)C(X,Y,Z) 与依据色温 TTT 直接计算的 CT(XT,YT,ZT)C_T(X_T,Y_T,Z_T)CT(XT,YT,ZT) 间,存在放缩关系: C=100YT⋅CT C = {\\frac{100}{Y_T}} \\cdot C_T C=YT100⋅CT 下表列出了一些常被取用作为 D 系标准光源替代 的,其他普朗克轨迹关键点在 XYZ 上的色度表示: 对应从低到高的颜色变化如图: 图 2-17 从 1000K 到 12000K 色温颜色表示 由于 设备相关色彩空间,在颜色表示上依赖于设备本身,而这种依赖关系的直观体现就是:颜色的存储,往往采用色彩空间内选定颜色与白点的色度向量差值,或类似变体,经过归一化来定义的。这么做也是一种无奈的妥协:由于设备存储介质空间有限,颜色在保存上需要离散化和均匀化,而最广泛使用的 RGB & XYZ 设备无关色彩空间都不能满足这两个要求。于是,存储问题结合色温的特征,使得不同白点的选取将会直接导致,颜色从数据还原至当前设备色彩空间后的色度与实际期望色度的偏差。 对于设备厂商(或软件供应商),一种可选的白点决策方案是在基于自身产品特性制定相关色彩空间时,首先通过 CIE 色度图将当前设备可表示的颜色边界确定下来,再依据由可表达颜色边界围成的闭包图形中心色度点附近的标准光源来确定白点。从而在一定程度上避免白点导致的定制色彩空间的均色问题。但均色问题从来不是一个能通过白点来解决的单一问题,在这种场景下,最大的影响其实来源自产品本身。所以大多数厂商还是以 CIE 建议,直接指定 D65 来避免这一吃力不讨好的过程。而有关产品本身色域对标准色域表达程度的衡量指标,则被用 显色指数 来更为直观的说明了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_4_6.html":{"url":"Chapter_2/Language/cn/Docs_2_4_6.html","title":"2.4.6 显色指数(Color Rendering Index)","keywords":"","body":"2.4.6 显色指数(Color Rendering Index) 显色指数(CRI [Color Rendering Index]) 是用来指代设备期望表达颜色与实际展示颜色差异程度的指标,单位为 RaRaRa ,取值范围 [0 Ra, 100 Ra][0 \\ Ra, \\ 100 \\ Ra][0 Ra, 100 Ra] 。通俗来讲, 0 Ra0 \\ Ra0 Ra 意味着完全偏差,比如黑洞; 100 Ra100 \\ Ra100 Ra 则代表着 100% 的颜色还原,比如太阳。 CIE 为了量化显色指数的测量标准,在 1995 年给出了一组被称为 CIE 基准颜色(CIE SC [CIE 1995 Standard Color]) 的 测试用例(TCS [Test Color Samples]) 如下: 这组测试用例,在随后的大量验证中被发现是 不太准确的。 2005 年,专注于日光颜色还原的,现如今已被 爱丽色(X-Rite) 收购的 原科尔莫根(Kollmorgen)子公司格雷塔格(Gretag AG)和麦克白(Macbeth),基于自身及爱丽色(X-Rite)生产的分光光度仪和比色计工程报告的统计信息,对比了 CIE 基准颜色标准的弊端 [40] [41] ,并推出了一套新的颜色测试标准用例。这套用例就是随后被广泛使用的 24 色标准色卡(Color Checker),也被称为 麦克白标准色卡(MCC [Macbeth Color Checker])。 图 2-18 MCC 2005 标准色卡 目前工程上大都采用 MCC 作为设备显色指数的测试标准。除了标准 24 色外,还有更为丰富的 160 色。颜色的丰富程度有助于提升测量的准确性,因此,在更为严苛标准下得到的显色指数结果,将更具有代表性。 最新一次的基于 CIE XYZ 的校准结果《显色指数(CRI)MCC 颜色标准测试用例》[42] [43] 如下,可作为工程参考: 那么怎么计算 CRI 呢?最为简单的方法就是计算设备实际显示颜色和目标颜色的色差,并归一化。1964 年,CIE 提出了 UVW 色彩空间(CIE 1964 U* V* W* Color Space),作为对于 1960 UCS 在归一化能力上的补充。UVW 通过引入白点,使 UCS 上表示的颜色能够被以相对白点坐标的形式转换到一个等大的数值范围内,从而解决了显色指数的计算问题。此后,CIE 将 UVW 作为 UCS 的 特定补充方案,计入到了 XYZ 的体系内并 沿用至今。 假设,当前我们测得的颜色 在 CIE 1960 UCS 中的色度 为 (u, v)(u,\\ v)(u, v) ,取 白点 为 (u0, v0)(u_0,\\ v_0)(u0, v0) 。记 CIE 1960 UCS 中 颜色 为 (U,V,W)(U, V, W)(U,V,W) ,对应 CIE 1964 UVW 中坐标为 (U∗,V∗,W∗)(U^*, V^*, W^*)(U∗,V∗,W∗) ,有: (u,v)=(4x−2x+12y+3, 6y−2x+12y+3)(U∗,V∗,W∗)=(13W∗⋅(u−u0), 13W∗⋅(v−v0), 25Y13−17) {\\displaystyle \\begin{aligned} (u, v) &= ({\\frac {4x}{-2x+12y+3}}, \\ {\\frac {6y}{-2x+12y+3}}) \\\\ (U^*, V^*, W^*) &= (13W^{*} \\cdot (u-u_{0}), \\ \\ \\ 13W^{*} \\cdot (v-v_{0}), \\ \\ \\ {25Y^{\\frac {1}{3}}-17}) \\end{aligned} } (u,v)(U∗,V∗,W∗)=(−2x+12y+34x, −2x+12y+36y)=(13W∗⋅(u−u0), 13W∗⋅(v−v0), 25Y31−17) 带入 CIE XYZ 色差计算规则,就有 色差 ΔC\\Delta CΔC 取欧氏距离: ΔC=ΔE(U∗,V∗,W∗)=(ΔU∗)2+(ΔV∗)2+(ΔW∗)2 {\\displaystyle \\begin{aligned} \\Delta C = \\Delta E(U^*, V^*, W^*)={\\sqrt {\\left(\\Delta U^{*}\\right)^{2}+\\left(\\Delta V^{*}\\right)^{2}+\\left(\\Delta W^{*}\\right)^{2}}} \\end{aligned} } ΔC=ΔE(U∗,V∗,W∗)=√(ΔU∗)2+(ΔV∗)2+(ΔW∗)2 基于 CIE 颜色标准规定,我们要求的 显色指数 为 RaRaRa 在 UVW 中有: Ra=100−4.6⋅ΔEUVW=100−4.6⋅ΔC {\\displaystyle \\begin{aligned} Ra = 100 - 4.6 \\cdot \\Delta E_{UVW} = 100 - 4.6 \\cdot \\Delta C \\end{aligned} } Ra=100−4.6⋅ΔEUVW=100−4.6⋅ΔC 虽然 CIE 对 UVW 的定义是基于 CIE SC,但 MCC 仍然可以使用此快速算法。我们将上述整个计算过程统称为 CIE 色度自适应转换(CAT [Chromatic Adaptation Transformation])的 CRI 公式,简称 CIE CAT-CRI。 到此,色彩的度量的关键指标基本介绍完毕。不难发现,每一次色彩关键标准的制定都与设备无关色彩空间的迭代密切相关。每一个设备无关色彩空间的设计,都针对性的解决某一种顺承而来的色彩度量问题。可以说,正是这些设备无关色彩空间,共同构成了色彩衡量发展的里程碑。 现在,我们已经从各个度量指标的演化角度,对概念进行了整理。是时候从发展史出发,来纵观整个过程中这些里程碑式的经典色彩空间了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_5.html":{"url":"Chapter_2/Language/cn/Docs_2_5.html","title":"2.5 经典色彩空间(Classical Color Space)","keywords":"","body":"2.5 经典色彩空间(Classical Color Space) 统一的标准制定和实践的演化推进总是需要循序渐进。而各个 经典色彩空间(Classical Color Space) 就是此领域内的关键节点。 在色彩的衡量中,我们了解了色彩空间偏重描述的特性。不同色彩空间中的相同颜色,必须得经过适当的映射变化和基准变化,才能相互等价。各类颜色描述,需要依托其描述本身对应的色彩空间来看,才会具有意义。 因此,为了简明扼要的阐述转换关系,此处假设用于例举的经典色彩空间,其 RGB 三色基准波长一致,即都为 CIE 1931 RGB 测定标准值。白点统一取用 D65 。而后文中介绍色彩空间所用的配色函数,如无特殊指定,则都为广义配色函数。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_5_1.html":{"url":"Chapter_2/Language/cn/Docs_2_5_1.html","title":"2.5.1 光学三原色色彩空间(RGB)","keywords":"","body":"2.5.1 光学三原色色彩空间(RGB Color Space) 光学三原色色彩空间(RGB Color Space) 又被称为 光三原色空间 或 RGB 色彩空间。光学三原色色彩空间,是对颜色的加法混合论的有效应用。以光学三原色(RGB)的叠波权重作为三维坐标单位轴,来表示大部分可见光颜色的一种色彩模型。从亥姆霍兹的三色理论之后,光学三原色被广泛的用来表示颜色特性,但并没有形成工程化的系统。 图 2-19 光学三原色色彩空间(RGB Color Space)坐标系 而由格拉斯曼颜色定律可知,人对颜色的感知其实是比较线性的。所以,光学三原色色彩空间的颜色表示非常简洁。如果记目标颜色为 CRGBC_{RGB}CRGB ,那么 配色函数 为: CRGB=R⋅Red+G⋅Green+B⋅Blue=Vector[R,G,B] C_{RGB} = R \\cdot Red + G \\cdot Green + B \\cdot Blue = Vector[R, G, B] CRGB=R⋅Red+G⋅Green+B⋅Blue=Vector[R,G,B] 所有可见光都可以利用此公式表示出来。 光学三原色色彩空间的基准取自 RGB 的锚定,因此 RGB 三色的代表波长选取,将会影响整个光学三原色色彩空间的颜色表示水平。 由于足够简单且便于量化,基于光学三原色色彩空间配色函数的有局限改版模型,如 IBM RGB、Adobe RGB等,被广泛使用于计算机科学。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_5_2.html":{"url":"Chapter_2/Language/cn/Docs_2_5_2.html","title":"2.5.2 颜料三原色色彩空间(CMY / CMYK )","keywords":"","body":"2.5.2 颜料三原色色彩空间(CMY / CMYK Color Space) 颜料三原色色彩空间,根据其是否包含对黑色(Black)的描述,被分为 印刷三分色模型(CMY Color Space) 即 CMY 色彩空间,和 印刷四分色模型(CMYK Color Space) 即 CMYK 色彩空间。其中,CMY 即指代颜料三原色,K 则为 Black 取尾字母,以和纯蓝色(Blue)作为区分。 颜料三原色色彩空间,是对颜色的减法混合论的直接应用。 图 2-20 颜料三原色色彩空间(CMY/CMYK Color Space)坐标系 对于 CMY 色彩空间,如果记目标颜色为 CCMYC_{CMY}CCMY ,那么 配色函数 为: CCMY=C⋅Cyan+M⋅Magenta+Y⋅Yellow=Vector[C,M,Y] C_{CMY} = C \\cdot Cyan + M \\cdot Magenta + Y \\cdot Yellow = Vector[C, M, Y] CCMY=C⋅Cyan+M⋅Magenta+Y⋅Yellow=Vector[C,M,Y] 可以发现 CMY 色彩空间与 RGB 色彩空间,恰好以立方体质心堆成。因此存在转换: CCMY=1−CRGB C_{CMY} = 1 - C_{RGB} CCMY=1−CRGB 印刷三分色模型最早被应用于人们于绘画中。通过对颜料三原色的调色板混合,可以形成不同的颜色。由于 CMY 色彩空间在人类历史长河中,已被应用于绘画创作许久,因此这个颜色空间较难追溯最初的提出者了。不过真正对颜料三原色进行色彩空间的标准化工作,还是在打印机被发明后。 无论是喷墨打印机、照相打印机,还是激光打印机。打印出的结果都是依靠反射光被人们观察到的。这决定了此类型工程和绘画基本一致。早期打印机采用 CMY 色彩空间,并用红、青、黄三色混合,来实现黑色的显示。但是,这样混合出的黑色在显示上偏红黑。为了应对这种现象,人们在工程上引入了独立的黑色墨盒,以求解决黑色的打印问题。因此,为了描述被独立引入的黑色在颜色还原上的转换,提出了 CMYK 色彩空间。 CMYK 色彩空间,对黑色进行了重设。如果记目标颜色为 CCMYKC_{CMYK}CCMYK ,那么配色函数为: CCMYK=C⋅Cyan+M⋅Magenta+Y⋅Yellow+K⋅Black=Vector[C,M,Y,K] C_{CMYK} = C \\cdot Cyan + M \\cdot Magenta + Y \\cdot Yellow + K \\cdot Black = Vector[C, M, Y, K] CCMYK=C⋅Cyan+M⋅Magenta+Y⋅Yellow+K⋅Black=Vector[C,M,Y,K] 由于 CMYK 比 CMY 多一维度K,从 CMY 到 CMYK 的映射就需要进行升维。 记 K=1K = 1K=1 时, CCMYK=Vector[0, 0, 0, 1]C_{CMYK} = Vector[0,\\ 0,\\ 0,\\ 1]CCMYK=Vector[0, 0, 0, 1] ,那么 K≠1K \\neq 1K≠1 时就有: [CMYK]=[(C′−K)/(1−K)(M′−K)/(1−K)(Y′−K)/(1−K)K] ∣ [K=min(C′,M′,Y′), K≠1] {\\begin{bmatrix} C \\\\ M \\\\ Y \\\\K \\end{bmatrix}} = {\\begin{bmatrix} (C^{\\prime} - K) / (1-K) \\\\ (M^{\\prime} -K ) / (1-K) \\\\ (Y^{\\prime} - K) / (1-K) \\\\K \\end{bmatrix}} \\ \\ | \\ \\ [K = min(C^{\\prime}, M^{\\prime}, Y^{\\prime}),\\ \\ K \\neq 1] ⎣⎢⎢⎡CMYK⎦⎥⎥⎤=⎣⎢⎢⎡(C′−K)/(1−K)(M′−K)/(1−K)(Y′−K)/(1−K)K⎦⎥⎥⎤ ∣ [K=min(C′,M′,Y′), K≠1] 而从 CMYK 到 CMY 的映射,就简单了: [C′M′Y′]=[(1−K)⋅C+K(1−K)⋅M+K(1−K)⋅Y+K] {\\begin{bmatrix} C^{\\prime} \\\\ M^{\\prime} \\\\ Y^{\\prime} \\end{bmatrix}} = {\\begin{bmatrix} (1-K) \\cdot C + K \\\\ (1-K) \\cdot M + K \\\\ (1-K) \\cdot Y + K \\end{bmatrix}} ⎣⎡C′M′Y′⎦⎤=⎣⎡(1−K)⋅C+K(1−K)⋅M+K(1−K)⋅Y+K⎦⎤ 而对于 CYMK 色彩空间和 RGB 色彩空间互转,就有需要以 CMY 色彩空间作为桥梁。先根据转换方向,通过 CMY 色彩空间进行 CRGB→CCMYC_{RGB} \\rightarrow C_{CMY}CRGB→CCMY 或者 CCMYK→CCMY C_{CMYK} \\rightarrow C_{CMY}CCMYK→CCMY ,再通过 CMY 与 RGB 与 CMYK 的关系,进行间接转换。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_5_3.html":{"url":"Chapter_2/Language/cn/Docs_2_5_3.html","title":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space)","keywords":"","body":"2.5.3 CIE RGB 色彩空间(CIE 1931 RGB Color Space) 在经过大量对理论的实践探索后,人们发现三维坐标系统无疑是从空间和原理上,最为合适构建色彩模型的描述载体。但传统的 RGB 色彩空间由于没有系统,且存在 基准波长校准问题,并不适用于现代工业。 1931年,为了解决工业体系内颜色描述的模型化, 国际照明委员会(CIE [International Commission on Illumination]) 进行了对光学三原色色彩空间抽象汇总的工作。 现在我们所称的 RGB 色彩空间,多指 CIE RGB 色彩空间。CIE RGB 色彩空间最为重要的贡献,是在格拉斯曼颜色实验的基础上确定了光谱三刺激值,以 Red 取 700nm、Green 取 546.1nm、Blue 取 435.8nm 作为光学三原色波长的基准标定,将人眼可见光谱范围内的所有颜色,依据前文中提到的 三原色函数(Trichromatic Primaries Functions) 统一到了模型。 图 2-21 CIE RGB 色彩空间(CIE RGB Color Space)顶点色示意图 CIE RGB 色彩空间的 配色函数 直接采用 了传统三原色色彩空间的配色函数,唯一不同的只在于三原色的选取 : CRGB=R⋅Red700+G⋅Green546.1+B⋅Blue435.8=Vector[R,G,B] C_{RGB} = R \\cdot Red_{700} + G \\cdot Green_{546.1} + B \\cdot Blue_{435.8} = Vector[R, G, B] CRGB=R⋅Red700+G⋅Green546.1+B⋅Blue435.8=Vector[R,G,B] 因此,CIE RGB 也不可避免的继承了光学三原色色彩空间的 负色匹配 问题。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_5_4.html":{"url":"Chapter_2/Language/cn/Docs_2_5_4.html","title":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space)","keywords":"","body":"2.5.4 CIE XYZ 色彩空间(CIE 1931 XYZ Color Space) 1931年,国际照明委员会(CIE [International Commission on Illumination]) 提出,以经过设计的 XYZ 基准坐标系来锚定 RGB 边界的方案可以解决问题。这一映射方案所对应的颜色描述模型,被称为 XYZ 色彩空间(XYZ Color Space) [12] [13] 。 CIE 以线性等式关系构建了 XYZ 系统与 RGB 系统的转换,以三刺激函数(Tristimulus Values Functions)使可见光基于 XYZ 坐标的混合向量全部局限于正象限 [X≥0, Y≥0, Z≥0]。 如果记目标颜色为 CXYZC_{XYZ}CXYZ ,一单位 RGB 到一单位 XYZ 有: 从 R→XR \\rightarrow XR→X 的转换因子为 CrxC_{rx}Crx ,从 G→YG \\rightarrow YG→Y 的转换因子为 CgyC_{gy}Cgy ,从 B→ZB \\rightarrow ZB→Z 的转换因子为 CbzC_{bz}Cbz 那么 XYZ 色彩空间的 配色函数 为: CXYZ=X⋅CrxR+Y⋅CgyG+Z⋅CbzB=Vector[X,Y,Z] C_{XYZ} = X \\cdot C_{rx}R + Y \\cdot C_{gy}G + Z \\cdot C_{bz}B = Vector[X, Y, Z] CXYZ=X⋅CrxR+Y⋅CgyG+Z⋅CbzB=Vector[X,Y,Z] 而从 RGB 到 XYZ 是天然可转的,记转换矩阵为 MRGB2XYZM_{RGB2XYZ}MRGB2XYZ ,那么有映射: CXYZ=MRGB2XYZ⋅CRGB C_{XYZ} = M_{RGB2XYZ} \\cdot C_{RGB} CXYZ=MRGB2XYZ⋅CRGB 即: [XYZ]=[+0.49000+0.31000+0.20000+0.17697+0.81240+0.01063+0.00000+0.01000+0.99000]⋅[RGB] {\\displaystyle {\\begin{bmatrix} X \\\\ Y \\\\ Z \\end{bmatrix}}= {\\begin{bmatrix} +0.490\\,00 & +0.310\\,00 & +0.200\\,00\\\\ +0.176\\,97 & +0.812\\,40 & +0.010\\,63\\\\ +0.000\\,00 & +0.010\\,00 & +0.990\\,00 \\end{bmatrix}} \\cdot {\\begin{bmatrix} R \\\\ G \\\\ B \\end{bmatrix}} } ⎣⎡XYZ⎦⎤=⎣⎡+0.49000+0.17697+0.00000+0.31000+0.81240+0.01000+0.20000+0.01063+0.99000⎦⎤⋅⎣⎡RGB⎦⎤ 而从 XYZ 到 RGB,就相当于反向求逆,因此如下: CXYZ=MRGB2XYZ−1⋅CRGB C_{XYZ} = {M_{RGB2XYZ}}^{-1} \\cdot C_{RGB} CXYZ=MRGB2XYZ−1⋅CRGB 即: [RGB]≈[+2.36461385−0.89654057−0.46807328−0.51516621+1.42640810+0.08875810+0.00520370−0.01440816+1.00920446][XYZ] {\\displaystyle {\\begin{bmatrix}R\\\\G\\\\B\\end{bmatrix}} \\approx {\\begin{bmatrix} +2.364\\,61385 & -0.896\\,54057 & -0.468\\,07328\\\\ -0.515\\,16621 & +1.426\\,40810 & +0.088\\,75810\\\\ +0.005\\,20370 & -0.014\\,40816 & +1.009\\,20446 \\end{bmatrix}}{\\begin{bmatrix}X\\\\Y\\\\Z\\end{bmatrix}}} ⎣⎡RGB⎦⎤≈⎣⎡+2.36461385−0.51516621+0.00520370−0.89654057+1.42640810−0.01440816−0.46807328+0.08875810+1.00920446⎦⎤⎣⎡XYZ⎦⎤ 其中, MRGB2XYZM_{RGB2XYZ}MRGB2XYZ 为测量所得 [12](见前文)推导而出的坐标映射矩阵。 基于此映射关系,所有实际可见波长的 视觉单色(Monochromat)和混合色 在经过坐标转换后,都可以被描述到由 XYZ 色彩空间。这为统一视觉颜色对比标准和迭代推进色彩空间色设计,创造了有力基础工具。工程中为了表示设备颜色特性,常将设备颜色范围以 XYZ 色彩空间的色度图切面,即 CIE 标准色度图(CIE Standard Observer Chromaticity Diagram) 表示。因此,CIE XYZ 颜色空间的配色函数也被称为 “CIE 标准观测者(CIE Standard Observer )”函数。 但 XYZ 的也继承了 RGB 的 “均匀色差” (即 平均色差 问题) 挑战(见前文)。人眼各类视锥细胞的数目是存在差异的。纯物理描述转换为感知上的情况,在 RGB 与 XYZ 所选基准波长条件下,就会因为人对光学三原色光线的敏感程度不同,产生冷色调区域相近颜色富集,而暖色调相近颜色离散的问题。如果取用广义色差 ,即两个颜色的欧式距离,为色差 ΔC\\Delta CΔC 的话。那么 XYZ 色彩空间中,单位 ΔC\\Delta CΔC 的颜色变化情况就显得不那么均匀。这个就是 平均色差 问题。 如何处理平均色差问题?CIE 和美标给出了不同的思路。CIE 将色差问题,拆分为色度图均匀化和白点取值影响归一化两个问题,区分考虑。提出了着重于细微色差变化的 CIE LAB 色彩空间标准,和偏重标准光源线性归一化的 CIE LUV 色彩空间标准。而美标则以商业出发点,追求色彩还原更接近人眼生理感受,同时还要兼顾工业体系中对色彩信息的精细度要求,进而推进了颜色三要素色彩空间的制定。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_5_5.html":{"url":"Chapter_2/Language/cn/Docs_2_5_5.html","title":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* Color Space)","keywords":"","body":"2.5.5 CIE LAB 色彩空间(CIE 1976 L*, a*, b* color space) 1952 年,色彩科学家 里查德·塞瓦尔·亨特(Richard Sewall Hunter,1909–1991) 创建了至今任然是业界最高端颜色解决方案供应商的 亨特联合实验室(HunterLab [Hunter Associates Laboratory]),并在之后提出了著名的 Hunter L,a,b 色彩空间。 Hunter L,a,b 色彩空间结合 CIE XYZ 色彩空间,共同组成了 CIE 1976 LAB 色彩空间的前身。所以,CIE LAB 与 RGB 间需要通过 XYZ 来缔结转换关系。 1976 年,在经过一系列建议的采纳和对 1931 色彩标准体系的完善后,CIE 尝试用一种全新的角度来处理均色问题。CIE 在 Hunter L,a,b 色彩空间的基础上,沿用了 Hunter L,a,b 的色度处理方式与 CIE XYZ 体系结合,将 CIE 标准观察者应用在了 CIE 1976 LAB 色彩空间上。由于 Hunter L,a,b 设定之初的目的,就是将不同颜色间的差异更为显著的客观表示出来,因此 CIE LAB 也继承了这一特点,成为了 设备无关 适合于色差比对的色彩空间。 CIE 1976 LAB 将 XYZ 色度图(非色度平面)在其所在平面,以选定白点为中心拓扑变换为圆形,分别代表:红(Red)、绿(Green)、蓝(Blue)、黄(Yellow) 的 4 个等大象限(扇区),并以平面中心构建了二维坐标系 (a, b)(a,\\ b)(a, b) 。以平面内向量 (a, b)(a,\\ b)(a, b) 来索引实际色度。 我们知道,单纯的靠色度是没办法完全描述颜色特征的。除了色度外,我们还需要引入光亮度因素。CIE LAB 中的依旧沿用了 1960 UCS 和 1931 XYZ 中对光亮度的处理方式,单取由白到黑的 灰度线(Grey Line) 作为了光亮度的刻度。但是对与不同光亮度的切分,LAB 也对 XYZ 原有的亮度表示进行了调整。以在一定程度上保证,每个亮度下切割得到的色度平面都有相对均匀表示。 如果记目标颜色为 CLABC_{LAB}CLAB ,那么 LAB 色彩空间的 配色函数 为: CLAB=L⋆⋅Luminance+Plane(a⋆, b⋆)=Vector[L⋆,a⋆,b⋆] C_{LAB} = L^{\\star } \\cdot Luminance + Plane(a^{\\star },\\ b^{\\star }) = Vector[L^{\\star }, a^{\\star }, b^{\\star }] CLAB=L⋆⋅Luminance+Plane(a⋆, b⋆)=Vector[L⋆,a⋆,b⋆] 记 D65 白点在 XYZ 色彩空间内颜色为 CD65C_{D65}CD65 ,有色温 1960 UCS 快速计算得: CD65 (XD65, YD65, ZD65)≈(95.049, 100, 108.884) {\\displaystyle \\begin{aligned} &C_{D65}\\ (X_{D65},\\ Y_{D65},\\ Z_{D65}) \\approx (95.049,\\ 100,\\ 108.884) \\\\ \\end{aligned} } CD65 (XD65, YD65, ZD65)≈(95.049, 100, 108.884) 如果记目标颜色为 CLABC_{LAB}CLAB ,一单位 XYZ 到一单位 LAB 有: [L⋆a⋆b⋆]=[0+116016+500−500000+200−2000]⋅[F(XXwhite)F(YYwhite)F(ZZwhite)1] {\\displaystyle {\\begin{bmatrix} L^{\\star } \\\\ a^{\\star } \\\\ b^{\\star } \\end{bmatrix}}= {\\begin{bmatrix} 0 & +116 & 0 & 16 \\\\ +500 & -500 & 0 & 0 \\\\ 0 & +200 & -200 & 0 \\\\ \\end{bmatrix}} \\cdot {\\begin{bmatrix} F(\\tfrac{X}{X_{white}}) \\\\ F(\\tfrac{Y}{Y_{white}}) \\\\ F(\\tfrac{Z}{Z_{white}}) \\\\ 1 \\end{bmatrix}} } ⎣⎡L⋆a⋆b⋆⎦⎤=⎣⎢⎢⎡0+5000+116−500+20000−2001600⎦⎥⎥⎤⋅⎣⎢⎢⎡F(XwhiteX)F(YwhiteY)F(ZwhiteZ)1⎦⎥⎥⎤ 即,从 XYZ 到 LAB 有: L⋆=116⋅ F(YYD65)−16a⋆=500⋅(F(XXD65)−F(YYD65))b⋆=200⋅(F(YYD65)−F(ZZD65)) {\\displaystyle \\begin{aligned} L^{\\star }&=116 \\cdot \\ F\\!\\left({\\frac {Y}{Y_{D65}}}\\right)-16 \\\\ a^{\\star }&=500 \\cdot \\left(F\\!\\left({\\frac {X}{X_{D65}}}\\right)-F\\!\\left({\\frac {Y}{Y_{D65}}}\\right)\\right) \\\\ b^{\\star }&=200 \\cdot \\left(F\\!\\left({\\frac {Y}{Y_{D65}}}\\right)-F\\!\\left({\\frac {Z}{Z_{D65}}}\\right)\\right) \\\\ \\end{aligned} } L⋆a⋆b⋆=116⋅ F(YD65Y)−16=500⋅(F(XD65X)−F(YD65Y))=200⋅(F(YD65Y)−F(ZD65Z)) 其中: F(n)={n3 n>δ3n3δ2+429 n≤δ3 , δ=629 {\\displaystyle \\begin{aligned} F(n)&={ \\begin{cases} {\\sqrt [{3}]{n}} & \\ \\ \\ n > \\delta ^{3} \\\\ {\\dfrac {n}{3\\delta ^{2}}}+{\\frac {4}{29}} & \\ \\ \\ n \\le \\delta ^{3} \\end{cases} }\\ \\ \\ , \\ \\ \\delta ={\\tfrac {6}{29}} \\end{aligned} } F(n)=⎩⎨⎧3√n3δ2n+294 n>δ3 n≤δ3 , δ=296 而从 LAB 到 XYZ,就相当于反向求逆,因此如下: X=XD65⋅F−1(L⋆+16116+a⋆500)Y=YD65⋅F−1(L⋆+16116)Z=ZD65⋅F−1(L⋆+16116−b⋆200) {\\displaystyle \\begin{aligned} X &= X_{D65} \\cdot F^{-1}\\left({\\frac {L^{\\star }+16}{116}} + {\\frac {a^{\\star }}{500}}\\right) \\\\ Y &= Y_{D65} \\cdot F^{-1}\\left({\\frac {L^{\\star }+16}{116}} \\right) \\\\ Z &= Z_{D65} \\cdot F^{-1}\\left({\\frac {L^{\\star }+16}{116}} - {\\frac {b^{\\star }}{200}}\\right) \\end{aligned} } XYZ=XD65⋅F−1(116L⋆+16+500a⋆)=YD65⋅F−1(116L⋆+16)=ZD65⋅F−1(116L⋆+16−200b⋆) 其中: F−1(n)={n3 n>δ3δ2(n−429) n≤δ , δ=629 {\\displaystyle \\begin{aligned} F^{-1}(n)&={ \\begin{cases} {n^3} & \\ \\ \\ n > \\delta \\\\ {3\\delta ^2}(n-{\\frac {4}{29})} & \\ \\ \\ n \\le \\delta \\end{cases} }\\ \\ \\ , \\ \\ \\delta ={\\tfrac {6}{29}} \\end{aligned} } F−1(n)=⎩⎨⎧n33δ2(n−294) n>δ n≤δ , δ=296 可见,XYZ 与 LAB 间的转换关系,并不是线性的。由于 CIE LAB 中的白点直接参与了转换运算,白点调参对 LAB 的影响程度会更大一些。带入色差公式 ΔC=(Δa⋆)2+(Δb⋆)2{\\displaystyle \\begin{aligned} {\\displaystyle \\Delta C = {\\sqrt {\\left(\\Delta a^{\\star}\\right)^{2}+\\left(\\Delta b^{\\star}\\right)^{2}}}} \\end{aligned} }ΔC=√(Δa⋆)2+(Δb⋆)2 会发现,通过这种方式切割得到的整个人眼可见光色域范围,色差均匀程度依赖于白点的同时,也并非完全均匀。越靠近色度图白点,色差变化越小;越靠近色度图边缘,色差变化越大,不过相较于 XYZ 已有很大改善。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_5_6.html":{"url":"Chapter_2/Language/cn/Docs_2_5_6.html","title":"2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* Color Space)","keywords":"","body":"2.5.6 CIE LUV 色彩空间(CIE 1976 L*, u*, v* color space) 1976 年,在 CIE 采纳 CIE LAB 色彩空间的同年,CIE 以 CIE 1960 UCS 和 CIE 1964 UVW(这两个在前文色彩度量中介绍过,做为补充型色彩空间,用于量化色温和 CRI 到 CIE 标准体系内)为基础 进一步拓展,提出了 CIE LUV 色彩空间。 显然,CIE LUV 提出的目的,是为了将 CIE 1960 UCS 和 CIE 1964 UVW 两个色彩空间的 特性统一 到单一色彩空间。通过整合两者在度量衡相关量方面的计算,来 化解得到目标尺度值后的色彩空间互转问题。我们知道 CIE 1960 UCS 是由 XYZ 拓扑变换而得,CIE 1964 UVW 是由 CIE 1960 UCS 引入白点而得,两者的关键点皆在于平面色度值,而两者区别只在于 UVW 引入了白点。因此,整个问题就变为,找到一个合适的映射函数(狭义配色函数),使得在任何白点取值条件下, CIE LUV 中颜色的色度转 XYZ 皆为线性。 基于此,LUV 对光亮度参数 进行了依托于白点的非线性变化。以此来保证,在不同白点选取下的色度,都能维持同 UCS 和 UVW 一致的线性转换方式。这一操作使 LUV 色彩空间不论白点如何选取,都能有从 LUV 到 XYZ 的色度的线性变换和逆变换。 如果记目标颜色为 CLUVC_{LUV}CLUV ,那么 LUV 色彩空间的 配色函数 为: CLUV=L⋆⋅Luminance+Plane(u⋆, v⋆)=Vector[L⋆,u⋆,v⋆] C_{LUV} = L^{\\star } \\cdot Luminance + Plane(u^{\\star },\\ v^{\\star }) = Vector[L^{\\star }, u^{\\star }, v^{\\star }] CLUV=L⋆⋅Luminance+Plane(u⋆, v⋆)=Vector[L⋆,u⋆,v⋆] 记 D65 白点在 XYZ 色彩空间内颜色为 CD65C_{D65}CD65 ,有色温 1960 UCS 快速计算得: CD65 (xD65, yD65, YD65)≈(0.31271, 0.32902, 100) {\\displaystyle \\begin{aligned} &C_{D65}\\ (x_{D65},\\ y_{D65},\\ Y_{D65}) \\approx (0.31271,\\ 0.32902,\\ 100) \\\\ \\end{aligned} } CD65 (xD65, yD65, YD65)≈(0.31271, 0.32902, 100) 如果记目标颜色为 CLUVC_{LUV}CLUV ,从 XYZ 到 LUV 有: (x,y)=( XX+Y+Z , YX+Y+Z )(u,v)=(4x−2x+12y+3, 9y−2x+12y+3)(u⋆,v⋆,L⋆)=F(Y)⋅(13⋅(u−uD65), 13⋅(v−vD65), 1 ) {\\displaystyle \\begin{aligned} &(x, y) = (\\ \\ \\ {\\frac {X}{X+Y+Z}} \\ \\ \\ \\ , \\ \\ \\ {\\frac {Y}{X+Y+Z}} \\ \\ \\ \\ ) \\\\ &(u, v) = ({\\frac {4x}{-2x+12y+3}}, \\ {\\frac {9y}{-2x+12y+3}}) \\\\ &(u^{\\star }, v^{\\star }, L^{\\star }) = F\\!\\left({Y}\\right) \\cdot ( 13 \\cdot \\left(u-u_{D65}\\right), \\ \\ \\ 13 \\cdot \\left(v-v_{D65}\\right), \\ \\ \\ 1\\ ) \\\\ \\end{aligned} } (x,y)=( X+Y+ZX , X+Y+ZY )(u,v)=(−2x+12y+34x, −2x+12y+39y)(u⋆,v⋆,L⋆)=F(Y)⋅(13⋅(u−uD65), 13⋅(v−vD65), 1 ) 其中: L⋆=F(Y)={(293)3⋅YYD65 YYD65≤δ3116⋅YYD653 −16 YYD65>δ3 , δ=629 {\\displaystyle \\begin{aligned} L^{\\star } = F(Y)&={ \\begin{cases} {\\left( {\\frac {29}{3}} \\right)^3 \\cdot {\\frac {Y}{Y_{D65}}}} & \\ \\ \\ {\\frac {Y}{Y_{D65}}} \\le \\delta ^{3} \\\\ {116 \\cdot {\\sqrt [3]{\\frac {Y}{Y_{D65}}}} \\ - 16} & \\ \\ \\ {\\frac {Y}{Y_{D65}}} > \\delta ^{3} \\end{cases} }\\ \\ \\ , \\ \\ \\delta ={\\tfrac {6}{29}} \\end{aligned} } L⋆=F(Y)=⎩⎪⎪⎨⎪⎪⎧(329)3⋅YD65Y116⋅3√YD65Y −16 YD65Y≤δ3 YD65Y>δ3 , δ=296 而从 LUV 到 XYZ,就相当于反向求逆,因此如下: (u,v)=(u⋆13⋅L⋆+uD65 , v⋆13⋅L⋆+vD65 )(x,y)=(9u6u−16v+12 , 4v6u−16v+12 )(X,Y,Z)=F−1(L⋆)⋅(9u4v, 1, 12−3u−20v4v ) {\\displaystyle \\begin{aligned} &(u, v) = ( {\\frac {u^{\\star }}{13 \\cdot L^{\\star }}} + u_{D65}\\ \\ , \\ \\ {\\frac {v^{\\star }}{13 \\cdot L^{\\star }}} + v_{D65} \\ ) \\\\ &(x, y) = ({\\frac {9u}{6u-16v+12}}\\ \\ , \\ {\\frac {4v}{6u-16v+12}} \\ ) \\\\ &(X, Y, Z) = F^{-1}(L^{\\star }) \\cdot ( {\\frac {9 u}{4 v}}, \\ \\ \\ 1, \\ \\ \\ {\\frac {12 - 3 u - 20 v}{4 v}} \\ ) \\\\ \\end{aligned} } (u,v)=(13⋅L⋆u⋆+uD65 , 13⋅L⋆v⋆+vD65 )(x,y)=(6u−16v+129u , 6u−16v+124v )(X,Y,Z)=F−1(L⋆)⋅(4v9u, 1, 4v12−3u−20v ) 其中: Y=F−1(L⋆)={YD65⋅(329)3⋅L⋆ L⋆≤8YD65⋅(L⋆+16116)3 L⋆>8 {\\displaystyle \\begin{aligned} Y = F^{-1}(L^{\\star })&={ \\begin{cases} {Y_{D65} \\cdot \\left( {\\frac {3}{29}} \\right)^3 \\cdot {L^{\\star }}} & \\ \\ \\ L^{\\star } \\le 8 \\\\ {Y_{D65} \\cdot \\left( {\\frac {L^{\\star }+16}{116}} \\right)^3 } & \\ \\ \\ L^{\\star } > 8 \\end{cases} } \\end{aligned} } Y=F−1(L⋆)=⎩⎪⎪⎨⎪⎪⎧YD65⋅(293)3⋅L⋆YD65⋅(116L⋆+16)3 L⋆≤8 L⋆>8 同 LAB,CIE LUV 的优势也在于白点确定后的快速计算。 由于 CIE LUV 并没有针对自身 LUV 色度图所在平面,即 所在平面, 做类似于 LAB 的均匀化拓扑变形。因此,LUV 在色差均匀问题上的表现,要逊于 LAB。 但是,基于 LUV 在选定白点后的线性色彩空间转换特性,LUV 在数据传输和色彩压缩方面却起到了意料之外的表现。其设计思想最终为 YUV 色彩格式的制定打下了理论基础。 既然将色差问题拆分为均匀化和归一化的间接处理方法不太行,那么以颜色三要素角度出发将色差均匀直接做为目标,是否就能得到完美答案呢?之前我们提到,于 LAB 和 LUV 同时期下的挑战者是美标 HSL。HSL 正是探索这一问题答案的先行者,虽然最终得到的结果 可能不尽如人意。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_5_7.html":{"url":"Chapter_2/Language/cn/Docs_2_5_7.html","title":"2.5.7 颜色三要素色彩空间(HSV / HSI / HSL)","keywords":"","body":"2.5.7 颜色三要素色彩空间(HSL [HSV / HSI / HSL(Lightness)]) HSL(Hue,Saturation,Luminance) 色彩空间 又被称为 颜色三要素色彩空间,是对 HSV(Hue,Saturation,Value)色彩空间、 HSI(Hue,Saturation,Intensity)色彩空间、HSL(Hue,Saturation,Lightness)色彩空间的统称,简称 三要素色彩空间。这里的 V(Value)、I(Intensity)、L(Lightness)其实代指的,基本等同于前文中提及的光亮度(Luminance)的简化概念。HSI 色彩空间,在设计理念上趋同于 HSV 色彩空间。HSL(Lightness) 在 HSV 、 HSI上进行了改进整合。因此,通常所称的 HSL 色彩空间,即为 HSL(Lightness)色彩空间。 后文中为了说明,保持 HSL 统一代称,在需要区分说明时,使用 HSV、HSI、HSL(Lightness)独立称谓指定区别。 1938年,为了解决彩色电视信号的转换和传输问题,一位名叫 乔治·瓦伦西(Georges Valensi,1889 - 1980) 的法国电信工程师,提交了以色调与光亮度来进行彩色图片编码传输的解决方案并注册了专利 [14] [15] 。方案中首次引入了使用色调和光亮度,来构建色彩空间的概念。瓦伦西在持有专利的有效时间内,经过反复延长申请,使他的专利权从 1939年 一直持续到了 1971年。尴尬的是,彩色电视机在 1946年才被 约翰·洛吉·贝尔德(J.L.Baird,1888 - 1946) 发明出来。而彩电和彩色信号,真正得到大规模商业化应用和普及的时间节点,几乎到了20世纪70年代。因此,在美电 1953年出台美国安全彩电标准和 1954年推出 RCA彩色电视机之前,瓦伦西几乎没有靠此专利得到任何收益。 图 2-22 乔治·瓦伦西(Georges Valensi)于 1945 年在美专利局注册手稿 [15] 1978年,HSV 色彩空间的概念由 阿尔维·雷·史密斯(A.R Smith,Alvy Ray Smith III,1943 - 今) 提出。HSV 的目的是为了解决计算机显示对 RGB 的主色还原问题。这要求我们提供一种更直观,并更接近经典理论的,可连续变化且动态可分的色彩模型 [16] 。 而于1978年同年,在 乔布洛夫(George H. Joblove) 和 格林伯格(Donald Greenberg) 发表的的论文 《为计算机图形设计的色彩空间》 中 [17],也通过引入 HSI 色彩空间,来尝试解决这个问题。论文同时还拿 HSI 与 HSV 做了比对。 为什么认为 HSV 和 HSI 是可以约等的?仅仅只是因为两者近乎先后出现于同年?并不是。最关键的判断,还是来自于 HSV 和 HSI 对颜色空间的定义。可以认为 HSV 和 HSI 的差异,是一种观察角度导致的偏差,是同种概念的参考位选取的不同而导致的。这种差异主要体现在光亮度与饱和度在模型中的处理。两者的解决方案,在这两个色彩要素的计算与设定上,各有优劣。HSI 的饱和度选取方式,让模型更接近人眼对颜色的感知,使颜色从 RGB 转换 HSI 更为便捷。但同时也导致还原相对麻烦。HSV 正好相反。那么是否存在一种模型,可以取弊存优呢? 1979年,在 美国计算机协会(ACM) 旗下的 计算机图形图像特别兴趣小组(SIGGRAPH) 组织的年度会报会议上。 泰克科技有限公司(Tektronix, Inc. US) 的工程师们提出了 HSL(Lightness)色彩空间 [18],尝试综合 HSV 和 HSI 解决色彩感知还原与颜色空间转换问题。 HSL(Lightness)从数学角度上, 以中值对 HSV 和 HSI 的光亮度概念进行了整合,使饱和度的表示得到简化,并保留了 HSI 的视觉感官还原特点。这也使 HSL(Lightness)模型,于 1979 年年末的计算机图形标准委员会(Computer Graphics Standards Committee,CGSC)报告上,被选定作为 三要素色彩空间基础标准 的原因 [19] 。 为了更好的理解这一点,需要分析 HSV、HSI、HSL(Lightness)的异同。 相同的色调拓扑计算 HSV 和 HSI 色彩空间为了计算机色彩还原服务,本身模型基于 RGB 色彩空间的拓扑变化。如果我们将 RGB 色彩空间中的 白点(White Point) 和 黑点(Black Point) 连线,那么我们就能得到一条由白到灰到黑的渐变对角线,这条线被我们称之为 灰度线(Grey Line)。 HSV 和 HSI 以灰度线作为法线,取过黑点的平面为投影平面,将 RGB 色彩空间的单位立方体投影到了此平面上。为了区别于 标准 CIE 色度平面(CIE Chromaticity Plane),这个平面被称为 HSL 色度平面(HSL Chromaticity Plane)。 图 2-23 RGB 色彩空间投影建立 HSL 色度平面(HSL Chromaticity Plane)示意图 HSL 色彩空间,以该平面做为 基准平面。取从 青色(Cyan)指向红色(Red)的连线作为基准轴,取红色为 0°,青色为 180°。 假设 RGB 色彩空间内存在颜色 CRGBC_{RGB}CRGB ,在 HSL 色度平面上的投影为 CRGB′{C_{RGB}}^{\\prime}CRGB′ 。 CRGB′{C_{RGB}}^{\\prime}CRGB′ 与黑点连线和基准轴的逆时针夹角,记为 HHH 。为了更好的表示 CRGBC_{RGB}CRGB 与其 HSL 色度平面投影的关系,瓦伦西曾在自己的专利 [14] [15] 中将, 与黑点连线的长度称为 色相(Chrominance)。在 HSL 中,继承了这一点,记为 CCC 。 图 2-24 HSL 色度平面(HSL Chromaticity Plane)示意图 需要注意的是,引入色相是为了用一个中间变量,把 CRGB′{C_{RGB}}^{\\prime}CRGB′ 的投影平面特性转化为颜色三要素的物理表述 [14] [18] 。色相本身并不是一个标准概念,在此处的意义为白点颜色与选定颜色之间的欧式距离,而 并非指 色度(Chromaticity)。它是 HSL 引入的对同色调下颜色饱和度的代称,即 狭义色差(sCA)。 介于此,为了便于说明,我们 将 HSL 的中间量 CCC 按照更贴近的含义,称为色差。 而 实际上 HHH 就是 色调(Hue),有 HHH 、 CCC 的关系为: M=max(R,G,B)C=max(R,G,B)−min(R,G,B)H=60∘×{undefined,if C=0G−BC+0,if M=RB−RC+2,if M=GR−GC+4,if M=B {\\displaystyle \\begin{aligned} &M=\\max(R,G,B) \\\\ &C =\\text {max} (R,G,B) - \\text {min} (R,G,B) \\\\ &H = 60^ \\circ \\times {\\begin{cases} \\mathrm {undefined} ,& {\\text{if }} C=0 \\\\ {\\frac {G-B} {C}} + 0 ,& {\\text{if }} M=R \\\\ {\\frac {B-R} {C}} + 2 ,& {\\text{if }} M=G \\\\ {\\frac {R-G} {C}} + 4 ,& {\\text{if }} M=B \\end{cases}} \\end{aligned} } M=max(R,G,B)C=max(R,G,B)−min(R,G,B)H=60∘×⎩⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎧undefined,CG−B+0,CB−R+2,CR−G+4,if C=0if M=Rif M=Gif M=B 这样的表示方法有些不尽如人意。因为 RGB 色彩空间在 HSL 色度平面的投影,是一个正六边形。导致了 H′H^{\\prime}H′ 在转换为角度表示上,存在分段的情况。那么如何使其简化为非条件函数表示呢?HSL 采用了 对正六边形投影做了二维拓扑变为单位圆,来处理此问题。 图 2-25 HSL 色度平面(HSL Chromaticity Plane)连续性处理拓扑示意图 取基准轴从黑点指向红色为 X轴正方向,做直角坐标系。 记为 X轴单位长度为 α\\alphaα ,Y轴单位长度为 β\\betaβ ,有: α=R−G⋅cos(60∘)−B⋅cos(60∘)=12(2R−G−B)β=G⋅sin(60∘)−B⋅sin(60∘)=32(G−B) {\\displaystyle \\begin{aligned} &\\alpha =R-G\\cdot \\cos(60^ \\circ)-B\\cdot \\cos(60^ \\circ)={\\tfrac {1}{2}}(2R-G-B) \\\\ &\\beta =G\\cdot \\sin(60^ \\circ)-B\\cdot \\sin(60^ \\circ)={\\tfrac {\\sqrt {3}}{2}}(G-B) \\\\ \\end{aligned} } α=R−G⋅cos(60∘)−B⋅cos(60∘)=21(2R−G−B)β=G⋅sin(60∘)−B⋅sin(60∘)=2√3(G−B) 那么,中间量 CCC 可以表示为: C=α2+β2 {\\displaystyle C ={\\sqrt {\\alpha ^{2}+\\beta ^{2}}}} C=√α2+β2 同时,色调(Hue) HHH 与 CCC 的关系,就可以转化为 HHH 与 α\\alphaα 、 β\\betaβ 的关系了: H=atan2(β,α) {\\displaystyle \\begin{aligned} &H =\\text{atan2} (\\beta ,\\alpha ) \\end{aligned} } H=atan2(β,α) 这样在描述上,就比较简洁了。便于计算机处理。 不同的光亮度与饱和度处理 计算色调之后,HSV、HSI、HSL(Lightness)在 光亮度(Luminance),和 饱和度(Saturation) 的处理上就存在不同了。因为以灰度线为法线的缘故,光亮度较好抽象。记各模型光亮度(Luminance)表示分别为 Lvalue=VL_{value} = VLvalue=V 、 Lintensity=IL_{intensity} = ILintensity=I 、 Llightness=LL_{lightness} = LLlightness=L 。 有 VVV 、 III 、 LLL 与原 RGB 颜色空间内颜色 CRGBC_{RGB}CRGB 的关系如下: V=max(R,G,B)=MI =avg(R,G,B)=(R+G+B)3L=mid(R,G,B)=(max(R,G,B)+min(R,G,B))2 {\\displaystyle \\begin{aligned} &V = \\max(R,G,B) = M \\\\ &I \\ = \\text{avg} (R,G,B)={\\tfrac {(R+G+B)}{3}} \\\\ &L = \\text {mid} (R,G,B)={\\tfrac {(\\max(R,G,B)+\\min(R,G,B))}{2}} \\end{aligned} } V=max(R,G,B)=MI =avg(R,G,B)=3(R+G+B)L=mid(R,G,B)=2(max(R,G,B)+min(R,G,B)) 如果我们取色调(Hue) HHH 为 50∘50^ \\circ50∘ (偏黄) & 230∘230^ \\circ230∘ (偏蓝)。以色差 CCC 和光亮度构成坐标系,取色差 CCC 为横轴,各模型光亮度参数为纵轴。那么条件下 在 HSV、HSI、HSL(Lightness)的 色差切面(Chrominance Slice),就如下图所示: 图 2-26 HSV、HSI、HSL(Lightness)色差切面(Chrominance Slice)示意图 图中灰色区域为无效值。指定色差 CCC 与光亮度配参构成的切面,需要在坐标范围内避开无效取值。这就意味着 以色差 CCC 作为关键参数的模型,必须以区域限定的方法处理灰区问题。而 HSV、HSI、HSL(Lightness)被设计的目的,是为计算机色彩还原服务的。以条件限定的方式来处理,将会为计算机运算带来大量逻辑判断,极大的影响图片处理效率。因此,色差 CCC 并不能 被直接用作 HSL 的基础参数。这也是为何不以饱和度(Saturation)的称谓,来直接指代色差 CCC 的原因。HSL 中的饱和度概念,与实际颜色三要素的饱和度定义(狭义)存在差异。这里的饱和度,是对实际物理饱和度概念进行衍射拓展后的结果,即广义饱和度。 如何减少这些不必要的运算,得到广义饱和度参数呢?直接的做法是对 色差切面(Chrominance Slice) 进行 一定程度的形变,使得色差切面能够填充整个坐标平面。由于各模型在设定之初,已经通过取用灰度线为投影法线的方式,在几何定义上抽象出纵轴参数 VVV 、 III 、 LLL 。参数 VVV 、 III 、 LLL 直观体现了颜色三要素的光亮度(Luminance)对物理发光强度的描述。因此,只需要做水平方向的拉伸(压缩),用拓扑后的横坐标单位,来替换色差 作为模型的饱和度参数即可。记 各模型饱和度(Saturation)分别为 SHSV=SVS_{HSV} = S_{V}SHSV=SV 、 SHSI=SIS_{HSI} = S_{I}SHSI=SI 、 SHSL=SLS_{HSL} = S_{L}SHSL=SL 。 有 SVS_{V}SV 、 SIS_{I}SI 、 SLS_{L}SL 与 CRGBC_{RGB}CRGB 、色差 CCC 、各自亮度值的关系如下: SV={0,if V=0CV, otherwiseSI={0,if I=01−min(R,G,B)I, if I≠0SL={0,if L=1 or L=0C1−∣2L−1∣, otherwise {\\displaystyle \\begin{aligned} &S_{V}={ \\begin{cases} {0}, &{\\text{if }} V = 0 \\\\ {\\frac {C}{V}}, \\ \\ &{\\text{otherwise}} \\end{cases}} \\\\ &S_{I}={ \\begin{cases} {0}, &{\\text{if }} I = 0 \\\\ {1-{\\frac {\\min(R,G,B)}{I}}}, \\ \\ &{\\text{if }} {I \\neq 0} \\end{cases}} \\\\ &S_{L}={ \\begin{cases} {0}, &{\\text{if }} L = 1 {\\text{ or }} L = 0 \\\\ {\\frac {C}{1-|2L-1|}}, \\ \\ \\ \\ \\ \\ &{\\text{otherwise}} \\end{cases}} \\end{aligned} } SV=⎩⎨⎧0,VC, if V=0otherwiseSI=⎩⎨⎧0,1−Imin(R,G,B), if I=0if I≠0SL=⎩⎨⎧0,1−∣2L−1∣C, if L=1 or L=0otherwise 转换后,的 色差切面(Chrominance Slice) 就 比较连续 了: 图 2-27 HSV、HSI、HSL(Lightness)切面拓扑示意图 很容易看出 HSL(Lightness)在保证自身任意选定色调 HHH 时的色差切面不包含无效区域的同时,还具有 HSI 本身对人眼观察颜色还原的特点。而其计算过程中依赖的条件判断,则可以使用绝对值运算代替。可以说,HSL(Lightness)结合了 HSV、HSI 的优点,且一定程度上避开了两者的缺陷。 三要素色彩空间的配色函数 现在,所有要素准备齐全。如果记目标颜色为 CHSLC_{HSL}CHSL ,则 HSL 配色函数 如下: CHSL=H⋅Hue+S⋅Saturation+L⋅Luminance=Vector[H,S,L] {\\displaystyle C_{HSL} = H \\cdot Hue + S \\cdot Saturation + L \\cdot Luminance = Vector[H, S, L]} CHSL=H⋅Hue+S⋅Saturation+L⋅Luminance=Vector[H,S,L] 如果记 CHSLC_{HSL}CHSL 在 RGB 色彩空间对应颜色为 CRGB=(R,G,B)C_{RGB} = (R, G, B)CRGB=(R,G,B) ,记有 CRGB→CHSLC_{RGB} \\rightarrow C_{HSL}CRGB→CHSL 的转换函数为 FFF ,则 CHSL→CRGBC_{HSL} \\rightarrow C_{RGB}CHSL→CRGB 的反向过程就为 F−1F^{-1}F−1 。有之前使用的通用中间量: α=12(2R−G−B)β=32(G−B) C=α2+β2≈(max(R,G,B)−min(R,G,B))=range(R,G,B) {\\displaystyle \\begin{aligned} &\\quad \\quad \\quad \\quad \\quad \\alpha = {\\tfrac {1}{2}}(2R-G-B) \\quad \\quad \\quad \\beta = {\\tfrac {\\sqrt {3}}{2}}(G-B) \\ \\ \\ \\ \\ \\ \\\\ & C = {\\sqrt {\\alpha ^{2}+\\beta ^{2}}} \\approx (\\max(R,G,B) - min(R,G,B)) = \\text {range} (R,G,B) \\\\ \\end{aligned} } α=21(2R−G−B)β=2√3(G−B) C=√α2+β2≈(max(R,G,B)−min(R,G,B))=range(R,G,B) 存粹使用 α\\alphaα 、 β\\betaβ 会使计算过于复杂,因此中间量 CCC 在处理时大多数都是用原有定义代替。 α\\alphaα 、 β\\betaβ 仅用于角度计算。从之前讲解可知,这样做并不会导致偏色,而只会影响 HSL 色度平面的几何样式。结合之前的完整推导过程,带入 α\\alphaα 、 β\\betaβ 、 CCC ,能得到从 RGB 到 HSL 的映射 FFF 为: FHSV={H=atan2(β,α)S=range(R,G,B)⋅max(R,G,B)−1V=max(R,G,B)FHSI={H=atan2(β,α)S={0,if I=01−min(R,G,B)⋅avg(R,G,B)−1, if I≠0I =avg(R,G,B)=(R+G+B)3FHSL={H=atan2(β,α)S=12⋅range(R,G,B)⋅min(L, 1−L)−1L=mid(R,G,B)=(max(R,G,B)+min(R,G,B))2 {\\displaystyle \\begin{aligned} &F_{HSV} ={ \\begin{cases} & H = \\text {atan2} (\\beta ,\\alpha ) \\\\ & S = \\text {range} (R,G,B) \\cdot \\max(R,G,B)^{-1} \\\\ & V = \\max(R,G,B) \\end{cases} } \\\\ &F_{HSI} ={ \\begin{cases} & H = \\text {atan2} (\\beta ,\\alpha ) \\\\ & S = { \\begin{cases} {0}, &{\\text{if }} I = 0 \\\\ 1- {\\min(R,G,B)} \\cdot {\\text {avg} (R,G,B)^{-1}}, \\ \\ &{\\text{if }} {I \\neq 0} \\end{cases}} \\\\ & I \\ = \\text {avg} (R,G,B)={\\tfrac {(R+G+B)}{3}} \\end{cases} } \\\\ &F_{HSL} ={ \\begin{cases} & H = \\text {atan2} (\\beta ,\\alpha ) \\\\ & S = \\tfrac {1}{2} \\cdot \\text {range} (R,G,B) \\cdot {\\min(L,\\ 1 - L)}^{-1} \\\\ & L = \\text {mid} (R,G,B)={\\tfrac {(\\max(R,G,B)+\\min(R,G,B))}{2}} \\end{cases} } \\end{aligned} } FHSV=⎩⎪⎨⎪⎧H=atan2(β,α)S=range(R,G,B)⋅max(R,G,B)−1V=max(R,G,B)FHSI=⎩⎪⎪⎪⎨⎪⎪⎪⎧H=atan2(β,α)S={0,1−min(R,G,B)⋅avg(R,G,B)−1, if I=0if I≠0I =avg(R,G,B)=3(R+G+B)FHSL=⎩⎪⎨⎪⎧H=atan2(β,α)S=21⋅range(R,G,B)⋅min(L, 1−L)−1L=mid(R,G,B)=2(max(R,G,B)+min(R,G,B)) 而从 HSL 到 RGB ,由于色度被作为了传入参数,在转换为 RGB 时就需要处理扇区划分问题。记 ∠RG:H∈[0∘,120∘)\\angle_{RG}: H \\in [0^{\\circ}, 120^{\\circ})∠RG:H∈[0∘,120∘) , ∠GB:H∈[120∘,240∘)\\angle_{GB}: H \\in [120^{\\circ}, 240^{\\circ})∠GB:H∈[120∘,240∘) , ∠BR:H∈[240∘,360∘)\\angle_{BR}: H \\in [240^{\\circ}, 360^{\\circ})∠BR:H∈[240∘,360∘) ,其中 H=0∘H = 0^{\\circ}H=0∘ 或 H=360∘H = 360^{\\circ}H=360∘ 时,有 (R,G,B)=(1, 0, 0)(R,G,B) = (1,\\ 0,\\ 0)(R,G,B)=(1, 0, 0) 。则映射 F−1F^{-1}F−1 为: H, ∠const∈[0∘, 360∘)FHSV−1={k={(∠const+H60∘) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,V)∠const=f∠consthsv=V−VS⋅sector⇒(R,G,B)=FHSV−1(H,S,V)=(f300∘hsv, f180∘hsv, f60∘hsv)FHSI−1={k={(∠const+H60∘) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,I)∠const=f∠consthsi=I−IS⋅sector⇒(R,G,B)=FHSI−1(H,S,I)=(f300∘hsi, f180∘hsi, f60∘hsi)FHSL−1={k={(∠const+H60∘) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,L)∠const=f∠consthsl=L−LS⋅sector⇒(R,G,B)=FHSL−1(H,S,L)=(f300∘hsl, f180∘hsl, f60∘hsl) {\\displaystyle \\begin{aligned} H ,\\ {\\angle_{const}} &\\in [0^ \\circ,\\ 360^ \\circ) \\\\ {F_{HSV}}^{-1} &= { \\begin{cases} & k = \\{(\\tfrac {\\angle_{const} + H}{60^ \\circ})\\ \\bmod\\ 6\\} \\\\ & sector = \\max(0,\\ \\min(k,\\ 4-k,\\ 1)) \\\\ & f(H,S,V)_{\\angle_{const}} = f_{\\angle_{const}} ^{hsv} = V-VS \\cdot sector \\end{cases} } \\\\ &\\Rightarrow (R,G,B) = {F_{HSV}}^{-1}(H,S,V) = (f_{300^ \\circ}^{hsv},\\ f_{180^ \\circ}^{hsv},\\ f_{60^ \\circ}^{hsv}) \\\\ \\\\ {F_{HSI}}^{-1} &= { \\begin{cases} & k = \\{(\\tfrac {\\angle_{const} + H}{60^ \\circ})\\ \\bmod\\ 6\\} \\\\ & sector = \\max(0,\\ \\min(k,\\ 4-k,\\ 1)) \\\\ & f(H,S,I)_{\\angle_{const}} = f_{\\angle_{const}} ^{hsi} = I-IS \\cdot sector \\end{cases} } \\\\ &\\Rightarrow (R,G,B) = {F_{HSI}}^{-1}(H,S,I) = (f_{300^ \\circ}^{hsi},\\ f_{180^ \\circ}^{hsi},\\ f_{60^ \\circ}^{hsi}) \\\\ \\\\ {F_{HSL}}^{-1} &= { \\begin{cases} & k = \\{(\\tfrac {\\angle_{const} + H}{60^ \\circ})\\ \\bmod\\ 6\\} \\\\ & sector = \\max(0,\\ \\min(k,\\ 4-k,\\ 1)) \\\\ & f(H,S,L)_{\\angle_{const}} = f_{\\angle_{const}} ^{hsl} = {L - LS \\cdot sector} \\end{cases} } \\\\ &\\Rightarrow (R,G,B) = {F_{HSL}}^{-1}(H,S,L) = (f_{300^ \\circ}^{hsl},\\ f_{180^ \\circ}^{hsl},\\ f_{60^ \\circ}^{hsl}) \\\\ \\end{aligned} } H, ∠constFHSV−1FHSI−1FHSL−1∈[0∘, 360∘)=⎩⎪⎨⎪⎧k={(60∘∠const+H) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,V)∠const=f∠consthsv=V−VS⋅sector⇒(R,G,B)=FHSV−1(H,S,V)=(f300∘hsv, f180∘hsv, f60∘hsv)=⎩⎪⎨⎪⎧k={(60∘∠const+H) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,I)∠const=f∠consthsi=I−IS⋅sector⇒(R,G,B)=FHSI−1(H,S,I)=(f300∘hsi, f180∘hsi, f60∘hsi)=⎩⎪⎨⎪⎧k={(60∘∠const+H) mod 6}sector=max(0, min(k, 4−k, 1))f(H,S,L)∠const=f∠consthsl=L−LS⋅sector⇒(R,G,B)=FHSL−1(H,S,L)=(f300∘hsl, f180∘hsl, f60∘hsl) 即: (R,G,B)=F−1(H,S,L)=(f300∘, f180∘, f60∘) (R,G,B) = {F}^{-1}(H,S,L) = (f_{300^ \\circ},\\ f_{180^ \\circ},\\ f_{60^ \\circ}) (R,G,B)=F−1(H,S,L)=(f300∘, f180∘, f60∘) 可以看出,排除 CRGB→CHSLC_{RGB} \\rightarrow C_{HSL}CRGB→CHSL 后 HSL 代表值的不同外, F−1F^{-1}F−1 并不存在显著差异。这正是因为 HSV、HSI、HSL(Lightness)三者的色彩空间设定,在本质上是一样的而产生的结果。 差异只存在于几何切面的抽象上。 显然 HSL 模型直观地体现了颜色三要素的两个重要事实,即: 亮度与图像的色彩信息无关,色彩信息体现自其色调和饱和度。这使得 HSL 色彩空间更适合在,对需要偏重于颜色三要素基础,进行色彩基础分析和检测的场景。 所以 HSL 的缺陷也很明显。对比 CIE LAB 和 CIE LUV,虽然 HSL 具有较好的对色彩生理学感知还原的特点,但 HSL 在 RGB 转换上却没法像 LAB 与 LUV 一样快速。后者在指定白点后,就能一线性关系将色彩转换到 XYZ 色彩空间,而 XYZ 到 RGB 只需要一个固定矩阵即可。这就意味着 HSL 在 RGB 换算上更为复杂。 另外,HSL 和 LAB 两者,都没有很好的处理到颜色压缩存储和数据传输方面的设计。除了精准调节和对比场景会使用 HSL 外(这种场景 CIE LAB 也能胜任且更精确),HSL 相较于 CIE LAB 和 CIE LUV 色彩空间(尤其是与 LUV 相比)并没有太大的优势。因此,各个组织(包括 CIE、ITU等)至今仍在尝试用更为先进的色彩统一化方案解决“均色问题”。 虽然存在各种弊端,但 HSL 对数据传输的探索和创造性的色彩空间设定,依旧 为后来 ITU 制定 YUV 色彩格式提供了不少思路上的帮助。使现代色彩存储体系,在结合 CIE 1976 UCS(即 LAB 与 LUV)的归一化和 HSL 的坐标设定的基础上,得以得到拓展。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_6.html":{"url":"Chapter_2/Language/cn/Docs_2_6.html","title":"2.6 色彩的存储","keywords":"","body":"2.6 色彩的存储 1960年,来自 贝尔实验室(Bell Laboratories) 的 穆罕默德·阿塔拉(Mohamed M. Atalla,1924 - 2009) 和 姜大元(Dawon Kahng,1931 - 1992),发现 金属氧化物半导体(MOS [Metal Oxide Semiconductor]) 可以借由场效应进行信息存储的现象,成功开发了 金属氧化物半导体场效应晶体管(MOSFET [Metal Oxide Semiconductor Field Effect Transistor])。随后,贝尔实验室(Bell Laboratories)联合喷气推进实验室(Jet Propulsion Laboratory)与其他研究机构,就 “提高图像在计算机处理过程中的效果增强”,提出了一系列用于数字图像处理(Digital Image Processing)的方法。其中 “关于如何利用有限的物理存储空间来保存图片像素点” 的部分,为图片灰度单色存储的提供了可行方案 [44]。 这一系列理论于 1964年 应用在了徘徊者7号月面探测器(Space Detector Ranger 7)的计算机软硬件设计上,并以此得到了 4300 张高分辨率月面摄影。 月面壮举极大的鼓舞了计算机图形学的发展,同时也让图片压缩存储需求开始变得至关重要。在此背景下,1979年,首个 单片数字信号处理器(DSP [Digital Signal Processor]) 诞生了。数字信号处理器通过 离散余弦变换(DCT) 技术对图片进行了 数模转换。该技术使图像像素能够以 0-1 单字节码(1-bit)的形式,存储在计算机晶体管中,形成了最初的 1-bit 灰度单色格式 [45]。 让离散化存储颜色成为了计算机图像像素存储的物理共识。 随着 19世纪 80年代个人电脑的快速发展。灰度图格式也从 单字节码(1-bit),经过 IBM 单色显示屏适配器(MDA [Monochrome Display Adapter]) 2-bit 格式,Commodore 128 所搭载 8563 显示控制器(VDC [Video Display Controller]) 提供的 4-bit 格式,演变到了Apple II 与 IBM 5150 的 8-bit 单色格式。 1981 年,IBM 结合 CIE 1976 UCS 在 RGB 色彩空间上的补充,开发并发布了携带彩色数据编解码 IBM 彩色图形适配器(CGA [Color Graphics Adapter]) 的 IBM 5153。 标志着计算机正式进入了彩色时代。自此开启了计算机 现代色彩格式(Modern Color Format) 的大门。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_6_1.html":{"url":"Chapter_2/Language/cn/Docs_2_6_1.html","title":"2.6.1 色彩格式(Color Format)与色彩存储","keywords":"","body":"2.6.1 色彩格式(Color Format) 色彩格式(Color Format) 包含了计算机对颜色的 存储格式(Data Format) 和 色彩格式空间(Color Format Space) 两部分。 同其他工业设备一样,计算机也受自身软硬件的限制,而需要特定的色彩模式。考虑到其本身是一种仅应用于计算机工业体系内(虽然现在计算机无处不在)的 设备相关色彩空间(Device Dependent Color Space),业内将之称为 色彩格式空间(Color Format Space),简称为 格式空间(Format Space)。 正如前文所提,色彩格式根据参考设备无关色彩空间的不同,被分为 RGB 色彩格式和 YUV 色彩格式。两者理论均衍生自 CIE 1976 UCS 的补充色彩空间方案,并在之后被分别设备相关化。 RGB 色彩格式,即 原色格式(Primaries Format),属于 CIE RGB 色彩空间体系; YUV 色彩格式,即 传输格式(Transport Format),根据 CIE LUV 特性被分属为 CIE XYZ 色彩空间体系。 RGB 与 YUV 共同组成了现代计算机色彩格式的两大分类。 为了更好的进行对比说明,我们用经典的彩色鹦鹉图片,来辅助说明不同色彩格式对图片携带颜色信息的影响。 图 2-28 模板彩色鹦鹉原色图片 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_6_2.html":{"url":"Chapter_2/Language/cn/Docs_2_6_2.html","title":"2.6.2 RGB 体系色彩格式","keywords":"","body":"2.6.2 RGB 体系色彩格式 原色格式(Primaries Format),或 RGB 体系色彩格式最大的特点就是在于,其对颜色表示的富集程度和存储空间大小密切相关。可以说 RGB 色彩格式中,每个通道能够占用的存储空间越大,则能够表示的颜色越多。非常的简单粗暴。统一的,RGB 色彩格式的格式空间,即为 归一化的 CIE RGB 色彩空间。 3-bit RGB 3-bit RGB 色彩格式 采用了红绿蓝各 1-bit 的存储格式。因此,3-bit RGB 最多只能表示 23=82^3 = 823=8 种颜色: 图 2-29 4-bit RGBI 可表示的所有颜色 以鹦鹉图为例,在 3-bit RGB 格式下的展示效果如下: 图 2-30 3-bit RGB 表示的鹦鹉图 此格式被广泛运用于 Oric 和 NEC 的 PC-8801 与 PC-9801 机型 上。 4-bit RGBI 1981年,IBM 在其 CGA 中,以 4-bit RGBI 格式 对彩色图片进行了存储。在此格式下,颜色被分为 RGBI 4个通道,每个通道各用 1-bit 表示。因此,RGBI 最多只能表示 23×2=162^3 \\times 2 = 1623×2=16 种颜色: 图 2-31 4-bit RGBI 可表示的所有颜色 以鹦鹉图为例,在 4-bit RGBI 格式下的展示效果如下: 图 2-32 4-bit RGBI 表示的鹦鹉图 此格式 只有 IBM 5153 在使用。 6-bit RGB 6-bit RGB 色彩格式 采用了红绿蓝各 2-bit 的存储格式。因此,6-bit RGB 最多只能表示 (22)3=64(2^2)^3 = 64(22)3=64 种颜色: 图 2-33 6-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 6-bit RGB 格式下的展示效果如下: 图 2-34 6-bit RGB 表示的鹦鹉图 此格式在 IBM 的增强图形适配器(EGA [Enhanced Graphics Adapter])上被首次运用。并在之后伴随了多个 IBM 主机版本。 9-bit RGB 9-bit RGB 色彩格式 采用了红绿蓝各 3-bit 的存储格式。因此,9-bit RGB 最多只能表示 (23)3=512(2^3)^3 = 512(23)3=512 种颜色: 图 2-35 9-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 9-bit RGBI 格式下的展示效果如下 图 2-36 9-bit RGB 表示的鹦鹉图 此格式最早在 1985年 的土星520ST(Atari 520ST)机型 上被使用。 12-bit RGB 12-bit RGB 色彩格式 采用了红绿蓝各 4-bit 的存储格式。因此,12-bit RGB 最多能表示 (24)3=4096(2^4)^3 = 4096(24)3=4096 种颜色: 图 2-37 12-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 12-bit RGBI 格式下的展示效果如下 图 2-38 12-bit RGB 表示的鹦鹉图 此格式被运用在 Apple IIGS、土星 STE 系列 和 世嘉(Sega)Game Gear 游戏机 上。 15-bit RGB 15-bit RGB 色彩格式 采用了红绿蓝各 5-bit 的存储格式。因此,15-bit RGB 最多能表示 (25)3=32,768(2^5)^3 = 32,768(25)3=32,768 种颜色: 图 2-39 15-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 15-bit RGBI 格式下的展示效果如下 图 2-40 15-bit RGB 表示的鹦鹉图 此格式被运用在 索尼的 PS1 游戏机 上。 16-bit RGB(RGB565) 16-bit RGB 色彩格式 采用了红蓝各 5-bit ,而绿色 6-bit 的存储格式。因此,16-bit RGB 最多只能表示 (25)2×(26)=65,536(2^5)^2 \\times (2^6) = 65,536(25)2×(26)=65,536 种颜色: 图 2-41 16-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 16-bit RGBI 格式下的展示效果如下 图 2-42 16-bit RGB 表示的鹦鹉图 此格式被运用在 携带有扩展图形阵列(XGA [Extended Graphics Array])的 IBM 机型 上。 18-bit RGB 18-bit RGB 色彩格式 采用了红绿蓝各 6-bit 的存储格式。因此,18-bit RGB 最多能表示 (26)3=262,144(2^6)^3 = 262,144(26)3=262,144 种颜色: 图 2-43 18-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 18-bit RGBI 格式下的展示效果如下 图 2-44 18-bit RGB 表示的鹦鹉图 此格式被运用在 IBM 8514,以及 IBM 携带视频图像阵列(VGA [Video Graphics Array]) 或 多色图像阵列(MCGA [Multi-Color Graphics Array])显卡 的设备上。 24-bit RGB & 32-bit RGBA8888 24-bit RGB 色彩格式 采用了红绿蓝各 8-bit 的存储格式。因此,24-bit RGB 最多能表示多达 (28)3=2563=16,777,216(2^8)^3 = 256^3 = 16,777,216(28)3=2563=16,777,216 种颜色: 图 2-45 24-bit RGB 可表示的所有颜色 以鹦鹉图为例,在 24-bit RGBI 格式下的展示效果如下 图 2-46 24-bit RGB 表示的鹦鹉图 这一格式最早于 1998年,被应用于 IBM 携带超级视频图像阵列(SVGA [Super Video Graphics Array])显卡 的设备上。由于 24-bit 对应 RGB 三通道各 8-bit 的特性和硬件 RAM 非常契合,使此格式至今仍为最常用的 RGB 色彩格式。配合额外 Alpha 透明度通道,24-bit RGB 色彩格式可以被扩充为 32-bit RGBA8888 色彩格式,进一步提升颜色精细度。 显然,RGB 色彩格式和物理存储空间的扩展紧密相关,其每一次可表示色阶的扩充,都意味着一次存储介质和空间的显著提升。 此特点决定了,当市面上绝大多数显卡的存储及处理能力没有发展的情况下,更细腻的 RGB 色彩格式也不太可能得到推广。同理,广泛应用于图像传输的 YUV 色彩格式则是规格驱动,其更多依赖于传输协议的演变和数据带宽的更新迭代。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/Docs_2_6_3.html":{"url":"Chapter_2/Language/cn/Docs_2_6_3.html","title":"2.6.3 YUV 体系色彩格式","keywords":"","body":"2.6.3 YUV 体系色彩格式 传输格式(Transport Format),即当下我们常用的 YUV 色彩格式也被称为 YCbCr、YPbPr、Y′UV 色彩格式。其中,Y/Y′ 指代光亮度分量,U/Cb/Pb 指代蓝色色度分量,V/Cr/Pr 指代红色色度分量。YUV 色彩格式受启发自,CIE LUV 中用与 xyY 色度图有线性转换关系的 uv 分量表示平面色彩信息的思想,最初被 国际电信联盟无线电通信部门(ITU-R [International Telecommunication Union Radiocommunication Sector]) 做为 彩色图像编解码流水线(Color Image Pipeline) 的一部分提出,用来 对图片进行压缩传输。 在之前的讨论中我们知道,CIE LUV 在指定白点情况下,可以直接将其所含色彩经由线性变换转换到 CIE XYZ 色彩空间,再从 CIE XYZ 依托固定转换矩阵,变换到 CIE RGB 色彩空间。将两个过程进行合并可知,存在从 LUV 到 RGB 的固定转换矩阵。因此,做为 CIE LUV 思路衍生下的实践产物,YUV 同样也具有这一特点。不同于 LUV 设备无关,YUV 是设备相关的。其受限于外部因素,对整体色度平面的处理上有一定程度的调整,使 YUV 根据采用规格的不同,有着不同的设备相关化调参。不过设备相关化处理带来的弊端,就是 YUV 相较于 LUV 来说色差变换更为不均匀。 当前 YUV 的常用规格 有三种:BT.601、BT.709、BT.2020。其中,BT.601 最早于 1982 年提出,最新一次修订于 2011年,适用于 标准画质电视(SDTV [Standard Definition Television]) [46] 。BT.709 最早于 1990 年提出,最新一次修订于 2015年,适用于 高清画质电视(HDTV [High Definition Television]) [47] 。BT.2020 最早于 2012 年提出,最新一次修订于 2015年,适用于 超高清画质电视(UHDTV [Ultra-High Definition Television]) [48] 。 YUV 是目前唯一做到了工程意义上打通图像数据压缩、传输和存储的色彩格式。为了便于说明,我们这里假设 Y、U、V 通道皆以当下主流的 8-bit 形式存放数据。 YUV 的数字电信号特征 YUV 被设计的目的主要就是为了进行数据传输,而数据传输就意味着数模信号转换。所以,根据可用电信号区间,YUV 存在两种有效范围:狭隘区间(Narrow Range)、完整区间(Full Range)。 狭隘区间 中,Y通道取值范围为 [16, 235][16,\\ 235][16, 235] ,U、V通道取值范围为 [16, 240][16,\\ 240][16, 240] ; 完整区间 中,Y、U、V通道取值范围均为 [0, 255][0,\\ 255][0, 255] ; 大多数应用场景下,YUV 都以狭隘范围表示,究其原因是由电讯号传输特性决定的。在广播电视系统中,为了防止过高和过低的信号造成 临界电平过载(Critical Level Overload) 现象,会人为的在信号可用模拟电平区段上,预留出一定的 “保护带”,保护最高位和最低位的电平不被使用。为了便于指代,电气学上把“保护带”称为 保护范围(Protection Range),被保护的最高位和最低位电平称为 保护电平(Protection Level),用于指代零信号量的电平被称为 消隐电平(Blanking Level),可用电平区段的上边界称为 白电平(White Level),下边界称为 黑电平(Black Level),黑白电平之间就是 信号电平(Signal Level) 了。 对于 8-bit 传输信号来说,保护电平为 0 mV0 \\ mV0 mV 和 255 mV255 \\ mV255 mV 。Y通道取 16 mV16 \\ mV16 mV 为消隐电平,可用电平区间上下分别预留了 [236, 254][236,\\ 254][236, 254] 和 [1, 15][1,\\ 15][1, 15] 的保护范围;U、V 通道则以 128 mV128 \\ mV128 mV 为消隐电平,可用电平区间上下分别预留了 [241, 254][241,\\ 254][241, 254] 和 [1, 15][1,\\ 15][1, 15] 的保护电平。所有可用的信号电平,分别组成了 Y、U、V 三通道取值范围的狭隘区间。 图 2-47 YUV Y通道信号电平分配图 [49] 图 2-48 YUV U通道信号电平分配图 [49] 图 2-49 YUV V通道信号电平分配图 [49] 对于不需要进行数据传输的场景,就不再需要保护电平了。 此时 8-bit 信号电平可以取用到 [0, 255][0,\\ 255][0, 255] 的完整范围表示。 解释清楚信号范围划分,接下来就该说明 ITU 对于 YUV 色彩格式下的 RGB YUV 颜色互转的定义了。在 YUV 和 RGB 的转换上,狭隘范围(Narrow)和完整范围(Full)并不影响推算,仅影响最终的转换矩阵结果。 YUV 与 RGB 间的相互转换 从工程角度出发,YUV 需要尽可能简单的处理过程。所以,YUV 在采用 LUV 转换思路的基础上结合了 HSL 的坐标处理思想,以 XYZ 坐标系下 xyY 色度图所在平面,截取色域三角形有效范围构建质心坐标的形式,进行了 YUV 色彩格式的格式空间关键要素定义。 不同于 LUV 和 HSL,YUV 并没有对完整的可见光色域进行拓扑变换,而是需要 手动设定 RGB 三原色的代表点和质心,来确定围成的色域范围和坐标系。因此,YUV 的色彩空间天然就是有缺陷的。不过,放弃完整色域换来了 YUV 足够通用的方法论。后续规格上的更新,只用按照工程测定结果来进行色域范围的调整,就能延续同一套计算过程满足新的需求。 这种可根据情况修整的延展性,也是 YUV 被广泛运用的原因之一。 那么,在 YUV 中 RGB 三原色的选取是否就是完全随意的呢? 答案是否定的。 RGB 三原色代表点的选取,完全依赖于设备本身对三原色的设定。即,设备的 RGB 色彩格式的格式空间决定了设备的三原色。由于不同的设备间差异可能非常大,想要使 YUV 格式通用,就必须在 YUV 体系的色彩格式规格制定时,固定做为标准的 RGB 三色坐标,通过自身格式空间的线性特征,来抹平不同设备间的转换误差。 我们假设 YUV 格式空间中,用于参照的 R点取自 xyY 色度图中坐标 R(xR, yR)R(x_R,\\ y_R)R(xR, yR) ,G点取自 xyY 色度图中坐标 G(xG, yG)G(x_G,\\ y_G)G(xG, yG) ,B点取自 xyY 色度图中坐标 B(xB, yB)B(x_B,\\ y_B)B(xB, yB) 。有下图: 图 2-50 YUV 格式空间在 xyY 色度图上的色域裁剪说明 根据图示可知,落于 RGB 围成三角形范围内的任意点 CCC ,与三角形顶点存在关系: C=B+(gB+rB)=R+(bR+gR)=G+(bG+rG)⇒C−G=b⋅(B−G)+r⋅(R−G) {\\displaystyle \\begin{aligned} &C = B + (gB + rB) = R + (bR + gR) = G + (bG + rG) &\\Rightarrow \\\\ &C - G = b \\cdot (B - G) + r \\cdot (R - G) \\end{aligned} } C=B+(gB+rB)=R+(bR+gR)=G+(bG+rG)C−G=b⋅(B−G)+r⋅(R−G)⇒ 取质心 WWW 为 轴心。指定 YUV 色彩空间下 Y+U+V=1Y + U + V = 1Y+U+V=1 ,选择 U=Cb⋅(B−W)U = C_b \\cdot (B - W)U=Cb⋅(B−W) 、 V=Cr⋅(R−W)V = C_r \\cdot (R - W)V=Cr⋅(R−W) 为坐标轴, CbC_bCb 、 CrC_rCr 为归一化因子。有 YYY 有效区间为 [0, 1][0,\\ 1][0, 1] , UUU 有效区间为 [−Umax, Umax][-U_{max},\\ U_{max}][−Umax, Umax] , VVV 有效区间为 [−Vmax, Vmax][-V_{max},\\ V_{max}][−Vmax, Vmax] 。 这里以 YUV 对应规格选定的 RGB 三色电信号,经过 电位差伽马预矫正(Gamma pre-corrected) 得到的归一化电平测量值 (WR ,WG ,WB)(W_R \\ , W_G \\ , W_B )(WR ,WG ,WB) 为依据 [46] [47] [48] ,取 YUV 光亮度参数有线性公式 Y=WR⋅R+WG⋅G+WB⋅BY = W_R \\cdot R + W_G \\cdot G + W_B \\cdot BY=WR⋅R+WG⋅G+WB⋅B 。则由点 CCC 与质心 WWW 的向量差 C−W=(C−G)−(W−G)C - W = (C -G)-(W-G)C−W=(C−G)−(W−G) 推得: Y=WR⋅R+WG⋅G+WB⋅BU=Umax1−WB⋅(B−Y)V=Vmax1−WR⋅(R−Y) {\\displaystyle \\begin{aligned} Y &= W_R \\cdot R + W_G \\cdot G + W_B \\cdot B \\\\ U &= {\\tfrac {U_{max}} {1 - W_B}} \\cdot (B - Y) \\\\ V &= {\\tfrac {V_{max}} {1 - W_R}} \\cdot (R - Y) \\\\ \\end{aligned} } YUV=WR⋅R+WG⋅G+WB⋅B=1−WBUmax⋅(B−Y)=1−WRVmax⋅(R−Y) 上式即为 YUV 格式空间的狭义配色函数。需要注意的是,测量值 (WR ,WG ,WB)(W_R \\ , W_G \\ , W_B )(WR ,WG ,WB) 是规格强相关的。其取值仅取决于规格中指定的 RGB 三色对应电信号电配比。 根据 RGB 与 YUV 归一化后 Y+U+V=R+G+B=1Y + U + V = R + G + B = 1Y+U+V=R+G+B=1 的数理特征,很容易就能证明 YUV 和 RGB 的线性等价关系: Y+U+V=R+G+B=1=1WG(WG⋅R+WG⋅G+WG⋅B)=1WG(Y+(WG−WR)⋅G+(WG−WB)⋅B)=3Y+WG−WRWG⋅(R−Y)+WG−WBWG⋅(B−Y)=3Y+WG−WRWG⋅1−WRVmax⋅V+WG−WBWG⋅1−WBUmax⋅UR+G+B=(Y+1−WRVmax⋅V)+(Y+WRWG⋅1−WRVmax⋅V+WBWG⋅1−WBUmax⋅U)+(Y+1−WBUmax⋅U) {\\displaystyle \\begin{aligned} Y + U + V &= R + G + B = 1 \\\\ &= {\\tfrac {1}{W_G}}\\left( W_G \\cdot R + W_G \\cdot G + W_G \\cdot B \\right) \\\\ &= {\\tfrac {1}{W_G}}\\left( Y + (W_G - W_R) \\cdot G + (W_G - W_B) \\cdot B \\right) \\\\ &=3Y + {\\tfrac {W_G - W_R}{W_G}} \\cdot (R - Y) + {\\tfrac {W_G - W_B}{W_G}} \\cdot (B - Y) \\\\ &=3Y + {\\tfrac {W_G - W_R}{W_G}} \\cdot {\\tfrac {1 - W_R}{V_{max}} \\cdot V} + {\\tfrac {W_G - W_B}{W_G}} \\cdot {\\tfrac {1 - W_B}{U_{max}} \\cdot U} \\\\ R + G + B &=\\left( Y + {\\tfrac {1 - W_R}{V_{max}} \\cdot V} \\right) + \\left( Y + {\\tfrac {W_R}{W_G}} \\cdot {\\tfrac {1 - W_R}{V_{max}} \\cdot V} + {\\tfrac {W_B}{W_G}} \\cdot {\\tfrac {1 - W_B}{U_{max}} \\cdot U} \\right) + \\left( Y + {\\tfrac {1 - W_B}{U_{max}} \\cdot U} \\right) \\end{aligned} } Y+U+VR+G+B=R+G+B=1=WG1(WG⋅R+WG⋅G+WG⋅B)=WG1(Y+(WG−WR)⋅G+(WG−WB)⋅B)=3Y+WGWG−WR⋅(R−Y)+WGWG−WB⋅(B−Y)=3Y+WGWG−WR⋅Vmax1−WR⋅V+WGWG−WB⋅Umax1−WB⋅U=(Y+Vmax1−WR⋅V)+(Y+WGWR⋅Vmax1−WR⋅V+WGWB⋅Umax1−WB⋅U)+(Y+Umax1−WB⋅U) 线性的变化关系对 YUV 相当重要,这意味着上式可直接以转换矩阵 MRGB2YUVM_{RGB2YUV}MRGB2YUV 表示,有: CRGB=MRGB2YUV−1⋅CYUV=MRGB2YUV−1⋅MRGB2YUV⋅CRGB=E⋅CRGB C_{RGB} = {M_{RGB2YUV}}^{-1} \\cdot C_{YUV} = {M_{RGB2YUV}}^{-1} \\cdot M_{RGB2YUV} \\cdot C_{RGB} = E \\cdot C_{RGB} CRGB=MRGB2YUV−1⋅CYUV=MRGB2YUV−1⋅MRGB2YUV⋅CRGB=E⋅CRGB 这一点保证了不论何种设备,设备间经过 YUV 色彩格式传递的 RGB 数据,在转换前后都有一致的值,维护了数据的准确性。 现在,理论基础得到了佐证。在此条件下,如果已经测得关键参数值,怎样计算转换矩阵 MRGB2YUVM_{RGB2YUV}MRGB2YUV 呢? 以 BT.601 的狭隘区间(Narrow Range) 为例。规格中取 D65 作为白点和质心 WWW ,测得 (WR ,WG ,WB)(W_R \\ , W_G \\ , W_B )(WR ,WG ,WB) 为 (0.299, 0.587, 0.114)(0.299, \\ 0.587, \\ 0.114)(0.299, 0.587, 0.114) 。经过值域范围平移后,带入狭义配色函数计算,有: (Y−16)⋅255=(+0.299⋅R+0.587⋅G+0.114⋅B)⋅(235−16)(U−128)⋅255=(−0.299⋅R−0.587⋅G+0.886⋅B)⋅(235−16)(V−128)⋅255=(+0.701⋅R−0.587⋅G−0.114⋅B)⋅(235−16) {\\displaystyle \\begin{aligned} (Y-16) \\cdot 255 &= (+0.299 \\cdot R + 0.587 \\cdot G + 0.114 \\cdot B) \\cdot (235 - 16) \\\\ (U-128) \\cdot 255 &= (-0.299 \\cdot R - 0.587 \\cdot G + 0.886 \\cdot B) \\cdot (235 - 16) \\\\ (V-128) \\cdot 255 &= (+0.701 \\cdot R - 0.587 \\cdot G - 0.114 \\cdot B) \\cdot (235 - 16) \\end{aligned} } (Y−16)⋅255(U−128)⋅255(V−128)⋅255=(+0.299⋅R+0.587⋅G+0.114⋅B)⋅(235−16)=(−0.299⋅R−0.587⋅G+0.886⋅B)⋅(235−16)=(+0.701⋅R−0.587⋅G−0.114⋅B)⋅(235−16) 换算一下就能得到 MRGB2YUVM_{RGB2YUV}MRGB2YUV 的表达式: [YUV]BT.601Narrow=[0.2570.5040.098−0.148−0.2910.4390.439−0.368−0.071]⋅([RGB]−[16128128]) {\\begin{bmatrix} Y \\\\ U \\\\ V \\end{bmatrix}}_{BT.601}^{Narrow}= {\\begin{bmatrix} 0.257 & 0.504 & 0.098 \\\\ -0.148 & -0.291 & 0.439 \\\\ 0.439 & -0.368 & -0.071 \\end{bmatrix}} \\cdot \\left( {\\begin{bmatrix} R \\\\ G \\\\ B \\end{bmatrix}} - {\\begin{bmatrix} 16 \\\\ 128 \\\\ 128 \\end{bmatrix}} \\right) ⎣⎡YUV⎦⎤BT.601Narrow=⎣⎡0.257−0.1480.4390.504−0.291−0.3680.0980.439−0.071⎦⎤⋅⎝⎛⎣⎡RGB⎦⎤−⎣⎡16128128⎦⎤⎠⎞ 可见,转换矩阵 MRGB2YUVM_{RGB2YUV}MRGB2YUV 的计算结果,只依赖于规格条件所指定的 (WR ,WG ,WB)(W_R \\ , W_G \\ , W_B )(WR ,WG ,WB) 测定值和 YUV 的取值范围。 其他规格下的计算方式也是一样,并无差异。这里列出 常用的主流规格带入公式后的结果,方便工程参考: 关于 YUV 色彩格式的格式空间部分,到这里就说明完毕。接下来我们来看组成 YUV 色彩格式的数据存储部分。 YUV 的数据存储 目前主流的 YUV 色彩格式的 存储格式族(Data Format Family) 主要有三种,分别是 YUV420、YUV422、YUV444。 YUV420 族 下的存储格式,以 4个Y通道分量共用一组UV分量构成(YYYY U V); YUV422 族 下的存储格式,以 2个Y通道分量共用一组UV分量构成(YY U V); YUV444 族 下的存储格式,三通道分量一一对应(Y U V); 而每一种 YUV 存储格式族,根据 Y通道、U通道、V通道的数据排布,又可以分为:平面(Planar)、半平面(Semi-Planar)、夹层(Interleaved)、打包(Packed) 四种存储的 数据分组类型。 平面(Planar) 类型,Y、U、V 数据独立存放; 半平面(Semi-Planar) 类型,Y通道数据独立存放,UV通道数据交替打包存放; 夹层(Interleaved) 类型,三通道数据以两个Y与一组UV为数据组,封包排列存放; 打包(Packed) 类型,三通道数据以一组YUV为数据组,封包排列存放; 因此,整个 YUV 的存储格式从属关系如下图所示: 图 2-51 YUV 存储格式(Data Format)谱系图 这些 YUV 存储格式最大的特点在于数据组成上。我们用相同颜色表示位于同一组的 YUV 数据。 假设存在一张包含 36 个像素点的 6x6 的图片(为了避免颜色重复,YUV444 用 12个像素点的 6x2 图片)。 以 Y、U、V 分别代表对应通道的有效数据,所有存储格式数据排布 《YUV 存储格式(Data Format)对比说明表》 如下: 显然,从数据的压缩程度上来说,YUV420 族明显具有较高的压缩比。但由于YUV 格式并不是完全无损的,与之相对的问题就是高压缩比导致的图片细节损失。不过图片的细部信息大都存在于灰度图上,而这部分信息主要由 Y 通道保存,因此人眼难以察觉丢失的颜色细节。相比较高压缩比带来的优势,这部分损失可以忽略不计。所以,在音视频数据传输及图像存储中,工程上常常采用 YUV420 族下的色彩格式进行保存。 至此,有关音视频工程中的图片色彩处理部分,基本讲解完毕。下一章我们将利用目前已掌握的音视频知识,来做针对一段音频和一张图片基本分析的工程实践。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_2/Language/cn/References_2.html":{"url":"Chapter_2/Language/cn/References_2.html","title":"【参考文献】","keywords":"","body":"二、【参考文献】 [1] Isaac Newton, Hypothesis explaining the properties of light, Letter from Newton to Henry Oldenburg, dated 14 December 1675. [2] Moses Harris, The Natural System of Colours and Ignaz Schiffermüller, Versuch eines Farbensystems (Vienna, 1772), plate I - project Gutenberg Ignaz Schiffermüller, Versuch eines Farbensystems (Vienna, 1772), plate I. [3] Young, T. (1802). \"Bakerian Lecture: On the Theory of Light and Colours\". Phil. Trans. R. Soc. Lond. 92: 12–48. doi:10.1098/rstl.1803.0004. [4] Glynn, Ian (2010). Elegance in Science. Oxford: Oxford University Press. pp. 147–150. ISBN 978-0-19-957862-7. [5] Stanley Finger (2001). Origins of Neuroscience: A History of Explorations into Brain Function. p. 100. ISBN 9780195146943. [6] Svaetichin,G. (1956). Spectral response curves from single cones, Actaphysiol. scand. 39, Suppl. 134, 17–46. [7] Schubring, Gert, ed. (1996). Hermann Günther Graßmann (1809–1877): Visionary Mathematician, Scientist and Neohumanist Scholar. Boston Studies in the Philosophy of Science. Vol. 187. Springer. doi:10.1007/978-94-015-8753-2. ISBN 978-94-015-8753-2. ISSN 0068-0346. [8] K-H Schlote, Hermann Günther Grassmann and the theory of hypercomplex number systems, in Hermann Günther Grassmann (1809-1877) : visionary mathematician, scientist and neohumanist scholar (Dordrecht, 1996), 165-173. [9] G Schubring (ed.), Hermann Günther Grassmann (1809-1877) : visionary mathematician, scientist and neohumanist scholar (Dordrecht, 1996). [10] Kirschmann, A., 1896. Color-Saturation and its Quantitative Relations. American Journal of Psychology, 7, 386-404. [11] Schlatter, T., Levinson, D: Visual Usability. Principles and Practices for Designing Digital Applications, 171-211, Morgan Kaufmann, Boston 2013 [12] Smith, Thomas; Guild, John (1931–32). \"The C.I.E. colorimetric standards and their use\". Transactions of the Optical Society. 33 (3): 73–134. [13] CIE (1932). Commission internationale de l'Eclairage proceedings, 1931. Cambridge: Cambridge University Press. [14] FR patent 841335, Valensi, Georges, \"Procédé de télévision en couleurs\", published 1939-05-17, issued 1939-02-06 [15] US patent 2375966, Valensi, Georges, \"System of television in colors\", published 1945-05-15 [16] Smith, Alvy Ray (August 1978). \"Color gamut transform pairs\". Computer Graphics. 12 (3): 12–19. doi:10.1145/965139.807361. [17] Joblove, George H.; Greenberg, Donald (August 1978). \"Color spaces for computer graphics\". Computer Graphics. 12 (3): 20–25. doi:10.1145/965139.807362. [18] Ware Myers (July 1979). \"Interactive Computer Graphics: Flying High-Part I\". Computer. 12 (7): 8–17. doi:10.1109/MC.1979.1658808. S2CID 15344162. [19] Computer Graphics Staff (August 1979). \"Status Report of the Graphics Standards Planning Committee\". ACM SIGGRAPH Computer Graphics. 13 (3): 1–10. doi:10.1145/988497.988498. S2CID 43687764. [20] OpenSource Project, Color-Science, Github, https://github.com/colour-science/colour#32012colour-temperature [21] David L. Fridge, \"Aberration Synthesizer*,\" J. Opt. Soc. Am. 50, 87-87 (1960) [22] Alan J. Werner, \"Luminous Transmittance, and Chromaticity of Colored Filter Glasses in CIE 1964 Uniform Color Space,\" Appl. Opt. 7, 849-855 (1968) [23] Planck, M. (1900a). \"On an Improvement of Wien's Equation for the Spectrum\", Verh. Dtsch. Phys. Ges. Berlin 2, 202 (1900) [24] Planck, M. (1900b). \"On the Theory of the Energy Distribution Law of the Normal Spectrum\", Verh. Dtsch. Phys. Ges. Berlin 2, 237 (1900) [25] Wright, William David (1928). \"A re-determination of the trichromatic coefficients of the spectral colors\". Transactions of the Optical Society. 30 (4): 141–164. doi:10.1088/1475-4878/30/4/301. [26] Guild, J. (1932). \"The colorimetric properties of the spectrum\". Philosophical Transactions of the Royal Society of London. Series A, Containing Papers of a Mathematical or Physical Character. 230 (681–693): 149–187. Bibcode:1932RSPTA.230..149G. doi:10.1098/rsta.1933.0005. JSTOR 91229. [27] Krystek, Michael P. (January 1985). \"An algorithm to calculate correlated colour temperature\". Color Research & Application. 10 (1): 38–40. doi:10.1002/col.5080100109. [28] Borbély, Ákos; Sámson,Árpád; Schanda, János (December 2001). \"The concept of correlated colour temperature revisited\". Color Research & Application. 26 (6): 450–457. doi:10.1002/col.1065. Archived from the original on 2009-02-05. [29] Simons, Ronald Harvey; Bean, Arthur Robert (2001). Lighting Engineering: Applied Calculations. Architectural Press. ISBN 0-7506-5051-6. [30] Robertson, Alan R. (November 1968). \"Computation of Correlated Color Temperature and Distribution Temperature\". JOSA. 58 (11): 1528–1535. Bibcode:1968JOSA...58.1528R. doi:10.1364/JOSA.58.001528. [31] McCamy, Calvin S. (April 1992). \"Correlated Color Temperature as an explicit function of chromaticity coordinates\". Color Research & Application. 17 (2): 142–144. doi:10.1002/col.5080170211. plus erratum doi:10.1002/col.5080180223. [32] Kelly, Kenneth L. (August 1963). \"Lines of Constant Correlated Color Temperature Based on MacAdam's (u,v) Uniform Chromaticity Transformation of the CIE Diagram\". JOSA. 53 (8): 999–1003. Bibcode:1963JOSA...53..999K. doi:10.1364/JOSA.53.000999. [33] Hernández-Andrés, Javier; Lee, RL; Romero, J (September 20, 1999). \"Calculating Correlated Color Temperatures Across the Entire Gamut of Daylight and Skylight Chromaticities\" (PDF). Applied Optics. 38 (27): 5703–5709. Bibcode:1999ApOpt..38.5703H. doi:10.1364/AO.38.005703. PMID 18324081. [34] Bongsoon Kang; Ohak Moon; Changhee Hong; Honam Lee; Bonghwan Cho; Youngsun Kim (December 2002). \"Design of Advanced Color Temperature Control System for HDTV Applications\" (PDF). Journal of the Korean Physical Society. 41 (6): 865–871. Archived from the original (PDF) on 2019-03-03. [35] Kim et al., \"Color Temperature Conversion System and Method Using the Same\", issued 2006-04-04 [36] CIE Publication 15.3, CIE 15:2004, ISBN 3-901-906-33-9 [37] Equivalent White Light Sources, and CIE Illuminants (PDF), archived from the original on 2005-05-23, retrieved 2017-12-11 [38] CIE F-series Spectral Data, CIE 15.2:1986, archived from the original on 2011-07-25, retrieved 2017-12-11 [39] Colorimetry, 4th Edition, vol. CIE 015:2018, doi:10.25039/TR.015.2018, ISBN 978-3-902842-13-8 [40] Sándor, Norbert; Schanda, János (September 1, 2006), \"Visual colour rendering based on colour difference evaluations\", Lighting Research and Technology, 38 (3): 225–239, doi:10.1191/1365782806lrt168oa, S2CID 109858508. [41] Masahura NAKAYAMA and Koichi IKEDA: Comparison of Perceived Colour Differences with Colorimetric Colour Differences in Uniform Colour Spaces and Colour Appearance Model, J. Light & Vis. Env. Vol.28, No.2, 2004. [42] ColorChecker Colorimetric Data (PDF), archived (PDF) from the original on 9 October 2012, retrieved 17 April 2013. [43] Charles Poynton (2008). \"ColorChecker (‘Macbeth’) Chart\". poynton.com [44] Azriel Rosenfeld, Picture Processing by Computer, New York: Academic Press, 1969 [45] Dyer, Stephen A.; Harms, Brian K. (13 August 1993). \"Digital Signal Processing\". In Yovits, Marshall C. (ed.). Advances in Computers. Vol. 37. Academic Press. pp. 59–118. doi:10.1016/S0065-2458(08)60403-9. ISBN 978-0120121373. ISSN 0065-2458. LCCN 59015761. OCLC 858439915. OL 10070096M. [46] ITU-R, Rec. ITU-R BT.601-7, \"BT.601 : Studio encoding parameters of digital television for standard 4:3 and wide screen 16:9 aspect ratios\", Article Number E 70000, archived from the original on 2011-03-08 [47] ITU-R, Rec. ITU-R BT.709-6, \"BT.709 : Parameter values for the HDTV standards for production and international programme exchange\", Article Number E 70000, archived from the original on 2015-06-17 [48] ITU-R, Rec. ITU-R BT.2020-2, \"BT.2020 : Parameter values for ultra-high definition television systems for production and international programme exchange\", Article Number E 70000, archived from the original on 2015-10-14 [49] 雷霄骅, \"Color format conversion: The simplest example of libswscale based on FFmpeg (YUV to RGB)\", archived (Web: https://blog.csdn.net/leixiaohua1020/article/details/42134965) from the original on 2014-12-28 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Apex_3_Introduce.html":{"url":"Chapter_3/Language/cn/Apex_3_Introduce.html","title":"三、音视频常用基础算法","keywords":"","body":"三、音视频常用基础算法 引言 音视频中最为重要的组成部分,即是音频处理和视频处理。 音频处理应用到的基础理论,来源自:数字信号处理(Digital Signal Process)、数字合成音效(Digital Audio Effects)、语音识别(Voice Recognition)等领域。视频处理应用到的基础理论,来源自:数字信号处理(Digital Signal Process)、计算机图形学(Computer Graphics)、计算机视觉(Computer Vision)等领域。 这些学科在工程中或多或少的交叉使用,甚至本身大都为交叉学科,但最为核心的始终只有两个,即数字信号处理(DSP)和计算机图形学(CG)。所以,在正式开始学习音视频工程技术之前,首先需要回顾部分基础算法的工程特征。 本章节主要对此简单梳理,并结合伪码和 C/C++/Python/GLSL等 工程汇总说明。可以做为最小集合的背景算法知识字典,供开发过程中查阅回顾使用。 关键字:傅立叶变换、滤波算法、区域检测、光流补正、冗余控制 目录 3.1 信号分析的核心算法 - 傅立叶变换 3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT) 3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT) 3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT) 3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach) 3.2 频率信息提取 - 常用滤波算法 3.2.1 高斯滤波(Gauss Filter) 3.2.2 双边滤波(Bilateral Filter) 3.2.3 拉普拉斯滤波(Laplacian Filter) 3.2.4 马尔滤波(Marr Filter) 3.2.5 索贝尔滤波(Sobel Filter) 3.2.6 各向异性扩散(Anisotropic Diffusion) 3.3 时间冗余控制 - 常用特征提取与朴素阈值处理 3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient]) 3.3.2 朴素目标检测结果度量 - IoU & GIoU 3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window) 3.4 空域冗余控制 - 基础光流算法与色度压缩 3.4.1 传统光流法(Classic Optical Flow Methods) 3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow]) 3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow]) 3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling]) 3.5 频域冗余控制 - 基础变换编码 3.5.1 整数离散正余弦变换(DST/DCT) 3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform]) 3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform]) 【在线展示】 【参考文献】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_1.html":{"url":"Chapter_3/Language/cn/Docs_3_1.html","title":"3.1 信号分析的核心算法 - 傅立叶变换","keywords":"","body":"3.1 信号分析的核心算法 - 傅立叶变换 傅立叶变换(FT [Fourier Transform]) [1] 可理解为:任意函数都存在由给定复指数函数空间(Complex Exponential Functions Space)的一组正交基(Orthogonal Bases),使得原函数可以被分解为该复指数函数空间下最大完备解的权重向量形式表示 [2] 。利用原函数与分量函数内积为该方向解分量且正交基内任意两个方向的方向函数内积为 0 的特点,来用解的人为限定有限维度子集逼近函数本身的数学方法 [3] 。这里,描述构成原函数的分量函数集与其所占权重分量(即求得的正交基),共同构成了该函数的傅里叶基(Fourier Basis)[4] [5]。 如果记原函数为 FFF,复指数函数空间为 Fω=[Fω1,Fω2, ... ,Fωn]{\\mathcal {F}}_{\\omega} = [{\\mathcal {F}}_{\\omega_1},{\\mathcal {F}}_{\\omega_2},\\ ...\\ ,{\\mathcal {F}}_{\\omega_{n}}]Fω=[Fω1,Fω2, ... ,Fωn],傅里叶基为 F=[f^1,f^2, ... ,f^n]{\\mathcal {F}} = [\\hat{f}_1,\\hat{f}_2,\\ ...\\ ,\\hat{f}_n]F=[f^1,f^2, ... ,f^n],且 nmax=Nn_{max} = Nnmax=N,则这一关系从空间投影变换角度来看 [6],可以表示为: N⋅F=FωT⋅F=[Fω1Fω2⋮Fωn]⋅[f^1,f^2, ... ,f^n] {\\displaystyle \\begin{aligned} N \\cdot F = {\\mathcal{F}_{\\omega}}^T \\cdot {\\mathcal {F}} = { \\begin{bmatrix} \\mathcal{F}_{\\omega_1} \\\\ \\mathcal{F}_{\\omega_2} \\\\ \\vdots \\\\ \\mathcal{F}_{\\omega_n} \\end{bmatrix} } \\cdot [\\hat{f}_1,\\hat{f}_2,\\ ...\\ ,\\hat{f}_n] \\end{aligned} } N⋅F=FωT⋅F=⎣⎢⎢⎡Fω1Fω2⋮Fωn⎦⎥⎥⎤⋅[f^1,f^2, ... ,f^n] 傅里叶变换被作为基础原理之一运用在数字信号(广义)的处理过程并处于核心地位。而在数字信号处理(DSP)中,我们把所有类型信号都抽象成,由一系列离散化数据构成的函数模型表示。这些函数并不一定都是周期性、单一维度的。这时我们需要一种手段,使得能够用统一的方式描述所有不同表征的函数,从而一致性的交付系统(不一定是电脑)处理。傅里叶变换正是这种化繁为简的理论工具(Theoretical Tools),通过它我们能够将任意信号函数转换为傅里叶级数展开,进而转化为复数平面上一系列构成谐波关系的周期性基础三角函数和表示。傅里叶变化作为对信号进行分解描述的方法论,并不局限于单维声音信号,针对二维图片信号或更高维的数据也能够拓展延伸(即可拓展性)。而这也是我们进行感知数据数字化的理论依据。 因此,理解上式如何被运用是进行学习的关键。那么这在工程上主要体现在哪儿呢?我们需要从最简单的傅里叶变换,即一维傅里叶变换开始了解。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_1_1.html":{"url":"Chapter_3/Language/cn/Docs_3_1_1.html","title":"3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT)","keywords":"","body":"3.1.1 一维傅立叶(1D-FT)与一维离散傅立叶变换(1D-DFT) 信号学中,将沿平面分布的信号称为一维信号(1D Signal),例如音频信号。 一维傅里叶变换,能够将一组满足狄利克雷条件(Dirichlet Theorem)的一维信号分解到周期性复指数波(Complex Exponential Wave)构成的二维向量空间。 从傅里叶级数(FS)到傅里叶变换(FT) 狄利克雷条件 最初被用作傅里叶级数(FS [Fourier Series])在三角函数域上进行分解的充分不必要条件 [2] [7]。在狄利克雷条件描述中,如果选定分析的周期信号 同时满足: 【单周期内,连续或存在有限个第一类间断点】; 【单周期内,存在有限数目的极大值与极小值】; 【单周期内,绝对可积】; 则,此周期信号就一定存在傅里叶三角级数的分解表示。 如果记周期信号函数 s(t)s(t)s(t) 的波长(周期)为 TTT ,角频率(角速度)为 2πT\\tfrac{2\\pi}{T}T2π 。则以信号函数波长 TTT 做可变 n∈[0, N]n \\in [0, \\ N]n∈[0, N] 等分(即步长 Step=1NStep = \\tfrac{1}{N}Step=N1 )选取分离函数。有分离函数(周期)为 Tn\\tfrac{T}{n}nT ,角频率(角速度)为 ωn=2πnT{\\omega_n} = \\tfrac{2\\pi n}{T}ωn=T2πn 。原周期信号函数 s(t)s(t)s(t) 就可以被分解为: s(t)=1N∑n=0Nan⋅cos(2πnTt) + 1N∑n=0Nbn⋅sin(2πnTt)an=∫−T2+T2s(t)⋅cos(ωnt) dt bn=∫−T2+T2s(t)⋅sin(ωnt) dt {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N} \\sum_{n =0}^{N} a_n \\cdot cos(\\tfrac{2\\pi n}{T}t)\\ \\ \\ \\ \\ \\ +\\ \\ \\ \\ \\ \\frac{1}{N} \\sum_{n =0}^{N} b_n \\cdot sin(\\tfrac{2\\pi n}{T}t) \\\\ a_n &= \\int_{-\\tfrac{T}{2}}^{+\\tfrac{T}{2}} s(t) \\cdot cos(\\omega_n t) \\ dt \\ \\ \\ \\ \\ b_n = \\int_{-\\tfrac{T}{2}}^{+\\tfrac{T}{2}} s(t) \\cdot sin(\\omega_n t) \\ dt \\\\ \\end{aligned} } s(t)an=N1n=0∑Nan⋅cos(T2πnt) + N1n=0∑Nbn⋅sin(T2πnt)=∫−2T+2Ts(t)⋅cos(ωnt) dt bn=∫−2T+2Ts(t)⋅sin(ωnt) dt 如果我们对函数周期进行平移,将区间从 (−T2, +T2)(-\\tfrac{T}{2},\\ +\\tfrac{T}{2})(−2T, +2T) 偏移 +T2+\\tfrac{T}{2}+2T ,即变换到 (0, T)(0,\\ T)(0, T) ,使原周期信号函数 s(t)s(t)s(t) 偏移为奇函数(即 s(−t)=−s(t)s(-t) = - s(t)s(−t)=−s(t) ),而奇函数式可证明是不需要余弦函数项的。此时,就可以进一步化简 s(t)s(t)s(t) 为存粹正弦函数表示: s(t)=1N∑n=0Nbn⋅sin(2πnλt)=1N∑n=0Nbn⋅sin(ωnt)bn=∫0Ts(t)⋅sin(ωnt) dt {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N} \\sum_{n =0}^{N} b_n \\cdot sin(\\tfrac{2\\pi n}{\\lambda}t) = \\frac{1}{N} \\sum_{n =0}^{N} b_n \\cdot sin(\\omega_n t) \\\\ b_n &= \\int_{0}^{T} s(t) \\cdot sin(\\omega_n t) \\ dt \\\\ \\end{aligned} } s(t)bn=N1n=0∑Nbn⋅sin(λ2πnt)=N1n=0∑Nbn⋅sin(ωnt)=∫0Ts(t)⋅sin(ωnt) dt 简化表示 ωn{\\omega_n}ωn 为 ω{\\omega}ω ,当我们将傅里叶级数从三角函数域,扩展到复变函数域时,基底函数由正余弦函数变为了以 λ=2πω=Tn{\\displaystyle \\begin{aligned} \\lambda = \\tfrac{2 \\pi}{\\omega} = \\tfrac{T}{n}\\\\ \\end{aligned} }λ=ω2π=nT 为周期(波长)的复指数函数 Sω(t)=eiωt{\\displaystyle \\begin{aligned} {\\mathcal {S}}_{\\omega}(t) = e^{i\\omega t}\\\\ \\end{aligned} }Sω(t)=eiωt 。信号函数 s(t)s(t)s(t) 的分解函数就可以表示为: s(t)=1N∑n=0Ns^(2πnT)⋅ei2πnTt=1N∑ω=0ωNs^(ω)⋅eiωt=1N∑n=0Ns^(ω)⋅Sω(t)s^(ω)=∫0Ts(t)⋅e−iωt dt {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N} \\sum_{n = 0}^{N} \\hat{s}(\\tfrac{2\\pi n}{T}) \\cdot e^{i \\tfrac{2\\pi n}{T}t} = \\frac{1}{N} \\sum_{\\omega = 0}^{\\omega_N} \\hat{s}(\\omega) \\cdot e^{i \\omega t} \\\\ &= \\frac{1}{N}\\sum_{n = 0}^{N} \\hat{s}(\\omega) \\cdot {\\mathcal {S}}_{\\omega}(t) \\\\ \\hat{s}(\\omega) &= \\int_{0}^{T} s(t) \\cdot e^{-i \\omega t} \\ dt \\\\ \\end{aligned} } s(t)s^(ω)=N1n=0∑Ns^(T2πn)⋅eiT2πnt=N1ω=0∑ωNs^(ω)⋅eiωt=N1n=0∑Ns^(ω)⋅Sω(t)=∫0Ts(t)⋅e−iωt dt 根据 欧拉公式(Euler's Formula) 可知 eix=cos(x)+i⋅sin(x){\\displaystyle \\begin{aligned} e^{ix} = cos(x) + i \\cdot sin(x) \\end{aligned} }eix=cos(x)+i⋅sin(x) , 带入上式有: s(t)=1N∑n=0Na^ω⋅cos(ωt)+i⋅b^ω⋅sin(ωt)a^ω=s^(−ω)+s^(ω)b^ω=1i⋅(s^(−ω)−s^(ω)) {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N}\\sum_{n = 0}^{N} \\hat{a}_{\\omega} \\cdot cos(\\omega t) + i \\cdot \\hat{b}_{\\omega} \\cdot sin(\\omega t)\\\\ \\hat{a}_{\\omega} &= \\hat{s}(-\\omega) + \\hat{s}(\\omega) \\quad \\quad \\hat{b}_{\\omega} = \\tfrac{1}{i} \\cdot (\\hat{s}(-\\omega)-\\hat{s}(\\omega)) \\end{aligned} } s(t)a^ω=N1n=0∑Na^ω⋅cos(ωt)+i⋅b^ω⋅sin(ωt)=s^(−ω)+s^(ω)b^ω=i1⋅(s^(−ω)−s^(ω)) 转换到欧氏空间下的三角函数表示 Sω(t){\\mathcal {S}}_{\\omega}(t)Sω(t) ,记构成原信号函数 s(t)s(t)s(t) 的复指数函数 Sω(t){\\mathcal {S}}_{\\omega}(t)Sω(t) 的初相为 ∠ϕω\\angle\\phi_{\\omega}∠ϕω ,振幅为 AωA_{\\omega}Aω ,则: Sω(t):∠ϕω=arctan(a^ωb^ω)Aω=(a^ω)2+(b^ω)2 {\\displaystyle \\begin{aligned} {\\mathcal {S}}_{\\omega}(t) : \\quad \\angle\\phi_{\\omega} = \\arctan(\\tfrac{\\hat{a}_{\\omega}}{\\hat{b}_{\\omega}}) \\quad A_{\\omega} = \\sqrt{ (\\hat{a}_{\\omega}) ^2 + (\\hat{b}_{\\omega}) ^2 } \\\\ \\end{aligned} } Sω(t):∠ϕω=arctan(b^ωa^ω)Aω=√(a^ω)2+(b^ω)2 同三角函数域的情况,复变函数域下的傅里叶级数仍然可以进一步精简。我们仍然需要对原函数 s(t)s(t)s(t) 平移 +λ2+\\tfrac{\\lambda}{2}+2λ 并将周期变换到 (0, λ)(0,\\ \\lambda)(0, λ) ,使 s(t)s(t)s(t) 表现为奇函数。由于原信号函数 s(t)s(t)s(t) 必为实函数的特性,会使得 aωa_{\\omega}aω 与 bωb_{\\omega}bω 互为共轭复数。因此在奇函数条件下, aωa_{\\omega}aω 与 bωb_{\\omega}bω 表现为符号相反的纯虚数,此时: a^ω=1⋅[s^(−ω)+s^(ω)]=0 b^ω=1i⋅[s^(−ω)−s^(ω)]=2i⋅s^(−ω)s(t)=1N∑ω=0ωN 0⋅cos(ωt) + i⋅(2i⋅s^(−ω))⋅sin(ωt)= 1N∑n=0Ns^(−ω)⋅sin(ωt) {\\displaystyle \\begin{aligned} \\hat{a}_{\\omega} &= 1 \\cdot [\\hat{s}(-\\omega) + \\hat{s}(\\omega)] = 0 \\ \\ \\ \\ \\ \\hat{b}_{\\omega} = \\tfrac{1}{i} \\cdot [\\hat{s}(-\\omega)-\\hat{s}(\\omega)] = \\tfrac{2}{i} \\cdot \\hat{s}(-\\omega) \\\\ s(t) &= \\frac{1}{N} \\sum_{\\omega =0}^{\\omega_N} \\ \\ \\ \\ 0 \\cdot cos(\\omega t) \\ \\ \\ \\ \\ + \\ \\ \\ \\ i \\cdot (\\tfrac{2}{i} \\cdot \\hat{s}(-\\omega)) \\cdot sin(\\omega t) \\\\ &= \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\frac{1}{N}\\sum_{n = 0}^{N} \\hat{s}(-\\omega) \\cdot sin(\\omega t) \\\\ \\end{aligned} } a^ωs(t)=1⋅[s^(−ω)+s^(ω)]=0 b^ω=i1⋅[s^(−ω)−s^(ω)]=i2⋅s^(−ω)=N1ω=0∑ωN 0⋅cos(ωt) + i⋅(i2⋅s^(−ω))⋅sin(ωt)= N1n=0∑Ns^(−ω)⋅sin(ωt) 如果我们将 s^(−ω)\\hat{s}(-\\omega)s^(−ω) 的负号划入公式,并将离散级数扩展到原信号函数 s(t)s(t)s(t) 的连续实数空间上以积分形式表示。则 s(t)s(t)s(t) 与 s^(−ω)\\hat{s}(-\\omega)s^(−ω) 的关系就展现为: s(t)=1N∫0Ns^(ω)⋅sin(ωt) dns^(ω)=∫0Ts(t)⋅sin(−ωt) dt {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N}\\int_{0}^{N} \\hat{s}(\\omega) \\cdot sin(\\omega t) \\ d{n} \\\\ \\hat{s}(\\omega) &= \\int_{0}^{T} s(t) \\cdot sin(-\\omega t) \\ dt \\\\ \\end{aligned} } s(t)s^(ω)=N1∫0Ns^(ω)⋅sin(ωt) dn=∫0Ts(t)⋅sin(−ωt) dt 这就是傅里叶变换的奇函数表达式,也被称为 正弦傅里叶变换(SFT [Sine Fourier Transform])。 同理,如果我们取偶函数,有 aωa_{\\omega}aω 与 bωb_{\\omega}bω 表现为符号相同的纯实数。即: a^ω=1⋅[s^(−ω)+s^(ω)]=2⋅s^(ω) b^ω=1i⋅[s^(−ω)−s^(ω)]=0s(t)=1N∑ω=0ωN 2⋅s^(ω)⋅cos(ωt) + i⋅0⋅sin(ωt)= 1N∑n=0Ns^(ω)⋅cos(ωt) {\\displaystyle \\begin{aligned} \\hat{a}_{\\omega} &= 1 \\cdot [\\hat{s}(-\\omega) + \\hat{s}(\\omega)] = 2 \\cdot \\hat{s}(\\omega) \\ \\ \\ \\ \\ \\hat{b}_{\\omega} = \\tfrac{1}{i} \\cdot [\\hat{s}(-\\omega)-\\hat{s}(\\omega)] = 0 \\\\ s(t) &= \\frac{1}{N} \\sum_{\\omega =0}^{\\omega_N} \\ \\ \\ \\ {2 \\cdot \\hat{s}(\\omega)} \\cdot cos(\\omega t) \\ \\ \\ \\ \\ + \\ \\ \\ \\ i \\cdot 0 \\cdot sin(\\omega t) \\\\ &= \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\ \\frac{1}{N}\\sum_{n = 0}^{N} \\hat{s}(\\omega) \\cdot cos(\\omega t) \\\\ \\end{aligned} } a^ωs(t)=1⋅[s^(−ω)+s^(ω)]=2⋅s^(ω) b^ω=i1⋅[s^(−ω)−s^(ω)]=0=N1ω=0∑ωN 2⋅s^(ω)⋅cos(ωt) + i⋅0⋅sin(ωt)= N1n=0∑Ns^(ω)⋅cos(ωt) 采用相同处理,有余 弦傅里叶变换(CFT [Cosine Fourier Transform]) 结果如下: s(t)=1N∫0Ns^(ω)⋅cos(ωt) dns^(ω)=∫−T2+T2s(t)⋅cos(−ωt) dt {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N} \\int_{0}^{N} \\hat{s}(\\omega) \\cdot cos(\\omega t) \\ d{n} \\\\ \\hat{s}(\\omega) &= \\int_{-\\tfrac{T}{2}}^{+\\tfrac{T}{2}} s(t) \\cdot cos(-\\omega t) \\ dt \\\\ \\end{aligned} } s(t)s^(ω)=N1∫0Ns^(ω)⋅cos(ωt) dn=∫−2T+2Ts(t)⋅cos(−ωt) dt 然而工程中的信号并不存在有限周期且并不都能判定奇偶性,这是否意味着我们无法对其进行分解和化简? 答案是否定的。首先来看,针对周期性需要进行的操作。 解构一维信号 - 时频分离(Time-Frequency Separation) 如果我们换个角度就会发现,不存在有限周期只不过是因为周期太长,以至函数周期等于信号完整时长或着趋近无穷而导致的。所以我们分解原函数到对应的复指数函数和,所选择基底复指数函数也趋近于无穷,并使其对应频率从 000 到 ∞\\infty∞ 而周期从极大到极小即可。不过在计算上就需要利用傅立叶变化的空间特征了。 结合上文,记被分解的原信号函数为 f(t)f(t)f(t) 。根据傅立叶基的正交特性,如果存在 F(t){\\mathcal {F}}(t)F(t) 为当前 f(t)f(t)f(t) 的解函数空间,则必然有 f(t)⋅Fω−1(t)f(t) \\cdot {\\mathcal {F}}_{\\omega}^{-1}(t)f(t)⋅Fω−1(t) 内积在时间 ttt 范围为 (0, ∞)(0,\\ \\infty)(0, ∞) 有固定值 f^(ω)\\hat{f}(\\omega)f^(ω) ,使得: f^(ω)=∫0∞f(t)⋅Fω−1(t) dt=∫0∞f(t)⋅e−iωt dt {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) &= \\int_{0}^{\\infty} f(t) \\cdot {\\mathcal {F}}_{\\omega}^{-1}(t) \\ dt = \\int_{0}^{\\infty} f(t) \\cdot e^{-i \\omega t}\\ dt \\\\ \\end{aligned} } f^(ω)=∫0∞f(t)⋅Fω−1(t) dt=∫0∞f(t)⋅e−iωt dt 以函数空间角度排除 f(t)f(t)f(t) 周期干扰。而复指数波的波函数,顾名思义就是复指数函数,有: f^(ω)=∫−∞+∞aω⋅cos(ωt)+i⋅bω⋅sin(ωt) dt {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) &= \\int_{-\\infty}^{+\\infty} a_{\\omega} \\cdot cos(\\omega t) + i \\cdot b_{\\omega} \\cdot sin(\\omega t) \\ dt\\\\ \\end{aligned} } f^(ω)=∫−∞+∞aω⋅cos(ωt)+i⋅bω⋅sin(ωt) dt 使 bωb_{\\omega}bω 可取复数域,就可以转换为: f^(ω)=∫−∞+∞aω⋅cos(ωt)+bω⋅sin(ωt) dt {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) &= \\int_{-\\infty}^{+\\infty} a_{\\omega} \\cdot cos(\\omega t) + b_{\\omega} \\cdot sin(\\omega t) \\ dt\\\\ \\end{aligned} } f^(ω)=∫−∞+∞aω⋅cos(ωt)+bω⋅sin(ωt) dt 由于实际信号并不能严格确定奇偶性,不过对于小于四维的情况下,大多数条件都能保证其本身为实函数(即函数只有实数域取值),因而构成原信号的分离基底函数是存在不同强度和初项的。我们沿用前文中对初相和振幅的定义,记 Fω(t){\\mathcal {F}}_{\\omega}(t)Fω(t) 初相为 ∠ϕω\\angle\\phi_{\\omega}∠ϕω ,振幅为 AωA_{\\omega}Aω ,则有: Fω(t):∠ϕω=arctan(a^ωb^ω) Aω=(a^ω)2+(b^ω)2 {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega}(t) : \\quad \\angle\\phi_{\\omega} = \\arctan(\\tfrac{\\hat{a}_{\\omega}}{\\hat{b}_{\\omega}}) \\ \\ \\ \\ A_{\\omega} = \\sqrt{ (\\hat{a}_{\\omega}) ^2 + (\\hat{b}_{\\omega}) ^2 } \\\\ \\end{aligned} } Fω(t):∠ϕω=arctan(b^ωa^ω) Aω=√(a^ω)2+(b^ω)2 根据 帕西瓦尔定理(Parseval’s Theorem) 转复数空间,我们会发现 AωA_{\\omega}Aω 就是 f^(ω)\\hat{f}(\\omega)f^(ω) 取 222 范数后的结果,而初项其实就是 f^(ω)\\hat{f}(\\omega)f^(ω) 在 t=0t = 0t=0 时,自身相位在复数空间上与实轴的夹角。即: Fω(t):∠ϕω=∠∣f^(t)∣ =arctan(a^ωb^ω)Aω= ∥f^(t)∥2=(a^ω)2+(b^ω)2 {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega}(t) &: \\\\ \\angle\\phi_{\\omega} &= \\angle{\\vert \\hat{f}(t) \\vert} \\ = \\arctan(\\tfrac{\\hat{a}_{\\omega}}{\\hat{b}_{\\omega}}) \\\\ A_{\\omega} &=\\ \\ \\Vert \\hat{f}(t) \\Vert _2 =\\sqrt{ (\\hat{a}_{\\omega}) ^2 + (\\hat{b}_{\\omega}) ^2 } \\\\ \\end{aligned} } Fω(t)∠ϕωAω:=∠∣f^(t)∣ =arctan(b^ωa^ω)= ∥f^(t)∥2=√(a^ω)2+(b^ω)2 进而有: Fω(t)=Aω⋅sin(ωt−∠ϕω)=Aω⋅cos(ωt+∠ϕω)=∥f^(t)∥2⋅sin(ωt−∠∣f^(t)∣)=∥f^(t)∥2⋅cos(ωt+∠∣f^(t)∣)f^(ω)=∫0∞f(t)⋅e−iωt dt ⇔ f(t)=1N∫−∞+∞f^(ω)⋅Fω(t) dω {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega}(t) &= A_{\\omega} \\cdot sin(\\omega t -\\angle\\phi_{\\omega}) = A_{\\omega} \\cdot cos(\\omega t +\\angle\\phi_{\\omega}) \\\\ &= {\\Vert \\hat{f}(t) \\Vert _2} \\cdot sin(\\omega t -\\angle{\\vert \\hat{f}(t) \\vert}) = {\\Vert \\hat{f}(t) \\Vert _2} \\cdot cos(\\omega t +\\angle{\\vert \\hat{f}(t) \\vert}) \\\\ \\hat{f}(\\omega) &= \\int_{0}^{\\infty} f(t) \\cdot e^{-i \\omega t}\\ dt \\ \\ \\ \\ \\ \\Leftrightarrow \\ \\ \\ \\ \\ f(t) = \\frac{1}{N} \\int_{-\\infty}^{+\\infty} \\hat{f}(\\omega) \\cdot {\\mathcal {F}}_{\\omega}(t) \\ d \\omega \\\\ \\end{aligned} } Fω(t)f^(ω)=Aω⋅sin(ωt−∠ϕω)=Aω⋅cos(ωt+∠ϕω)=∥f^(t)∥2⋅sin(ωt−∠∣f^(t)∣)=∥f^(t)∥2⋅cos(ωt+∠∣f^(t)∣)=∫0∞f(t)⋅e−iωt dt ⇔ f(t)=N1∫−∞+∞f^(ω)⋅Fω(t) dω 显然,大部分信号都是有限时间下的,且基本都能满足无穷区间的狄利克雷条件,也因此可以使用傅里叶变换分解。 如果频率范围在 ω∈[ω0, ω1]\\omega \\in [\\omega_{0},\\ \\omega_{1}]ω∈[ω0, ω1] ,对于选定的时间点 t=tct = t_ct=tc ,有频率 ω\\omegaω 、原函数 f(t)f(t)f(t) 在 t=tct = t_ct=tc 时的取值 f(tc)f(t_c)f(tc) 、基底函数族 Fω(t){\\mathcal {F}}_{\\omega}(t)Fω(t) 锁定时间 t=tct = t_ct=tc 的变体 Ftc(ω){\\mathcal {F}}_{t_c}(\\omega)Ftc(ω) ,构成该频率范围的 频域投影(FDP [Frequency Domain Projection]); 反之,如果时间范围在 t∈[ t0, t1]t\\in [\\ t_0,\\ \\ t_1]t∈[ t0, t1] ,对于频率范围 ω∈[ω0, ω1]\\omega \\in [\\omega_{0},\\ \\omega_{1}]ω∈[ω0, ω1] ,有时间 ttt 、原函数 f(t)f(t)f(t) 、基底函数族 Fω(t){\\mathcal {F}}_{\\omega}(t) Fω(t),就构成了原函数在该时间范围的 时域投影(TDP [Time Domain Projection])。 两者的区别仅在于观察角度的不同: Frequency Domain Projection: ( ω , f(tc) , Ftc(ω) )Time Domain Projection: ( t , f(t) , Fω(t) )ω∈[ω0, ωn] t ∈[ t0, tn ] {\\displaystyle \\begin{aligned} {Frequency\\ Domain\\ Projection:} &\\ \\ (\\ \\ \\omega\\ ,\\ \\ f(t_c)\\ ,\\ \\ {\\mathcal {F}}_{t_c}(\\omega) \\ \\ ) \\\\ {Time\\ Domain\\ Projection:} &\\ \\ (\\ \\ t\\ \\ ,\\ \\ f(t)\\ \\ ,\\ \\ {\\mathcal {F}}_{\\omega}(t) \\ \\ \\ ) \\\\ {\\omega \\in [\\omega_0,\\ \\omega_n]} \\ \\ \\ \\ & \\ \\ {\\ t\\ \\in [\\ t_0,\\ \\ t_n\\ ]} \\\\ \\end{aligned} } Frequency Domain Projection:Time Domain Projection:ω∈[ω0, ωn] ( ω , f(tc) , Ftc(ω) ) ( t , f(t) , Fω(t) ) t ∈[ t0, tn ] 周期的问题解决了,现在我们能够拿到时频分离(Time-Frequency Separation)的原信号函数信息并可以依此还原信号本身。但积分对于计算机来说任务有些繁重。同时,由于计算机只能处理离散化后的数字信号,因此离散化的算法才能够被计算机有效使用。 所以还需要在此基础上,找到更为便捷的算法实现。 精简运算过程 - 一维离散傅立叶变换(1D-DFT) 如果将积分重新转换为级数形式积化和差表示,并在允许误差范围内取有限子集。那么就能够化解掉大部分运算量,从而得到一个相对理论而言的低时间复杂度算法。这种想法促成了基于计算机运算的一维离散傅立叶(1D-DFT)的诞生。 一维离散傅立叶(1D-DFT [1D-Discrete Fourier Transform])本质上包含了两部分离散化作业,即对时域的离散化(TDD [Time Domain Discrete])和对频域的离散化(FDD [Frequency Domain Discrete])。 时域离散化(TDD) 方面,一维离散傅立叶采用了离散时间傅立叶变化(DTFT [Discrete Time Fourier Transform])中,对时域信号间隔采样的操作。即将: f^(ω)=∫0∞f(t)⋅e−iωt dt {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) &= \\int_{0}^{\\infty} f(t) \\cdot e^{-i \\omega t}\\ dt \\\\ \\end{aligned} } f^(ω)=∫0∞f(t)⋅e−iωt dt 以时间采样(切片)数量为 n1{n_1}n1 ,转为级数形式: f^(ω)=∑t=t0tn1f(t)⋅e−iωt {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) &= \\sum_{t = t_0}^{t_{n_1}} f(t) \\cdot e^{-i \\omega t} \\\\ \\end{aligned} } f^(ω)=t=t0∑tn1f(t)⋅e−iωt 打破时间上的连续性。需要注意的是,此时频域仍然是连续的。 频域离散化(FDD) 方面,离散傅立叶做的操作就更为直观了。如果在频率采样时就以离散化的方式采样数据,那得到的频域信息天然就是离散的。同样,从某个时刻 t=tct = t_ct=tc 离散化的频域信息上还原当前实际频率,则也是一个线性求和的过程。因此有: f(t)=1N∫−∞+∞f^(ω)⋅Fω(t) dω {\\displaystyle \\begin{aligned} f(t) = \\frac{1}{N} \\int_{-\\infty}^{+\\infty} \\hat{f}(\\omega) \\cdot {\\mathcal {F}}_{\\omega}(t) \\ d \\omega \\\\ \\end{aligned} } f(t)=N1∫−∞+∞f^(ω)⋅Fω(t) dω 以频率采样(切片)数量为 n2{n_2}n2 ,转为级数形式: f(t)=1n2∑ω=ω0ωn2f^(ω)⋅Fω(t) {\\displaystyle \\begin{aligned} f(t) = \\frac{1}{n_2} \\sum_{\\omega = \\omega_0}^{\\omega_{n_2}} \\hat{f}(\\omega) \\cdot {\\mathcal {F}}_{\\omega}(t) \\\\ \\end{aligned} } f(t)=n21ω=ω0∑ωn2f^(ω)⋅Fω(t) 而随着有限采样,基底函数族 Fω(t){\\mathcal {F}}_{\\omega}(t) Fω(t)$ 构成的解函数空间也是有限维的,即: Fω=[Fω1,Fω2, ... ,Fωn2] {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega} = [{\\mathcal {F}}_{\\omega_1},{\\mathcal {F}}_{\\omega_2},\\ ...\\ ,{\\mathcal {F}}_{\\omega_{n_2}}] \\\\ \\end{aligned} } Fω=[Fω1,Fω2, ... ,Fωn2] 至此,由时域离散化(TDD)与频域离散化(FDD)共同构成离散傅立叶(DFT)的完整表达如下所示: Fω=[Fω1,Fω2, ... ,Fωn2]f^(ω)=∑t=t0tn1f(t)⋅e−iωt ⇔ f(t)=1n2∑ω=ω0ωn2f^(ω)⋅Fω(t) {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega} = [{\\mathcal {F}}_{\\omega_1},&{\\mathcal {F}}_{\\omega_2},\\ ...\\ ,{\\mathcal {F}}_{\\omega_{n_2}}] \\\\ \\hat{f}(\\omega) = \\sum_{t = t_0}^{t_{n_1}} f(t) \\cdot e^{-i \\omega t} \\ \\ \\ \\ \\ &\\Leftrightarrow \\ \\ \\ \\ \\ f(t) = \\frac{1}{n_2} \\sum_{\\omega = \\omega_0}^{\\omega_{n_2}} \\hat{f}(\\omega) \\cdot {\\mathcal {F}}_{\\omega}(t) \\\\ \\end{aligned} } Fω=[Fω1,f^(ω)=t=t0∑tn1f(t)⋅e−iωt Fω2, ... ,Fωn2]⇔ f(t)=n21ω=ω0∑ωn2f^(ω)⋅Fω(t) 经过离散化后的有限采样更适合计算机有限的算力,因此才能被程序化。不过由于并没有保存连续的完整信息,经过离散傅里叶变换后再还原的数据,相对于采样自然源的原始数据终归还是会有一定损失的。但是由于变换与逆变换,并不会导致解构再还原后的数据存在差异。所以离散傅里叶变换被归类为 有损采样(Lossy Sampling)的无损算法(Lossless Algorithm)。 一维离散傅立叶变换(1D-DFT)的 C 语言实现 既然需要做程序化,那么首先需要将离散傅里叶变换的过程抽象化。理清逻辑思路的同时,方便构造迭代器和代码的处理流水线。这里我们使用伪码表示: /** * 1D-DFT [Discrete Fourier Transform] * [How to Use] * * Fo[T] = {...}; * Fn[N] = {}; * dft_1d(&Fo, &Fn, T, N); * [theorem::definitions] * Fo meanings Original Function * Fn meanings Fourier Basis at [n] * pi meanings π * T meanings Periodic of Fo * N meanings Slice of Frequency * Wn meanings Angular Frequency of Basis Fn is Wn = ((2*pi*n)/T) * [theorem::formula] * Fo[t] = sum_{n=0}^{N-1} x Fn[t] * exp( i * ((2*pi*n)/T) * t), 0 同时,我们还需要提供离散傅里叶变换的逆变换(IDFT [Inverse Discrete Fourier Transform])来使得电脑能够还原信息: /** * 1D-IDFT [Inverse Discrete Fourier Transform] * [How to Use] * * Fo[T] = {}; * Fn[N] = {...}; * dft_1d(&Fo, &Fn, T, N); * [theorem::definitions] * Fo meanings Original Function * Fn meanings Fourier Basis at [n] * pi meanings π * T meanings Periodic of Fo * N meanings Slice of Frequency * Wn meanings Angular Frequency of Basis Fn is Wn = ((2*pi*n)/T) * [theorem::formula] * Fo[t] = sum_{n=0}^{N-1} x Fn[t], 0 现在思路有了,只需要以代码实现即可: #include \"stdio.h\" #include \"math.h\" #define PI 3.1415926f typedef struct FBasis { double re_; double im_; double w_; } FBasis; void dft_1d(double *Fo, FBasis *Fn, size_t T, size_t N) { for (int n = 0; n 写完后简单测试一下: int main(void) { FBasis Fn[6] = {}; double Fo[6] = {1, 2, 3, 4, 5, 6}; double iFo[6] = {}; size_t T = sizeof(Fo) / sizeof(double); size_t N = sizeof(Fn) / sizeof(FBasis); printf(\"\\n Original_data: \\n\"); for (int t = 0; t 得到结果和标准几近相同: Original data: 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 DFT_result: 21.000000 + i 0.000000 -3.000003 + i -5.196152 -3.000002 + i -1.732048 -3.000000 + i -0.000002 -2.999996 + i 1.732057 -2.999979 + i 5.196158 IDFT_result: 1.000003 2.000000 2.999999 3.999999 4.999999 6.000000 运行结束。 到这里,我们已经基本掌握了傅里叶变换原理和最基础的应用。 如果拓展傅里叶变换到相对复杂的二维情况,那么和一维时有哪些不同呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_1_2.html":{"url":"Chapter_3/Language/cn/Docs_3_1_2.html","title":"3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT)","keywords":"","body":"3.1.2 二维傅立叶(2D-FT)与二维离散傅立叶变换(2D-DFT) 信号学中,将沿空间分布的信号称为二维信号(2D Signal)。图像信号就是二维信号。 二维傅里叶变换,能够将一组二维信号分解到 周期性复平面波(Complex Plane Wave) 构成的三维向量空间。 什么是平面波呢?延空间传播的选定波,若有任意时刻的相位相同的点连接起来得到的波阵面(同相位面)为互相平行的平面,就可以被称为 平面波(Plane Wave)。如果平面波同时满足简谐振动(即以正余弦规律振动)的特征,则可称为 平面简谐波(Plane Harmonic Waves)。复平面波则指的是在复数空间下的平面波。 从一维到二维傅里叶变换(2D-FT) 如果说一维信号是由一组数据延单一方向排布组成的数字序列,那么二维信号就是由一组数据延横向和纵向两个方向排布构成的数字平面。在一维信号处理时,我们将复指数函数分解为一系列一维简谐波的组合。同样的处理方式,我们也可以类比应用在二维信号场景,将构成二维信号的相关复平面波分解为在复数空间下的一系列复平面简谐波的聚合,进而把二维信号以相关强度参数,转化为平面简谐波的叠加表示。 一维信号和二维信号仅仅是维度上的差异。因此,结合向量空间,我们引入波的方向矢量,并取其大小等于当前波的角频率来表示波本身,称为波矢 k⃗{\\vec{k}}k⃗ 。将波矢为 k⃗{\\vec{k}}k⃗ 的平面简谐波,称为 k⃗{\\vec{k}}k⃗ 平面波。 对于周期为 TTT 的一维信号,因为时间只能沿着时间轴正向流动,所以此时的 k⃗{\\vec{k}}k⃗ 不存在方向。其基础波函数的波矢 k⃗{\\vec{k}}k⃗ 只有大小,即 ω=∣k⃗∣\\omega = \\vert {\\vec{k}} \\vertω=∣k⃗∣ 。所以在一维傅里叶变换中,我们只考虑了时间与频率的关系,即一维的时频关系。 对于周期为 TTT 的二维信号,以可变 nnn 等分选取作为基础的复平面波,记波函数为 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) ,则波长(周期)λ=Tn\\lambda = \\tfrac{T}{n}λ=nT ,角频率(角速度)为 ω=∣k⃗∣=2πλ\\omega = \\vert {\\vec{k}} \\vert = \\tfrac{2\\pi}{\\lambda}ω=∣k⃗∣=λ2π 。将 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 的传播方向 (u,v)(u,v)(u,v) 限定 u∈[−U2, +U2]u \\in [-\\tfrac{U}{2}, \\ +\\tfrac{U}{2}]u∈[−2U, +2U] ,v∈[−V2, +V2]v \\in [-\\tfrac{V}{2}, \\ +\\tfrac{V}{2}]v∈[−2V, +2V] 的范围。则 (u,v)(u,v)(u,v) 与原点的欧式距离,实际代表的是该方向上的分割强度值 nnn ,有: U2 + V2 = T → (uT)2 + (vT)2 = n {\\displaystyle \\begin{aligned} \\sqrt{U^2 \\ \\ + \\ \\ V^2} \\ = \\ T \\ \\ \\ \\rightarrow \\ \\ \\ \\sqrt{(\\tfrac{u}{T})^2 \\ \\ + \\ \\ (\\tfrac{v}{T})^2} \\ = \\ n \\\\ \\end{aligned} } √U2 + V2 = T → √(Tu)2 + (Tv)2 = n 因此,代表 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 的波矢 k⃗=(2π⋅uU , 2π⋅vV)=(ξ, η){\\vec{k}} = (\\tfrac{2 \\pi \\cdot {u}}{U} \\ , \\ \\tfrac{2 \\pi \\cdot {v}}{V} ) = (\\xi, \\ \\eta)k⃗=(U2π⋅u , V2π⋅v)=(ξ, η) ,推得: ω=∣k⃗∣=(2π⋅uU)2+(2π⋅vV)2=2πλ→ξ2 + η2 = ω2 {\\displaystyle \\begin{aligned} &\\omega = \\vert {\\vec{k}} \\vert = \\sqrt{({\\tfrac{2 \\pi \\cdot u}{U}})^2 + ({\\tfrac{2 \\pi \\cdot v}{V}})^2} = \\tfrac{2 \\pi}{\\lambda} \\\\ & \\quad \\rightarrow {\\xi}^2 \\ \\ + \\ \\ {\\eta}^2 \\ = \\ {\\omega}^2 \\\\ \\end{aligned} } ω=∣k⃗∣=√(U2π⋅u)2+(V2π⋅v)2=λ2π→ξ2 + η2 = ω2 Fω(x,y)=eik⃗⋅(x,y)T=ei⋅2π(uUx+vVy)=Fξ(x)⋅Fη(y) {\\displaystyle {\\mathcal {F}_{\\omega}(x,y)} = e^{i \\vec{k} \\cdot (x,y)^T } = e^{i \\cdot {2 \\pi} (\\tfrac{u}{U}x+\\tfrac{v}{V}y)} = {\\mathcal {F}_{\\xi}(x)} \\cdot {\\mathcal {F}_{\\eta}(y)} } Fω(x,y)=eik⃗⋅(x,y)T=ei⋅2π(Uux+Vvy)=Fξ(x)⋅Fη(y) 上式对复平面波 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 的拆解,从数理上表明了,Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 是由沿着 xxx 轴方向的一维波 Fξ(x){\\mathcal {F}}_{\\xi}(x)Fξ(x) 和沿着 yyy 轴方向的一维波 Fη(y){\\mathcal {F}}_{\\eta}(y)Fη(y) 两部分构成。其中,ξ=2π⋅uU\\xi = \\tfrac{2\\pi \\cdot u}{U}ξ=U2π⋅u 为 Fξ(x){\\mathcal {F}}_{\\xi}(x)Fξ(x) 的角频率,η=2π⋅vV\\eta = \\tfrac{2\\pi \\cdot v}{V}η=V2π⋅v 为 Fη(y){\\mathcal {F}}_{\\eta}(y)Fη(y) 的角频率。点位 (x,y)(x,y)(x,y) 在二维信号中代表的是实际像素数据在数字平面上的空间位置信息。所以在处理二维傅里叶变换时,我们需要考虑的是平面空间点 P⃗(x,y)\\vec{P}(x,y)P⃗(x,y) 与 k⃗{\\vec{k}}k⃗ 平面波间的关系,即二维的空频关系。 解构二维信号 - 空频分离(Spacial-Frequency Separation) 记原二维信号的函数表达为 f(x,y)f(x,y)f(x,y) ,有任意点 P⃗(x,y)\\vec{P}(x,y)P⃗(x,y) 可取 x∈[0, W]x \\in [0, \\ W]x∈[0, W] , y∈[0, H]y \\in [0, \\ H]y∈[0, H] ,那么对于二维信号来说,周期 T=W2+H2T= \\sqrt{W^2+H^2}T=√W2+H2。保持 u∈[−U2, U2]u \\in [-\\tfrac{U}{2}, \\ \\tfrac{U}{2}]u∈[−2U, 2U] 、v∈[−V2, V2]v \\in [-\\tfrac{V}{2}, \\ \\tfrac{V}{2}]v∈[−2V, 2V] 范围,则 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 沿传播方向角频率 (ξ,η)(\\xi, \\eta)(ξ,η) 就有 ξ∈[−π, +π]\\xi \\in [-\\pi, \\ +\\pi]ξ∈[−π, +π] , η∈[−π, +π]\\eta \\in [-\\pi, \\ +\\pi]η∈[−π, +π] 。则由一维拓展至二维傅里叶级数可知, f(x,y)f(x,y)f(x,y) 与波函数 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 的分量权重系数 a^ω(u,v){\\hat{a}_{\\omega}} (u, v) a^ω(u,v) 、 b^ω(u,v){\\hat{b}_{\\omega}} (u, v) b^ω(u,v) 存在: f(x,y)=1U⋅V(∑u=0∞∑v=0∞a^ω⋅cos(k⃗⋅P⃗T) + ∑u=0∞∑v=0∞b^ω⋅sin(k⃗⋅P⃗T))a^ω(u,v)=∫0H∫0Wf(x,y)⋅cos(2π⋅(uUx+vVy)) dx dyb^ω(u,v)=∫0H∫0Wf(x,y)⋅sin(2π⋅(uUx+vVy)) dx dy {\\displaystyle \\begin{aligned} f(x, y) &= \\frac{1}{U\\cdot V} (\\sum_{u =0}^{\\infty} \\sum_{v =0}^{\\infty} {\\hat{a}_{\\omega}} \\cdot cos(\\vec{k} \\cdot \\vec{P}^T)\\ \\ \\ + \\ \\ \\sum_{u =0}^{\\infty} \\sum_{v =0}^{\\infty} {\\hat{b}_{\\omega}} \\cdot sin(\\vec{k} \\cdot \\vec{P}^T)) \\\\ {\\hat{a}_{\\omega}} (u, v) &= \\int_{0}^{H} \\int_{0}^{W} f(x,y) \\cdot cos({2 \\pi} \\cdot (\\tfrac{u}{U}x+\\tfrac{v}{V}y)) \\ dx \\ dy \\\\ {\\hat{b}_{\\omega}} (u, v) &= \\int_{0}^{H} \\int_{0}^{W} f(x,y) \\cdot sin({2 \\pi} \\cdot (\\tfrac{u}{U}x+\\tfrac{v}{V}y)) \\ dx \\ dy \\\\ \\end{aligned} } f(x,y)a^ω(u,v)b^ω(u,v)=U⋅V1(u=0∑∞v=0∑∞a^ω⋅cos(k⃗⋅P⃗T) + u=0∑∞v=0∑∞b^ω⋅sin(k⃗⋅P⃗T))=∫0H∫0Wf(x,y)⋅cos(2π⋅(Uux+Vvy)) dx dy=∫0H∫0Wf(x,y)⋅sin(2π⋅(Uux+Vvy)) dx dy 取 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 初相为 ∠ϕω\\angle\\phi_{\\omega}∠ϕω ,振幅为 AωA_{\\omega}Aω ,则仍然有 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 的简谐波特征表示: Fω(x,y):∠ϕω=∠∣f^(x,y)∣ =arctan(a^ωb^ω)Aω= ∥f^(x,y)∥2=(a^ω)2+(b^ω)2 {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega}(x,y) &: \\\\ \\angle\\phi_{\\omega} &= \\angle{\\vert \\hat{f}(x,y) \\vert} \\ = \\arctan(\\tfrac{\\hat{a}_{\\omega}}{\\hat{b}_{\\omega}}) \\\\ A_{\\omega} &=\\ \\ \\Vert \\hat{f}(x,y) \\Vert _2 =\\sqrt{ (\\hat{a}_{\\omega}) ^2 + (\\hat{b}_{\\omega}) ^2 } \\\\ \\end{aligned} } Fω(x,y)∠ϕωAω:=∠∣f^(x,y)∣ =arctan(b^ωa^ω)= ∥f^(x,y)∥2=√(a^ω)2+(b^ω)2 因此,带入 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) ,有 f(x,y)f(x,y)f(x,y) 的二维傅里叶变换展开为: Fω(x,y)=Fω(P⃗)=Aω⋅sin(k⃗⋅P⃗T−∠ϕω)=Aω⋅cos(k⃗⋅P⃗T+∠ϕω)=∥f^(x,y)∥2⋅sin(ω⋅v⃗T−∠∣f^(x,y)∣)=∥f^(x,y)∥2⋅cos(ω⋅v⃗T+∠∣f^(x,y)∣)f^(u,v)=∫0H∫0Wf(x,y)⋅e−i(ux+vy) dx dy⇔f(x,y)=1U⋅V∫−V2+V2∫−U2+U2f^(u,v)⋅Fω(x,y) du dv {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega}(x,y) &= {\\mathcal {F}}_{\\omega} (\\vec{P}) = A_{\\omega} \\cdot sin(\\vec{k} \\cdot \\vec{P}^T -\\angle\\phi_{\\omega}) = A_{\\omega} \\cdot cos(\\vec{k} \\cdot \\vec{P}^T +\\angle\\phi_{\\omega}) \\\\ &= {\\Vert \\hat{f}(x,y) \\Vert _2} \\cdot sin(\\omega \\cdot \\vec{v}^T -\\angle{\\vert \\hat{f}(x,y) \\vert}) \\\\ &= {\\Vert \\hat{f}(x,y) \\Vert _2} \\cdot cos(\\omega \\cdot \\vec{v}^T +\\angle{\\vert \\hat{f}(x,y) \\vert}) \\\\ \\\\ \\hat{f}(u,v) &= \\int_{0}^{H} \\int_{0}^{W} f(x,y) \\cdot e^{-i (ux+vy)}\\ dx \\ dy \\\\ &\\Leftrightarrow \\\\ f(x,y) &= \\frac{1}{U\\cdot V} \\int_{-\\tfrac{V}{2}}^{+\\tfrac{V}{2}} \\int_{-\\tfrac{U}{2}}^{+\\tfrac{U}{2}} \\hat{f}(u,v) \\cdot {\\mathcal {F}}_{\\omega}(x, y) \\ du \\ dv \\\\ \\end{aligned} } Fω(x,y)f^(u,v)f(x,y)=Fω(P⃗)=Aω⋅sin(k⃗⋅P⃗T−∠ϕω)=Aω⋅cos(k⃗⋅P⃗T+∠ϕω)=∥f^(x,y)∥2⋅sin(ω⋅v⃗T−∠∣f^(x,y)∣)=∥f^(x,y)∥2⋅cos(ω⋅v⃗T+∠∣f^(x,y)∣)=∫0H∫0Wf(x,y)⋅e−i(ux+vy) dx dy⇔=U⋅V1∫−2V+2V∫−2U+2Uf^(u,v)⋅Fω(x,y) du dv 一般情况为了方便起见,常取 U=WU = WU=W 、 V=HV = HV=H ,化简分离参数。上式即为二维傅里叶变换的基本形式。 如果波矢范围在 k⃗∈[k0⃗, k1⃗]\\vec{k} \\in [\\vec{k_0},\\ \\vec{k_1}]k⃗∈[k0⃗, k1⃗] ,对于任意数据平面的像素点 P(x,y)P(x,y)P(x,y) ,有频率 ω=∥k⃗∥2\\omega = \\Vert \\vec{k} \\Vert_2ω=∥k⃗∥2 传播方向 (u,v)(u,v)(u,v) 、基底函数族 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y) Fω(x,y) 的强度系数 f^ω(u,v)\\hat{f}_{\\omega}(u,v)f^ω(u,v) ,构成该波矢范围的 频域投影(FDP [Frequency Domain Projection]); 反之,如果选定像素点 P(x,y)P(x,y)P(x,y) ,对于波矢范围在 k⃗∈[k0⃗, k1⃗]\\vec{k} \\in [\\vec{k_0},\\ \\vec{k_1}]k⃗∈[k0⃗, k1⃗] ,有平面位置 P(x,y)P(x,y)P(x,y) 、原函数值 f(x,y)f(x,y)f(x,y) 、基底函数族 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y) Fω(x,y) ,构成原函数在该空间(二维)范围的 空域投影(SDP [Spacial Domain Projection])。 两者的区别同一维一样,仅在于观察角度的不同: Frequency Domain Projection: ( u , v , f^ω(u,v) )Spacial Domain Projection: ( x , y , f(x,y) , Fω(x,y) )k⃗∈[k0⃗, k1⃗] P⃗(x,y) ∈[ (0, 0) , (W, H) ] {\\displaystyle \\begin{aligned} {Frequency\\ Domain\\ Projection:} &\\ \\ (\\ \\ u\\ \\ ,\\ \\ v\\ \\ ,\\ \\ \\hat{f}_{\\omega}(u,v)\\ \\ ) \\\\ {Spacial\\ Domain\\ Projection:} &\\ \\ (\\ \\ x\\ \\ ,\\ \\ y\\ \\ ,\\ \\ f(x,y)\\ \\ ,\\ \\ {\\mathcal {F}}_{\\omega}(x,y) \\ \\ ) \\\\ {\\vec{k} \\in [\\vec{k_0},\\ \\vec{k_1}]} \\ \\ \\ \\ & \\ \\ {\\ \\vec{P}(x,y)\\ \\in [\\ (0,\\ 0)\\ \\ ,\\ \\ (W,\\ H)\\ ]} \\\\ \\end{aligned} } Frequency Domain Projection:Spacial Domain Projection:k⃗∈[k0⃗, k1⃗] ( u , v , f^ω(u,v) ) ( x , y , f(x,y) , Fω(x,y) ) P⃗(x,y) ∈[ (0, 0) , (W, H) ] 显然,二维和一维情况的差异很明显且必然:二维傅里叶变换下所获的的分离投影结果位于三维欧式空间,而非一维时的平面(二维)。 精简运算过程 - 二维离散傅立叶变换(2D-DFT) 同一维傅里叶变换需要做时域离散化(TDD)和频域离散化(FDD)来精简运算量。二维傅里叶变换由于引入了新的维度,更需要依赖离散化处理,才能被计算机在有限算力的前提下使用。 二维离散傅里叶变换(2D-DFT)分为 空域离散化(SDD [Spacial Domain Discrete]) 和 频域离散化(FDD [Frequency Domain Discrete])。当然,此处的空域为二维空域(平面),是不包含 zzz 轴的。我们将两者结合称为 空频离散化(SFD [Spacial Frequency Discrete])。 如果取任意点 P⃗(x,y)\\vec{P}(x,y)P⃗(x,y) 可取 x∈[0, 1, ... , W]x \\in [0, \\ 1, \\ \\ ...\\ , \\ W]x∈[0, 1, ... , W] , y∈[0, 1, ... , H]y \\in [0, \\ 1, \\ \\ ...\\ , \\ H]y∈[0, 1, ... , H] ,只取整数位置。同时, u∈[−U2, ... , +U2]u \\in [-\\tfrac{U}{2}, \\ \\ ...\\ , \\ +\\tfrac{U}{2}]u∈[−2U, ... , +2U] 、 v∈[−V2, ... , +V2]v \\in [-\\tfrac{V}{2}, \\ \\ ...\\ , \\ +\\tfrac{V}{2}]v∈[−2V, ... , +2V] ,有离散 k⃗∈[k0⃗, k1⃗, ... , kn⃗]\\vec{k} \\in [\\vec{k_0}, \\ \\vec{k_1}, \\ \\ ...\\ , \\ \\vec{k_{n}}]k⃗∈[k0⃗, k1⃗, ... , kn⃗] , n=UV=HWn = UV = HWn=UV=HW ,则: SDD: f^(u,v)=∑x=0W∑y=0Hf(x,y)⋅e−i(ux+vy)FDD: f(x,y)=1U⋅V∑u=−U/2+U/2∑v=−V/2+V/2f^(u,v)⋅Fω(x,y) {\\displaystyle \\begin{aligned} SDD: \\ \\ \\hat{f}(u,v) &= \\sum_{x = 0}^{W} \\sum_{y = 0}^{H} f(x,y) \\cdot e^{-i (ux+vy)} \\\\ FDD: \\ \\ f(x,y) &= \\frac{1}{U\\cdot V} \\sum_{u=-U/2}^{+U/2} \\sum_{v= -V/2}^{+V/2} \\hat{f}(u,v) \\cdot {\\mathcal {F}}_{\\omega}(x, y) \\\\ \\end{aligned} } SDD: f^(u,v)FDD: f(x,y)=x=0∑Wy=0∑Hf(x,y)⋅e−i(ux+vy)=U⋅V1u=−U/2∑+U/2v=−V/2∑+V/2f^(u,v)⋅Fω(x,y) 至此,由空域离散化(SDD)与频域离散化(FDD)共同构成二维离散傅立叶(2D-DFT)的完整表达如下所示: Fω=[Fk0⃗,Fk1⃗, ... ,Fkn⃗]f^(u,v)=∑x=0W∑y=0Hf(x,y)⋅e−i(ux+vy)⇔f(x,y)=1U⋅V∑u=−U/2+U/2∑v=−V/2+V/2f^(u,v)⋅Fω(x,y) {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega} &= [{\\mathcal {F}}_{\\vec{k_0}}, {\\mathcal {F}}_{\\vec{k_1}},\\ ...\\ ,{\\mathcal {F}}_{\\vec{k_n}}] \\\\ \\hat{f}(u,v) &= \\sum_{x = 0}^{W} \\sum_{y = 0}^{H} f(x,y) \\cdot e^{-i (ux+vy)} \\\\ &\\Leftrightarrow \\\\ f(x,y) &= \\frac{1}{U\\cdot V} \\sum_{u=-U/2}^{+U/2} \\sum_{v= -V/2}^{+V/2} \\hat{f}(u,v) \\cdot {\\mathcal {F}}_{\\omega}(x, y) \\\\ \\end{aligned} } Fωf^(u,v)f(x,y)=[Fk0⃗,Fk1⃗, ... ,Fkn⃗]=x=0∑Wy=0∑Hf(x,y)⋅e−i(ux+vy)⇔=U⋅V1u=−U/2∑+U/2v=−V/2∑+V/2f^(u,v)⋅Fω(x,y) 利用上式,既可做算法实现。 二维离散傅立叶变换(1D-DFT)的 C 语言实现 第一步还是将二维离散傅立叶变化的过程抽象化。这里依旧采用伪码表示: /** * 2D-DFT [Discrete Fourier Transform] * [How to Use] * * Fo[W][H] = {...}; * Fn[U][V] = {}; * dft_2d(&Fo, &Fn); * [logistic] * { * result[U][V] = []; // as byte 2D-array * // do SDD: * for u in range(NU-Horizontal_Slices) { * for v in range(NV-Vertical_Slices) { * An = 0; Bn = 0; * // do FDD: * for y in Range(Height) { * for x in Range(Wight) { * Wn = (2 * PI) * Vec; * An = Re += Cos(Wn · VecT) * Fo(t); * Bn = Im += Sin(Wn · VecT) * Fo(t); * }} * result[u][v] = Fn.to_complex_angular_form(An, Bn) * }} * return result; * } * @param original_ Original Function input 2D-array * (image include width & height) * @param analyzed_ Fourier Basis info in 2D */ 同时,二维情况也需要提供离散傅里叶变换的逆变换(IDFT [Inverse Discrete Fourier Transform])来使得电脑能够还原信息: /** * 2D-IDFT [Inverse Discrete Fourier Transform] * [How to Use] * * Fo[W][H] = {}; * Fn[U][V] = {...}; * idft_2d(&Fo, &Fn); * [logistic] * { * result[W][H] = []; // as byte 2D-array * // do SDD: * for y in Range(Height) { * for x in Range(Wight) { * Re = 0; Im = 0; * // do FDD: * for u in range(NU-Horizontal_Slices) { * for v in range(NV-Vertical_Slices) { * Wn = (2 * PI) * Vec;; * An = Re * (Fn[n] · VecT); * Bn = Im * (Fn[n] · VecT); * result[t] += Fn[n].to_value(Wn, An, Bn) / (U * V); * }} * } * return result; * } * @param original_ Original Function input 2D-array * (image include width & height) * @param analyzed_ Fourier Basis analyzed info in 2D */ 接下来只需要根据思路做代码实现即可: #include \"stdio.h\" #include \"math.h\" #define PI 3.1415926f typedef struct FBasis { double re_; double im_; double w_[2]; } FBasis; typedef struct Signal2DOriginal { int GW_; int GH_; double *Fo_; } Signal2DOriginal; typedef struct Signal2DAnalyzed { int NU_; int NV_; FBasis *Fn_; } Signal2DAnalyzed; void dft_2d(Signal2DOriginal *original_, Signal2DAnalyzed *analyzed_) { for (int u = 0; u NU_; ++u) { for (int v = 0; v NV_; ++v) { double An = 0; double Bn = 0; double Un = (2 * PI / analyzed_->NU_) * u ; double Vn = (2 * PI / analyzed_->NV_) * v ; for (int y = 0; y GH_; ++y) { for (int x = 0; x GW_; ++x) { An += cos(Un * x + Vn * y) * original_->Fo_[y * original_->GW_ + x]; Bn += sin(Un * x + Vn * y) * original_->Fo_[y * original_->GW_ + x]; } } FBasis e_ = {An, Bn, {Un, Vn}}; analyzed_->Fn_[u * analyzed_->NV_ + v] = e_; } } } void idft_2d(Signal2DOriginal *original_, Signal2DAnalyzed *analyzed_) { for (int y = 0; y GH_; ++y) { for (int x = 0; x GW_; ++x) { for (int u = 0; u NU_; ++u) { for (int v = 0; v NV_; ++v) { FBasis e_ = analyzed_->Fn_[u * analyzed_->NV_ + v]; original_->Fo_[y * original_->GW_ + x] += ( e_.re_ * cos(e_.w_[0] * x + e_.w_[1] * y) + e_.im_ * sin(e_.w_[0] * x + e_.w_[1] * y) ) / (analyzed_->NU_ * analyzed_->NV_); } } } } } 写完后还是需要简单测试一下: int main(void) { double input_data_[36] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 3.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 4.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 5.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 6.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; FBasis output_data_[36] = {}; double versed_data_[36] = {}; Signal2DOriginal Fo = { 6, 6, input_data_ }; Signal2DAnalyzed Fn = { 6, 6, output_data_ }; Signal2DOriginal iFo = { 6, 6, versed_data_ }; printf(\"\\n Original_data: \\n\"); for (int y = 0; y 得到结果和标准几近相同: Original_data: 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 2.000000 3.000000 4.000000 5.000000 6.000000 1.000000 3.000000 4.000000 5.000000 6.000000 1.000000 2.000000 4.000000 5.000000 6.000000 1.000000 2.000000 3.000000 5.000000 6.000000 1.000000 2.000000 3.000000 4.000000 6.000000 1.000000 2.000000 3.000000 4.000000 5.000000 DFT_result: 126.000 + i 0.000 -0.000 + i 0.000 -0.000 + i 0.000 0.000 + i 0.000 0.000 + i 0.000 0.000 + i 0.000 -0.000 + i 0.000 -18.000 + i -31.177 0.000 + i 0.000 0.000 + i -0.000 0.000 + i -0.000 0.000 + i -0.000 -0.000 + i 0.000 0.000 + i 0.000 -18.000 + i -10.392 0.000 + i -0.000 0.000 + i -0.000 0.000 + i -0.000 0.000 + i 0.000 0.000 + i -0.000 0.000 + i -0.000 -18.000 + i 0.000 0.000 + i -0.000 -0.000 + i -0.000 0.000 + i 0.000 0.000 + i -0.000 0.000 + i -0.000 0.000 + i -0.000 -18.000 + i 10.392 -0.000 + i -0.000 0.000 + i 0.000 0.000 + i -0.000 0.000 + i -0.000 -0.000 + i -0.000 -0.000 + i -0.000 -18.000 + i 31.177 IDFT_result: 1.000007 2.000001 2.999999 3.999999 5.000001 6.000003 2.000001 2.999998 3.999998 4.999998 5.999999 1.000000 2.999999 3.999998 4.999997 5.999998 0.999998 2.000000 3.999999 4.999998 5.999998 0.999997 1.999999 3.000001 5.000001 5.999999 0.999998 1.999999 3.000000 4.000003 6.000003 1.000000 2.000000 3.000001 4.000003 5.000005 运行结束。 二维离散傅里叶变换到此结束,那么更多维度的傅里叶变换该怎么处理呢?我们只需要拓展波矢 k⃗{\\vec{k}}k⃗ 的维度即可。而多维和一维、二维情况,在离散傅里叶变换的逻辑流程上并没有不同。但是,随着波矢 k⃗{\\vec{k}}k⃗ 的参数维度扩展,我们发现现有的直接计算法实现的离散傅里叶变换,其算法时间复杂度 O{n2}O\\{ n^2\\}O{n2} 已不足以支撑超过二维参数量后的敏捷计算。因此,我们迫切需要一种更快的代替算法。 这就是促成快速傅立叶蝴蝶法工程化的要素。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_1_3.html":{"url":"Chapter_3/Language/cn/Docs_3_1_3.html","title":"3.1.3 傅立叶变化的经典 - 快速傅立叶变换(FFT)","keywords":"","body":"3.1.3 快速傅立叶(FFT [Fast Fourier Transform]) 快速傅立叶是对离散傅立叶的数学逼近。其旨在通过有限点的分布拟合,快速逼近离散傅立叶变换结果。 快速傅立叶变换最早由 高斯(Carl Friedrich Gauss,1777 - 1855) 为了解决天文学中有关于智神星(Pallas)和婚神星(Juno)的位姿计算问题,而在 1805 年提出的 [8] [9] 。不过由于种种意料之外的因素,让该论文并没有被及时的发表。因此,论文在当时也没有获得太多的关注。直到计算机开始兴起,有关傅里叶变换等算法的更为低时间复杂度的要求变的迫切,才让后续研究者们又一次察觉到了这一篇文献(以及包括 19 世纪中叶和 20 世纪初的旁类研究)的贡献 [9] 。 1965 年,来自 IBM 普林斯通实验室的 詹姆士·库利(James Cooley) 和来自普林斯通大学的 约翰·图奇(John Tukey) 教授,联合发表了基于快速傅里叶变换的机器实现 [10] ,首次将该算法迁移到了计算机上。他们的研究提出了,通过采用分治法的思想来减少变换所需步数。这成功的使得,多维信号分析所用的傅立叶算法的时间复杂度算法,降至 。促进了数字信号处理(DSP)和计算机图形学的技术更新 [11] 。所以,为纪念两位的贡献,这套程序化的快速傅里叶变换(FFT [Fast Fourier Transform])方法论,被称为 库利-图奇算法(Cooley-Tukey Algorithm)。库利-图奇算法目标是一维信号,不过高维信号是可以被拆解为低维信号的向量积的,因此 并不影响其泛化。 在库利-图奇算法提出的时候,分治法已经被广泛的用来做计算机数组求最大值(Max)和排序(Sort)的处理当中。虽然离散的数组和周期信号之间,在信息密度和特征上存在较大差异。但如果考虑到周期信号沿传播维度重复,和傅里叶变换傅里叶基的特征,会发现: 如果将一维信号离散傅里叶变换的有限基底函数族 Fω\\mathcal {F}_{\\omega}Fω 构成的傅里叶基看作最小元,那么对其在时域上进行分组重排,也是可行的。从而使信号的一组基底函数基,能够以树状结构分类,并拆解特征表示原信号函数。 这就是库利-图奇算法的关键,在后续的算法的演进过程中逐步被提炼,形成了时域抽取这一核心概念 [11] 。 时域抽取(DIT [Decimation-in-Time]) 时域抽取(DIT [Decimation-in-Time])是从时域(TD [Time Domain])对一维信号进行可逆解构的一种数学工具。 它的工作流包含有两个阶段: 分组离散傅立叶(Grouped DFT) 和 旋转因子转换(Rotation Factor Convert) 时域抽取 - 分组离散傅立叶(Grouped DFT) 分组离散傅立叶(Grouped DFT) 是指,在信号的单个周期 TTT 内,以等间距有限次取 个原始离散采样后。将周期内所有采样点信息以 step=TK=Nstep =\\tfrac {T}{K} = Nstep=KT=N 的步长等分,得到 KKK 组顺序连续的子采样分组,依照组别记为样本子集 [S1,S2, ... ,SK][S_1,S_2,\\ ...\\ , S_K][S1,S2, ... ,SK] 。每组子集都有 Sk∈[fk((k−1)⋅N), fk(k⋅N))S_k \\in [f_k((k-1) \\cdot N),\\ f_k(k \\cdot N))Sk∈[fk((k−1)⋅N), fk(k⋅N)) 的样本取样区间。 此时,记组内索引为 nnn ,有 n∈[1, N]n \\in [1,\\ N]n∈[1, N] 。按照顺序从各组中,取组内索引位置为 nnn 的元素,组成包含数据量为 Fωn\\mathcal {F}_{\\omega_n}Fωn 的基底函数 Fωn\\mathcal {F}_{\\omega_n}Fωn 的波峰数组。可以逐个拟合,得到一组当前一维信号的有限基底函数族 Fω=[Fω1,Fω2, ... ,FωN]\\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2},\\ ...\\ ,\\mathcal {F}_{\\omega_N}]Fω=[Fω1,Fω2, ... ,FωN] ,记为当前解的最小傅立叶基。根据一维离散傅立叶变换有: Fω=[Fω1,Fω2, ... ,FωN]T=NKf^(ω)=∑t=0Tf(t)⋅e−iωt ⇔ f(t)=1K∑ω0ωNf^(ω)⋅Fω(t) {\\displaystyle \\begin{aligned} \\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1},\\mathcal {F}_{\\omega_2},& \\ ...\\ ,\\mathcal {F}_{\\omega_N}] \\quad \\quad T = NK \\\\ \\hat{f}(\\omega) = \\sum_{t = 0}^{T} f(t) \\cdot e^{-i \\omega t} \\ \\ \\ \\ \\ &\\Leftrightarrow \\ \\ \\ \\ \\ f(t) = \\frac{1}{K} \\sum_{\\omega_0}^{\\omega_N} \\hat{f}(\\omega) \\cdot \\mathcal {F}_{\\omega}(t) \\\\ \\end{aligned} } Fω=[Fω1,Fω2,f^(ω)=t=0∑Tf(t)⋅e−iωt ... ,FωN]T=NK⇔ f(t)=K1ω0∑ωNf^(ω)⋅Fω(t) 又因 ωn=2πnT{\\omega_n} = \\tfrac{2\\pi n}{T}ωn=T2πn ,强度系数 f^(ω)\\hat{f}(\\omega)f^(ω) 与 f(t)f(t)f(t) 的关系,可以被转换为 f^(n)\\hat{f}(n)f^(n) 与 f(t)f(t)f(t) 的关系: f^(ω)=∑t=0Tf(t)⋅e−iωt→f^(n)=∑t=0Tf(t)⋅e−i2πnTtf(t)=1N∑ω0ωNf^(ω)⋅Fω(t)→f(t)=1N∑n=1Nf^(n)⋅Fω(t)f^(n)=∑t=0Tf(t)⋅e−i2πnTt⇔f(t)=1N∑n=1Nf^(n)⋅Fω(t) {\\displaystyle \\begin{aligned} \\hat{f}(\\omega) = \\sum_{t = 0}^{T} f(t) \\cdot e^{-i \\omega t} &\\rightarrow \\hat{f}(n) =\\sum_{t = 0}^{T} f(t) \\cdot e^{-i \\tfrac{2\\pi n}{T} t } \\\\ f(t) = \\frac{1}{N} \\sum_{\\omega_0}^{\\omega_{N}} \\hat{f}(\\omega) \\cdot \\mathcal {F}_{\\omega}(t) &\\rightarrow f(t) = \\frac{1}{N} \\sum_{n=1}^{N} \\hat{f}(n) \\cdot \\mathcal {F}_{\\omega}(t) \\\\ \\hat{f}(n) =\\sum_{t = 0}^{T} f(t) \\cdot e^{-i \\tfrac{2\\pi n}{T} t } \\quad \\quad &\\Leftrightarrow \\quad \\quad f(t) = \\frac{1}{N} \\sum_{n=1}^{N} \\hat{f}(n) \\cdot \\mathcal {F}_{\\omega}(t) \\end{aligned} } f^(ω)=t=0∑Tf(t)⋅e−iωtf(t)=N1ω0∑ωNf^(ω)⋅Fω(t)f^(n)=t=0∑Tf(t)⋅e−iT2πnt→f^(n)=t=0∑Tf(t)⋅e−iT2πnt→f(t)=N1n=1∑Nf^(n)⋅Fω(t)⇔f(t)=N1n=1∑Nf^(n)⋅Fω(t) 带入 KKK 分组情况( T=NKT = NKT=NK ),上式可化为: f^(n)=∑k=1K∑(k−1)Nt=kN−1f(t)⋅e−i2πnTt⇔f(t)=1N∑n=1Nf^(n)⋅Fω(t) {\\displaystyle \\begin{aligned} \\hat{f}(n) =\\sum_{k=1}^{K}\\sum_{(k-1)N}^{t = kN-1} f(t) \\cdot e^{-i \\tfrac{2\\pi n}{T} t } \\quad \\quad &\\Leftrightarrow \\quad \\quad f(t) = \\frac{1}{N} \\sum_{n=1}^{N} \\hat{f}(n) \\cdot \\mathcal {F}_{\\omega}(t) \\end{aligned} } f^(n)=k=1∑K(k−1)N∑t=kN−1f(t)⋅e−iT2πnt⇔f(t)=N1n=1∑Nf^(n)⋅Fω(t) 即强度系数 f^(n)\\hat{f}(n)f^(n) 存在展开式: f^(n)=∑k=1K∑(k−1)Nt=kN−1f(t)⋅e−i2πnTt=∑t=0N−1f(t)⋅e−i2πtT⋅n+∑t=N2N−1f(t)⋅e−i2πtT⋅n+ ... +∑(K−1)Nt=KN−1f(t)⋅e−i2πtT⋅n=∑t=0N−1f(t)⋅e−i2πtT⋅n+∑t=0N−1f(t+N)⋅e−i2π(t+N)T⋅n+ ... +∑t=0N−1f(t+(K−1)N)⋅e−i2π(t+(K−1)N)T⋅n=∑k=1K∑t=0N−1f(t+(k−1)N)⋅e−i2πtTn⋅e−i2π(k−1)Kn {\\displaystyle \\begin{aligned} \\hat{f}(n) &= \\sum_{k=1}^{K}\\sum_{(k-1)N}^{t = kN-1} f(t) \\cdot e^{-i \\tfrac{2\\pi n}{T} t } \\\\ &= \\sum_{t=0}^{N-1} f(t) \\cdot e^{-i \\tfrac{2\\pi t}{T} \\cdot n } + \\sum_{t=N}^{2N-1} f(t) \\cdot e^{-i \\tfrac{2\\pi t}{T} \\cdot n } + \\ ...\\ + \\sum_{(K-1)N}^{t=KN-1} f(t) \\cdot e^{-i \\tfrac{2\\pi t}{T} \\cdot n } \\\\ &= \\sum_{t=0}^{N-1} f(t) \\cdot e^{-i \\tfrac{2\\pi t}{T} \\cdot n } + \\sum_{t=0}^{N-1} f(t+N) \\cdot e^{-i \\tfrac{2\\pi (t+N)}{T} \\cdot n } + \\ ...\\ + \\sum_{t=0}^{N-1} f(t + (K-1)N) \\cdot e^{-i \\tfrac{2\\pi (t + (K-1)N)}{T} \\cdot n } \\\\ &= \\sum_{k=1}^{K} \\sum_{t=0}^{N-1} f(t+ (k-1)N) \\cdot e^{-i \\tfrac{2\\pi t}{T} n } \\cdot e^{-i \\tfrac{2\\pi (k-1)}{K} n } \\\\ \\end{aligned} } f^(n)=k=1∑K(k−1)N∑t=kN−1f(t)⋅e−iT2πnt=t=0∑N−1f(t)⋅e−iT2πt⋅n+t=N∑2N−1f(t)⋅e−iT2πt⋅n+ ... +(K−1)N∑t=KN−1f(t)⋅e−iT2πt⋅n=t=0∑N−1f(t)⋅e−iT2πt⋅n+t=0∑N−1f(t+N)⋅e−iT2π(t+N)⋅n+ ... +t=0∑N−1f(t+(K−1)N)⋅e−iT2π(t+(K−1)N)⋅n=k=1∑Kt=0∑N−1f(t+(k−1)N)⋅e−iT2πtn⋅e−iK2π(k−1)n 要点就出现在这里,此时,由于有限基底函数族 Fω=[Fω1,Fω2, ... ,FωN]\\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2},\\ ...\\ ,\\mathcal {F}_{\\omega_N}]Fω=[Fω1,Fω2, ... ,FωN] 的拟合样本选取自各个分组的对应角标数据,则显然任意 Fωi\\mathcal {F}_{\\omega_i}Fωi 的周期都有 Ti=2πnωi≥NT_i = \\tfrac{2\\pi n}{\\omega_i} \\geq NTi=ωi2πn≥N 且必然有 TimodN=0T_i \\mod N = 0TimodN=0 。因此,强度系数 f^(n)\\hat{f}(n)f^(n) 关于 kkk 的展开式能进一步精简为: f^(n)=∑k=1K(∑t=0N−1f(t+(k−1)N)⋅e−i2πtTn)⋅e−i2π(k−1)Kn=∑k=1Ke−i2π(k−1)Kn⋅[∑(k−1)NkN−1∣t f(t)⋅Fω−1(tn)] {\\displaystyle \\begin{aligned} \\hat{f}(n) &= \\sum_{k=1}^{K} (\\sum_{t=0}^{N-1} f(t+ (k-1)N) \\cdot e^{-i \\tfrac{2\\pi t}{T} n }) \\cdot e^{-i \\tfrac{2\\pi (k-1)}{K} n } \\\\ &= \\sum_{k=1}^{K} e^{-i \\tfrac{2\\pi (k-1)}{K} n } \\cdot [\\sum_{(k-1)N}^{kN-1} \\vert_t \\ f(t) \\cdot \\mathcal {F}_{\\omega}^{-1}(tn) \\quad ] \\\\ \\end{aligned} } f^(n)=k=1∑K(t=0∑N−1f(t+(k−1)N)⋅e−iT2πtn)⋅e−iK2π(k−1)n=k=1∑Ke−iK2π(k−1)n⋅[(k−1)N∑kN−1∣t f(t)⋅Fω−1(tn)] 记 f^k(n)=∑(k−1)NkN−1∣t f(t)⋅Fω−1(tn)\\hat{f}_k(n) =\\sum_{(k-1)N}^{kN-1} \\vert_t \\ f(t) \\cdot \\mathcal {F}_{\\omega}^{-1}(tn)f^k(n)=∑(k−1)NkN−1∣t f(t)⋅Fω−1(tn) ,则 f^k(n)\\hat{f}_k(n)f^k(n) 即为分组样本子集 [S1,S2, ... ,SK][S_1,S_2,\\ ...\\ , S_K][S1,S2, ... ,SK] 在自己的分组样本区间 Sk∈[fk((k−1)⋅N), fk(k⋅N))S_k \\in [f_k((k-1) \\cdot N),\\ f_k(k \\cdot N))Sk∈[fk((k−1)⋅N), fk(k⋅N)) 内,进行离散傅里叶变换的分组强度系数结果。而 e−i2π(k−1)Kne^{-i \\tfrac{2\\pi (k-1)}{K} n }e−iK2π(k−1)n 在样本顺序 nnn 给定时,只与所处分组的组序 kkk 有关,且本身在三角函数空间表现为 n(k−1)n(k-1)n(k−1) 的角度固定值,所以我们记其为旋转因子(Rotation Factor) Rk(n)=e−i2π(k−1)KnR_k(n) = e^{-i \\tfrac{2\\pi (k-1)}{K} n }Rk(n)=e−iK2π(k−1)n 。 将 f^k(n)\\hat{f}_k(n)f^k(n) 、 Rk(n)R_k(n)Rk(n) 带入 f^(n)\\hat{f}(n)f^(n) ,则 f^(n)\\hat{f}(n)f^(n) 最终表现为: R1(n)=1f^(n)=∑k=1KRk(n)⋅f^k(n)=R1(n)⋅f^1(n)+R2(n)⋅f^2(n)+ ... +RK(n)⋅f^K(n)f^(n)=f^1(n)+R2(n)⋅f^2(n)+ ... +RK(n)⋅f^K(n) {\\displaystyle \\begin{aligned} R_1(n) & = 1 \\\\ \\hat{f}(n) &= \\sum_{k=1}^{K} R_k(n) \\cdot \\hat{f}_k(n) = R_1(n) \\cdot \\hat{f}_1(n) + R_2(n) \\cdot \\hat{f}_2(n) + \\ ...\\ + R_K(n) \\cdot \\hat{f}_K(n) \\\\ \\hat{f}(n) &= \\hat{f}_1(n) + R_2(n) \\cdot \\hat{f}_2(n) + \\ ...\\ + R_K(n) \\cdot \\hat{f}_K(n) \\\\ \\end{aligned} } R1(n)f^(n)f^(n)=1=k=1∑KRk(n)⋅f^k(n)=R1(n)⋅f^1(n)+R2(n)⋅f^2(n)+ ... +RK(n)⋅f^K(n)=f^1(n)+R2(n)⋅f^2(n)+ ... +RK(n)⋅f^K(n) 上式就是时域抽取(DIT)有关分组离散傅立叶(Grouped DFT)的通用完整过程。单从公式来看,由于切割了样本集,我们只能通过分组离散傅立叶(Grouped DFT)直接求得原一维信号前 NNN 个信号量的傅里叶解。反而因为样本不足的问题,无法直接求得剩余的 (K−1)N(K-1)N(K−1)N 个信号量。 那么我们大费周章的这么做有什么用处呢?原因就在于旋转因子间是存在关系的。 时域抽取 - 旋转因子转换(Rotation Factor Convert) 这个问题,需要从复变函数的三角函数特性来回答。记 Rk(n)R_k(n)Rk(n) 变换到三角函数域,其实部为 aka_kak ,虚部为 bkb_kbk 。则 Rk(n)R_k(n)Rk(n) 可以表示为: Rk(n)=e−i2π(k−1)Kn=ak⋅cos(2π(k−1)Kn)+i⋅bk⋅sin(2π(k−1)Kn) dt {\\displaystyle \\begin{aligned} R_k(n) &= e^{-i \\tfrac{2\\pi (k-1)}{K} n } \\\\ &= a_k \\cdot cos(\\tfrac{2\\pi (k-1)}{K} n) + i \\cdot b_k \\cdot sin(\\tfrac{2\\pi (k-1)}{K} n) \\ dt \\\\ \\end{aligned} } Rk(n)=e−iK2π(k−1)n=ak⋅cos(K2π(k−1)n)+i⋅bk⋅sin(K2π(k−1)n) dt 依此,取 aka_kak 为 yyy 轴、 bkb_kbk 为 xxx 轴。我们假设分组 K=2mK = 2^mK=2m ,信号周期 T=2π⋅MT = 2 \\pi \\cdot MT=2π⋅M 且 Tmod2π=0T \\mod 2\\pi = 0Tmod2π=0 ,有此时步长 N=π2m−1⋅MN = \\tfrac{\\pi}{2^{m-1}} \\cdot MN=2m−1π⋅M 。为便于说明,我们取 M=1M = 1M=1 , m=1m = 1m=1 ,且 n=π6=∠30∘n = \\tfrac{\\pi}{6} = \\angle 30^\\circn=6π=∠30∘ 来进行绘制。实际上 nnn 只能取 [1, N][1, \\ N][1, N] 的整数,但那样会不便于图示,这里取固定角并不影响后续结论。则 Rk(n)R_k(n)Rk(n) 在 akbka_kb_kakbk 构成的平面坐标系上有如下取值范围: 图 3-1 旋转因子的三角函数系取值演示 在图像表示下 Rk(n)R_k(n)Rk(n) 的特性更易察觉,当分组 K=2mK = 2^mK=2m 且 m≥1m \\geq 1m≥1 取整时, 单个 2π2\\pi2π 周期内,以 N=2πKN = \\tfrac{2\\pi}{K}N=K2π 可以分为 2m−12^{m-1}2m−1 组。每组分组都包涵两个子样本集 [Sk ,Sk+2m−1][S_k\\ ,S_{k+2^{m-1}}][Sk ,Sk+2m−1] ,此时,这两个字样本集旋转因子原点对称,有 Rk(n)=−Rk(n+π)n∈[2π(k−1)K, 2πkK]R_k(n) = -R_k(n+\\pi) \\quad n \\in [\\tfrac{2\\pi (k-1)}{K}, \\ \\tfrac{2\\pi k}{K}]Rk(n)=−Rk(n+π)n∈[K2π(k−1), K2πk] 。而对于信号 M>1M > 1M>1 时,间隔为 2π2\\pi2π 的分组有 2M2^M2M 组,且旋转因子取值相同,即 Rk(n)=Rk+2π⋅M(n)R_k(n) = R_{k+2\\pi \\cdot M}(n)Rk(n)=Rk+2π⋅M(n) 。 如果我们取 K=2K = 2K=2 ,即 m=1m = 1m=1 ,对整体信号的 TTT 个样本分为两组,两组原点对称有: f^(n)=f^1(n)+e−iπn⋅f^2(n) =f^1(n)+R2(n)⋅f^2(n)f^(n+π)=f^1(n)+e−iπ(n+π)⋅f^2(n)=f^1(n)−R2(n)⋅f^2(n) {\\displaystyle \\begin{aligned} \\hat{f}(n) &= \\hat{f}_1(n) + e^{-i \\pi n} \\cdot \\hat{f}_2(n) \\quad \\ = \\hat{f}_1(n) + R_2(n) \\cdot \\hat{f}_2(n) \\\\ \\hat{f}(n+\\pi) &= \\hat{f}_1(n) + e^{-i \\pi (n+\\pi)} \\cdot \\hat{f}_2(n) = \\hat{f}_1(n) - R_2(n) \\cdot \\hat{f}_2(n) \\\\ \\end{aligned} } f^(n)f^(n+π)=f^1(n)+e−iπn⋅f^2(n) =f^1(n)+R2(n)⋅f^2(n)=f^1(n)+e−iπ(n+π)⋅f^2(n)=f^1(n)−R2(n)⋅f^2(n) 如果我们取 K=4K = 4K=4 ,即 m=2m = 2m=2 ,对整体信号的 TTT 个样本分为四组,间隔两两原点对称,即相邻组间实虚轴反转,有: Rk(n+π2)=[(k−1)%2]⋅(−1)k−1⋅Rk(n)+[(k−1)%2+1]⋅(−i)k−1⋅Rk(n) {\\displaystyle \\begin{aligned} R_k(n+\\tfrac{\\pi}{2}) = [(k-1)\\%2] \\cdot (-1)^{k-1} \\cdot R_k(n) + [(k-1)\\%2 + 1] \\cdot(-i)^{k-1} \\cdot R_k(n) \\\\ \\end{aligned} } Rk(n+2π)=[(k−1)%2]⋅(−1)k−1⋅Rk(n)+[(k−1)%2+1]⋅(−i)k−1⋅Rk(n) 则 f^(n)\\hat{f}(n)f^(n) 有 n∈[0, π2]n \\in [0, \\ \\tfrac{\\pi}{2}]n∈[0, 2π] 范围的表达式: f^(n)=f^1(n)+ R2(n)⋅f^2(n)+ R3(n)⋅f^3(n)+ R4(n)⋅f^4(n)f^(n+π2)=f^1(n)−iR2(n)⋅f^2(n)− R3(n)⋅f^3(n)+iR4(n)⋅f^4(n)f^(n+π)=f^1(n)− R2(n)⋅f^2(n)+ R3(n)⋅f^3(n)− R4(n)⋅f^4(n)f^(n+3π2)=f^1(n)+iR2(n)⋅f^2(n)− R3(n)⋅f^3(n)−iR4(n)⋅f^4(n) {\\displaystyle \\begin{aligned} \\hat{f}(n) &= \\hat{f}_1(n) + \\ R_2(n) \\cdot \\hat{f}_2(n) + \\ R_3(n) \\cdot \\hat{f}_3(n) + \\ R_4(n) \\cdot \\hat{f}_4(n) \\\\ \\hat{f}(n+\\tfrac{\\pi}{2}) &= \\hat{f}_1(n) - i R_2(n) \\cdot \\hat{f}_2(n) - \\ R_3(n) \\cdot \\hat{f}_3(n) + i R_4(n) \\cdot \\hat{f}_4(n) \\\\ \\hat{f}(n+\\pi) &= \\hat{f}_1(n) - \\ R_2(n) \\cdot \\hat{f}_2(n) + \\ R_3(n) \\cdot \\hat{f}_3(n) - \\ R_4(n) \\cdot \\hat{f}_4(n) \\\\ \\hat{f}(n+\\tfrac{3\\pi}{2})&= \\hat{f}_1(n) + i R_2(n) \\cdot \\hat{f}_2(n) - \\ R_3(n) \\cdot \\hat{f}_3(n) - i R_4(n) \\cdot \\hat{f}_4(n) \\\\ \\end{aligned} } f^(n)f^(n+2π)f^(n+π)f^(n+23π)=f^1(n)+ R2(n)⋅f^2(n)+ R3(n)⋅f^3(n)+ R4(n)⋅f^4(n)=f^1(n)−iR2(n)⋅f^2(n)− R3(n)⋅f^3(n)+iR4(n)⋅f^4(n)=f^1(n)− R2(n)⋅f^2(n)+ R3(n)⋅f^3(n)− R4(n)⋅f^4(n)=f^1(n)+iR2(n)⋅f^2(n)− R3(n)⋅f^3(n)−iR4(n)⋅f^4(n) 不论上述哪一种分组方法,我们都可以将求解范围从有限子集 SkS_kSk 中 n∈[2π(k−1)K, 2πkK]n \\in [\\tfrac{2\\pi (k-1)}{K}, \\ \\tfrac{2\\pi k}{K}]n∈[K2π(k−1), K2πk] 的离散傅立叶结果,拓展到完整信号周期 TTT 。而只需要求任意一有限子集 SkS_kSk 的傅立叶基即可。 根据 K=2mK = 2^mK=2m 的不同取值,时域抽取(DIT)过程的时间复杂度,通过计算分片耗时,能够简单得到为 O(K−1Kn⋅log2mn)=O(K−1K⋅2m−1n⋅log2n)O(\\tfrac{K-1}{K}n \\cdot log_{2^m}n) = O(\\tfrac{K-1}{K \\cdot 2^{m-1} }n \\cdot log_2n)O(KK−1n⋅log2mn)=O(K⋅2m−1K−1n⋅log2n) 。 显然,O∣K=2=O(12n⋅log2n)O|_{K=2} =O(\\tfrac{1}{2}n \\cdot log_2n)O∣K=2=O(21n⋅log2n) 、 O∣K=4=O(38n⋅log2n)O|_{K=4} =O(\\tfrac{3}{8}n \\cdot log_2n)O∣K=4=O(83n⋅log2n) 虽然分组间耗时差异不大,但相较于直接对一维信号使用离散傅里叶变换(DFT)的 O(n2)O(n^2)O(n2) 耗时来说,直接减少了一个数量级。这即是快速傅立叶的 “快”。 对于 KKK 取不同值时的时域抽取(DIT),为了做区分,根据 KKK 值的不同被分别称为 双模时域抽取(Radix-2 DIT) 和 四模时域抽取(Radix-4 DIT)。同理,我们将 K=2K = 2K=2 时的库利-图奇算法称为 双模快速傅里叶变换(Radix-2 FFT),将 K=4K = 4K=4 时的库利-图奇算法称为 四模快速傅里叶变换(Radix-4 FFT)。两者差异如上,主要即是在划分导致推算上的不同。 至于为什么快速傅里叶变换又被称为蝴蝶法这点。则和经过时域抽取(DIT)处理后,有限基底函数族 Fω=[Fω1,Fω2, ... ,FωN]\\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2},\\ ...\\ ,\\mathcal {F}_{\\omega_N}]Fω=[Fω1,Fω2, ... ,FωN] 的对应强度系数 f^(ω)\\hat{f}(\\omega)f^(ω) 与分组 f^k(n)\\hat{f}_k(n)f^k(n) 的换算方式有关。 处理单元最小化 - 交叉求值与“蝴蝶”的由来 以 双模快速傅里叶变换(Radix-2 FFT) 为例。在最简情况下,当样本取 T=2T = 2T=2 ,有 K=2K = 2K=2 且 N=1N = 1N=1 ,基底函数族 Fω=[Fω1,Fω2]\\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2}]Fω=[Fω1,Fω2] ,此时: ∵f^k(n)=∑(k−1)NkN−1∣t f(t)⋅Fω−1(tn)∴f^(n)=f^1(n) + (−1)n⋅R2(n)⋅f^2(n)=Fω1−1(n)⋅f(0)+ Fω2−1(n)⋅f(1)= f(0) + (−1)n⋅Fω2−1(n)⋅f(1) {\\displaystyle \\begin{aligned} \\because \\hat{f}_k(n) &=\\sum_{(k-1)N}^{kN-1} \\vert_t \\ f(t) \\cdot \\mathcal {F}_{\\omega}^{-1}(tn) \\\\ \\therefore \\hat{f}(n) &=\\quad \\quad \\hat{f}_1(n)\\quad \\ +\\ (-1)^n \\cdot R_2(n) \\cdot \\hat{f}_2(n) \\\\ &= \\mathcal {F}_{\\omega_1}^{-1}(n) \\cdot f(0) + \\quad \\ \\mathcal {F}_{\\omega_2}^{-1}(n) \\cdot f(1) \\\\ &= \\quad \\quad \\ f(0) \\ \\quad +\\ (-1)^n \\cdot \\mathcal {F}_{\\omega_2}^{-1}(n) \\cdot f(1) \\\\ \\end{aligned} } ∵f^k(n)∴f^(n)=(k−1)N∑kN−1∣t f(t)⋅Fω−1(tn)=f^1(n) + (−1)n⋅R2(n)⋅f^2(n)=Fω1−1(n)⋅f(0)+ Fω2−1(n)⋅f(1)= f(0) + (−1)n⋅Fω2−1(n)⋅f(1) 显然,对于足够小的样本,其库利-图奇解的旋转因子 Rk(n)R_k(n)Rk(n) ,就是它所对应的傅里叶基函数与转置因子的乘机,即: Rk(n)=(−1)n⋅Fω2−1(n),k∣n∈int[0,1] R_k(n) = (-1)^n \\cdot \\mathcal {F}_{\\omega_2}^{-1}(n) \\quad , k|n \\in int[0,1] Rk(n)=(−1)n⋅Fω2−1(n),k∣n∈int[0,1] 我们在傅里叶变换章节开始时提到过,傅里叶变换从空间投影变换角度,可以表示为: N⋅F=FωT⋅F=[Fω1Fω2⋮Fωn]⋅[f^1,f^2, ... ,f^n] {\\displaystyle \\begin{aligned} N \\cdot F = {\\mathcal {F}_{\\omega}}^T \\cdot \\mathcal {F} = {\\begin{bmatrix} \\mathcal {F}_{\\omega_1} \\\\ \\mathcal {F}_{\\omega_2} \\\\ \\vdots \\\\ \\mathcal {F}_{\\omega_n} \\end{bmatrix}} \\cdot [\\hat{f}_1,\\hat{f}_2,\\ ...\\ ,\\hat{f}_n] \\\\ \\end{aligned} } N⋅F=FωT⋅F=⎣⎢⎢⎡Fω1Fω2⋮Fωn⎦⎥⎥⎤⋅[f^1,f^2, ... ,f^n] 那么,在引入了转置因子的情况下,原信号 f(n)f(n)f(n) 与 f^(n)\\hat{f}(n)f^(n) 的关系就可以被写为: [f(0)f(1)]=[1,+Fω21,−Fω2]⋅[f^(0)f^(1)]=[1,+11,−1]⋅[Fω1,Fω2]⋅[f^(0)f^(1)] {\\displaystyle \\begin{aligned} { \\begin{bmatrix} f(0) \\\\ f(1) \\end{bmatrix} } = { \\begin{bmatrix} 1 \\quad , +\\mathcal{F}_{\\omega_2} \\\\ 1 \\quad , -\\mathcal{F}_{\\omega_2} \\end{bmatrix} } \\cdot { \\begin{bmatrix} \\hat{f}(0) \\\\ \\hat{f}(1) \\end{bmatrix} } = { \\begin{bmatrix} 1 \\quad , & +1 \\\\ 1 \\quad , & -1 \\end{bmatrix} } \\cdot [\\mathcal{F}_{\\omega_1}, \\mathcal{F}_{\\omega_2}] \\cdot { \\begin{bmatrix} \\hat{f}(0) \\\\ \\hat{f}(1) \\end{bmatrix} } \\\\ \\end{aligned} } [f(0)f(1)]=[1,+Fω21,−Fω2]⋅[f^(0)f^(1)]=[1,1,+1−1]⋅[Fω1,Fω2]⋅[f^(0)f^(1)] 而这个过程如果换到拓扑图表示,就是大名鼎鼎的 “蝴蝶” 流造型了 (注意,颜色表示转子输出方向) : 同理,当采用 四模快速傅里叶变换(Radix-4 FFT) 时,有在最简情况下样本取 T=4T = 4T=4 。有 K=4K = 4K=4 且 N=1N = 1N=1 ,基底函数族 Fω=[Fω1,Fω2,Fω3,Fω4]\\mathcal {F}_{\\omega} = [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2}, \\mathcal {F}_{\\omega_3}, \\mathcal {F}_{\\omega_4}]Fω=[Fω1,Fω2,Fω3,Fω4] 。省略同质的推导过程,有原信号 f(n)f(n)f(n) 与 f^(n)\\hat{f}(n)f^(n) 的关系: [f(0)f(1)f(2)f(3)]=[1,1,1,11,−i,−1,i1,−1,1,−11,i,−1,−i]⋅[Fω1,Fω2,Fω3,Fω4]⋅[f^(0)f^(1)f^(2)f^(3)] {\\displaystyle \\begin{aligned} { \\begin{bmatrix} f(0) \\\\ f(1) \\\\ f(2) \\\\ f(3) \\end{bmatrix} } = { \\begin{bmatrix} 1 ,& &1,& &1,& &1 \\\\ 1 ,& -&i,& -&1,& &i \\\\ 1 ,& -&1,& &1,& -&1 \\\\ 1 ,& &i,& -&1,& -&i \\end{bmatrix} } \\cdot [\\mathcal {F}_{\\omega_1}, \\mathcal {F}_{\\omega_2}, \\mathcal {F}_{\\omega_3}, \\mathcal {F}_{\\omega_4}] \\cdot { \\begin{bmatrix} \\hat{f}(0) \\\\ \\hat{f}(1) \\\\ \\hat{f}(2) \\\\ \\hat{f}(3) \\end{bmatrix} } \\\\ \\end{aligned} } ⎣⎢⎢⎡f(0)f(1)f(2)f(3)⎦⎥⎥⎤=⎣⎢⎢⎡1,1,1,1,−−1,i,1,i,−−1,1,1,1,−−1i1i⎦⎥⎥⎤⋅[Fω1,Fω2,Fω3,Fω4]⋅⎣⎢⎢⎡f^(0)f^(1)f^(2)f^(3)⎦⎥⎥⎤ 四模的 “蝴蝶” 流造型如下 (注意,颜色表示前级数据来源) : 可见,单元的最小化抽象是通用的方法论。 对于多样本情况,只需要层层分解组装即可完成整体的快速处理。由于时间差异并不明显,但转置矩阵复杂度差异较大,因此我们一般选择 双模(Radix-2) 简化整体处理过程。 分批处理层级树 - 单元组装与完整流水线 和简单情况不一样的是,更多的样本采样使得我们没办法通过一次计算就得到最终结果。而在之前的推导过程中我们提到,对于不同子样本集抽参求解 f^k(n)\\hat{f}_k(n)f^k(n) 的过程,其本质也是一个傅里叶变换,只不过在解构过程中被我们以整体进行了代指换元。因此,随着 T=2lT = 2^lT=2l 与 K=2mK = 2^mK=2m 的变化,对信号处理的层数 LayerLayerLayer 也会产生变更有: Layer=logK(T)=lm Layer = \\log_{K}(T) = \\frac{l}{m} Layer=logK(T)=ml 假设样本取 T=4T = 4T=4 ,有 K=2K = 2K=2 ,则 N=2N = 2N=2 ,此时所需层数为 Layer=2Layer = 2Layer=2 。根据其上我们的分析可知,存在整合后的基底函数族为: Fω=[Fω1,Fω2]=[Fω11,Fω12,Fω21,Fω22] \\mathcal{F}_{\\omega} = [\\mathcal{F}_{\\omega_1}, \\mathcal{F}_{\\omega_2}] = [\\mathcal{F}_{\\omega_{11}}, \\mathcal{F}_{\\omega_{12}}, \\mathcal{F}_{\\omega_{21}}, \\mathcal{F}_{\\omega_{22}}] Fω=[Fω1,Fω2]=[Fω11,Fω12,Fω21,Fω22] 使得原信号 f(n)f(n)f(n) 与 f^(n)\\hat{f}(n)f^(n) 的关系为: ∵f^k(n)=∑(k−1)NkN−1∣t f(t)⋅Fω−1(tn)mark:Ri(n)⋅Fωij−1(n)=Rij(n)∣(T=4,K=2)∴f^1(n)= Fω11−1(n)⋅f1(0)+ (−1)n⋅Fω12−1(n)⋅f1(1)= + (−1)n⋅Fω12−1(n)⋅f(2)= DFT(f1(n))f^2(n)= Fω21−1(n)⋅f2(0)+ (−1)n⋅Fω22−1(n)⋅f2(1)= f(1) + (−1)n⋅Fω22−1(n)⋅f(3)= DFT(f2(n))∴f^(n)=[ f^1(n) + (−1)n⋅R2(n)⋅f^2(n) ]∣(T=8,K=2)= R1(n)⋅DFT(f1(n))+ (−1)n⋅R2(n)⋅DFT(f2(n))= R1(n)⋅Fω11−1(n)⋅f(0) + (−1)n⋅R1(n)⋅Fω12−1(n)⋅f(2)+ (−1)n⋅R2(n)⋅f(1)+R2(n)⋅Fω22−1(n)⋅f(3)⇒f^(n)= R11(n)⋅f(0)+ (−1)n⋅R12(n)⋅f(2)+ (−1)n⋅R21(n)⋅f(1)+ R22(n)⋅f(3) {\\displaystyle \\begin{aligned} \\because \\hat{f}_k(n) =&\\sum_{(k-1)N}^{kN-1} \\vert_t \\ f(t) \\cdot \\mathcal{F}_{\\omega}^{-1}(tn) \\quad \\quad \\quad \\quad \\quad \\quad mark: R_i(n) \\cdot \\mathcal{F}_{\\omega_{ij}}^{-1}(n) = R_{ij}(n) \\vert_{(T=4,K=2)} \\\\ \\therefore \\hat{f}_1(n) =& \\ \\mathcal{F}_{\\omega_{11}}^{-1}(n) \\cdot f_1(0) \\quad +\\ (-1)^n \\cdot \\mathcal{F}_{\\omega_{12}}^{-1}(n) \\cdot f_1(1) \\\\ =& \\ +\\ (-1)^n \\cdot \\mathcal{F}_{\\omega_{12}}^{-1}(n) \\cdot f(2) \\\\ =& \\ DFT(f_1(n)) \\\\ \\hat{f}_2(n) =& \\ \\mathcal{F}_{\\omega_{21}}^{-1}(n) \\cdot f_2(0) \\quad +\\ (-1)^n \\cdot \\mathcal{F}_{\\omega_{22}}^{-1}(n) \\cdot f_2(1) \\\\ =& \\ f(1) \\ +\\ (-1)^n \\cdot \\mathcal{F}_{\\omega_{22}}^{-1}(n) \\cdot f(3) \\\\ =& \\ DFT(f_2(n)) \\\\ \\\\ \\therefore \\hat{f}(n) =& [\\ \\hat{f}_1(n)\\ +\\ (-1)^n \\cdot R_2(n) \\cdot \\hat{f}_2(n)\\ ]\\vert_{(T=8,K=2)} \\\\ =& \\ R_1(n) \\cdot DFT(f_1(n)) \\quad +\\ (-1)^n \\cdot R_2(n) \\cdot DFT(f_2(n)) \\\\ =& \\ R_1(n) \\cdot \\mathcal{F}_{\\omega_{11}}^{-1}(n) \\cdot f(0) \\ \\ + \\ (-1)^n \\cdot R_1(n) \\cdot \\mathcal{F}_{\\omega_{12}}^{-1}(n) \\cdot f(2) \\quad + \\\\ & \\ (-1)^n \\cdot R_2(n) \\cdot f(1) \\quad + \\quad \\quad R_2(n) \\cdot \\mathcal{F}_{\\omega_{22}}^{-1}(n) \\cdot f(3) \\\\ \\Rightarrow \\\\ \\hat{f}(n) =& \\quad \\quad \\ R_{11}(n) \\cdot f(0)\\quad \\quad +\\ \\quad (-1)^n \\cdot R_{12}(n)\\cdot f(2)\\quad +\\ \\\\ & (-1)^n \\cdot R_{21}(n) \\cdot f(1) \\quad +\\ \\quad \\quad \\quad \\ R_{22}(n)\\cdot f(3) \\\\ \\end{aligned} } ∵f^k(n)=∴f^1(n)===f^2(n)===∴f^(n)===⇒f^(n)=(k−1)N∑kN−1∣t f(t)⋅Fω−1(tn)mark:Ri(n)⋅Fωij−1(n)=Rij(n)∣(T=4,K=2) Fω11−1(n)⋅f1(0)+ (−1)n⋅Fω12−1(n)⋅f1(1) + (−1)n⋅Fω12−1(n)⋅f(2) DFT(f1(n)) Fω21−1(n)⋅f2(0)+ (−1)n⋅Fω22−1(n)⋅f2(1) f(1) + (−1)n⋅Fω22−1(n)⋅f(3) DFT(f2(n))[ f^1(n) + (−1)n⋅R2(n)⋅f^2(n) ]∣(T=8,K=2) R1(n)⋅DFT(f1(n))+ (−1)n⋅R2(n)⋅DFT(f2(n)) R1(n)⋅Fω11−1(n)⋅f(0) + (−1)n⋅R1(n)⋅Fω12−1(n)⋅f(2)+ (−1)n⋅R2(n)⋅f(1)+R2(n)⋅Fω22−1(n)⋅f(3) R11(n)⋅f(0)+ (−1)n⋅R12(n)⋅f(2)+ (−1)n⋅R21(n)⋅f(1)+ R22(n)⋅f(3) 同理,当 T=8T = 8T=8 ,有 K=2K = 2K=2 ,则 N=4N = 4N=4 ,此时所需层数为 Layer=3Layer = 3Layer=3 。存在整合后的基底函数族: Fω=[Fω1,Fω2]=[Fω11,Fω12,Fω21,Fω22]=[Fω111,Fω112,Fω121,Fω122,Fω211,Fω212,Fω221,Fω222] {\\displaystyle \\begin{aligned} \\mathcal{F}_{\\omega} &= [\\mathcal{F}_{\\omega_1}, \\mathcal{F}_{\\omega_2}] \\\\ &= [\\mathcal{F}_{\\omega_{11}}, \\mathcal{F}_{\\omega_{12}}, \\mathcal{F}_{\\omega_{21}}, \\mathcal{F}_{\\omega_{22}}] \\\\ &= [\\mathcal{F}_{\\omega_{111}}, \\mathcal{F}_{\\omega_{112}}, \\mathcal{F}_{\\omega_{121}}, \\mathcal{F}_{\\omega_{122}}, \\mathcal{F}_{\\omega_{211}}, \\mathcal{F}_{\\omega_{212}}, \\mathcal{F}_{\\omega_{221}}, \\mathcal{F}_{\\omega_{222}}] \\\\ \\end{aligned} } Fω=[Fω1,Fω2]=[Fω11,Fω12,Fω21,Fω22]=[Fω111,Fω112,Fω121,Fω122,Fω211,Fω212,Fω221,Fω222] 使得原信号 f(n)f(n)f(n) 与 f^(n)\\hat{f}(n)f^(n) 的关系为 (省略同质化过程) : f^(n)= R11(n)⋅f^11(n)∣0,4+ (−1)n⋅R12(n)⋅f^12(n)∣1,5+ (−1)n⋅R21(n)⋅f^21(1)∣2,6+ R22(n)⋅f^22(3)∣3,7=[R111(n)⋅f(0) + (−1)n⋅R112(n)⋅f(4)+ R221(n)⋅f(2)+ (−1)n⋅R222(n)⋅f(6)]feven+[R121(n)⋅f(1) + (−1)n⋅R122(n)⋅f(5)+ R321(n)⋅f(3)+ (−1)n⋅R322(n)⋅f(7)]fodds {\\displaystyle \\begin{aligned} \\hat{f}(n) =& \\quad \\quad \\ R_{11}(n) \\cdot \\hat{f}_{11}(n) \\vert_{0,4} \\quad \\quad + \\quad \\quad \\ (-1)^n \\cdot R_{12}(n)\\cdot \\hat{f}_{12}(n) \\vert_{1,5} \\quad \\quad + \\\\ & \\ (-1)^n \\cdot R_{21}(n) \\cdot \\hat{f}_{21}(1) \\vert_{2,6} \\quad + \\quad \\quad \\ R_{22}(n)\\cdot \\hat{f}_{22}(3) \\vert_{3,7} \\\\ =& [ R_{111}(n) \\cdot f(0)\\ + \\ (-1)^n \\cdot R_{112}(n) \\cdot f(4) + \\ R_{221}(n) \\cdot f(2) + \\ (-1)^n \\cdot R_{222}(n) \\cdot f(6) ]_{f_{even}} \\quad + \\\\ & [ R_{121}(n) \\cdot f(1)\\ + \\ (-1)^n \\cdot R_{122}(n) \\cdot f(5) + \\ R_{321}(n) \\cdot f(3) + \\ (-1)^n \\cdot R_{322}(n) \\cdot f(7) ]_{f_{odds}} \\\\ \\end{aligned} } f^(n)== R11(n)⋅f^11(n)∣0,4+ (−1)n⋅R12(n)⋅f^12(n)∣1,5+ (−1)n⋅R21(n)⋅f^21(1)∣2,6+ R22(n)⋅f^22(3)∣3,7[R111(n)⋅f(0) + (−1)n⋅R112(n)⋅f(4)+ R221(n)⋅f(2)+ (−1)n⋅R222(n)⋅f(6)]feven+[R121(n)⋅f(1) + (−1)n⋅R122(n)⋅f(5)+ R321(n)⋅f(3)+ (−1)n⋅R322(n)⋅f(7)]fodds 此时的“蝴蝶”流造型,就要复杂一些了 : 从图上可知,每层都可以被分割为 2la−12^{l_a - 1}2la−1 个蝶形单元,其中 lal_ala 为当前层级。而完整的计算,则需要历经共计 2l/m−12^{l/m} - 12l/m−1 个单元才能完成。 如果我们开始就对总样本集 SSS ,按照奇偶样本分为 S1′=[f(0), f(2), f(4), f(6)]S_1^{\\prime} = [f(0),\\ f(2),\\ f(4) ,\\ f(6)]S1′=[f(0), f(2), f(4), f(6)] 和 S2′=[f(1), f(3), f(5), f(7)]S_2^{\\prime} = [f(1),\\ f(3),\\ f(5) ,\\ f(7)]S2′=[f(1), f(3), f(5), f(7)] 这两个子集。使单一分组求单一解,来方便分离的离散傅里叶变换调用。那么整个蝴蝶图就变成如下样子了 (同色线表示相同流向) : 结果同样一致,可见奇偶分割实质上是一个以 K=2K = 2K=2 为步长的抽样再迭代计算的过程。这点也能够从 K=4K = 4K=4 时,四模对原数据取样 T=8T = 8T=8 会使 f(n)f(n)f(n) 被分为: f^(n)=[R11(n)⋅f(0) + (−1)n⋅R12(n)⋅f(4)]f1/4+[R21(n)⋅f(1) + (−1)n⋅R22(n)⋅f(5)]f2/4+[R31(n)⋅f(2) + (−1)n⋅R32(n)⋅f(6)]f3/4+[R41(n)⋅f(3) + (−1)n⋅R42(n)⋅f(7)]f4/4 {\\displaystyle \\begin{aligned} \\hat{f}(n) =& [ R_{11}(n) \\cdot f(0)\\ + \\ (-1)^n \\cdot R_{12}(n) \\cdot f(4) ]_{f_{1/4}} \\quad + \\\\ & [ R_{21}(n) \\cdot f(1)\\ + \\ (-1)^n \\cdot R_{22}(n) \\cdot f(5) ]_{f_{2/4}} \\quad + \\\\ & [ R_{31}(n) \\cdot f(2)\\ + \\ (-1)^n \\cdot R_{32}(n) \\cdot f(6) ]_{f_{3/4}} \\quad + \\\\ & [ R_{41}(n) \\cdot f(3)\\ + \\ (-1)^n \\cdot R_{42}(n) \\cdot f(7) ]_{f_{4/4}} \\\\ \\end{aligned} } f^(n)=[R11(n)⋅f(0) + (−1)n⋅R12(n)⋅f(4)]f1/4+[R21(n)⋅f(1) + (−1)n⋅R22(n)⋅f(5)]f2/4+[R31(n)⋅f(2) + (−1)n⋅R32(n)⋅f(6)]f3/4+[R41(n)⋅f(3) + (−1)n⋅R42(n)⋅f(7)]f4/4 的情况,得到间接的阐明。 因此,我们可以通过封装固定 KKK 时的最小蝶形单元,采用递归的方式来计算 f(n)f(n)f(n) 与 f^(n)\\hat{f}(n)f^(n) 的相互转换。分组的产生,是由顺序输入在算法作用下经过每层的蝶形单元处理后,导致的必然结果。是一个自然而然的过程而并非强行去做的设定,切勿本末倒置。 而我们期望的是有序的输出,这也就产生了对输入进行排序的要求。 基于数据的优化 - 位反转(Bit Reversal)输入索引重排 经过前面的一系列分析,不难归纳得到:最终算法的输出顺序,是原序列经过 Layer−1Layer - 1Layer−1 层反转的结果。即每个蝶形单元,会反转当前对应字样本周期跨度的一半。 还是采用当 T=8T = 8T=8 ,有 K=2K = 2K=2 时的情形。我们将所有的处理过程排除,以原样本数据序列角标的传递过程来标记处理流,则有: 当代计算机采用的二进制计数,我们将上述样本角标 采用二进制表示,有: 这一现象即被称为 位反转(Bit Reversal)。我们可以利用这一特点,在工程运算过程中每个蝶形单元的数据装配处,以顺序序列对应位反转的角标来取用输入数据,从而保证迭代运算结果的顺序。 一维快速傅立叶变换(1D-FFT)的 C 语言实现 现在,万事俱备。可以进行代码实现了。先用伪码缕清算法程序化思路: /** * 1D-FFT [Fast Fourier Transform] * [How to Use] * * Fo[T] = {...}; * Fn[T] = {}; * fft_1d(&Fo, &Fn, T); * [logistic] * { * result = []; // as byte array * // do Bit-Reversal * Fo_sorted = bit_reversal(Fn, Fn, T); * // do DIT: * for (int layer_at_ = 0; layer_at_ 依然,快速傅立叶变换也需要有逆变换(IDFT [Inverse Fast Fourier Transform]),来帮我们进行数据还原: /** * 1D-IFFT [Inverse Fast Fourier Transform] * [How to Use] * * Fo[T] = {}; * Fn[T] = {...}; * fft_1d(&Fo, &Fn, T); * [logistic] * { * result = []; // as byte array * // do Bit-Reversal * Fo_sorted = bit_reversal(Fn, Fn, T) / T; dont forget divide N(num equal T) [ 到此,快速傅里叶变换的 工程优势 就体现出来了。从上面的工作流可以看出,FFT 和 IFFT 唯一的实现上的不同的地方,就在于两点: 分片计算均值,这个是傅里叶变换的通性; 旋转因子互逆,转换三角函数时的对称性; 这正是我们在之前推倒时,双模快速傅里叶变换(Radix-2 FFT)所利用的最为显著的特征。而其他部分的计算,则可以用相同的流水线进行统一。 所以,一维双模快速傅里叶变换(1D Radix-2 FFT)的工程化,并没有想象中的复杂: #include \"stdio.h\" #include \"math.h\" #define PI 3.1415926f typedef struct Complex { double re_; double im_; Complex operator+(const Complex &b_) const { Complex result_; result_.re_ = re_ + b_.re_; result_.im_ = im_ + b_.im_; return result_; } Complex operator-(const Complex &b_) const { Complex result_; result_.re_ = re_ - b_.re_; result_.im_ = im_ - b_.im_; return result_; } Complex operator*(const Complex &b_) const { Complex result_; result_.re_ = re_ * b_.re_ - im_ * b_.im_; result_.im_ = re_ * b_.im_ + im_ * b_.re_; return result_; } } Rotator, FBasis; void digital_convert(double *digital_, Complex *complex_, size_t size_, bool inverse = false) { if (!inverse) { for (int i = 0; i 0) { j = j > 1; } if (j > i) { Complex temp = input_[i]; result_[i] = input_[j]; result_[j] = temp; } } } void butterfly(Complex *target_, int step_, int slice_idx_, bool inverse = false) { int start_at_ = slice_idx_ * 2 * step_; for (int inner_idx_ = 0; inner_idx_ 写完后简单测试一下: int main(void) { FBasis Fn[8] = {}; double Fo[8] = {0, 1, 2, 3, 4, 5, 6, 7}; double iFo[8] = {}; size_t T = sizeof(Fo) / sizeof(double); size_t N = sizeof(Fn) / sizeof(FBasis); printf(\"\\n Original_data: \\n\"); for (int t = 0; t 得到结果和标准基本相同: Original_data: 0.000000 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000 FFT_result: 28.000000 + i 0.000000 -4.000001 + i -9.656855 -4.000000 + i -4.000000 -4.000000 + i -1.656854 -4.000000 + i 0.000000 -4.000000 + i 1.656855 -4.000000 + i 4.000000 -3.999999 + i 9.656854 IFFT_result: 0.000000 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000 运行结束。 至此,快速傅立叶变换的简单工程化基本完毕。二维情况,可以类比二维离散傅里叶变换的拓展思想,来进行改造。 另外,快速傅立叶变换 并不只有 时域抽取(DIT)、 双模(Radix-2)、四模(Radix-4)等这些处理手段。通用的其他类型,包括并不限于 频域抽取(FIT)、八模(Radix-8)、多模混合(Mixed-Radix)等。但亦可触类旁通。 这些方法共同构成了当今快速傅立叶变换的高性能函数库,甚至 配合硬件的特殊门电路设计,还能够进一步压缩过程中非理论因素的处理耗时。而在工作中,除特殊情况外,通常会在项目允许范畴内引入一些由研究机构校准的快速傅立叶变换函数库,这里按量级列举 三个经典库,以供参考使用之便: 小:Ooura's Mathematical Software Packages. by Takuya Ooura. 中:FXT: a library of algorithms. by Jörg Arndt. 大:FFTW: Fastest Fourier Transform in the West. by Matteo Frigo and Steven G. Johnson. at MIT. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_1_4.html":{"url":"Chapter_3/Language/cn/Docs_3_1_4.html","title":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach)","keywords":"","body":"3.1.4 傅里叶的硬件优化 - 多常数乘法矩阵逼近(Matrix-MCM Approach) 2011 年, [12]。 【申请 IEEE 授权中】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_2.html":{"url":"Chapter_3/Language/cn/Docs_3_2.html","title":"3.2 频率信息提取 - 常用滤波算法","keywords":"","body":"3.2 频率信息提取 - 常用滤波算法 上一节中,我们就数字信号处理(DSP)的核心算法,傅里叶变换,进行了详细的说明。而在对二维傅里叶变换进行讲解的时候。细心的读者可能已经发现了 图像在空频分布上的一些特点。在分布的频率(波矢 k⃗{\\vec{k}}k⃗ 的频率 ∣k⃗∣=ω\\vert {\\vec{k}} \\vert = \\omega∣k⃗∣=ω )的占比(强度系数 f^ω(u,v)\\hat{f}_{\\omega}(u,v)f^ω(u,v) )中,低频信号的占比高,而高频信号的占比低。 这一现象产生的原因在于: 当一张图片处于色彩变化大且明显的区域时, k⃗{\\vec{k}}k⃗ 平面波在 uvuvuv 平面上的相邻点,单次数值变化的跨度就越大,直观体现就是波矢 k⃗{\\vec{k}}k⃗ 更长,即频率 ω\\omegaω 更高,波长 λ\\lambdaλ 更短。相反,当图片处于色彩变化相对平稳的区域时,相邻两个像素点的色彩差异就越小,单次数值变化的跨度就越小,对应的波矢 k⃗{\\vec{k}}k⃗ 更短,即频率 ω\\omegaω 更低,波长 λ\\lambdaλ 更长。这种变化明显处,往往是图片中的噪点或物体的轮廓位置。显然,色彩差异较小的相邻像素区域,才是占有图片空间较多的部分。 传统的图像处理,即是对图像频率的处理。其本质上是根据不同的目标,提炼出图像中被认为有用的信息,这一步被称为滤波(Filter)。滤波是对信号已有频率的过滤,通过增强(阻塞/增强阻塞)一部分频段,来达到筛选的效果。 因此,由于信息量的关系,滤波算法更多的使用场景是被运用在已得图像的结果上。相较于一维信号,二维信号明显对算法敏捷程度有更低的容忍度。而直接以傅里叶空频分离(SFS)进行科学处理,依旧会有些臃肿。毕竟非分析场景一般不需要特别高的精度,通常只在意一定范围内的频率特征,且并不会对细部有过多的要求。那么有没有满足条件下,针对目标频段的,更实用的变体呢? 考虑到简易的滤波手段多为均值与阈限共同作用。从算法层面,优化均值与阈限的求与取,就是切入点。如果可以将算法抽象为足够小的有限参数单元,我们就能够以 卷积核(Convolution Nucleus / Convolution Kernel / Sliding Window / Filter) 数学工具,封装整个运算过程。从而充分的利用现代 GPU 设备进行并行计算,批量处理并降低耗时。 欲期望达成此要求,被抽象的有限参数单元必然不能太复杂。 为了便于演示说明,本节采用 OpenGL 的 GLSL 程序片脚本语言,并使用 WebGL 环境预览,来进行算法的演示工作。其他驱动,如 DirectX 的 HLSL 或 Metal 的 MLSL,皆可参照 GLSL 逻辑达到相同效果。 在线演示 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_2_1.html":{"url":"Chapter_3/Language/cn/Docs_3_2_1.html","title":"3.2.1 高斯滤波(Gauss Filter)","keywords":"","body":"3.2.1 高斯滤波(Gauss Filter) 高斯滤波是我们最常用的一种滤波器。 想要理解高斯滤波的作用,首先需要回顾一下 高斯分布(Gaussian Distribution),即 正态分布(Normal Distribution) 的数学特征。高斯分布公式 : f(x,μ)=12π⋅δe−(x−μ)22⋅δ2 f(x,\\mu) = \\frac{1}{\\sqrt{2\\pi} \\cdot \\delta} e ^{-\\tfrac{(x-\\mu)^2}{2 \\cdot \\delta^2}} f(x,μ)=√2π⋅δ1e−2⋅δ2(x−μ)2 其在 xxx 为一维时的平面的对应分布如下: 图 3-2 一维正态分布示意图 从图像可见,高斯分布的 μ\\muμ 决定了分部的中心,而 δ\\deltaδ 决定了形变的剧烈程度。而线下曲线面积,则代表了对应区间段内的取值发生概率。从离散角度则指 x∈int[xc−n2,xc+n2]x \\in int[x_c-\\tfrac{n}2, x_c+\\tfrac{n}2]x∈int[xc−2n,xc+2n] 范围内,有 x=xcx = x_cx=xc 的取值概率为 f(xc)f(x_c)f(xc) 。 记原信号为 S(x)S(x)S(x) 。以 ∣target∣1\\vert target \\vert_1∣target∣1 表示归一化操作,则 ∣∑xc−n/2xc+n/2(f(x)⋅S(x))∣1\\vert {\\sum}_{x_c -n/2}^{x_c+n/2}(f(x) \\cdot S(x)) \\vert_1∣∑xc−n/2xc+n/2(f(x)⋅S(x))∣1 代表在当前给定 (δ,μ)(\\delta, \\mu)(δ,μ) 的高斯分布 f(x,μ)f(x, \\mu)f(x,μ) 下,考虑 x=xcx = x_cx=xc 时左右相邻含 xcx_cxc 在内共 nnn 个节点取值情况的 S(xc)S(x_c)S(xc) 的概率均值。我们记 xcx_cxc 为中心点,数据采样数为 TTT ,有: xc∈int[n2,T−n2],n∈intoddsFn(xc)=∣∑xc−n/2xc+n/2(f(x,xc)⋅S(x))∣1 {\\displaystyle \\begin{aligned} x_c \\in &int [\\tfrac{n}{2}, T-\\tfrac{n}{2}], \\quad n \\in int_{odds} \\\\ \\\\ F_n(x_c) &= \\vert {\\sum}_{x_c -n/2}^{x_c+n/2}(f(x, x_c) \\cdot S(x)) \\vert_1 \\\\ \\end{aligned} } xc∈Fn(xc)int[2n,T−2n],n∈intodds=∣∑xc−n/2xc+n/2(f(x,xc)⋅S(x))∣1 上式中,Fn(xc)F_n(x_c)Fn(xc) 即为一维情况下的 nnn 步滑动窗口,也可以称为 n×1n \\times 1n×1 卷积核。通过沿信号的数据顺序,滑动 Fn(xc)F_n(x_c)Fn(xc) 求取原值 xcx_cxc 替换值的操作。我们可以在一定程度上利用分布的概率关系,以调整 δ\\deltaδ 取值的方式来影响核内相邻数据的波动性,进而影响整体波动性达到滤波目的。 取 δ\\deltaδ 越小,波动性越强越激烈,图片越尖锐;反之 δ\\deltaδ 越大,波动性越弱越平缓,图片越模糊。 一维信号早期常用这种手段来一定程度的进行降噪(现今已被优秀和复杂的多的算法替换了)。而二维信号,即图片,在我们之前讲解傅里叶变化时以提到过,和一维主要差别是在维度上。所以当我们记数据采样数为 (W×H)(W \\times H)(W×H) ,有将 xxx 换为向量 x⃗=(x,y)\\vec{x} = (x,y)x⃗=(x,y) 表示: xc∈int[n2,W−n2],yc∈int[n2,H−n2]n∈intoddsFn(xc⃗)=Fn(xc,yc)=∣∑yc−n/2yc+n/2∑xc−n/2xc+n/2(f(x⃗,xc⃗)⋅S(x⃗))∣1 {\\displaystyle \\begin{aligned} x_c \\in &int [\\tfrac{n}{2}, W-\\tfrac{n}{2}], \\quad y_c \\in int [\\tfrac{n}{2}, H-\\tfrac{n}{2}] \\quad n \\in int_{odds} \\\\ \\\\ F_n(\\vec{x_c}) &= F_n(x_c, y_c) =\\vert {\\sum}_{y_c -n/2}^{y_c+n/2}{\\sum}_{x_c -n/2}^{x_c+n/2}(f(\\vec{x}, \\vec{x_c}) \\cdot S(\\vec{x})) \\vert_1 \\\\ \\end{aligned} } xc∈Fn(xc⃗)int[2n,W−2n],yc∈int[2n,H−2n]n∈intodds=Fn(xc,yc)=∣∑yc−n/2yc+n/2∑xc−n/2xc+n/2(f(x⃗,xc⃗)⋅S(x⃗))∣1 则 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 即为二维情况下的 n×nn \\times nn×n 高斯滤波卷积核。同理,更多维情况只需要扩展参数 x⃗\\vec{x}x⃗ 的向量空间即可。 可是看上去,目前的公式算不上简单。但真的是这样吗? 假设 n=3n = 3n=3 那么 3×33 \\times 33×3 高斯滤波卷积核,实际描述的是 xc⃗\\vec{x_c}xc⃗ 点周围单位距离内,相邻含 xc⃗\\vec{x_c}xc⃗ 在内共 999 个节点的波动关系,有: Fn(xc⃗)=∣∑xySxy⋅f((xc,yc)−[(−1,−1),(0,−1),(1,−1)(−1,0),(0,0),(1,0)(−1,1),(0,1),(1,1)])∣1=∣∑xySxy⋅f(xc⃗−N3×3⃗)∣1 {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) &= \\vert \\sum_{xy} S_{xy} \\cdot f ( (x_c,y_c) - { \\begin{bmatrix} (-1, -1) ,& \\quad (\\quad 0, -1) ,& \\quad (\\quad 1, -1) \\\\ (-1,\\quad 0) ,& \\quad (\\quad 0,\\quad 0) ,& \\quad (\\quad 1,\\quad 0) \\\\ (-1,\\quad 1) ,& \\quad (\\quad 0,\\quad 1) ,& \\quad (\\quad 1,\\quad 1) \\end{bmatrix} }) \\vert_1 \\\\ &= \\vert \\sum_{xy}S_{xy} \\cdot f ( \\vec{x_c} - \\vec{N_{3 \\times 3}} ) \\vert_1 \\\\ \\end{aligned} } Fn(xc⃗)=∣xy∑Sxy⋅f((xc,yc)−⎣⎡(−1,−1),(−1,0),(−1,1),(0,−1),(0,0),(0,1),(1,−1)(1,0)(1,1)⎦⎤)∣1=∣xy∑Sxy⋅f(xc⃗−N3×3⃗)∣1 一般情况,我们不会在单批(single batch)数据处理时,改变 δ\\deltaδ 的取值。假设 δ\\deltaδ 为标准正态分布取值 δ=1\\delta=1δ=1 ,那么 f(x⃗,μ⃗)f(\\vec{x},\\vec{\\mu})f(x⃗,μ⃗) 有: f(x⃗,μ⃗)=12πe−12(x⃗−μ⃗)2 f(\\vec{x},\\vec{\\mu}) = \\frac{1}{\\sqrt{2\\pi}} e ^{-\\tfrac{1}{2}(\\vec{x}-\\vec{\\mu})^2} f(x⃗,μ⃗)=√2π1e−21(x⃗−μ⃗)2 显然, f(x⃗,μ⃗)f(\\vec{x},\\vec{\\mu})f(x⃗,μ⃗) 在 δ\\deltaδ 取固定值的情况下,只和 (x⃗−μ⃗)(\\vec{x}-\\vec{\\mu})(x⃗−μ⃗) 的计算有关。而由于我们取 μ⃗=xc⃗\\vec{\\mu} = \\vec{x_c}μ⃗=xc⃗ ,在 (x⃗−μ⃗)(\\vec{x}-\\vec{\\mu})(x⃗−μ⃗) 的计算中: ∑(x⃗−μ⃗)=∑(x⃗−xc⃗)=N3×3⃗ \\sum(\\vec{x}-\\vec{\\mu}) = \\sum(\\vec{x}-\\vec{x_c}) = \\vec{N_{3 \\times 3}} ∑(x⃗−μ⃗)=∑(x⃗−xc⃗)=N3×3⃗ 正好消除了变化的 x⃗\\vec{x}x⃗ 的部分,因此 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 可以被化简为: Fn(xc⃗)=∣∑xySxy⋅f(xc⃗−N3×3⃗)∣1=∣∑xySxy⋅f(N3×3⃗)∣1=∑xySxy⋅∣(12πe−12(Δx2+Δy2))xy∣1=∑xySxy⋅∣[0.075,0.124,0.0750.124,1.000,0.1240.075,0.124,0.075]∣1 {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) &= \\vert \\sum_{xy}S_{xy} \\cdot f ( \\vec{x_c} - \\vec{N_{3 \\times 3}} ) \\vert_1 = \\vert \\sum_{xy}S_{xy} \\cdot f (\\vec{N_{3 \\times 3}} ) \\vert_1 \\\\ &= \\sum_{xy}S_{xy} \\cdot \\vert ( \\frac{1}{\\sqrt{2\\pi}} e ^{-\\tfrac{1}{2}(\\Delta x^2+\\Delta y^2)} )_{xy} \\vert_1 \\\\ &= \\sum_{xy}S_{xy} \\cdot \\vert { \\begin{bmatrix} 0.075 ,& \\quad 0.124 ,& \\quad 0.075 \\\\ 0.124 ,& \\quad 1.000 ,& \\quad 0.124 \\\\ 0.075 ,& \\quad 0.124 ,& \\quad 0.075 \\end{bmatrix} } \\vert_1 \\\\ \\end{aligned} } Fn(xc⃗)=∣xy∑Sxy⋅f(xc⃗−N3×3⃗)∣1=∣xy∑Sxy⋅f(N3×3⃗)∣1=xy∑Sxy⋅∣(√2π1e−21(Δx2+Δy2))xy∣1=xy∑Sxy⋅∣⎣⎡0.075,0.124,0.075,0.124,1.000,0.124,0.0750.1240.075⎦⎤∣1 我们只需要依次计算卷积核范围内的点,对应信号值与概率相乘之和即可,即: Fn(xc⃗)=∣0.075⋅S(xc−1,yc−1)+0.124⋅S(xc,yc−1) +0.075⋅S(xc+1,yc−1) +0.124⋅S(xc−1,yc)+1.000⋅S(xc,yc)+0.124⋅S(xc+1,yc) +0.075⋅S(xc−1,yc−1)+0.124⋅S(xc,yc+1) +0.075⋅S(xc+1,yc+1)∣1 {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) = \\vert & 0.075 \\cdot S_{(x_c-1,y_c-1)} + 0.124 \\cdot S_{(x_c,y_c-1)}\\ + 0.075 \\cdot S_{(x_c+1,y_c-1)} \\ + \\\\ & 0.124 \\cdot S_{(x_c-1,y_c )}\\quad + 1.000 \\cdot S_{(x_c,y_c)}\\quad + 0.124 \\cdot S_{(x_c+1,y_c)} \\quad \\ + \\\\ & 0.075 \\cdot S_{(x_c-1,y_c-1)} + 0.124 \\cdot S_{(x_c,y_c+1)}\\ + 0.075 \\cdot S_{(x_c+1,y_c+1)} \\vert_1 \\\\ \\end{aligned} } Fn(xc⃗)=∣0.075⋅S(xc−1,yc−1)+0.124⋅S(xc,yc−1) +0.075⋅S(xc+1,yc−1) +0.124⋅S(xc−1,yc)+1.000⋅S(xc,yc)+0.124⋅S(xc+1,yc) +0.075⋅S(xc−1,yc−1)+0.124⋅S(xc,yc+1) +0.075⋅S(xc+1,yc+1)∣1 为了保证输入输出数据一致。根据卷积核的大小,我们还需要在数据的外围补充一圈空值,以保证感受野等大数据源。如果当前需要处理的数据为 (W×H)=(5×5)(W \\times H) = (5 \\times 5)(W×H)=(5×5) ,即总共 252525 个像素的单通道灰度图。经过 n×n=3×3n \\times n = 3 \\times 3n×n=3×3 大小的高斯卷积核处理后,有如下结果: 不难发现上面的 求值过大,这是因为我们 并没有 使用 δ=1.0\\delta = 1.0δ=1.0 时归一化后的高斯算子: f(N3×3⃗)=∣[0.075,0.124,0.0750.124,1.000,0.1240.075,0.124,0.075]∣1=[0.042,0.069,0.0420.069,0.557,0.0690.042,0.069,0.042] {\\displaystyle \\begin{aligned} f(\\vec{N_{3 \\times 3}}) &= \\vert { \\begin{bmatrix} 0.075 ,& \\quad 0.124 ,& \\quad 0.075 \\\\ 0.124 ,& \\quad 1.000 ,& \\quad 0.124 \\\\ 0.075 ,& \\quad 0.124 ,& \\quad 0.075 \\end{bmatrix} } \\vert_1 = { \\begin{bmatrix} 0.042 ,& \\quad 0.069 ,& \\quad 0.042 \\\\ 0.069 ,& \\quad 0.557 ,& \\quad 0.069 \\\\ 0.042 ,& \\quad 0.069 ,& \\quad 0.042 \\end{bmatrix} } \\\\ \\end{aligned} } f(N3×3⃗)=∣⎣⎡0.075,0.124,0.075,0.124,1.000,0.124,0.0750.1240.075⎦⎤∣1=⎣⎡0.042,0.069,0.042,0.069,0.557,0.069,0.0420.0690.042⎦⎤ 当然,也可以直接除以 f(N3×3⃗)f(\\vec{N_{3 \\times 3}})f(N3×3⃗) 矩阵的秩,即 ∣f(N3×3⃗)∣δ=1.0=1.796\\vert f(\\vec{N_{3 \\times 3}}) \\vert_{\\delta = 1.0} = 1.796∣f(N3×3⃗)∣δ=1.0=1.796 ,作用在最终结果上。完成这一步后,整个高斯滤波单元才真正封装完毕。 对一张 (W×H)(W \\times H)(W×H) 的图片,单次标准高斯滤波需要经过 O(N)=((W−(n−2))×(H−(n−2))×8)O(N) =((W-(n-2)) \\times (H-(n-2)) \\times 8) O(N)=((W−(n−2))×(H−(n−2))×8) 次加法运算,外加单独进行的一次 n×nn \\times nn×n 卷积核大小的 f(x⃗,μ⃗)f(\\vec{x},\\vec{\\mu})f(x⃗,μ⃗) 归一化概率计算。而通过计算 f(x⃗,μ⃗)f(\\vec{x},\\vec{\\mu})f(x⃗,μ⃗) 得到的 f(N3×3⃗)f(\\vec{N_{3 \\times 3}})f(N3×3⃗) ,在 δ\\deltaδ 发生改变前都可以无限复用。因此,算法非常快捷。 高斯滤波的简易 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装了。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 没有太多操作,因为关键的部分在 像素程序片(Pixel Shader/Fragment Shader) 上: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform vec2 pixel_bias; uniform mat3 gaussian_matrix; uniform sampler2D target_texture; void main() { vec3 output_; for (int i = 0; i 完成对算法求和过程的迁移。传入的 高斯算子 gaussian_matrix 和 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。由于采用 Web 展示,此处方法以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_gaussian_kernel(step, delta) { let n = step * 2 + 1; let kernel = new Float32Array(n * n); let factor_1 = 1.0 / (Math.sqrt(2.0 * Math.PI) * delta); let factor_2 = 1.0 / (2.0 * delta * delta); let normalize_div = 0; for (let i = 0; i 至此,一个简单但实用的高斯滤波器就完成了。除了上述这种使用卷积核大小一对一采样的方式外,采用单一方向的高斯滤波滑动窗口,如 v⃗n×1=(vx,vy)orient\\vec{v}_{n \\times 1} = (v_x, v_y)_{orient}v⃗n×1=(vx,vy)orient ,也是一种减少采样数量,从而提高运算效率的方式。但由于只有指定方向的颜色关系参与了运算,单一方向高斯滤波,或者说更为通用的是近乎所有单一方向的滤波器,对数据处理后的结果,都只会表现为固定方向的过滤效果。这会使画面显得有些割裂,因此建议慎重使用。 而如果要求在保证滤波效果的同时,还能精简运算。那么我们就需更为快捷且采样更少的高斯单元了。 高斯滤波的线性插值采加速 一种通用的方式,就是在采样时引入 线性插值(Linear Sampling),减少采样次数。我们用 WWW 代表高斯算子,用 Wij=w(x⃗)W_{ij} =w(\\vec{x})Wij=w(x⃗) 代表高斯算子在 x⃗\\vec{x}x⃗ 所处 N3×3⃗\\vec{N_{3 \\times 3}}N3×3⃗ 中位置的对应 fij(N3×3⃗)f_{ij} ( \\vec{N_{3 \\times 3}})fij(N3×3⃗) 值,用 s(x⃗)s(\\vec{x})s(x⃗) 代表 x⃗\\vec{x}x⃗ 在图片中的像素值。则对于采样 3×33 \\times 33×3 的 N3×3⃗\\vec{N_{3 \\times 3}}N3×3⃗ 来说,由差值公式: sdst(x1⃗,x2⃗)=ssrc(x1⃗)⋅wsrc(x1⃗)+ssrc(x2⃗)⋅wsrc(x2⃗)wsrc(x1⃗)+wsrc(x2⃗) {\\displaystyle \\begin{aligned} s_{dst}(\\vec{x_1},\\vec{x_2}) &= \\frac{s_{src}(\\vec{x_1}) \\cdot w_{src}(\\vec{x_1}) + s_{src}(\\vec{x_2}) \\cdot w_{src}(\\vec{x_2})}{w_{src}(\\vec{x_1}) + w_{src}(\\vec{x_2})} \\\\ \\end{aligned} } sdst(x1⃗,x2⃗)=wsrc(x1⃗)+wsrc(x2⃗)ssrc(x1⃗)⋅wsrc(x1⃗)+ssrc(x2⃗)⋅wsrc(x2⃗) 可知,999 次采样能够两两差值,从而减少到只需 555 次实际的纹理数据读。卷积核的采样位置,取四角记为 [C1,C2,C3,C4]=[S(xc−1,yc−1),S(xc−1,yc+1),S(xc+1,yc−1),S(xc+1,yc+1)][C_1, C_2, C_3, C_4] =[S_{(x_c-1,y_c-1)} , S_{(x_c-1,y_c+1)}, S_{(x_c+1,y_c-1)}, S_{(x_c+1,y_c+1)}][C1,C2,C3,C4]=[S(xc−1,yc−1),S(xc−1,yc+1),S(xc+1,yc−1),S(xc+1,yc+1)] 和中心 C0=S(xc,yc)C_0 = S_{(x_c,y_c)}C0=S(xc,yc) ,如下: Samplexy⋅[1,0,10,1,01,0,1]=[C1C2C0C3C4] {\\displaystyle \\begin{aligned} Sample_{xy} \\cdot { \\begin{bmatrix} 1 ,& \\quad 0 ,& \\quad 1 \\\\ 0 ,& \\quad 1 ,& \\quad 0 \\\\ 1 ,& \\quad 0 ,& \\quad 1 \\end{bmatrix} } = { \\begin{bmatrix} C_1 & \\quad & \\quad C_2 \\\\ & \\quad C_0 \\\\ C_3 & \\quad & \\quad C_4 \\end{bmatrix} } \\\\ \\end{aligned} } Samplexy⋅⎣⎡1,0,1,0,1,0,101⎦⎤=⎣⎡C1C3C0C2C4⎦⎤ 则 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 就可以表示为: Fn(xc⃗)=W00⋅C1 +W01⋅C12 +W02⋅C2 +W10⋅C13+W11⋅C0 +W12⋅C24 +W20⋅C3 +W21⋅C34 +W22⋅C4=W00⋅C1 +W01⋅W00⋅C1+W02⋅C2W00+W02 +W02⋅C2 +W10⋅W00⋅C1+W20⋅C3W00+W20 +W11⋅C0 +W12⋅W02⋅C2+W22⋅C4W02+W22 +W20⋅C3 +W21⋅W20⋅C3+W22⋅C4W20+W22 +W22⋅C4=(W00 + W00⋅W01W00 + W02+W00⋅W10W00 + W20)⋅C1 +(W02 + W02⋅W01W00 + W02+W02⋅W12W02 + W22)⋅C2 +(W20 + W20⋅W10W00 + W20+W20⋅W21W20 + W22)⋅C3 +(W22 + W22⋅W12W02 + W22+W22⋅W21W20 + W22)⋅C4 +W11⋅C0 {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) =& W_{00} \\cdot C_1 \\ + W_{01} \\cdot C_{12} \\ + W_{02} \\cdot C_{2} \\ \\ + \\\\ & W_{10} \\cdot C_{13} + W_{11} \\cdot C_{0} \\ \\ + W_{12} \\cdot C_{24} \\ + \\\\ & W_{20} \\cdot C_3 \\ + W_{21} \\cdot C_{34} \\ + W_{22} \\cdot C_{4} \\\\ =& W_{00} \\cdot C_1 \\ + W_{01} \\cdot \\tfrac{W_{00} \\cdot C_1 + W_{02} \\cdot C_2}{W_{00} + W_{02} } \\ \\ + \\\\ & W_{02} \\cdot C_{2} \\ + W_{10} \\cdot \\tfrac{W_{00} \\cdot C_1 + W_{20} \\cdot C_3}{W_{00} + W_{20} } \\ \\ + \\\\ & W_{11} \\cdot C_{0} \\ + W_{12} \\cdot \\tfrac{W_{02} \\cdot C_2 + W_{22} \\cdot C_4}{W_{02} + W_{22} } \\ \\ + \\\\ & W_{20} \\cdot C_{3} \\ + W_{21} \\cdot \\tfrac{W_{20} \\cdot C_3 + W_{22} \\cdot C_4}{W_{20} + W_{22} } \\ \\ + \\\\ & W_{22} \\cdot C_{4} \\\\ =& (W_{00}\\ +\\ \\tfrac{W_{00} \\cdot W_{01}}{W_{00}\\ +\\ W_{02}} + \\tfrac{W_{00} \\cdot W_{10}}{W_{00}\\ +\\ W_{20}})\\cdot C_1 \\ + \\\\ & (W_{02}\\ +\\ \\tfrac{W_{02} \\cdot W_{01}}{W_{00}\\ +\\ W_{02}} + \\tfrac{W_{02} \\cdot W_{12}}{W_{02}\\ +\\ W_{22}})\\cdot C_2 \\ + \\\\ & (W_{20}\\ +\\ \\tfrac{W_{20} \\cdot W_{10}}{W_{00}\\ +\\ W_{20}} + \\tfrac{W_{20} \\cdot W_{21}}{W_{20}\\ +\\ W_{22}})\\cdot C_3 \\ + \\\\ & (W_{22}\\ +\\ \\tfrac{W_{22} \\cdot W_{12}}{W_{02}\\ +\\ W_{22}} + \\tfrac{W_{22} \\cdot W_{21}}{W_{20}\\ +\\ W_{22}})\\cdot C_4 \\ + \\\\ & W_{11} \\cdot C_{0} \\\\ \\end{aligned} } Fn(xc⃗)===W00⋅C1 +W01⋅C12 +W02⋅C2 +W10⋅C13+W11⋅C0 +W12⋅C24 +W20⋅C3 +W21⋅C34 +W22⋅C4W00⋅C1 +W01⋅W00+W02W00⋅C1+W02⋅C2 +W02⋅C2 +W10⋅W00+W20W00⋅C1+W20⋅C3 +W11⋅C0 +W12⋅W02+W22W02⋅C2+W22⋅C4 +W20⋅C3 +W21⋅W20+W22W20⋅C3+W22⋅C4 +W22⋅C4(W00 + W00 + W02W00⋅W01+W00 + W20W00⋅W10)⋅C1 +(W02 + W00 + W02W02⋅W01+W02 + W22W02⋅W12)⋅C2 +(W20 + W00 + W20W20⋅W10+W20 + W22W20⋅W21)⋅C3 +(W22 + W02 + W22W22⋅W12+W20 + W22W22⋅W21)⋅C4 +W11⋅C0 看上去很复杂,但取中心点的二维高斯分布,其 fij(N3×3⃗)f_{ij} (\\vec{N_{3 \\times 3}} )fij(N3×3⃗) 的值是随 xc⃗\\vec{x_c}xc⃗ 中心对称的,有: W0=[W11]W1=[W01=W10=W12=W21]W2=[W00=W02=W20=W22] {\\displaystyle \\begin{aligned} W_0 &= [W_{11}] \\\\ W_1 &= [W_{01} = W_{10} = W_{12} = W_{21}] \\\\ W_2 &= [W_{00} = W_{02} = W_{20} = W_{22}] \\\\ \\end{aligned} } W0W1W2=[W11]=[W01=W10=W12=W21]=[W00=W02=W20=W22] 带入到线性插值 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 表达式,则: Fn(xc⃗)=W0⋅C0+[(W1 + W2)⋅(C1 +C2 +C3 +C4 )] {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) =& W_0 \\cdot C_0 +[(W_1\\ +\\ W_2)\\cdot (C_1 \\ + C_2 \\ + C_3 \\ + C_4 \\ )] \\\\ \\end{aligned} } Fn(xc⃗)=W0⋅C0+[(W1 + W2)⋅(C1 +C2 +C3 +C4 )] 当取 δ=1.0\\delta = 1.0δ=1.0 时,三值得到固定的归一化取值 [W0,W1,W2]=[0.557, 0.069, 0.042][W_0,W_1,W_2] = [0.557,\\ 0.069,\\ 0.042][W0,W1,W2]=[0.557, 0.069, 0.042] ,而 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 的表达式就只和采样相关了: Fn(xc⃗)=0.557⋅C0 + 0.111⋅(C1 +C2 +C3 +C4 ) {\\displaystyle \\begin{aligned} F_n(\\vec{x_c}) = 0.557 \\cdot C_0\\ +\\ 0.111 \\cdot (C_1 \\ + C_2 \\ + C_3 \\ + C_4 \\ ) \\\\ \\end{aligned} } Fn(xc⃗)=0.557⋅C0 + 0.111⋅(C1 +C2 +C3 +C4 ) 所以,插值采样的高斯滤波非常精简。只需要略微调整像素程序片(Pixel Shader/Fragment Shader)的实现,而不需要对其他处理进行改动,就能完成改造: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform vec2 pixel_bias; uniform mat3 gaussian_matrix; uniform sampler2D target_texture; void main() { float gauss_factor = gaussian_matrix[0][0]+gaussian_matrix[0][1]; vec3 output_; output_ += texture2D(target_texture, fs_texcoord.xy ).rgb * gaussian_matrix[1][1]; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(-1, -1) * pixel_bias).rgb * gauss_factor; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(-1, +1) * pixel_bias).rgb * gauss_factor; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(+1, -1) * pixel_bias).rgb * gauss_factor; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(+1, +1) * pixel_bias).rgb * gauss_factor; gl_FragColor = vec4(output_, 1.0); } 加速后的高斯滤波单元,对一张 (W×H)(W \\times H)(W×H) 图片的处理的理论耗时,减少到了原耗时的 0.625⋅O(N)0.625 \\cdot O(N)0.625⋅O(N) 。采样数也同比减少了 37.5%37.5\\%37.5% 。效果上和直算相比,几乎无差别。 高斯滤波的局限性 由于高斯滤波的通用卷积核是 各向同性(Isotropic) 的,在核范围内的各方向向量与中心点的方差,仅和向量终点与核中心点的相对距离有关。因此,高斯滤波并不是没有弊端的。 我们仍然选择 μ=xc⃗\\mu = \\vec{x_c}μ=xc⃗ 为核中心,假设核范围内有不包含 xc⃗\\vec{x_c}xc⃗ 在内的,总计为 NNN 的 nnn 维向量 x⃗=(x1,x2, ... ,xn)∈Rn\\vec{x} = (x_1,x_2,\\ ...\\ ,x_n) \\in \\mathbb{R}^nx⃗=(x1,x2, ... ,xn)∈Rn 的采样数据 SN={Sx1⃗,Sx2⃗, ... ,SxN⃗}S_N = \\{ S_{\\vec{x_1}} , S_{\\vec{x_2}},\\ ...\\ , S_{\\vec{x_N}} \\}SN={Sx1⃗,Sx2⃗, ... ,SxN⃗} 。将高斯滤波卷积核的离散程度,以非概率密度 协方差矩阵(Covariance Matrix) 的 Mcov(x⃗)M_{cov}(\\vec{x})Mcov(x⃗) 形式表示,记 III 为单位对角矩阵,有: Mcov(x⃗)=1N∑i=1NSxi⃗⋅[(x1−xc1)2(x2−xc2)2...(xn−xcn)2]=∑Δx2⋅I∈Rn×n {\\displaystyle \\begin{aligned} M_{cov}(\\vec{x}) &= \\tfrac{1}{N} \\sum_{i = 1}^{N} S_{\\vec{x_i}} \\cdot { \\begin{bmatrix} (x_1-x_{c1})^2 & \\quad & \\quad & \\quad \\\\ & \\quad (x_2-x_{c2})^2 & \\quad & \\quad \\\\ & \\quad & \\quad ... & \\quad\\\\ & \\quad & \\quad & \\quad (x_n-x_{cn})^2 \\end{bmatrix} } \\\\ &= \\sum \\Delta x^2 \\cdot I \\in \\mathbb{R}^{n \\times n} \\\\ \\end{aligned} } Mcov(x⃗)=N1i=1∑NSxi⃗⋅⎣⎢⎢⎡(x1−xc1)2(x2−xc2)2...(xn−xcn)2⎦⎥⎥⎤=∑Δx2⋅I∈Rn×n 多维高斯的协方差矩阵,只有对角线的 方差(Variance)存在非 000 取值,而衡量参数交叠影响的 协方差(Covariance)皆为 000 值。所以,高斯滤波没有考虑维度方位信息带来的数据间的差异,每一个维度仅对自身属性产生影响。因此,高斯核总是中心对称。 这一特征体现在二维信号的处理上时,就表现为经过高斯滤波处理的图片,轮廓细节会有所丢失(物体更不容易分辨,而非单纯颜色变得规整)。同时,也更容易因为算法导致的频率扰动,产生高频变化规律缺失,像素朝核的外边缘等量的分散运动而出现摩尔纹(Moire Pattern)。毕竟图片的高频部分,才是保存轮廓信息的关键。但高斯滤波本质上却是全通量的概率权重控制。 那么有没有能够在一定程度上,既保留高频细节的同时,又能够相对独立的处理低频波动的算法呢? 考虑问题主要出现在高斯滤波的各向同性,或许可以通过引入高低频差异修饰滤波器,来达成要求。这种做法被称为 边缘保存(Edge Preserving)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_2_2.html":{"url":"Chapter_3/Language/cn/Docs_3_2_2.html","title":"3.2.2 双边滤波(Bilateral Filter)","keywords":"","body":"3.2.2 双边滤波(Bilateral Filter) 双边滤波(Bilateral Filter) 是在高斯滤波基础上,基于 边缘保存(Edge Preserving) 滤波思想,通过一个 空间域(Spatial Domain/Domain)标准高斯滤波 和 灰度值(Gray Range/Range)朴素高斯分布 的共同作用,形成的 高斯滤波变体。 由于二维信号的高频部分,在灰度通道上体现的更为明确(本质起作用的是物理意义上的光亮度信息,人眼主要通过光亮度差异来感知物体轮廓。光亮度的多种衍生抽象,和相关概念是如何迁移数据化到计算机视觉体系内的,会在本书第三章详细讲解)。所以,双边滤波引入对灰度值的高斯,是期望提取核内灰度变化特征,来得到各频率波的核内密度分布情况。 进而对核内标准高斯滤波像素值概率密度结果进行修饰,得到 带有截面的单向滤波卷积核(Single Orientation Filter)。 图 3-3 双边滤波经过灰度裁剪后,在轮廓边缘处的卷积核示意图 [13] 因此,双边滤波属于 混合高斯卷积核(Combined Gaussian Kernel) 滤波器的一种。我们需要分别计算 空间高斯权重(SGW [Spatial Gaussian Weight]) 和 灰度高斯权重(GGW [Gray Gaussian Weight]) 两部分,并混合权重得到最终的双边滤波矩阵。 双边滤波的混合高斯权重 空间高斯权重(SGW),也被称为 领域权重(Domain Weight),记为 Gs(x⃗,μ⃗)G_s(\\vec{x},\\vec{\\mu})Gs(x⃗,μ⃗) ,有波动参数 δs\\delta_sδs 。其本身代表,以选定中心点 μ⃗=xc⃗\\vec{\\mu} = \\vec{x_c}μ⃗=xc⃗ 与卷积核内相邻点的欧式距离,求得的 二维高斯概率分布 结果。即: Gs(x⃗,xc⃗)=12π⋅δse−(x⃗−xc⃗)22⋅δs2=12π⋅δse−(Δx2+Δy2)2⋅δs2 {\\displaystyle \\begin{aligned} G_s(\\vec{x},\\vec{x_c}) = \\frac{1}{\\sqrt{2\\pi} \\cdot \\delta_s} e ^{-\\tfrac{(\\vec{x}-\\vec{x_c})^2}{2 \\cdot {\\delta_s}^2}} = \\frac{1}{\\sqrt{2\\pi} \\cdot \\delta_s} e ^{-\\tfrac{(\\Delta x^2+\\Delta y^2)}{2 \\cdot {\\delta_s}^2}} \\\\ \\end{aligned} } Gs(x⃗,xc⃗)=√2π⋅δs1e−2⋅δs2(x⃗−xc⃗)2=√2π⋅δs1e−2⋅δs2(Δx2+Δy2) 灰度高斯权重(GGW),也被称为 尺度权重(Range Weight),记为 Gr(x⃗,μ⃗)G_r(\\vec{x},\\vec{\\mu})Gr(x⃗,μ⃗) ,有波动参数 δr\\delta_rδr 。其本身代表,以选定中心点 μ⃗=xc⃗\\vec{\\mu} = \\vec{x_c}μ⃗=xc⃗ 灰度 gray(xc⃗)gray(\\vec{x_c})gray(xc⃗) 与卷积核内相邻点灰度 gray(x⃗)gray(\\vec{x})gray(x⃗) 的方差,求得的 一维高斯概率分布 结果。记 S(x)={r,g,b}S(x) = \\{r,g,b \\}S(x)={r,g,b} 有: Gr(x⃗,xc⃗)=12π⋅δre−(gray(x⃗)−gray(xc⃗))22⋅δr2=12π⋅δre−(Δr2+Δg2+Δb2)2⋅δr2 {\\displaystyle \\begin{aligned} G_r(\\vec{x},\\vec{x_c}) = \\frac{1}{\\sqrt{2\\pi} \\cdot \\delta_r} e ^{-\\tfrac{(gray(\\vec{x})-gray(\\vec{x_c}))^2}{2 \\cdot {\\delta_r}^2}} = \\frac{1}{\\sqrt{2\\pi} \\cdot \\delta_r} e ^{-\\tfrac{(\\Delta r^2+\\Delta g^2 +\\Delta b^2)}{2 \\cdot {\\delta_r}^2}} \\\\ \\end{aligned} } Gr(x⃗,xc⃗)=√2π⋅δr1e−2⋅δr2(gray(x⃗)−gray(xc⃗))2=√2π⋅δr1e−2⋅δr2(Δr2+Δg2+Δb2) 以 ∣target∣1\\vert target \\vert_1∣target∣1 表示归一化操作,记混合高斯权重为 W(x⃗,μ⃗)W(\\vec{x},\\vec{\\mu})W(x⃗,μ⃗) ,则: W(x⃗,xc⃗)=∣Gs(x⃗,xc⃗)⋅Gr(x⃗,xc⃗)∣1 {\\displaystyle \\begin{aligned} W(\\vec{x},\\vec{x_c}) &= \\vert G_s(\\vec{x},\\vec{x_c}) \\cdot G_r(\\vec{x},\\vec{x_c}) \\vert_1 \\\\ \\end{aligned} } W(x⃗,xc⃗)=∣Gs(x⃗,xc⃗)⋅Gr(x⃗,xc⃗)∣1 由于,空间高斯权重其实就是标准高斯滤波权重,因此 ∣Gs(x⃗,μ⃗)∣1=f(Nn×n⃗)\\vert G_s(\\vec{x},\\vec{\\mu}) \\vert_1 = f( \\vec{N_{n \\times n}} )∣Gs(x⃗,μ⃗)∣1=f(Nn×n⃗) 。我们沿用上节高斯滤波的设定,取用 n×n=3×3n \\times n = 3 \\times 3n×n=3×3 大小卷积核,滤波函数记为 Bn(xc⃗)B_n(\\vec{x_c})Bn(xc⃗) ,则: Bn(xc⃗)=∣∑xySxy⋅W(xc⃗−N3×3⃗)∣1=∣∑xySxy⋅W(N3×3⃗)∣1=∣∑xySxyGs(x⃗,xc⃗)⋅Gr(x⃗,xc⃗)∣1=∣∑xySxyf(x⃗,xc⃗)∣1⋅[Gr(x⃗,xc⃗)∑Gr(xc⃗)]∈R3×3Bn(xc⃗)=Fn(xc⃗)⋅∣Gr(xc⃗)∣1∈R3×3 {\\displaystyle \\begin{aligned} B_n(\\vec{x_c}) &= \\vert \\sum_{xy}S_{xy} \\cdot W( \\vec{x_c} - \\vec{N_{3 \\times 3}} ) \\vert_1 = \\vert \\sum_{xy}S_{xy} \\cdot W( \\vec{N_{3 \\times 3}} ) \\vert_1 \\\\ &= \\vert \\sum_{xy}S_{xy} G_s(\\vec{x},\\vec{x_c}) \\cdot G_r(\\vec{x},\\vec{x_c}) \\vert_1 \\\\ &= \\vert \\sum_{xy}S_{xy} f(\\vec{x},\\vec{x_c}) \\vert_1 \\cdot [\\frac{ G_r(\\vec{x},\\vec{x_c}) }{\\sum G_r(\\vec{x_c})}] \\in \\mathbb{R}^{3 \\times 3} \\\\ B_n(\\vec{x_c}) &= F_n(\\vec{x_c}) \\cdot \\vert G_r(\\vec{x_c}) \\vert_1 \\in \\mathbb{R}^{3 \\times 3} \\\\ \\end{aligned} } Bn(xc⃗)Bn(xc⃗)=∣xy∑Sxy⋅W(xc⃗−N3×3⃗)∣1=∣xy∑Sxy⋅W(N3×3⃗)∣1=∣xy∑SxyGs(x⃗,xc⃗)⋅Gr(x⃗,xc⃗)∣1=∣xy∑Sxyf(x⃗,xc⃗)∣1⋅[∑Gr(xc⃗)Gr(x⃗,xc⃗)]∈R3×3=Fn(xc⃗)⋅∣Gr(xc⃗)∣1∈R3×3 而 ∑Gr(xc⃗)\\sum G_r(\\vec{x_c})∑Gr(xc⃗) 就是一维高斯曲线的线下面积,有 ∑Gr(xc⃗)=1\\sum G_r(\\vec{x_c}) = 1∑Gr(xc⃗)=1 ,所以: Bn(xc⃗)=Fn(xc⃗)⋅Gr(xc⃗)∈R3×3 {\\displaystyle \\begin{aligned} B_n(\\vec{x_c}) &= F_n(\\vec{x_c}) \\cdot G_r(\\vec{x_c}) \\in \\mathbb{R}^{3 \\times 3} \\\\ \\end{aligned} } Bn(xc⃗)=Fn(xc⃗)⋅Gr(xc⃗)∈R3×3 上式中 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 即为高斯滤波核函数 。 可见,适用于高斯滤波 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) 的快速算法,同样也适用于双边滤波 Bn(xc⃗)B_n(\\vec{x_c})Bn(xc⃗) 。 为什么通过核内频率采用朴素高斯分布,能够达到裁切的目的呢?这是因为,当卷积核目标中心点处于图像中物体的轮廓位置附近时,卷积核内的频率分布会出现相对非轮廓区域更为强烈的波动。 而高斯分布,即正态分布,恰恰是一种常用的放缩范围内数据波动的手段。 在标准高斯滤波中,我们通过多维高斯,粗浅的处理了整体数据上的波动性。这种处理方式,相当于将图像经过二维傅里叶变换得到的空域(SD)数据和频域(FD)数据,统一按照全通道空域的像素均值分布情况进行了概率平均。忽略了频域本身所具有的实际意义。而灰度值高斯的作用,就是 间接 的达成抽象频域数据波动特征的目的。 通过降低 δr\\delta_rδr 取值,放大核内频率差异情况。增强高频部分的权重,衰减低频占比。因此,对于双边滤波来说:在满足取 δd\\delta_dδd 越小,波动性越强越激烈,图片越尖锐;反之 δd\\delta_dδd 越大,波动性越弱越平缓,图片越模糊的同时;取 δr\\delta_rδr 越大,高低频差异缩减,边缘越模糊;反之 δr\\delta_rδr 越小,高低频差异被放大,边缘越清晰。 双边滤波的 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装了。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 没有太多操作,因为关键的部分在 像素程序片(Pixel Shader/Fragment Shader) 上: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform vec2 pixel_bias; uniform mat3 gaussian_matrix; uniform float gaussian_range; uniform sampler2D target_texture; float variance(vec3 c1, vec3 c2){ vec3 temp = c2 - c1; return temp[0] * temp[0] + temp[1] * temp[1] + temp[2] * temp[2]; } void main() { vec3 output_; vec4 color_center = texture2D(target_texture, fs_texcoord.xy); for (int i = 0; i 完成对算法求和过程的迁移。传入的 高斯算子 gaussian_matrix 和 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。而 灰度高斯权重 gaussian_range 涉及到实际采样,需要直接传入。由于采用 Web 展示,此处方法以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_gaussian_kernel(step, delta) { let n = step * 2 + 1; let kernel = new Float32Array(n * n); let factor_1 = 1.0 / (Math.sqrt(2.0 * Math.PI) * delta); let factor_2 = 1.0 / (2.0 * delta * delta); let normalize_div = 0; for (let i = 0; i 如上,双边滤波需要固定计算的部分,和标准高斯滤波并无不同。工程中,仅在像素程序片的实现上存在差异。 同理,双边滤波也是可以使用 线性插值(Linear Sampling) 代替部分采样,来进行加速。和标准高斯滤波一样,只需要略微调整像素程序片(Pixel Shader/Fragment Shader)的实现: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform vec2 pixel_bias; uniform mat3 gaussian_matrix; uniform float gaussian_range; uniform sampler2D target_texture; float variance(vec3 c1, vec3 c2){ vec3 temp = c2 - c1; return temp[0] * temp[0] + temp[1] * temp[1] + temp[2] * temp[2]; } void main() { vec4 color_center = texture2D(target_texture, fs_texcoord.xy); float gauss_factor = gaussian_matrix[0][0]+gaussian_matrix[0][1]; vec3 output_ = texture2D(target_texture, fs_texcoord.xy ).rgb * gaussian_matrix[1][1]; for (int i = 0; i 至此,一个标准双边滤波器,和它的线性采样快速版就完成了。 双边滤波的局限性 双边滤波是否彻底的解决了高斯滤波的局限性问题呢?答案是解决了 一部分。 引入高低频分布密度权重,虽然能够处理图像中物体轮廓边缘模糊现象,达到强度可控的 边缘保存(Edge Preserving)。但由于灰度高斯权重,单一维度单一方向梯度的特点。在利用双边滤波增强高频波权重的同时,也会 增大由标准高斯滤波高频分散运动带来的干扰。这反而会让增强边缘细节过程中产生的 摩尔纹(Moire Pattern)更加显著。 为处理这个问题,我们相对放松对算力的限制。一个可行的方案是在标准高斯滤波的基础上,通过使用多个方向梯度共同作用,重新构造一个满足 非各向同性(Not Isotropic) 条件的滤波单元 (毕竟非全方位的梯度差异,还无法满足各向异性条件),来保存和引入核内像素移动和频率波传导关系。使我们能够对核内像素所占均值比重进行更为合理的分配,起到缓解效果。 这种多梯度的方式,会增强算法对图像边缘的处理能力,保存边缘的同时增强细节。因此也被称为 边缘锐化(Edge Sharpening)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_2_3.html":{"url":"Chapter_3/Language/cn/Docs_3_2_3.html","title":"3.2.3 拉普拉斯滤波(Laplacian Filter)","keywords":"","body":"3.2.3 拉普拉斯滤波(Laplacian Filter) 拉普拉斯滤波(Laplacian Filter) 是一种基于二阶微分方程的差异扩大化算子(Operator)。其不仅可以从灰度出发用于物体的 边缘锐化(Edge Sharpening),也可以应用于全通道的色彩变化增强,即 广义锐化(Sharpening)。 数学上,一阶微分能够突出原函数连续变化的幅度特征(即原函数斜率),二阶微分则进一步扩大了对这种变化趋势(即导数的斜率)的描述。而基于多参数的二阶偏导数方程,在展示参数本身对趋势影响的同时,也能够说明两两参数间的影响关系。 由于是对趋势的求导,以离散数据逼近信号的二阶微分方程,只需要使用目标相邻采样做差值计算即可,且并不会影响周边点各自的趋势判断。正好符合目标情况卷积核,对核内关系闭环和抗干扰的要求。所以,拉普拉斯滤波以卷积核中心点构建包含全部方向参数(Orient Axis)的平面坐标系,核内采样求得中心点突变权重的二阶导数展式。用它增强核内数据中心的突变特征。 二维拉普拉斯滤波核 对于二维信号,即图片信号,来说。拉普拉斯卷积核只有 xyxyxy 两个方向参数。记原信号为 S(x)S(x)S(x) ,原信号的二阶导数为 ∇2S(x)\\nabla^2 S(x)∇2S(x) 。仍然取用大小 n×n=3×3n \\times n = 3 \\times 3n×n=3×3 ,中心点 xc⃗\\vec{x_c}xc⃗ 的卷积核,记边缘检测拉普帕斯滤波核函数为 Lp(xc⃗)\\mathcal{L}_p(\\vec{x_c})Lp(xc⃗) ,则: Lp(xc⃗)=−K⋅∇2S(xc⃗) {\\displaystyle \\begin{aligned} \\mathcal{L}_p(\\vec{x_c}) = -K \\cdot \\nabla^2 S(\\vec{x_c}) \\\\ \\end{aligned} } Lp(xc⃗)=−K⋅∇2S(xc⃗) 考虑到需要调节边缘检测强弱。我们采用强度因子 K∈(−∞,+∞)K \\in (-\\infty, +\\infty)K∈(−∞,+∞) 作为权重,以便进行敏感度控制。 则 KKK 取正值时增强, KKK 取负值时衰减, 绝对值 ∣K∣\\vert K \\vert∣K∣ 大小表示放缩强度。 记核函数为 Ln(xc⃗)\\mathcal{L}_n(\\vec{x_c})Ln(xc⃗) ,有: Ln(xc⃗)=S(xc⃗)+Lp(xc⃗)=S(xc⃗)−K⋅∇2S(xc⃗) {\\displaystyle \\begin{aligned} \\mathcal{L}_n(\\vec{x_c}) =& S(\\vec{x_c}) + \\mathcal{L}_p(\\vec{x_c}) \\\\ =& S(\\vec{x_c}) - K \\cdot \\nabla^2 S(\\vec{x_c}) \\\\ \\end{aligned} } Ln(xc⃗)==S(xc⃗)+Lp(xc⃗)S(xc⃗)−K⋅∇2S(xc⃗) 若 Lp(xc⃗)\\mathcal{L}_p(\\vec{x_c})Lp(xc⃗) 不计算偏导数在内,即 只处理轴方向二阶导数。我们就可以得到 双通(2-Way)拉普拉斯核 : ∇2S(x)=d2S(xc⃗)dxc⃗2=∂2S∂x2+∂2S∂y2=S(x−1, y) − 2⋅S(x,y) + S(x+1, y) + S(x,y−1) − 2⋅S(x,y) + S(x,y+1)Lp(xc⃗)=−K⋅∑xySxy⋅[0, 1, 01,−4, 10, 1, 0]Ln(xc⃗)=−K⋅∑xySxy⋅[0, 1, 01,−4, 10, 1, 0] + S(xc⃗) {\\displaystyle \\begin{aligned} \\nabla^2 S(x) =& \\tfrac{\\mathrm{d}^2 S(\\vec{x_c})}{\\mathrm{d}{\\vec{x_c}^2}} = \\tfrac{ \\partial^2 S}{\\partial x^2} + \\tfrac{ \\partial^2 S}{\\partial y^2} \\\\ =& S(x-1,\\ y)\\ -\\ 2 \\cdot S(x,y)\\ +\\ S(x+1,\\ y)\\ +\\ \\\\ & S(x,y-1)\\ -\\ 2 \\cdot S(x,y)\\ +\\ S(x,y+1) \\\\ \\mathcal{L}_p(\\vec{x_c}) =& -K \\cdot \\sum_{xy}S_{xy} \\cdot { \\begin{bmatrix} 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\\\ 1 ,& \\quad -4 ,& \\quad \\ \\ 1 \\\\ 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\end{bmatrix} }\\\\ \\mathcal{L}_n(\\vec{x_c}) =& - K \\cdot \\sum_{xy}S_{xy} \\cdot { \\begin{bmatrix} 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\\\ 1 ,& \\quad -4 ,& \\quad \\ \\ 1 \\\\ 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\end{bmatrix} }\\ +\\ S(\\vec{x_c}) \\\\ \\end{aligned} } ∇2S(x)==Lp(xc⃗)=Ln(xc⃗)=dxc⃗2d2S(xc⃗)=∂x2∂2S+∂y2∂2SS(x−1, y) − 2⋅S(x,y) + S(x+1, y) + S(x,y−1) − 2⋅S(x,y) + S(x,y+1)−K⋅xy∑Sxy⋅⎣⎡0,1,0, 1,−4, 1, 0 1 0⎦⎤−K⋅xy∑Sxy⋅⎣⎡0,1,0, 1,−4, 1, 0 1 0⎦⎤ + S(xc⃗) 若 Lp(xc⃗)\\mathcal{L}_p(\\vec{x_c})Lp(xc⃗) 包含对角方向 的影响,即处理偏导数情况,我们就可以得到 四通(4-Way)拉普拉斯核 : ∇2S(x)=d2S(xc⃗)dxc⃗2=∂2S∂x2+∂2S∂x∂y+∂2S∂y∂x+∂2S∂y2=S(x−1, y+0) − 2⋅S(x, y) + S(x+1, y+0) + S(x−1, y−1) − 2⋅S(x, y) + S(x+1, y+1) + S(x+1, y−1) − 2⋅S(x, y) + S(x−1, y+1) + S(x+0, y−1) − 2⋅S(x, y) + S(x+0, y+1) Lp(xc⃗)=−K⋅∑xySxy⋅[1, 1, 11,−8, 11, 1, 1]Ln(xc⃗)=−K⋅∑xySxy⋅[1, 1, 11,−8, 11, 1, 1] + S(xc⃗) {\\displaystyle \\begin{aligned} \\nabla^2 S(x) =& \\tfrac{\\mathrm{d}^2 S(\\vec{x_c})}{\\mathrm{d}{\\vec{x_c}^2}} = \\tfrac{ \\partial^2 S}{\\partial x^2} + \\tfrac{ \\partial^2 S}{\\partial x \\partial y} + \\tfrac{ \\partial^2 S}{\\partial y \\partial x} + \\tfrac{ \\partial^2 S}{\\partial y^2} \\\\ =& S(x-1,\\ y+0)\\ -\\ 2 \\cdot S(x,\\ y)\\ +\\ S(x+1,\\ y+0)\\ +\\ \\\\ & S(x-1,\\ y-1)\\ -\\ 2 \\cdot S(x,\\ y)\\ +\\ S(x+1,\\ y+1)\\ +\\ \\\\ & S(x+1,\\ y-1)\\ -\\ 2 \\cdot S(x,\\ y)\\ +\\ S(x-1,\\ y+1)\\ +\\ \\\\ & S(x+0,\\ y-1)\\ -\\ 2 \\cdot S(x,\\ y)\\ +\\ S(x+0,\\ y+1)\\ \\\\ \\mathcal{L}_p(\\vec{x_c}) =& -K \\cdot \\sum_{xy}S_{xy} \\cdot { \\begin{bmatrix} 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad -8 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 \\end{bmatrix} }\\\\ \\mathcal{L}_n(\\vec{x_c}) =& - K \\cdot \\sum_{xy}S_{xy} \\cdot { \\begin{bmatrix} 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad -8 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 \\end{bmatrix} }\\ +\\ S(\\vec{x_c}) \\\\ \\end{aligned} } ∇2S(x)==Lp(xc⃗)=Ln(xc⃗)=dxc⃗2d2S(xc⃗)=∂x2∂2S+∂x∂y∂2S+∂y∂x∂2S+∂y2∂2SS(x−1, y+0) − 2⋅S(x, y) + S(x+1, y+0) + S(x−1, y−1) − 2⋅S(x, y) + S(x+1, y+1) + S(x+1, y−1) − 2⋅S(x, y) + S(x−1, y+1) + S(x+0, y−1) − 2⋅S(x, y) + S(x+0, y+1) −K⋅xy∑Sxy⋅⎣⎡1,1,1, 1,−8, 1, 1 1 1⎦⎤−K⋅xy∑Sxy⋅⎣⎡1,1,1, 1,−8, 1, 1 1 1⎦⎤ + S(xc⃗) 显然,四通拉普拉斯对中心点突变特征能有更好的提炼。如果需要对更多方向进行评估,则需要增大核面积。根据拉普拉斯二阶微分自身的特性可知,大小为 n×nn \\times nn×n 的卷积核,可选评估方向为 2(n−1)2(n-1)2(n−1) 个,相应的需求采样也会成倍扩增。且增大采样面积仅仅是预先提炼出,中心点周边的相邻点的突变情况。用这些点的加权增强值来计算中心点加权增强值。所以,更大的拉普拉斯核只是利用了小核的富集,反而并不一定能够得到更优秀的筛选结果(比如单核内波动,具有复杂高低差变化时)。因此,为了相对保证结果的稳定性,我们一般不会采用超过 n×n=3×3n \\times n = 3 \\times 3n×n=3×3 大小的拉普拉斯卷积核。 拉普拉斯滤波的 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装了。 如果是 边缘锐化(Edge Sharpening) 的场景,数据只采用灰度值处理即可。对于 原色格式(Primaries Format)为 CIE RGB 1931 色彩空间 的数据,可按下式用 RGB 快速换算: Grey=0.299⋅R + 0.587⋅G + 0.114⋅B Grey = 0.299 \\cdot R\\ +\\ 0.587 \\cdot G\\ +\\ 0.114 \\cdot B Grey=0.299⋅R + 0.587⋅G + 0.114⋅B 此处演示为了便于说明和展示,选择采用更广泛的适用范围,针对广义锐化(Sharpening)构造像素全通道采样的拉普拉斯滤波器。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 没有太多操作,因为关键的部分在 像素程序片(Pixel Shader/Fragment Shader) 上。依据双通还是四通做一下区分。我们采用两种实现,双通情况下直接计算,有: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform bool only_edge; uniform vec2 pixel_bias; uniform mat3 laplacian_matrix; uniform sampler2D target_texture; void main() { vec3 output_; output_ += texture2D(target_texture, fs_texcoord.xy).rgb * ((only_edge? 0.0 : 1.0) + laplacian_matrix[1][1]); output_ += texture2D(target_texture, fs_texcoord.xy + vec2(-1, -1) * pixel_bias).rgb * laplacian_matrix[0][0]; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(-1, +1) * pixel_bias).rgb * laplacian_matrix[2][0]; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(+1, -1) * pixel_bias).rgb * laplacian_matrix[0][2]; output_ += texture2D(target_texture, fs_texcoord.xy + vec2(+1, +1) * pixel_bias).rgb * laplacian_matrix[2][2]; gl_FragColor = vec4(output_, 1.0); } 四通则采用 for 循环实现,传入双通的 拉普拉斯算子 laplacian_matrix 即可兼容,有: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform bool only_edge; uniform vec2 pixel_bias; uniform mat3 laplacian_matrix; uniform sampler2D target_texture; void main() { vec3 output_ = only_edge? vec3(0) : texture2D(target_texture, fs_texcoord.xy).rgb; for (int i = 0; i 上述程序片中,我们通过 only_edge 开关 控制是否只获取边缘信息。而传入的 拉普拉斯算子 laplacian_matrix 和 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。由于采用 Web 展示,此处方法以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_laplacian_kernel(step, way_count, str_factor) { let n = step * 2 + 1; let max_way = (n - 1) * 2; let cur_way = Math.min(way_count, max_way); let way_step = Math.floor(max_way / cur_way); let kernel = new Float32Array(n * n); for (let i = 0; i 至此,双通和四通的标准拉普拉斯广义锐化滤波器程序片就完成了。 拉普拉斯滤波的局限性 从卷积核可以看出,拉普拉斯滤波仍然是固定梯度的。但是否启用对角元素(Diagonal Elements)对卷积核特性还是会有较大的影响的。 双通拉普拉斯,只对于横纵方向上的数据敏感,构成的卷积核为 非各向同性(Not Isotropic) 卷积核。但是在有权重的方向上,数据变化梯度(Gradient)却是等大的。因此,双通拉普拉斯也 非各向异性(Not Anisotropic)。 四通拉普拉斯,由于引入对角线方向代表的 45∘45^{\\circ}45∘ 、 135∘135^{\\circ}135∘ 、 225∘225^{\\circ}225∘ 、 315∘315^{\\circ}315∘ 的计算,使 3×33 \\times 33×3 核心相邻元素所含所有方向上的梯度都成为等大参考值,因此,四通拉普拉斯的卷积核,为 各向同性(Isotropic) 卷积核。 所以,虽然四通拉普拉斯能够更好的提取临界边缘特征,但也会同步的保留并增强高频扰动,从而在结果中留存更多的高频噪音。双通则要相对好一些,但相应的临界特征提取能力也变得更弱。不过,若是能够提升数据源的质量,通过 先行降噪(NRF [Noise Reduction First]) 过滤部分干扰。那么理论上,最终提取产物的质量也会有一定程度的提升。马尔滤波(Marr Filter) 就是对此方向的探索。 同时,拉普拉斯滤波 并非是脱离中心参考值的边缘锐化(Edge Sharpening)算法,对于一些复杂的边缘位置波动情况,会有 边缘扩散(Edge Spread) 的风险。且由于 包含高权重的中心值参与了计算过程,使得拉普拉斯滤波对噪声非常敏感,从而极易丢失边缘方向信息,最终导致检测得到的边缘不连续。基于该情况,部分后续的改进算法采用了 *去中心化(Center Insensitive) 思想,来一定程度上避免问题发生。比如, 索贝尔滤波(Sobel Filter)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_2_4.html":{"url":"Chapter_3/Language/cn/Docs_3_2_4.html","title":"3.2.4 马尔滤波(Marr Filter)","keywords":"","body":"3.2.4 马尔滤波(Marr Filter) 马尔滤波(Marr Filter) 是拉普拉斯滤波采用 先行降噪(NRF [Noise Reduction First]) 的改进算法。利用高斯滤波对频率波动性的处理能力,对图片的高频信息进行模糊过滤。再行使标准拉普拉斯边缘检测,筛选突变明显的剩余高频部分并增强,达到更好的效果 [14] 。 因此马尔滤波也被称为 拉普拉斯-高斯滤波(LoG [Laplacian of Gaussian]),或 马尔-希德雷斯算法(Marr–Hildreth Algorithm)。还是以 ∣target∣1\\vert target \\vert_1∣target∣1 表示归一化操作。我们记高斯滤波核函数为 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) ,记 LoG 的边缘检测核函数为 LoGp(xc⃗){LoG}_p(\\vec{x_c})LoGp(xc⃗) ,有: LoGn(xc⃗)=Lp(xc⃗)∣Fn=−K⋅∇2Fn(xc⃗) {\\displaystyle \\begin{aligned} {LoG}_n(\\vec{x_c}) =& \\mathcal{L}_p(\\vec{x_c})|_{F_n} = -K \\cdot \\nabla^2 F_n(\\vec{x_c}) \\\\ \\end{aligned} } LoGn(xc⃗)=Lp(xc⃗)∣Fn=−K⋅∇2Fn(xc⃗) 其中 KKK 是我们取来控制强度的强度因子,展开简化上式有: LoGp(xc⃗)=−K⋅∑xySxy⋅∣(−1πδ4⋅[1−(Δx2+Δy2)2⋅δ2]⋅e−(Δx2+Δy2)2⋅δ2)xy∣1 {\\displaystyle \\begin{aligned} {LoG}_p(\\vec{x_c}) =& -K \\cdot \\sum_{xy}S_{xy} \\cdot \\vert ( -\\tfrac{1}{\\pi \\delta ^4} \\cdot [1- \\tfrac{(\\Delta x^2+\\Delta y^2)}{2 \\cdot \\delta ^2}] \\cdot e ^{-\\tfrac{(\\Delta x^2+\\Delta y^2)}{2 \\cdot \\delta ^2}})_{xy} \\vert_1 \\\\ \\end{aligned} } LoGp(xc⃗)=−K⋅xy∑Sxy⋅∣(−πδ41⋅[1−2⋅δ2(Δx2+Δy2)]⋅e−2⋅δ2(Δx2+Δy2))xy∣1 显然,LoGp(xc⃗){LoG}_p(\\vec{x_c})LoGp(xc⃗) 也满足高斯滤波的特性,在 δ\\deltaδ 确定的情况下具有固定大小的算子。如果选用的高斯核大小为 3×33 \\times 33×3 ,则考虑到最大程度生效的感受野大小,算法的卷积核必须得保证有至少 n×n≥3×3n \\times n \\geq 3 \\times 3n×n≥3×3 的取值。但也不能太大。如果超过核心高斯算子大小的 555 倍,即 n×n≥15×15n \\times n \\geq 15 \\times 15n×n≥15×15 时,会非常容易产生采样元素的过度富集,导致边缘取值偏移和过曝问题。 因此,一般而言 LoGn(xc⃗){LoG}_n(\\vec{x_c})LoGn(xc⃗) 算子的大小会取奇数范围 n×n∈[5×5, 11×11]∣oddn \\times n \\in [5 \\times 5, \\ 11 \\times 11]|_{odd}n×n∈[5×5, 11×11]∣odd , 记为 MLoGM_{LoG}MLoG 。 为了便于说明,我们采用 n×n=9×9n \\times n = 9 \\times 9n×n=9×9 的核大小做计算。当 δ=1.4\\delta = 1.4δ=1.4 且 K=1.0K = 1.0K=1.0 时,未归一化的 MLoGM_{LoG}MLoG 可算得为: MLoG∣δ=1.4K=1.0=[0, 1, 1, 2, 2, 2, 1, 1, 01, 2, 4, 5, 5, 5, 4, 2, 11, 4, 5, 3, 0, 3, 5, 4, 12, 5, 3,−12,−24,−12, 3, 5, 22, 5, 0,−24,−40,−24, 0, 5, 22, 5, 3,−12,−24,−12, 3, 5, 21, 4, 5, 3, 0, 3, 5, 4, 11, 2, 4, 5, 5, 5, 4, 2, 10, 1, 1, 2, 2, 2, 1, 1, 0]9×9 {\\displaystyle \\begin{aligned} M_{LoG}|_{\\delta=1.4}^{K=1.0} =& { \\begin{bmatrix} 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\\\ 1 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 0 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 1 \\\\ 2 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 3 ,& \\quad -12 ,& \\quad -24 ,& \\quad -12 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 2 \\\\ 2 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 0 ,& \\quad -24 ,& \\quad -40 ,& \\quad -24 ,& \\quad \\ \\ 0 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 2 \\\\ 2 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 3 ,& \\quad -12 ,& \\quad -24 ,& \\quad -12 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 2 \\\\ 1 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 0 ,& \\quad \\ \\ 3 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 1 \\\\ 1 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 5 ,& \\quad \\ \\ 4 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 1 \\\\ 0 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 2 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 1 ,& \\quad \\ \\ 0 \\end{bmatrix} } _{9 \\times 9} \\\\ \\end{aligned} } MLoG∣δ=1.4K=1.0=⎣⎢⎢⎢⎢⎢⎢⎢⎢⎢⎢⎢⎢⎡0,1,1,2,2,2,1,1,0, 1, 2, 4, 5, 5, 5, 4, 2, 1, 1, 4, 5, 3, 0, 3, 5, 4, 1, 2, 5, 3,−12,−24,−12, 3, 5, 2, 2, 5, 0,−24,−40,−24, 0, 5, 2, 2, 5, 3,−12,−24,−12, 3, 5, 2, 1, 4, 5, 3, 0, 3, 5, 4, 1, 1, 2, 4, 5, 5, 5, 4, 2, 1, 0 1 1 2 2 2 1 1 0⎦⎥⎥⎥⎥⎥⎥⎥⎥⎥⎥⎥⎥⎤9×9 此时,有 LoGn(xc⃗)∣δ=1.4K=1.0{LoG}_n(\\vec{x_c})|_{\\delta=1.4}^{K=1.0}LoGn(xc⃗)∣δ=1.4K=1.0 可表示如下: LoGn(xc⃗)∣δ=1.4=∑xySxy⋅∣(MLoG∣δ=1.4K=1.0)∣1∈R9×9 {\\displaystyle \\begin{aligned} {LoG}_n(\\vec{x_c})|_{\\delta=1.4} =& \\sum_{xy}S_{xy} \\cdot \\vert (M_{LoG}|_{\\delta=1.4}^{K=1.0}) \\vert_1 \\in \\mathbb{R}^{9 \\times 9} \\\\ \\end{aligned} } LoGn(xc⃗)∣δ=1.4=xy∑Sxy⋅∣(MLoG∣δ=1.4K=1.0)∣1∈R9×9 除了采样不占优势外,马尔滤波核本身在确定 δ\\deltaδ 取值后并不复杂。考虑到最小采样成本,我们一般取用 5×55 \\times 55×5 大小的卷积核。且不建议对马尔滤波核使用线性采样简化运算,否则会扩大误差。 马尔滤波的 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 程序化马尔滤波的关键处理部分,依旧在 像素程序片(Pixel Shader/Fragment Shader)上和 CPU 的马尔算子的计算上。我们先看像素程序片(Pixel Shader/Fragment Shader)是怎么实现的: precision mediump float; const int n = 5; varying vec4 fs_position; varying vec2 fs_texcoord; uniform vec2 pixel_bias; uniform float marr_matrix[n * n]; uniform sampler2D target_texture; void main() { vec3 output_; for (int i = 0; i 完全就是高斯的像素程序片。或者说,对于以矩阵形式传入的固定算子,在程序片的实现上都是可以复用的。因此,如果遇到类似场景,此类程序片也可以考虑合并或者同态转换。 而传入的 马尔算子 marr_matrix 和 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_marr_kernel(step, delta) { let n = step * 2 + 1; let kernel = new Float32Array(n * n); let factor_1 = 1.0 / (Math.PI * Math.pow(delta, 4)); // trick: normalized skip let factor_2 = 1.0 / (2.0 * delta * delta); let normalize_div = 0; for (let i = 0; i 至此,简易马尔滤波器程序片就完成了。 马尔滤波的局限性 马尔滤波最大问题就在于采样数上。但如果不考虑采样的消耗,其本身也并非毫无缺点。 虽然马尔滤波因 具有对信号数据所携带高频干扰(即高频噪声)的一定抗性,使得算法结果相较于拉普拉斯滤波而言,有较大的改善。但却不能避免非各向异性(Not Anisotropic)引入并增强摩尔纹的缺点。 且马尔滤波更容易受没有针对中心高权重进行处理,而采用大卷积核进一步 增加了中心占比 的影响,出现 边缘扩散 和 非连续 的问题。 不过在取 δ1.0\\delta δ1.0 时,利用高斯算法对波动性的削弱,马尔滤波能够在抑制噪音的同时,进行有限程度并考虑相邻波动特征的边缘增强。这让马尔滤波配合原始数据下,能够达到更自然的滤波效果。所以,我们一般不采用马尔滤波检测边缘,而是使用其处理广义锐化场景。 马尔滤波的广义锐化应用 马尔滤波在广义锐化下的核函数是怎样的呢?参考拉普拉斯滤波,我们只需要替换掉权重部分即可: Ln(xc⃗)=S(xc⃗)+LoGn(xc⃗)=S(xc⃗)−K⋅∇2Fn(xc⃗) {\\displaystyle \\begin{aligned} \\mathcal{L}_n(\\vec{x_c}) =& S(\\vec{x_c}) + {LoG}_n(\\vec{x_c}) \\\\ =& S(\\vec{x_c}) -K \\cdot \\nabla^2 F_n(\\vec{x_c}) \\\\ \\end{aligned} } Ln(xc⃗)==S(xc⃗)+LoGn(xc⃗)S(xc⃗)−K⋅∇2Fn(xc⃗) 这里已经有一些复合函数的感觉了。如果我们将数据源 S(xc⃗)S(\\vec{x_c})S(xc⃗) 更换为高斯滤波结果,为区别于 Fn(xc⃗)F_n(\\vec{x_c})Fn(xc⃗) ,这里我们记为 Gn(xc⃗)G_n(\\vec{x_c})Gn(xc⃗) 。则整个处理函数就成为了,在高斯模糊的基础上再行锐化,达到模糊着色面,增强轮廓边缘的效果。此时的核函数为: Ln(xc⃗)=Gn(xc⃗)−K⋅∇2Fn(xc⃗) {\\displaystyle \\begin{aligned} \\mathcal{L}_n(\\vec{x_c}) =& G_n(\\vec{x_c}) -K \\cdot \\nabla^2 F_n(\\vec{x_c}) \\\\ \\end{aligned} } Ln(xc⃗)=Gn(xc⃗)−K⋅∇2Fn(xc⃗) 以此类推,我们也可以将数据源 S(xc⃗)S(\\vec{x_c})S(xc⃗) 换成其他滤波的结果,将马尔滤波(进一步衍生到所有可行的滤波函数)作为后级处理,构建连续的滤波处理流水线。这种思想,即是 滤波链路(Filter Chain) 技术的概念起源。 所以,应用于锐化的马尔滤波链路,也被称为 马尔锐化(Marr Sharpening),或简称为 朴素锐化(Simple Sharpening) 算法。 马尔锐化的 GLSL 渲染程序片 根据上文的分析,马尔锐化包含两部分:前级数据 和 后级数据。前级数据用于内容主体,后级数据用于叠加锐化。这里我们取用可配置是否采用高斯模糊,作为可选前级数据的程序片方案,对已实现的马尔滤波进行改造。 由于顶点程序片仍然可以被沿用,此处我们单独来看 像素程序片(Pixel Shader/Fragment Shader) 该怎么定义: precision mediump float; const int n = 3; const int m = 5; varying vec4 fs_position; varying vec2 fs_texcoord; uniform bool only_edge; uniform bool marr_blur; uniform vec2 pixel_bias; uniform float gaussian_matrix[n * n]; uniform float marr_matrix[m * m]; uniform float str_factor; uniform sampler2D target_texture; vec3 gauss_operation() { vec3 output_; for (int i = 0; i 显然,作为前级输入的高斯滤波,其滤波核大小并不一定需要和后级处理核大小保持一致。我们依旧采用 强度参数 str_factor,对锐化介入的强度进行了直接调控。而传入的 高斯算子 gaussian_matrix 、 马尔算子 marr_matrix 和 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_gaussian_kernel(step, delta) { let n = step * 2 + 1; let kernel = new Float32Array(n * n); let factor_1 = 1.0 / (Math.sqrt(2.0 * Math.PI) * delta); let factor_2 = 1.0 / (2.0 * delta * delta); let normalize_div = 0; for (let i = 0; i 至此,马尔锐化基本完成。 看来更稳定的边缘检测,还是需要依赖去中心化的索贝尔滤波了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_2_5.html":{"url":"Chapter_3/Language/cn/Docs_3_2_5.html","title":"3.2.5 索贝尔滤波(Sobel Filter)","keywords":"","body":"3.2.5 索贝尔滤波(Sobel Filter) 索贝尔滤波(Sobel Filter) 是由 斯坦福人工智能实验室(SAIL [Stanford Artificial Intelligence Laboratory]) 的 艾尔文·索贝尔(Irwin Sobel,1940 - present) 和 格雷·费尔德曼(Gary Feldman,1942 - present) 于 1968 年提出的一种用于 边缘检测(Edge Detection) 的 去中心化(Center Insensitive)一阶离散微分算子 [15] 。 通过在构建 3×33 \\times 33×3 卷积核中,对横纵两个方向距离中心点不同偏移的相邻点,采用不同的方位权重占比的方式,针对性的计算边缘变化影响。其实,是将平面点漂移的方向向量,拆解为以卷积核中心点构建的 xyxyxy 坐标系下的方向分量。通过抽象方向分量的 一维简易高斯分布(1D Simple Gaussian Distribution) 密度函数到方差同位表示,来记录中心点的运动情况。而核内不同取值,则代表垂直于该取值方向的分量高斯分布函数切片,占当前相位的百分比( 归一化后 )。 因此,仍然取用大小 n×n=3×3n \\times n = 3 \\times 3n×n=3×3 ,中心点 xc⃗\\vec{x_c}xc⃗ 的卷积核。记原信号为 S(x)S(x)S(x) ,边缘检测索贝尔滤波核函数为 Sp(xc⃗)\\mathcal{S}_p(\\vec{x_c})Sp(xc⃗) ,则: Sp(xc⃗)=K⋅Gx2+Gy2 {\\displaystyle \\begin{aligned} \\mathcal{S}_p(\\vec{x_c}) =& K \\cdot \\sqrt{ {G_x}^{2} + {G_y}^{2} } \\\\ \\end{aligned} } Sp(xc⃗)=K⋅√Gx2+Gy2 横向 xxx 轴方向的滤波核函数 GxG_xGx 为: Gx(xc⃗)=Kx⋅[+1, 0, −1+2, 0, −2+1, 0, −1]⋅∑xyxc⃗Sxy∈R3×3 {\\displaystyle \\begin{aligned} G_x(\\vec{x_c}) =& K_x \\cdot { \\begin{bmatrix} +1 ,& \\ \\ 0 ,& \\ \\ -1 \\\\ +2 ,& \\ \\ 0 ,& \\ \\ -2 \\\\ +1 ,& \\ \\ 0 ,& \\ \\ -1 \\end{bmatrix} } \\cdot \\sum_{xy}^{\\vec{x_c}}S_{xy} \\in \\mathbb{R}^{3 \\times 3} \\\\ \\end{aligned} } Gx(xc⃗)=Kx⋅⎣⎡+1,+2,+1, 0, 0, 0, −1 −2 −1⎦⎤⋅xy∑xc⃗Sxy∈R3×3 横向 yyy 轴方向的滤波核函数 GyG_yGy 为: Gy(xc⃗)=Ky⋅[+1, +2, +10, 0,0−1, −2, −1]⋅∑xyxc⃗Sxy∈R3×3 {\\displaystyle \\begin{aligned} G_y(\\vec{x_c}) =& K_y \\cdot { \\begin{bmatrix} +1 ,& \\ +2 ,& \\ +1 \\\\ 0 ,& \\ \\ 0 ,& \\quad 0 \\\\ -1 ,& \\ -2 ,& \\ -1 \\end{bmatrix} } \\cdot \\sum_{xy}^{\\vec{x_c}}S_{xy} \\in \\mathbb{R}^{3 \\times 3} \\\\ \\end{aligned} } Gy(xc⃗)=Ky⋅⎣⎡+1,0,−1, +2, 0, −2, +10 −1⎦⎤⋅xy∑xc⃗Sxy∈R3×3 从上式可知,强度系数 KKK 可以拆分到 xyxyxy 各自方向的子核中,记为 K⃗=(Kx,Ky)\\vec{K} = (K_x,K_y)K⃗=(Kx,Ky) 。则,当 K⃗=(0, 1)\\vec{K} = (0,\\ 1)K⃗=(0, 1) 时 Sp(xc⃗)=K⋅Gy(xc⃗)\\mathcal{S}_p(\\vec{x_c}) = K \\cdot G_y(\\vec{x_c})Sp(xc⃗)=K⋅Gy(xc⃗) 只保留纵向滤波结果,当 K⃗=(1, 0)\\vec{K} = (1,\\ 0)K⃗=(1, 0) 时 Sp(xc⃗)=K⋅Gx(xc⃗)\\mathcal{S}_p(\\vec{x_c}) = K \\cdot G_x(\\vec{x_c})Sp(xc⃗)=K⋅Gx(xc⃗) 只保留横向滤波结果。不过,一般情况下我们不会只进行单边检测,因此方便起见还是采用在整体滤波结果上进行强度控制,即使用 K∈RK \\in \\mathbb{R}K∈R 来调整。 显然,索贝尔滤波是同时具有 梯度方向(Orientate) 和 强度(Magnitude) 的。记方向为 Θ\\ThetaΘ ,强度为 AAA 。则有: A=∣Sp(xc⃗)∣=K⋅Gx2+Gy2Θ=∠Sp(xc⃗) =atan2(Gy, Gx) {\\displaystyle \\begin{aligned} A =& \\vert {\\mathcal{S}_p(\\vec{x_c})} \\vert = K \\cdot \\sqrt{ {G_x}^{2} + {G_y}^{2} } \\\\ \\Theta =& \\angle \\mathcal{S}_p(\\vec{x_c})\\ = {atan2}(G_y,\\ G_x)\\\\ \\end{aligned} } A=Θ=∣Sp(xc⃗)∣=K⋅√Gx2+Gy2∠Sp(xc⃗) =atan2(Gy, Gx) 此时,有 LoGn(xc⃗)∣δ=1.4K=1.0{LoG}_n(\\vec{x_c})|_{\\delta=1.4}^{K=1.0}LoGn(xc⃗)∣δ=1.4K=1.0 可表示如下: LoGn(xc⃗)∣δ=1.4=∑xySxy⋅∣(MLoG∣δ=1.4K=1.0)∣1∈R9×9 {\\displaystyle \\begin{aligned} {LoG}_n(\\vec{x_c})|_{\\delta=1.4} =& \\sum_{xy}S_{xy} \\cdot \\vert (M_{LoG}|_{\\delta=1.4}^{K=1.0}) \\vert_1 \\in \\mathbb{R}^{9 \\times 9} \\\\ \\end{aligned} } LoGn(xc⃗)∣δ=1.4=xy∑Sxy⋅∣(MLoG∣δ=1.4K=1.0)∣1∈R9×9 因此,用索贝尔滤波也可以得到图像中心像素的 运动漂移信息,可用于 方向梯度直方图(HOG [Histogram of Oriented Gradient]) 中获取像素点梯度矢量的计算方法。此部分我们在随后的章节中进行。 那么,基于索贝尔滤波的边界检测该怎样实现呢? 索贝尔滤波的 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 程序化索贝尔滤波的关键处理部分,依旧在 像素程序片(Pixel Shader/Fragment Shader)上和 CPU 的索贝尔算子的计算上。我们先看像素程序片(Pixel Shader/Fragment Shader)是怎么实现的: precision mediump float; varying vec4 fs_position; varying vec2 fs_texcoord; uniform bool only_edge; uniform vec2 pixel_bias; uniform mat3 sobel_matrix_x; uniform mat3 sobel_matrix_y; uniform sampler2D target_texture; void main() { vec3 output_ = only_edge? vec3(0) : texture2D(target_texture, fs_texcoord.xy).rgb; vec3 color_center_x; vec3 color_center_y; for (int i = 0; i 我们依旧采用 强度参数 str_factor,对锐化介入的强度进行直接调控。而传入的 索贝尔算子分为两个方向记为 sobel_matrix_x 和 sobel_matrix_y。同 相邻像素归一化的偏移距离 pixel_bias 的操作,只需要在执行前由 CPU 计算一次即可。以 JavaScript 语法实现: function pixel_bias(width, height) { return new Float32Array([ 1.0 / width, 1.0 / height ]); } function calculate_sobel_kernel(use_horizontal, str_factor) { let kernel = new Float32Array(use_horizontal ? [ +1.0, 0.0, -1.0, +2.0, 0.0, -2.0, +1.0, 0.0, -1.0 ] : [ +1.0, +2.0, +1.0, 0.0, 0.0, 0.0, -1.0, -2.0, -1.0 ]) for (let i = 0; i 至此,简易索贝尔滤波器程序片就完成了。 索贝尔滤波的局限性 虽然索贝尔滤波通过去中心化检测目标像素点周边的运动情况,检测结果也 相对准确,并摆脱了 由卷积核中心权值造成像素富集而导致对干扰抗性较弱的问题。但也正因此 进一步扩大了边缘扩散(Edge Spread)的风险。且当物体轮廓处的灰度(光亮度)变化过于发散时,算法会有一定程度的丢失,即 对抗弱边缘(Weak Edge)的能力较差。 不过,这些缺点在只需要边缘位置的情况下,可以通过 阈值限定二值化(Thresholding) 来得到一定程度的改善( 这种做法经常出现在机器学习的数据前处理过程中 )。由于一般音视频工程并不会需要如此精度,考虑到索贝尔滤波的快捷、简单、高效和高干扰抗性的特点,算法本身常被用于各种场景下的 边缘数据提取 和 像素信息预测 过程。但本身不适合(也不应该)作为噪音抑制算法使用。 经过几个滤波算法的辨析,我们发现想要真正的有效抑制噪音,达到自然模糊且边缘保存的目的,单纯以多 非各向异性 滤波器组合的形式,还是很难得到同 各向异性 滤波算法相同的效果。 当然,不同的算法各有自身的优势,并非是独一的非此即彼的对立关系。作为工程师,在不同需求下,还是要灵活取用和组合达成所求。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_2_6.html":{"url":"Chapter_3/Language/cn/Docs_3_2_6.html","title":"3.2.6 各向异性扩散(Anisotropic Diffusion)","keywords":"","body":"3.2.6 各向异性扩散(Anisotropic Diffusion) 【待补充】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_3.html":{"url":"Chapter_3/Language/cn/Docs_3_3.html","title":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理","keywords":"","body":"3.3 时间冗余控制 - 常用特征提取与朴素阈值处理 在本节之前,本书已经讲解了如何分离的处理 一维动态音频 和 二维静态图片 信号。如果我们 将一系列图片以时间轴串联,就得到一组由二维静态信号按序构成的二维动态信号。这种类型的信号,被称为 视频流(Visual Stream)。 相较于一维信号,静态二维信号本就具有 信息密度高 的特征。而动态化则会进一步 加剧 其对 算力资源 的消耗。不经合适的方法控制数据,将会产生大量的 冗余信息。 严重不利于数据的保存、传输和处理。 考虑到被采样的运动物体,其前后总是存在时序关联性的客观事实。视频流作为观察物体得到的数据载体,相邻的两个时间节点采样图片,像素值上必然也可以抽象出相应运动特征的 位移向量投影,得到 关联前后数据的变化关系。借此,工程上就可以利用像素的漂移情况,来筛选出未发生改变的数据,从而复用前值以求降低不必要计算和更新,减少消耗。 为此,需要对 运动区域进行检测,并提取运动矢量信息。 在线演示 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_3_1.html":{"url":"Chapter_3/Language/cn/Docs_3_3_1.html","title":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient])","keywords":"","body":"3.3.1 方向梯度直方图(HOG [Histogram of Oriented Gradient]) 在前文中,我们提到了索贝尔滤波(Sobel Filter)卷积核对中心点周边方向信息的提炼,可以被用来获取方向梯度直方图的梯度矢量计算中。那么什么是方向梯度直方图呢? 方向梯度直方图最早的 概念原型(Prototype) 来自于 罗伯特·麦康纳尔(Robert K. McConnell) 在 1986 年申请的有关模式识别专利中,对 视野(FoV [Field of View]) 方向性输入产生输出结果差异的判断过程。并于 1994 年 三菱电子研究实验室(Mitsubishi Electric Research Laboratories) 在手势识别应用的区域检测过程中,首次总结为当前称谓 [16] 。最终经过 2005 年 CVPR 顶会参会论文验证,重新确认了 HOG 在动态检测上的高适配度,才开始被人熟知 [17] 。 方向梯度直方图(HOG [Histogram of Oriented Gradient]) 是对用于提炼并描述区域范围内像素漂移情况方法论的概念抽象。是对过程的抽象,而非对结果的抽象。由于本身最终运算能够表示为处理单元形式,因而属于 特征描述算子(Feature Descriptor) 的一种。整体思想则是在单元间隔均匀的卷积核内,使用重叠的局部梯度提炼算法并 录表统计归一化(Normalization),以取得中心点变化方向矢量。方法常结合 阈值限定(Thresholding) 筛选结果,提高运动预测的准确度。 显然,方向梯度直方图并不只适用于索贝尔, 只要能够提供中心点周边梯度变化的大小和方向的算子,都可以被应用于 HOG 的求解中。一个方向梯度直方图是否优秀,最大的影响点就在于梯度提炼的是否精准。 HOG 的标准处理流 HOG 有一套相对固定的标准过程的。基本可以按照如下顺序进行: 数据优化,通过滤波算法(如高斯滤波),减少干扰信息并增强灰度(光亮度)对比; 梯度计算,通过梯度滤波器(如索贝尔滤波)提取图像每个像素的梯度矢量; 分组抽象,指定梯度矢量采样卷积核范围,即分组(Cell) 矢量合并,将分组内所有像素的梯度矢量,以方向投票统计合并权重,获取 HOG 块归一化,指定块大小(由分组为单位),整合 HOG 统计结果并归一化快内分组权重 五个步骤,共同构成了方向梯度直方图方法论本身。 且四五两步概念不同,但密不可分。 数据优化 数据优化的目的是为了增强光亮度变化差异,并减少干扰噪声,从而更好的保存并放大像素梯度变化情况。我们记原信号为 S(x)S(x)S(x) ,记经过滤波降噪和修饰后的灰度(光亮度)数据为 Sg(x)S_g(x)Sg(x) 。从 S(x)S(x)S(x) 到 Sg(x)S_g(x)Sg(x) 的处理过程就不再赘述(见滤波,类比处理)。记经过优化函数 Og(x)O_g(x)Og(x) 处理,以 Sg(x)S_g(x)Sg(x) 获取的优化结果为 So(x)S_o(x)So(x) 。那么,相对简单的处理方式,就是直接对 Sg(x)S_g(x)Sg(x) 进行 伽马矫正(Gamma Correction)来得到 So(x)S_o(x)So(x) 。取伽马因子为 γ\\gammaγ ,矫正系数(Adjust Factor)为 AAA (一般情况 A=1.0A = 1.0A=1.0 为常量),有: Og(x)=Gamma(S)=A⋅S(x)γ {\\displaystyle \\begin{aligned} O_g(x) =& Gamma(S) = A \\cdot S(x)^{\\gamma} \\\\ \\end{aligned} } Og(x)=Gamma(S)=A⋅S(x)γ 伽马矫正(Gamma Correction) 本是用于应对,早期 阴极射线管(CRT [Cathode Ray Tube])显示器 的电子偏转特征,引入的采样源数据非线性转换算法。传统的 CRT 显示器在显示时就会完成对偏转数据的自然逆向过程,而在 液晶显示器(LCD [Liquid Crystal Display]) 上,则需要 主动的实现这一反向运算,否则会面临数据亮度过爆的问题。 由于采样时采用 γ1\\gamma γ1 应用于数据修正, 所以 γ1\\gamma γ1 时的 γ\\gammaγ 值被称为 编码伽马值(Encoding Gamma)。相应的,γ>1\\gamma > 1γ>1 时的 γ\\gammaγ 值被称为 解码伽马值(Decoding Gamma)。而采样到还原的过程中,对伽马矫正的不同运用被分别称为 伽马编码(Gamma Encode) 和 伽马解码(Gamma Decode)。 图 3-4 原数据经过伽马编解码(伽马矫正)的还原过程示意图 伽马矫正本身的作用正是针对原图色彩通道数据,进行非线性的映射。衍生为对图片整体光亮度的调节,因此在灰度值上的体现最为明显。我们利用这种特性,来增强图片的对比信息,放大像素梯度变化。 这一步,通常取用 γ∈[0.45, 1.25]\\gamma \\in [0.45,\\ 1.25]γ∈[0.45, 1.25] 区间内的值,或 γ=0.5\\gamma = 0.5γ=0.5 的原论文推荐值来进行修正。得到用于后续处理的灰度数据源 So(x)S_o(x)So(x) 。 梯度计算 在经过优化得到高对比度的 灰度(光亮度)图 后,就可以利用一些方向梯度卷积核算法,来计算每一个像素点光亮度变换的梯度矢量了。 此时应用边缘检测索贝尔滤波,目的同 HOG 的默认设定中,采用横纵方向均取 单一中线 的简化 普雷维特算子(Prewitt Operator),以求取梯度 方向(Orientate) 和 强度(Magnitude) 的作用一致。显然,并不只有索贝尔算法或普雷维特算法,适用于方向梯度直方图中梯度矢量的计算。只要能够提供中心点周边梯度变化的大小和方向的算子,都可以被应用于 HOG 的此步的求解计算中。 我们记方向为 Θ\\ThetaΘ ,强度为 AAA ,横向 xxx 轴方向的滤波核函数 GxG_xGx ,纵向 yyy 轴方向的滤波核函数 GyG_yGy 。强度系数 KKK 为同态值 K=Kx=KyK= K_x = K_yK=Kx=Ky$ 。此处不含推导展示结论。 记 边缘检测普雷维特滤波核函数 为 Pp(xc⃗)\\mathcal{P}_p(\\vec{x_c})Pp(xc⃗) ,有: Gx=Kx⋅[+1,0,−1] ⋅So(xc⃗)3×1Gy=Ky⋅[+1,0,−1]T⋅So(xc⃗)1×3A=∣Pp(xc⃗)∣=K⋅Gx2+Gy2Θ=∠Pp(xc⃗) =atan2(Gy, Gx) {\\displaystyle \\begin{aligned} G_x =& K_x \\cdot { \\begin{bmatrix} +1 ,& \\quad 0 ,& \\quad -1 \\end{bmatrix} } \\ \\cdot S_o(\\vec{x_c})^{3 \\times 1} \\\\ G_y =& K_y \\cdot { \\begin{bmatrix} +1 ,& \\quad 0 ,& \\quad -1 \\end{bmatrix} ^{T} } \\cdot S_o(\\vec{x_c})^{1 \\times 3} \\\\ A =& \\vert {\\mathcal{P}_p(\\vec{x_c})} \\vert = K \\cdot \\sqrt{ {G_x}^{2} + {G_y}^{2} } \\\\ \\Theta =& \\angle \\mathcal{P}_p(\\vec{x_c})\\ = {atan2}(G_y,\\ G_x)\\\\ \\end{aligned} } Gx=Gy=A=Θ=Kx⋅[+1,0,−1] ⋅So(xc⃗)3×1Ky⋅[+1,0,−1]T⋅So(xc⃗)1×3∣Pp(xc⃗)∣=K⋅√Gx2+Gy2∠Pp(xc⃗) =atan2(Gy, Gx) 记 边缘检测索贝尔滤波核函数 为 Sp(xc⃗)\\mathcal{S}_p(\\vec{x_c})Sp(xc⃗) ,有: Gx=Kx⋅[+1,0, −1+2,0, −2+1,0, −1]⋅So(xc⃗)3×3Gy=Ky⋅[+1, +2, +1 0,0,0−1, −2, −1]⋅So(xc⃗)3×3A=∣Sp(xc⃗)∣=K⋅Gx2+Gy2Θ=∠Sp(xc⃗) =atan2(Gy, Gx) {\\displaystyle \\begin{aligned} G_x =& K_x \\cdot { \\begin{bmatrix} +1 ,& \\quad \\quad 0 ,& \\quad \\ -1 \\\\ +2 ,& \\quad \\quad 0 ,& \\quad \\ -2 \\\\ +1 ,& \\quad \\quad 0 ,& \\quad \\ -1 \\end{bmatrix} } \\cdot S_o(\\vec{x_c})^{3 \\times 3} \\\\ G_y =& K_y \\cdot { \\begin{bmatrix} +1 ,& \\quad \\ +2 ,& \\quad \\ +1 \\\\ \\ \\ \\ 0 ,& \\quad \\quad 0 ,& \\quad \\quad 0 \\\\ -1 ,& \\quad \\ -2 ,& \\quad \\ -1 \\end{bmatrix} } \\cdot S_o(\\vec{x_c})^{3 \\times 3} \\\\ A =& \\vert {\\mathcal{S}_p(\\vec{x_c})} \\vert = K \\cdot \\sqrt{ {G_x}^{2} + {G_y}^{2} } \\\\ \\Theta =& \\angle \\mathcal{S}_p(\\vec{x_c})\\ = {atan2}(G_y,\\ G_x)\\\\ \\end{aligned} } Gx=Gy=A=Θ=Kx⋅⎣⎡+1,+2,+1,0,0,0, −1 −2 −1⎦⎤⋅So(xc⃗)3×3Ky⋅⎣⎡+1, 0,−1, +2,0, −2, +10 −1⎦⎤⋅So(xc⃗)3×3∣Sp(xc⃗)∣=K⋅√Gx2+Gy2∠Sp(xc⃗) =atan2(Gy, Gx) 更明确的,当我们采用不同算法进行梯度计算时,梯度提炼的结果,将会在较大程度上影响最终得到的方向梯度直方图。是需要更准确、更快捷,还是需要高抗性、低波动,应以实际工程角度考量。根据具体需要来采用不同的边缘检测算法。 而梯度方向和强度的计算则可统一为共识: A=K⋅Gx2+Gy2Θ=∠ [tan−1(GyGx)] {\\displaystyle \\begin{aligned} A =& K \\cdot \\sqrt{ {G_x}^{2} + {G_y}^{2} } \\\\ \\Theta =& \\angle \\ [{tan^{-1}}(\\tfrac{G_y}{G_x})] \\\\ \\end{aligned} } A=Θ=K⋅√Gx2+Gy2∠ [tan−1(GxGy)] 称为 通用卷积核梯度矢量公式(Formula of Kernel Gradient Vector)。 经过此步计算后,灰度数据源 So(x)S_o(x)So(x) 的输入就被转换为原信号为 S(x)S(x)S(x) 的所有像素点,梯度方向数据集 Θ(x)\\Theta(x)Θ(x) 和 梯度强度数据集 A(x)A(x)A(x) 。不过此时的数据量相对较大,不便于计算处理,还需 简化信息量。 分组抽象 & 矢量合并 分组抽象的目的是为了提炼每个像素点的数据,汇总分组内逐个像素特征到分组整体的单元特征。 由于原有梯度方向的平面完整性,以 Θ\\ThetaΘ 范围即便只限定为整数角,也包含 [0∘, 360∘)[0^{\\circ},\\ 360^{\\circ})[0∘, 360∘) 共 360360360 个取值。 这样造成的数据膨胀,不利于有限算力的处理。 因此,以尽可能不损失方向包含实际意义为前提, 将角度按照权重分割 来表示原梯度包含信息,是个不错的办法。 假设我们将 [0∘, 360∘)[0^{\\circ},\\ 360^{\\circ})[0∘, 360∘) 按照 ∠Θ=[Θ0 , ... , Θθ−1]\\angle \\Theta = [\\Theta_0\\ ,\\ ...\\ ,\\ \\Theta_{\\theta-1}]∠Θ=[Θ0 , ... , Θθ−1] 的边界角度,拆分为 θ\\thetaθ 个指定方向。记存在像素点 xc⃗\\vec{x_c}xc⃗ 的梯度 G⃗(xc⃗)=(Ac, Θc)\\vec{G}(\\vec{x_c}) = (A_c,\\ \\Theta_c)G⃗(xc⃗)=(Ac, Θc) 的方向落于角度区间 [Θa, Θb)[\\Theta_a,\\ \\Theta_b)[Θa, Θb) 内,有: Ac=Wa⋅Aa+Wb⋅AbΘc=Wa⋅Θa+Wb⋅ΘbWa=Θc−ΘaΘb−ΘaWb=Θb−ΘcΘb−Θa {\\displaystyle \\begin{aligned} A_c = W_a & \\cdot A_a + W_b \\cdot A_b \\\\ \\Theta_c = W_a & \\cdot \\Theta_a + W_b \\cdot \\Theta_b \\\\ W_a = \\frac{\\Theta_c - \\Theta_a}{\\Theta_b - \\Theta_a} & \\quad \\quad W_b = \\frac{\\Theta_b - \\Theta_c}{\\Theta_b - \\Theta_a} \\\\ \\end{aligned} } Ac=WaΘc=WaWa=Θb−ΘaΘc−Θa⋅Aa+Wb⋅Ab⋅Θa+Wb⋅ΘbWb=Θb−ΘaΘb−Θc 其中 Wa+Wb=1W_a + W_b = 1Wa+Wb=1 ,按照权重 WaW_aWa 、 WbW_bWb 即可拆分 G⃗(xc⃗)\\vec{G}(\\vec{x_c})G⃗(xc⃗) 数据到 Θa\\Theta_aΘa 、 Θb\\Theta_bΘb 角度分量混合表示。记两个角度方向的分量分别为 Ga⃗\\vec{G_a}Ga⃗ 、 Gb⃗\\vec{G_b}Gb⃗ ,则: Ga⃗=(Wa⋅Ac, Wa⋅Θc)Gb⃗=(Wb⋅Ac, Wa⋅Θc)G⃗(xc⃗)=Ga⃗+Gb⃗ {\\displaystyle \\begin{aligned} \\vec{G_a} =& (W_a \\cdot A_c ,\\ W_a \\cdot \\Theta_c) \\\\ \\vec{G_b} =& (W_b \\cdot A_c ,\\ W_a \\cdot \\Theta_c) \\\\ \\vec{G}(\\vec{x_c}) &= \\vec{G_a} + \\vec{G_b} \\\\ \\end{aligned} } Ga⃗=Gb⃗=G⃗(xc⃗)(Wa⋅Ac, Wa⋅Θc)(Wb⋅Ac, Wa⋅Θc)=Ga⃗+Gb⃗ 显然,以 ∠Θ=[Θ0 , ... , Θθ−1]\\angle \\Theta = [\\Theta_0\\ ,\\ ...\\ ,\\ \\Theta_{\\theta-1}]∠Θ=[Θ0 , ... , Θθ−1] 指定方向的矢量合形式表示, G⃗(xc⃗)\\vec{G}(\\vec{x_c})G⃗(xc⃗) 除了 Θa\\Theta_aΘa 、 Θb\\Theta_bΘb 角度外,其余角度分量为 000 , 有: G⃗(xc⃗)=∠Θ(0, ... ,Wa,Wb, ... ,0) {\\displaystyle \\begin{aligned} \\vec{G}(\\vec{x_c}) &= \\angle \\Theta(0, \\ ...\\ , W_a, W_b,\\ ...\\ ,0) \\\\ \\end{aligned} } G⃗(xc⃗)=∠Θ(0, ... ,Wa,Wb, ... ,0) 由于不需要考虑反向的数据还原,核内采样按照 ∠Θ=[Θ0 , ... , Θθ−1]\\angle \\Theta = [\\Theta_0\\ ,\\ ...\\ ,\\ \\Theta_{\\theta-1}]∠Θ=[Θ0 , ... , Θθ−1] 的边界角度的方向矢量合形式求和,即可完成分组内的特征整合。记得到分组的 θ\\thetaθ 维特征向量 Cell⃗\\vec{Cell}Cell⃗ ,则: Cell⃗=∑∠G⃗(xc⃗) {\\displaystyle \\begin{aligned} \\vec{Cell} &= \\sum \\angle \\vec{G}(\\vec{x_c}) \\\\ \\end{aligned} } Cell⃗=∑∠G⃗(xc⃗) 那么现在的问题就是如何分组,或者分为几组了。 当采样核为 n×nn \\times nn×n 时,我们取边界整数点出发过核心 (12n, 12n)(\\tfrac {1}{2}n,\\ \\tfrac {1}{2}n)(21n, 21n) 的连线,加上对角线一起作为分组分割线。 由任意两条相邻分割线间的夹角,构成以核心为原点的角度分组。 所以, ∠Θ=[Θ0 , ... , Θθ−1]\\angle \\Theta = [\\Theta_0\\ ,\\ ...\\ ,\\ \\Theta_{\\theta-1}]∠Θ=[Θ0 , ... , Θθ−1] 代表的正是分割线角度。因此,当不区分夹角及其对角方向时,中心角能够分为 θ=n+1\\theta = n + 1θ=n+1 组,称为 无符号梯度(Unsigned Gradient) 分组。当考虑夹角与对角方向互反时,中心角能够分为 θ=2(n+1)\\theta = 2(n+1)θ=2(n+1) 组,称为 有符号梯度(Signed Gradient) 分组。 采样核一般为 n×n=8×8n \\times n = 8 \\times 8n×n=8×8 大小,此时无符号梯度以方向标记,可分为 999 组即: ∠Θ=[0∘, 20∘, 40∘, 60∘, 80∘, 100∘, 120∘, 140∘, 160∘] {\\displaystyle \\begin{aligned} \\angle \\Theta =& [0^{\\circ},\\ 20^{\\circ},\\ 40^{\\circ},\\ 60^{\\circ},\\ 80^{\\circ},\\ 100^{\\circ},\\ 120^{\\circ},\\ 140^{\\circ},\\ 160^{\\circ}] \\\\ \\end{aligned} } ∠Θ=[0∘, 20∘, 40∘, 60∘, 80∘, 100∘, 120∘, 140∘, 160∘] 而有符号梯度则可分为 181818 组: ∠Θ=[∠Θlt∠Θrb]=[0∘,20∘,40∘,60∘,80∘,100∘,120∘,140∘,160∘180∘,200∘,220∘,240∘,260∘,280∘,300∘,320∘,340∘] {\\displaystyle \\begin{aligned} \\angle \\Theta = \\begin{bmatrix} &\\angle \\Theta_{lt} \\\\ &\\angle \\Theta_{rb} \\end{bmatrix} = \\begin{bmatrix} 0^{\\circ},& 20^{\\circ},& 40^{\\circ},& 60^{\\circ},& 80^{\\circ},& 100^{\\circ},& 120^{\\circ},& 140^{\\circ},& 160^{\\circ} \\\\ 180^{\\circ},& 200^{\\circ},& 220^{\\circ},& 240^{\\circ},& 260^{\\circ},& 280^{\\circ},& 300^{\\circ},& 320^{\\circ},& 340^{\\circ} \\end{bmatrix} \\end{aligned} } ∠Θ=[∠Θlt∠Θrb]=[0∘,180∘,20∘,200∘,40∘,220∘,60∘,240∘,80∘,260∘,100∘,280∘,120∘,300∘,140∘,320∘,160∘340∘] 以无符号梯度的 999 组分组为例,统计只需累计入组即可: 图 3-5 核大小 8x8 的无符号梯度(Unsigned Gradient)分组示意图 随后依次统计分组的采样核内数据。上图数据统计结果如下(概略图): 图 3-6 无符号梯度分组的单组采样核内统计结果示意直方图 统计完毕时,特征向量 Cell⃗\\vec{Cell}Cell⃗ 随即生成完毕。我们以 WθW_{\\theta}Wθ 表示分组的特征向量,在方向 θ\\thetaθ 上的强度大小(即此方向矢量的秩),则对于无符号梯度(Unsigned Gradient)分组: Cell⃗=∑∠G⃗(xc⃗)=Θ⃗(W0∘, ... ,W160∘)∈R9×1 {\\displaystyle \\begin{aligned} \\vec{Cell} &= \\sum \\angle \\vec{G}(\\vec{x_c}) = \\vec{\\Theta}(W_{0^{\\circ}}, \\ ...\\ , W_{160^{\\circ}}) \\in \\mathbb{R}^{9 \\times 1} \\\\ \\end{aligned} } Cell⃗=∑∠G⃗(xc⃗)=Θ⃗(W0∘, ... ,W160∘)∈R9×1 同样,对有符号梯度(Signed Gradient)分组: Cell⃗=∑∠G⃗(xc⃗)=Θ⃗(W0∘, ... ,W160∘, ... ,W340∘)∈R18×1 {\\displaystyle \\begin{aligned} \\vec{Cell} &= \\sum \\angle \\vec{G}(\\vec{x_c}) = \\vec{\\Theta}(W_{0^{\\circ}}, \\ ...\\ , W_{160^{\\circ}}, \\ ...\\ , W_{340^{\\circ}}) \\in \\mathbb{R}^{18 \\times 1} \\\\ \\end{aligned} } Cell⃗=∑∠G⃗(xc⃗)=Θ⃗(W0∘, ... ,W160∘, ... ,W340∘)∈R18×1 至此,完成分组提炼。 这种对数据梯度的蒸馏手段非常重要,因为它不只可以运用于物体识别等情况的中间步骤,也可以被运用于粗糙的运动特征检测。 而从分组的数据得来的分组特征,还需要归一化才能被有效使用。 块归一化 由于分组内梯度矢量的分解叠加有可能会使某个方向上的梯度强度 远超其他方向,因而造成该方向上的灰度(光亮度)变化会极大的影响结果。 这样的影响当然是有利的,但无法相对统一的权重,也会给处理带来大量的不确定性。 如图例: 图 3-7 块归一化说明图例(数据源) 取绿色框中以 n×n=8×8n \\times n = 8 \\times 8n×n=8×8 采样核,经过前几步以无符号梯度(Unsigned Gradient)方式处理,会得到的四个分组: 图 3-8 图例(数据源)绿色框中四个分组特征向量直方图表示 如果能够将这种变化趋势原封不动的保存下来,并缩小尺度到统一标准,就可以实现即保证特征不被不必要的削减,也有足够一致的度量衡。 因此,归一化就是解决办法。 归一化(Normalization) 是将目标数据集,按照总体权重等比放缩到指定区间范围的一种数学工具。通常我们选取当前采样分组包含的数据,即为归一化的目标数据集。组与组间独立归一化。但 块归一化(Block Normalization) 和一般情况下不完全一样,是以 块(Block) 为样本源而非 组(Cell) 样本源本身,来进行归一化处理的。 什么是块(Block)呢? 块(Block)是对于由一系列分组(Cell)按照一定规则(例如四叉树、标准单元等)组合构成的分组并集单元的称谓。 是组的集合。对块的分法有各种形式,但在方向梯度直方图中,使用的是一种直接切入的固定设置。记块大小为 N×NN \\times NN×N ,块的最小单位为组,则取 N×N=2×2N \\times N = 2 \\times 2N×N=2×2 的固定大小组采样,构成 HOG 的分块。即图例中的绿色方块: 图 3-9 图例(数据源)块划分单一块示意图 同分组一样,分块的目的也是为了更好的将特征数据进行汇总。只不过分块时的基础单元,从分组时的像素梯度矢量,变为了分组特征向量。记分块为 BlockBlockBlock ,分块特征向量为 Block⃗\\vec{Block}Block⃗ 。仍以 ∣target∣1\\vert target \\vert_1∣target∣1 表示归一化操作,有: Block⃗=∣[Cell⃗1, Cell⃗2, Cell⃗3, Cell⃗4]∣1∈R(N×N)⋅θ×1 {\\displaystyle \\begin{aligned} \\vec{Block} &= \\vert [\\vec{Cell}_1,\\ \\vec{Cell}_2,\\ \\vec{Cell}_3,\\ \\vec{Cell}_4] \\vert_1 \\in \\mathbb{R}^{(N \\times N) \\cdot \\theta \\times 1} \\\\ \\end{aligned} } Block⃗=∣[Cell⃗1, Cell⃗2, Cell⃗3, Cell⃗4]∣1∈R(N×N)⋅θ×1 可见,在 2×22 \\times 22×2 大小的固定分块下,分块特征向量 的维度即为分组特征向量方向的 444 倍,即 (N×N)⋅θ(N \\times N) \\cdot \\theta(N×N)⋅θ 。如果我们采用 L-2 归一化(即 L2范数)处理,记归一化因子为 L2L_2L2 ,则: L2=∣Cell⃗1∣2+ ∣Cell⃗2∣2+ ∣Cell⃗3∣2+ ∣Cell⃗4∣2=∑(∣∠G⃗1∣2+ ∣∠G⃗2∣2+ ∣∠G⃗3∣2+ ∣∠G⃗4∣2)Block⃗=1L2[Cell⃗1, Cell⃗2, Cell⃗3, Cell⃗4]∈R(N×N)⋅θ×1 {\\displaystyle \\begin{aligned} L_2 &= \\sqrt{|\\vec{Cell}_1 |^2+\\ |\\vec{Cell}_2 |^2+\\ |\\vec{Cell}_3 |^2+\\ |\\vec{Cell}_4 |^2} \\\\ &= \\sqrt{\\sum (| \\angle \\vec{G}_1|^2 +\\ | \\angle \\vec{G}_2|^2 +\\ | \\angle \\vec{G}_3|^2 +\\ | \\angle \\vec{G}_4|^2 )} \\\\ \\vec{Block} &= \\frac{1}{L_2}[\\vec{Cell}_1,\\ \\vec{Cell}_2,\\ \\vec{Cell}_3,\\ \\vec{Cell}_4] \\in \\mathbb{R}^{(N \\times N) \\cdot \\theta \\times 1} \\end{aligned} } L2Block⃗=√∣Cell⃗1∣2+ ∣Cell⃗2∣2+ ∣Cell⃗3∣2+ ∣Cell⃗4∣2=√∑(∣∠G⃗1∣2+ ∣∠G⃗2∣2+ ∣∠G⃗3∣2+ ∣∠G⃗4∣2)=L21[Cell⃗1, Cell⃗2, Cell⃗3, Cell⃗4]∈R(N×N)⋅θ×1 那么,对图例中的分组进行块归一化到 [0, 1][0,\\ 1][0, 1] 区间,所得如下: 图 3-10 图例(数据源)绿色框对应块的块归一化特征向量结果 之后,按照块大小为步长,对全图分块计算即可得到输入图片的方向梯度直方图运算结果。达成对图片整体和分块区域的运动检测目的。 那么,在具体实践中是怎么做的呢? 同前文中对滤波的处理方法类似,对于此类存在核操作流的方法论,为了充分利用 GPU 并行计算能力,通用思路仍然是抽象为可执行的渲染程序片来交由 GPU 加速。 以索贝尔梯度计算 HOG 的 GLSL 渲染程序片 现在,我们可以依据理论来做 GPU 的动态管线程序片封装。 首先,我们需要定义 顶点程序片(Vertex Shader)。通过该程序片指定 GPU 的绘制区域,以及纹理与物体的点位映射。由于我们是对整个视窗界面进行处理,所以可以采用对传入的顶点数据进行坐标变换的方式,来求得顶点映射的纹理坐标,减少少量数据通信: attribute vec3 position; varying vec4 fs_position; varying vec2 fs_texcoord; void main() { fs_position = vec4(position.x, position.y, position.z, 1.0); fs_texcoord = (position.xy + vec2(1.0, 1.0)) / 2.0; gl_Position = fs_position; } 程序化 HOG 的关键处理部分,依旧在 像素程序片(Pixel Shader/Fragment Shader) 上。相比之前对于滤波算法的实现,这里 显然复杂得多 : precision mediump float; const float PI = 3.1415927; const int n = 8; const int N = 2; const int SIZE_CV = (n + 1); const int SIZE_BV = /*N * N **/ SIZE_CV; // for orientation weight sum const float ANGLE_GAP = 20.0 * PI / 180.0; const vec3 ANGLE_0 = vec3(cos(ANGLE_GAP * 0.0), sin(ANGLE_GAP * 0.0), 100); // x=cos y=sin z=cot const vec3 ANGLE_20 = vec3(cos(ANGLE_GAP * 1.0), sin(ANGLE_GAP * 1.0), 2.74747742); const vec3 ANGLE_40 = vec3(cos(ANGLE_GAP * 2.0), sin(ANGLE_GAP * 2.0), 1.19175359); const vec3 ANGLE_60 = vec3(cos(ANGLE_GAP * 3.0), sin(ANGLE_GAP * 3.0), 0.57735027); const vec3 ANGLE_80 = vec3(cos(ANGLE_GAP * 4.0), sin(ANGLE_GAP * 4.0), 0.17632698); const vec3 ANGLE_100 = vec3(cos(ANGLE_GAP * 5.0), sin(ANGLE_GAP * 5.0), -0.17632698); const vec3 ANGLE_120 = vec3(cos(ANGLE_GAP * 6.0), sin(ANGLE_GAP * 6.0), -0.57735027); const vec3 ANGLE_140 = vec3(cos(ANGLE_GAP * 7.0), sin(ANGLE_GAP * 7.0), -1.19175359); const vec3 ANGLE_160 = vec3(cos(ANGLE_GAP * 8.0), sin(ANGLE_GAP * 8.0), -2.74747742); const vec3 ANGLE_180 = vec3(cos(ANGLE_GAP * 9.0), sin(ANGLE_GAP * 9.0), -100); const float CELL_TILE_SIZE = 8.0; //pixels const float BLOCK_TILE_SIZE = 2.0; //cells const float HOG_TILE_SIZE = 16.0; //pixels(n*N) const float HOG_SHAFT_LENGTH = 14.0; const float HOG_SHAFT_THICKNESS = 0.5; const float HOG_SHAFT_HEAD_RATE = 64.0; const vec3 HOG_COLOR = vec3(1.0, 1.0, 0.0); const float HOG_MIN_MAGNITUDE = 0.1; varying vec4 fs_position; varying vec2 fs_texcoord; uniform bool only_edge; uniform vec2 pixel_bias; uniform mat3 sobel_matrix_x; uniform mat3 sobel_matrix_y; uniform float hog_magnitude_limit; uniform sampler2D target_texture; /* Simple Grey */ float grey(vec3 c) { return 0.299 * c[0] + 0.587 * c[1] + 0.114 * c[2]; } /* Calucate HOG Orient-hog Density (pixel by pixel) */ float hog_density(vec2 target_coord, vec3 field_vector) { vec2 ori_pos = target_coord.xy / pixel_bias; vec2 tile_center = (floor(ori_pos / HOG_TILE_SIZE) + 0.5) * HOG_TILE_SIZE; float magnitude = abs(field_vector.z); if (magnitude > max(HOG_MIN_MAGNITUDE, hog_magnitude_limit)) { float distance = clamp(magnitude * HOG_SHAFT_LENGTH, 0.1, HOG_SHAFT_LENGTH); vec2 normalizer = normalize(field_vector.xy); vec2 tile_offset = ori_pos - tile_center; float density = HOG_SHAFT_THICKNESS / HOG_SHAFT_HEAD_RATE - max( abs(dot(tile_offset, vec2(+normalizer.y, -normalizer.x))), abs(dot(tile_offset, vec2(+normalizer.x, +normalizer.y))) - distance ); return clamp(1.0 + density, 0.0, 1.0); } return 0.0; } /* Calucate Sobel Field at target center */ vec3 sobel_edge_detection(vec2 target_coord) { float gradient_center_x; float gradient_center_y; for (int i = 0; i = seek_to && seek_to >= ANGLE_20.z){ wight_as = abs((seek_to - ANGLE_0.z)/(ANGLE_20.z - ANGLE_0.z)); result[0][0] += field_vector[2] *wight_as; result[0][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_20.z>= seek_to && seek_to >= ANGLE_40.z){ wight_as = abs((seek_to - ANGLE_20.z)/(ANGLE_40.z - ANGLE_20.z)); result[0][1] += field_vector[2] * wight_as; result[0][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_40.z>= seek_to && seek_to >= ANGLE_60.z){ wight_as = abs((seek_to - ANGLE_40.z)/(ANGLE_60.z - ANGLE_40.z)); result[0][2] += field_vector[2] * wight_as; result[1][0] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_60.z>= seek_to && seek_to >= ANGLE_80.z){ wight_as = abs((seek_to - ANGLE_60.z)/(ANGLE_80.z - ANGLE_60.z)); result[1][0] += field_vector[2] * wight_as; result[1][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_80.z>= seek_to && seek_to >= ANGLE_100.z){ wight_as = abs((seek_to - ANGLE_80.z)/(ANGLE_100.z - ANGLE_80.z)); result[1][1] += field_vector[2] * wight_as; result[1][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_100.z>= seek_to && seek_to >= ANGLE_120.z){ wight_as = abs((seek_to - ANGLE_100.z)/(ANGLE_120.z - ANGLE_100.z)); result[1][2] += field_vector[2] * wight_as; result[2][0] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_120.z>= seek_to && seek_to >= ANGLE_140.z){ wight_as = abs((seek_to - ANGLE_120.z)/(ANGLE_140.z - ANGLE_120.z)); result[2][0] += field_vector[2] * wight_as; result[2][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_140.z>= seek_to && seek_to >= ANGLE_160.z){ wight_as = abs((seek_to - ANGLE_140.z)/(ANGLE_160.z - ANGLE_140.z)); result[2][1] += field_vector[2] * wight_as; result[2][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_160.z>= seek_to && seek_to >= ANGLE_180.z){ wight_as = abs((seek_to - ANGLE_160.z)/(ANGLE_180.z - ANGLE_160.z)); result[2][2] += field_vector[2] * wight_as; result[0][0] += field_vector[2] * (1.0 - wight_as); } } } } return result; } /* Calucate Block Feature at target center */ float block_feature_extraction(vec2 target_coord) { float orient_hog_density = 0.0; float block_feature_vector[SIZE_BV]; vec2 cell_bias = vec2(n, n) * pixel_bias; mat3 cell_lt = cell_feature_extraction(target_coord); mat3 cell_rt = cell_feature_extraction(target_coord + vec2(cell_bias.x, 0.0)); mat3 cell_lb = cell_feature_extraction(target_coord + vec2(0.0, cell_bias.y)); mat3 cell_rb = cell_feature_extraction(target_coord + cell_bias); float normalization_factor = 0.0; for (int i = 0; i 样例采用 单一流水线过程,我们将几个关键流程节点封装为方法,实现了 HOG 的处理。相对于顶点程序片,像素程序片不太容易理解,还需分步拆开解读。 HOG 片元着色器(Fragment Shader)的细节拆解 首先需要在处理前,进行一部分方法和常量准备。这些 前置工作包含两个部分。 第一部分由纯常量构成。用于辅助实现 方向梯度直方图(HOG)算法 中,各个步骤所使用到的关键恒定参数,有: const float PI = 3.1415927; const int n = 8; const int N = 2; const int SIZE_CV = (n + 1); const int SIZE_BV = /*N * N **/ SIZE_CV; // for orientation weight sum const float ANGLE_GAP = 20.0 * PI / 180.0; const vec3 ANGLE_0 = vec3(cos(ANGLE_GAP * 0.0), sin(ANGLE_GAP * 0.0), 100); // x=cos y=sin z=cot const vec3 ANGLE_20 = vec3(cos(ANGLE_GAP * 1.0), sin(ANGLE_GAP * 1.0), 2.74747742); const vec3 ANGLE_40 = vec3(cos(ANGLE_GAP * 2.0), sin(ANGLE_GAP * 2.0), 1.19175359); const vec3 ANGLE_60 = vec3(cos(ANGLE_GAP * 3.0), sin(ANGLE_GAP * 3.0), 0.57735027); const vec3 ANGLE_80 = vec3(cos(ANGLE_GAP * 4.0), sin(ANGLE_GAP * 4.0), 0.17632698); const vec3 ANGLE_100 = vec3(cos(ANGLE_GAP * 5.0), sin(ANGLE_GAP * 5.0), -0.17632698); const vec3 ANGLE_120 = vec3(cos(ANGLE_GAP * 6.0), sin(ANGLE_GAP * 6.0), -0.57735027); const vec3 ANGLE_140 = vec3(cos(ANGLE_GAP * 7.0), sin(ANGLE_GAP * 7.0), -1.19175359); const vec3 ANGLE_160 = vec3(cos(ANGLE_GAP * 8.0), sin(ANGLE_GAP * 8.0), -2.74747742); const vec3 ANGLE_180 = vec3(cos(ANGLE_GAP * 9.0), sin(ANGLE_GAP * 9.0), -100); const float CELL_TILE_SIZE = 8.0; //pixels const float BLOCK_TILE_SIZE = 2.0; //cells 第二部分则包含常量和辅助方法。用于辅助 HOG 最终结果的图像化显示,有: const float CELL_TILE_SIZE = 8.0; //pixels const float BLOCK_TILE_SIZE = 2.0; //cells const float HOG_TILE_SIZE = 16.0; //pixels(n*N) const float HOG_SHAFT_LENGTH = 14.0; const float HOG_SHAFT_THICKNESS = 0.5; const float HOG_SHAFT_HEAD_RATE = 64.0; const vec3 HOG_COLOR = vec3(1.0, 1.0, 0.0); const float HOG_MIN_MAGNITUDE = 0.1; /* Simple Grey */ float grey(vec3 c) { return 0.299 * c[0] + 0.587 * c[1] + 0.114 * c[2]; } /* Calucate HOG Orient-hog Density (pixel by pixel) */ float hog_density(vec2 target_coord, vec3 field_vector) { vec2 ori_pos = target_coord.xy / pixel_bias; vec2 tile_center = (floor(ori_pos / HOG_TILE_SIZE) + 0.5) * HOG_TILE_SIZE; float magnitude = abs(field_vector.z); if (magnitude > max(HOG_MIN_MAGNITUDE, hog_magnitude_limit)) { float distance = clamp(magnitude * HOG_SHAFT_LENGTH, 0.1, HOG_SHAFT_LENGTH); vec2 normalizer = normalize(field_vector.xy); vec2 tile_offset = ori_pos - tile_center; float density = HOG_SHAFT_THICKNESS / HOG_SHAFT_HEAD_RATE - max( abs(dot(tile_offset, vec2(+normalizer.y, -normalizer.x))), abs(dot(tile_offset, vec2(+normalizer.x, +normalizer.y))) - distance ); return clamp(1.0 + density, 0.0, 1.0); } return 0.0; } 灰度(光亮度)值采用 BT.601 的狭隘区间(Narrow Range) 标准快速计算,运用中也可以替换为均值(部分场景)或根据情况更换其他标准( 如 RGB数据 非采样得原始数据的标准原色格式而来,则因根据转换前的传输格式来选择配套的规格,见上一章)。 注意以 HOG_[xx] 为格式的常量。这些常量被用于计算,上屏显示的无符号梯度(Unsigned Gradient)对应方向上的权重柱形轴。 柱形轴过分块中心,轴的长度和颜色的深浅(即能量密度)代表归一化后的权重大小。而方法计算所得 density 则为当前像素点对应块内位置的能量密度值。显然,密度值只有在轴方向上才存在有效值。另一方面,较小的能量密度也不具有代表性,需要通过 阈值限定进行过滤,此处采用 max(HOG_MIN_MAGNITUDE, hog_magnitude_limit) 进行设置。 准备完成后,就该正式流程的处理了。这里的封装思路,是以 生成的最小结果单元为分割依据 进行的。所以,将 HOG 步骤方法封为一下三个: sobel_edge_detection 针对 像素点(Pixel)梯度矢量 的 索贝尔边界检测 /* Calucate Sobel Field at target center */ vec3 sobel_edge_detection(vec2 target_coord) { float gradient_center_x; float gradient_center_y; for (int i = 0; i cell_feature_extraction 针对 分组(Cell)特征提取 为结果的 矢量统计合并 /* Calucate Cell Feature at target center */ mat3 cell_feature_extraction(vec2 target_coord) { mat3 result; float bias_unit = float(n-1)/2.0; vec2 ori_pos = target_coord.xy / pixel_bias; vec2 cell_center = (floor(ori_pos / CELL_TILE_SIZE) + 0.5) * CELL_TILE_SIZE; float normalization_factor = 0.0; for (int i = 0; i = seek_to && seek_to >= ANGLE_20.z){ wight_as = abs((seek_to - ANGLE_0.z)/(ANGLE_20.z - ANGLE_0.z)); result[0][0] += field_vector[2] *wight_as; result[0][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_20.z>= seek_to && seek_to >= ANGLE_40.z){ wight_as = abs((seek_to - ANGLE_20.z)/(ANGLE_40.z - ANGLE_20.z)); result[0][1] += field_vector[2] * wight_as; result[0][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_40.z>= seek_to && seek_to >= ANGLE_60.z){ wight_as = abs((seek_to - ANGLE_40.z)/(ANGLE_60.z - ANGLE_40.z)); result[0][2] += field_vector[2] * wight_as; result[1][0] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_60.z>= seek_to && seek_to >= ANGLE_80.z){ wight_as = abs((seek_to - ANGLE_60.z)/(ANGLE_80.z - ANGLE_60.z)); result[1][0] += field_vector[2] * wight_as; result[1][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_80.z>= seek_to && seek_to >= ANGLE_100.z){ wight_as = abs((seek_to - ANGLE_80.z)/(ANGLE_100.z - ANGLE_80.z)); result[1][1] += field_vector[2] * wight_as; result[1][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_100.z>= seek_to && seek_to >= ANGLE_120.z){ wight_as = abs((seek_to - ANGLE_100.z)/(ANGLE_120.z - ANGLE_100.z)); result[1][2] += field_vector[2] * wight_as; result[2][0] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_120.z>= seek_to && seek_to >= ANGLE_140.z){ wight_as = abs((seek_to - ANGLE_120.z)/(ANGLE_140.z - ANGLE_120.z)); result[2][0] += field_vector[2] * wight_as; result[2][1] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_140.z>= seek_to && seek_to >= ANGLE_160.z){ wight_as = abs((seek_to - ANGLE_140.z)/(ANGLE_160.z - ANGLE_140.z)); result[2][1] += field_vector[2] * wight_as; result[2][2] += field_vector[2] * (1.0 - wight_as); } else if (ANGLE_160.z>= seek_to && seek_to >= ANGLE_180.z){ wight_as = abs((seek_to - ANGLE_160.z)/(ANGLE_180.z - ANGLE_160.z)); result[2][2] += field_vector[2] * wight_as; result[0][0] += field_vector[2] * (1.0 - wight_as); } } } } return result; } block_feature_extraction 针对 分块(Block)特征提取 为结果的 块归一化 /* Calucate Block Feature at target center */ float block_feature_extraction(vec2 target_coord) { float orient_hog_density = 0.0; float block_feature_vector[SIZE_BV]; vec2 cell_bias = vec2(n, n) * pixel_bias; mat3 cell_lt = cell_feature_extraction(target_coord); mat3 cell_rt = cell_feature_extraction(target_coord + vec2(cell_bias.x, 0.0)); mat3 cell_lb = cell_feature_extraction(target_coord + vec2(0.0, cell_bias.y)); mat3 cell_rb = cell_feature_extraction(target_coord + cell_bias); float normalization_factor = 0.0; for (int i = 0; i 考虑到思路连贯性,样例中的实现将所有步骤放在一张纹理过程中处理,且没有对核计算做优化。这会导致每个像素都存在一次 HOG 计算金字塔,而按理来说 一个块内并不需要重复计算。样例中相当于将块内运算重复了 16×1616 \\times 1616×16 次,极大的增加了消耗。 因此,在实际应用中,需要对上文的实现进行改造。 把文中程序片内的各个步骤的方法,分配到不同阶的程序片中,并优化纹理过程。 之后才能被更为高效的予以运用。介于骨干并无不同,此处就不再展开赘述。 经过处理后的最终结果,以能量密度的形式附加到当前像素点的色彩值上,实现最终的图形化展示: void main() { vec3 output_ = only_edge? vec3(0) : texture2D(target_texture, fs_texcoord.xy).rgb; float orient_hog_density = block_feature_extraction(fs_texcoord.xy); vec3 hogs_ = orient_hog_density * HOG_COLOR; gl_FragColor = vec4(output_ + hogs_, 1.0); } 现在,整个 HOG 的简易程序片就完成了。 到此为止,方向梯度直方图技术可以初步应用于音视频当中了。 虽然在上文样例的渲染程序片实现过程中,但从普遍意义上来讲,HOG 仍然属于相对高消耗的算法, HOG 提供的方法论更多被应用在 编解码规格制定的时域冗余处理 上。其本身具有一定的 硬件门槛。 HOG 最终产物的用处 假设输入帧长宽为 W×H=256×256W \\times H = 256 \\times 256W×H=256×256 。按照前文采用块大小 2×22 \\times 22×2 ,分组大小 8×88 \\times 88×8 进行处理,则得到方向梯度直方图最终输出结果为包含 16×16=25616 \\times 16 = 25616×16=256 个块特征向量的数据集合。每一个块特征向量由 (2×2)⋅9=36(2 \\times 2) \\cdot 9 = 36(2×2)⋅9=36 维(参数)构成。为了方便描述,我们将输出数据集称为 HOG 数据帧。 HOG 数据帧(HOG Frame)更多被作为经过特征提取后的预处理输入数据,传入目标物体检测等人工智能计算机视觉方向的算法模型。 通过模型获取的物体识别结果后,再利用训练好的目标跟踪模型,或传统目标跟踪算法(诸如:核卷积滤波(KCF [Kernelized Correlation Filter])[18] 、MOSSE 算法等)等,来获取视频流中运动物体在时序上的关联性。 那么,用于判断目标检测结果是否准确的方法,也就是目标检测模型的 损失函数(Loss Function) 是什么呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_3_2.html":{"url":"Chapter_3/Language/cn/Docs_3_3_2.html","title":"3.3.2 朴素目标检测结果度量 - IoU & GIoU","keywords":"","body":"3.3.2 朴素目标检测结果度量 - IoU & GIoU 考虑到算法本身需要作为目标检测结果准确性的衡量标准,并用于模型的计算过程。所以不能采用较高复杂程度的算法。而 交并比(IoU [Intersection over Union]) 计算作为相对简单的区域检测算法,则是可被采用的不错方案。 交并比顾名思义,即为交集和并集的比值。 只不过这里的交集和并集,指的是 预测结果(Prediction)对应的预测框(Anchor Box)和标注框(Ground Truth)的交集与并集,记为 I=IntersectionI = IntersectionI=Intersection 和 U=UnionU = UnionU=Union 。 图 3-11 原论文中交并比示意图[19] 如图所示,交并比公式非常简洁(注意 并非 IoU Loss ),可记为: IoU=Intersection(Anchor, Truth)Union(Anchor, Truth)=IU {\\displaystyle \\begin{aligned} IoU &= \\frac{Intersection(Anchor,\\ Truth)}{Union(Anchor,\\ Truth)} = \\frac{I}{U} \\\\ \\end{aligned} } IoU=Union(Anchor, Truth)Intersection(Anchor, Truth)=UI 而根据交并比设计的损失函数,就是交并比损失函数(IoU Loss)。 同其他有关深度学习领域,针对损失函数提出的算法理论一致。IoU Loss 在模型中同样存在两项应用,分别为 前向预测(Forward Prediction) 和 反向传播(Backward Propagation)。 即标准损失的计算和模型梯度迭代的加速。 交并比损失函数(IoU Loss) 根据原论文的设计,IoU 前向扩散作用在 ReLU 激活函数层(ReLU Layer)后,以代替传统物体识别模型采用的 L2L_2L2 损失函数,判断待筛选的预测框是否命中。由于始终有 IoU∈[0, 1]IoU \\in [0, \\ 1]IoU∈[0, 1] ,交并比损失函数可被认为是 p(IoU=1)=1p(IoU = 1) = 1p(IoU=1)=1 的 特殊交叉熵损失函数(cross-entropy Loss),有: IoU Loss=−p⋅ln(IoU)−(1−p)⋅ln(1−IoU)∣p(IoU=1)=1=−ln(IoU) {\\displaystyle \\begin{aligned} IoU \\ \\mathcal{L}oss &= -p \\cdot ln(IoU) - (1 - p) \\cdot ln(1-IoU) | \\quad p(IoU = 1) = 1 \\\\ &= -ln(IoU) \\\\ \\end{aligned} } IoU Loss=−p⋅ln(IoU)−(1−p)⋅ln(1−IoU)∣p(IoU=1)=1=−ln(IoU) 带入交并比实际值,有: IoU Loss=−lnIntersection(Anchor, Truth)Union(Anchor, Truth)=−lnIU {\\displaystyle \\begin{aligned} IoU \\ \\mathcal{L}oss &= -ln \\frac{Intersection(Anchor,\\ Truth)}{Union(Anchor,\\ Truth)} = -ln \\frac{I}{U} \\\\ \\end{aligned} } IoU Loss=−lnUnion(Anchor, Truth)Intersection(Anchor, Truth)=−lnUI 此即为 交并比损失函数。由于 IoU∈[0, 1]IoU \\in [0, \\ 1]IoU∈[0, 1] 有 −ln(IoU)≈1−IoU-ln(IoU) \\approx 1-IoU−ln(IoU)≈1−IoU ,考虑到计算便利性,在条件范围内常用差值代替对数计算。即: IoU Loss≈1−IoUIoU∈[0, 1] {\\displaystyle \\begin{aligned} IoU \\ \\mathcal{L}oss &\\approx 1-IoU \\quad IoU \\in [0, \\ 1] \\\\ \\end{aligned} } IoU Loss≈1−IoUIoU∈[0, 1] 相比 L2L_2L2 损失函数的简单区域差值来衡量命中的方式, IoU 考虑到了 预测框与标准框的平面空间位置关系,并通过对位置的衡量 锁定了两者间的平面位姿独立优化,因而具有更贴合客观的代表性。且在交叉熵类型损失函数(详见下一章)的特性作用下,结果落于单位量化的百分比区间,利于阈值衡量和操作之便。 交并比损失函数(IoU Loss)的反向传播(Backward Propagation) 反向传播(Backward Propagation) 简单来说,是通过当前学习到的参数在参数空间内指定方向的运动趋势,来反相强化或衰减该方向上的参数权重,进而达到更快使模型拟合的数学方法论统称。自 杰弗里·辛顿(Geoffrey Hinton,“深度学习之父”,当代人工智能领域三巨头之一) 教授提出并汇总这一概念以来,持续的被作为深度学习根基理论之一,应用在各类算法的学习过程中。 如果从物理学角度来看,把参与训练的相关模型参数的权重向量比作速度,那么,损失函数的反向传播,就相当于 速度在各个方向上的某一时刻的加速度。所以,其影响的是权重在方向上的迭代步长变化,即为优化算法的输出。 交并比损失函数的反向传播,为便于称呼,简称 反向交并比(Backward IoU/ IoU Back)。取图 3.3.2-1 说明,记预测框为 x=(xl,xt,xr,xb)x = (x_l, x_t, x_r, x_b)x=(xl,xt,xr,xb) 面积为 XXX ,标注框为 x~=(x~l,x~t,x~r,x~b)\\tilde{x} = (\\tilde{x}_l, \\tilde{x}_t, \\tilde{x}_r, \\tilde{x}_b)x~=(x~l,x~t,x~r,x~b) 面积为 X~\\tilde{X}X~ ,则反向交并比可表示为: IoU Back=∂L∂x=I⋅(∇xX−∇xI)−U⋅∇xIU2⋅IoU=1U⋅∇xX − U+IUI⋅∇xI {\\displaystyle \\begin{aligned} IoU\\ \\mathcal{B}ack &= \\frac{\\partial \\mathcal{L}}{\\partial x} = \\frac{I \\cdot (\\nabla_xX - \\nabla_xI) - U \\cdot \\nabla_xI}{U^2 \\cdot IoU} \\\\ &= \\tfrac{1}{U} \\cdot \\nabla_xX \\ - \\ \\tfrac{U+I}{UI} \\cdot \\nabla_xI \\\\ \\end{aligned} } IoU Back=∂x∂L=U2⋅IoUI⋅(∇xX−∇xI)−U⋅∇xI=U1⋅∇xX − UIU+I⋅∇xI 其中, ∇xX\\nabla_xX∇xX 是 预测框面积关于位置的偏导数(Partial Derivative), ∇xI\\nabla_xI∇xI 是 交集区域面积关于位置的偏导数,有: Iw=min(xl, x~l)+min(xr, x~r)Ih=min(xt, x~t)+min(xb, x~b)∇xX={∂X∂xt(or ∂xb)=xl+xr∂X∂xl(or ∂xr)=xt+xb∇xI={∂I∂xt(or ∂xb)={Iw, if(xtx~t or xbx~b)0,otherwise∂I∂xl(or ∂xr)={Ih, if(xlx~l or xrx~r)0,otherwise {\\displaystyle \\begin{aligned} I_w &= min(x_l,\\ \\tilde{x}_l) + min(x_r,\\ \\tilde{x}_r) \\\\ I_h &= min(x_t,\\ \\tilde{x}_t) + min(x_b,\\ \\tilde{x}_b) \\\\ \\nabla_xX &= { \\begin{cases} \\frac{\\partial X}{\\partial x_t( \\mathbf{or}\\ \\partial x_b)} = x_l + x_r \\\\ \\frac{\\partial X}{\\partial x_l( \\mathbf{or}\\ \\partial x_r)} = x_t + x_b \\end{cases} } \\\\ \\nabla_xI &= { \\begin{cases} \\frac{\\partial I}{\\partial x_t( \\mathbf{or}\\ \\partial x_b)} = { \\begin{cases} I_w &, \\ if ( x_t IwIh∇xX∇xI=min(xl, x~l)+min(xr, x~r)=min(xt, x~t)+min(xb, x~b)=⎩⎪⎨⎪⎧∂xt(or ∂xb)∂X=xl+xr∂xl(or ∂xr)∂X=xt+xb=⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧∂xt(or ∂xb)∂I={Iw0, if(xtx~t or xbx~b),otherwise∂xl(or ∂xr)∂I={Ih0, if(xlx~l or xrx~r),otherwise 带入求得 IoU BackIoU\\ \\mathcal{B}ackIoU Back 值,作用于 优化算法的梯度变换,如 自适应动量算法(Adam) 等。来发挥相应作用。 交并比损失函数(IoU Loss)的简单 C++ 语言实现 到这里,我们就可以根据基本情况来做一下交并比的代码实现了。由于需要进行一些基本的矩阵运算,我们选择采用引入 轻量级的 GLM(GL Mathematics) 开源库,来协助完成基本工作。 我们选择 GLM 库的原因,是因为其可以通过纯粹的包含头文件的方式,简便轻巧的启动包含基本图形矩阵数据结构和方法的完整库功能。在其开源协议保证下,非常适合运用于大部分工程项目。如果需要也可以自己分装部分算法和操作。例如在某些场景下,我们需要计算物体体积方块区域,到视窗平面上的投影位置: #include #include \"stdio.h\" #include \"math.h\" typedef glm::vec2 Vector_2f; typedef glm::vec3 Vector_3f; typedef glm::vec4 Vector_4f; typedef glm::mat2 Matrix_2x2f; typedef glm::mat3 Matrix_3x3f; typedef glm::mat4 Matrix_4x4f; #define XC_PI 3.14159265358979323846 #define XC_RADIAN(d_) (XC_PI * d_ / 180.0f) #define XC_VECTOR_NORMALIZE(v_) glm::normalize(v_) #define XC_VECTOR_CROSS(vl_, vr_) glm::cross(vl_, vr_) #define XC_VECTOR_DOT(vl_, vr_) glm::dot(vl_, vr_) #define XC_MATRIX_INVERSE(m_) glm::inverse(m_) #define XC_MATRIX_TRANSPOSE(m_) glm::transpose(m_) #define XC_MATRIX_DOT(ml_, mr_) dot_m4x4(ml_, mr_) #define XC_V4_M44_DOT(vl_, mr_) dot_v4_m4x4(vl_, mr_) Vector_4f dot_v4_m4x4(Vector_4f v4_, Matrix_4x4f m4x4_) { return m4x4_[0] * v4_[0] + m4x4_[1] * v4_[1] + m4x4_[2] * v4_[2] + m4x4_[3] * v4_[3]; } Matrix_4x4f dot_m4x4(Matrix_4x4f ml_, Matrix_4x4f mr_) { Matrix_4x4f result_; result_[0] = mr_[0] * ml_[0][0] + mr_[1] * ml_[0][1] + mr_[2] * ml_[0][2] + mr_[3] * ml_[0][3]; result_[1] = mr_[0] * ml_[1][0] + mr_[1] * ml_[1][1] + mr_[2] * ml_[1][2] + mr_[3] * ml_[1][3]; result_[2] = mr_[0] * ml_[2][0] + mr_[1] * ml_[2][1] + mr_[2] * ml_[2][2] + mr_[3] * ml_[2][3]; result_[3] = mr_[0] * ml_[3][0] + mr_[1] * ml_[3][1] + mr_[2] * ml_[3][2] + mr_[3] * ml_[3][3]; return result_; } 此处我们简单的实现了两个快速算法,用于协助我们完成目标 4×14 \\times 14×1 向量与 4×44 \\times 44×4 矩阵的点乘,和两个 4×44 \\times 44×4 矩阵的点乘。 其实类似的快速算法已在库内有封装,此处仅是用于说明 GLM 的一些基本用法。 不过,对于交并比的代码工程化来说,并不需要这么复杂: #include #include \"stdio.h\" #include \"math.h\" typedef glm::vec2 Vector_2f; typedef glm::vec4 Vector_4f; bool static IoU_simple(Vector_4f anchor_box_, Vector_4f ground_box_, float threshold_ = 0.8f) { float M_area_, T_area_, I_area_, U_area_; float IoU_mark_; { Vector_2f I_lt = { MAX(anchor_box_[0], ground_box_[0]), MAX(anchor_box_[1], ground_box_[1]) }; Vector_2f I_rb = { MIN(anchor_box_[2], ground_box_[2]), MIN(anchor_box_[3], ground_box_[3]) }; if (I_rb.x threshold_); } 上面的简短过程,就是整个交并比的 C++ 语言封装了。可见易于迁移。 IoU 的缺点与 GIoU 的改进 交并比损失函数并非是没有缺陷的。 一个显而易见的问题就是 IoU 无法评估预测框和标注框无交集区域时,预测框的优劣程度(梯度消失)。 这所造成的直接问题就是,当 无交集情况出现,我们将无法只通过 IoU 损失函数,来使预测框快速的向标注框方向运动。从而导致数据浪费并产生不准确的结果,且有可能使模型陷入局部解而导致停滞。 2019 年的 CVPR 上,来自斯坦福大学的研究团队以交并比为基础,提出了 IoU 的改进版 通用交并比(GIoU [Generalized Intersection over Union])算法 [20] 。解决了无交集的判断问题。 GIoU 采用的处理办法为,在原有 IoU 计算的基础上,引入预测框与标注框区域所构成的最小外接矩形,即 两者的最小外接闭包(smallest enclosing convex) 参与损失函数计算,来辅助量化两者之间的远近到权重迭代中, 记为 C=ConvexC = ConvexC=Convex 。 图 3-12 红框即为 IoU 图例中,I 和 U 的最小外接矩形 改进后的通用交并比公式 同样非常简洁 (注意 并非 GIoU Loss ),可记为: GIoU=IoU−∣C−(A∪B)∣∣C∣=IoU−∣C−U∣∣C∣ {\\displaystyle \\begin{aligned} GIoU &= IoU - \\frac{|C - (A \\cup B)|}{|C|} = IoU - \\frac{|C - U|}{|C|} \\\\ \\end{aligned} } GIoU=IoU−∣C∣∣C−(A∪B)∣=IoU−∣C∣∣C−U∣ 从公式可知,当 预测框与标注框不存在交集时, U=∣A∪B∣=0→IoU=0U = |A \\cup B| = 0 \\rightarrow IoU = 0U=∣A∪B∣=0→IoU=0 有: GIoU=IoU−C−0C=−1 {\\displaystyle \\begin{aligned} GIoU &= IoU - \\frac{C-0}{C} = -1 \\\\ \\end{aligned} } GIoU=IoU−CC−0=−1 当 预测框与标注框完全重合时, I=∣A∩B∣=∣A∪B∣=U→IoU=1I = |A \\cap B| = |A \\cup B| = U \\rightarrow IoU = 1I=∣A∩B∣=∣A∪B∣=U→IoU=1 有: GIoU=IoU−C−UC=IoU−0C=1 {\\displaystyle \\begin{aligned} GIoU &= IoU - \\frac{C-U}{C} = IoU - \\frac{0}{C} = 1 \\\\ \\end{aligned} } GIoU=IoU−CC−U=IoU−C0=1 基于此,GIoU 的取值范围为 GIoU∈[−1, +1]GIoU \\in [-1, \\ +1]GIoU∈[−1, +1] 。 通用交并比损失函数(GIoU Loss) GIoU 本质是一种对 IoU 算法的 泛化补充,所以在损失函数 GIoU LossGIoU \\ \\mathcal{L}ossGIoU Loss 的表达上,直接采用 GIoU 代替 IoU 作为影响因子即可。有: GIoU Loss=−ln(GIoU)≈1−GIoUGIoU∈[−1, 1] {\\displaystyle \\begin{aligned} GIoU \\ \\mathcal{L}oss & = -ln(GIoU) \\approx 1-GIoU \\quad GIoU \\in [-1, \\ 1] \\\\ \\end{aligned} } GIoU Loss=−ln(GIoU)≈1−GIoUGIoU∈[−1, 1] 同理,记 ∇xX\\nabla_xX∇xX 是预测框面积关于位置的偏导数(Partial Derivative), ∇xX~\\nabla_x\\tilde{X}∇xX~ 是标注框面积关于位置的偏导数(Partial Derivative), ∇xI\\nabla_xI∇xI 是交集区域面积关于位置的偏导数,有: GIoU Back=∂L∂x=∂LIoU∂x+∂LUoC∂x=I⋅(∇xX−∇xI)−U⋅∇xIU2⋅IoU+U⋅(∇xX+∇xX~)−C⋅(∇xX−∇xI)C⋅U=1U⋅∇xX − U+IUI⋅∇xI + 1U⋅∇xI +1C⋅∇xX~ − C−UCU⋅∇xX=1C⋅∇xX − 1I⋅∇xI + 1C⋅∇xX~ {\\displaystyle \\begin{aligned} GIoU\\ \\mathcal{B}ack &= \\frac{\\partial \\mathcal{L}}{\\partial x} = \\frac{\\partial \\mathcal{L}_{IoU}}{\\partial x} + \\frac{\\partial \\mathcal{L}_{UoC}}{\\partial x} \\\\ &= \\frac{I \\cdot (\\nabla_xX - \\nabla_xI) - U \\cdot \\nabla_xI}{U^2 \\cdot IoU} + \\frac{U \\cdot (\\nabla_xX + \\nabla_x\\tilde{X}) - C \\cdot (\\nabla_xX - \\nabla_xI)}{C \\cdot U} \\\\ &= \\tfrac{1}{U} \\cdot \\nabla_xX \\ - \\ \\tfrac{U+I}{UI} \\cdot \\nabla_xI \\ + \\ \\tfrac{1}{U} \\cdot \\nabla_xI \\ + \\tfrac{1}{C} \\cdot \\nabla_x\\tilde{X} \\ - \\ \\tfrac{C-U}{CU} \\cdot \\nabla_xX \\\\ &= \\tfrac{1}{C} \\cdot \\nabla_xX \\ - \\ \\tfrac{1}{I} \\cdot \\nabla_xI \\ + \\ \\tfrac{1}{C} \\cdot \\nabla_x\\tilde{X} \\\\ \\end{aligned} } GIoU Back=∂x∂L=∂x∂LIoU+∂x∂LUoC=U2⋅IoUI⋅(∇xX−∇xI)−U⋅∇xI+C⋅UU⋅(∇xX+∇xX~)−C⋅(∇xX−∇xI)=U1⋅∇xX − UIU+I⋅∇xI + U1⋅∇xI +C1⋅∇xX~ − CUC−U⋅∇xX=C1⋅∇xX − I1⋅∇xI + C1⋅∇xX~ 而 标注框在单次迭代中是常量值,即 ∇xX~=0\\nabla_x\\tilde{X} = 0∇xX~=0 代入: GIoU Back=1C⋅∇xX − 1I⋅∇xI {\\displaystyle \\begin{aligned} GIoU\\ \\mathcal{B}ack &= \\tfrac{1}{C} \\cdot \\nabla_xX \\ - \\ \\tfrac{1}{I} \\cdot \\nabla_xI \\end{aligned} } GIoU Back=C1⋅∇xX − I1⋅∇xI 显然 GIoU 的反向传播计算相比 IoU 更为快捷有效。这也是其 通用性 的体现之一。 通用交并比损失函数(GIoU Loss)的简单 C++ 语言实现 万事具备,现在只需要代码实现 GIoU 算法即可,仍然非常便捷。只需在原 IoU 算法上补充改进部分即可: #include #include \"stdio.h\" #include \"math.h\" typedef glm::vec2 Vector_2f; typedef glm::vec4 Vector_4f; bool static GIoU_simple(Vector_4f anchor_box_, Vector_4f ground_box_, float threshold_ = 0.8f) { float M_area_, T_area_, I_area_, U_area_, C_area_; float IoU_mark_, GIoU_mark_; { Vector_2f I_lt = { MAX(anchor_box_[0], ground_box_[0]), MAX(anchor_box_[1], ground_box_[1]) }; Vector_2f I_rb = { MIN(anchor_box_[2], ground_box_[2]), MIN(anchor_box_[3], ground_box_[3]) }; if (I_rb.x threshold_); } 完成 GIoU 算法的程序化封装。 GIoU 的缺点与 IoU 算法族的发展 那么,GIoU 算法是否依旧存在缺陷呢? 虽然 GIoU 可以适度的缓解无交集情况的梯度消失问题,但 并不能加速当预测框完整包含标注框时的梯度迭代。此时 GIoU 算法,会因为最小外接矩形等同于并集 的缘故,退化为 IoU 算法。从而无法起到有向加速梯度趋向更贴合标注大小的目的。 图 3-13 预测框(绿)包含标注框时 GIoU 退化为 IoU 示意图[20] 针对这种情形,后续的一些研究试图通过引入 框中心点(DIoU [Distance-IoU]) [21] ,结合 长宽一致性(CIoU [Complete-IoU]) [21] ,并在中心点基础上 进一步优化损失函数的设计(EIoU [Efficient-IoU]) [22] 来解决此问题。虽然取得了不错的效果,但算法复杂度也有较大变化,考虑到实际工程情况取舍可以酌情选用,本书不再展开讲解。 几种算法的对比结果如表 《当前主流 IoU 算法族基于 COCO val-2017 数据集的对比结果》 所示 [22],仅供参考: 进行到这里,在一些耗时训练之后,我们就能够得到一个静态的物体识别算法模型了。 由于静态模型不需要持续迭代,通过直接取模型参数或者接入其他成型的推理引擎,即可完成对指定关注物体的识别操作。 需要注意的是,目前训练所得的 简易模型,还不能在 不经过辅助方法 的情况下,自主完成锁定需要检测的物体。模型只能用于判断某一个给定检测范围(检测框)内的数据,是否属于被用于训练录入的标签物体,并给出命中率。 因此,依旧需要人为提供用于辅助锁定检测目标的方法。 配合检测所得命中率经过阈值筛选最终结果,得到其所处像素位置。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_3_3.html":{"url":"Chapter_3/Language/cn/Docs_3_3_3.html","title":"3.3.3 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window)","keywords":"","body":"3.3.2 朴素目标检测物体锁定 - 分步滑动窗口(Simple Sliding Window) 分步滑动窗口(Simple Sliding Window) 是一种常用的辅助锁定检测目标的手段。其优势在于,简单易行且精度可控。 作为一个经典的工具范式,分步滑动窗口被广泛应用于深度学习相关的特征提取、语义分割、物体检测、物体识别等各种场合。前面的章节中使用 HOG 提取特征向量时的卷积核操作,其中卷积核就可以被认为是一个步长等于窗口大小的滑动窗口。本质上,滑动窗口和卷积核只是不同视角下对同种数学工具的不同描述而已。 滑动窗口 实则为一个泛化的概念,而称谓上的差异,主要体现在狭义的分步滑动窗口更注重强调概念上的步长选择。例如中科院就从变步长角度出发,提出了一种基于滑动窗口捕获图像信息的分批量化整合空间结构局部相关性的视觉 Transformer 基础模型 SimViT [23] 。滑动窗口之名,仅用于区分关注点的差异,可见一斑。 图 3-14 滑动窗口在 SimViT 中的运用[23] 我们日常工程中,在已经有可以被部署的物体检测模型阶段之后,可用滑动窗口锁定随时间轴变化而发生运动的目标。 方法本身有三个关键概念,分别是:窗口大小(Window Size) 、滑动步长(Sliding Step) 、采样层级(Sample Level)。 窗口大小(Window Size) 即 滑动窗口的空间属性,等价于卷积核大小的意义。在二维情况下通常指由 宽(Width)和高(Height)组成的矩形所围成的闭包内区域,记为 Size=(W,H)Size = (W,H)Size=(W,H) 。 滑动步长(Sliding Step) 即 滑动窗口的运动属性,代表窗口在维度空间内的移动状态。在二维情况下则分为 横向(Horizontal) 和 纵向(Vertical) 两个方向。一般在大多数工程场景下,都会选择 速度为常量取值的匀速步长(Uniform Step),且 优先横向扫描(Transverse Scaning),记为 Step=(u,v)Step = (u,v)Step=(u,v) 。 采样层级(Sample Level) 即 原数据的缩放(提取)层级,如 SimViT 的图例中,就可以被认为在窗口大小恒定情况下,利用 MCSA 注意力激励算法向上采样,构建了双层(2-Level)的变步长滑动窗口单元,记为 Level=(l)Level = (l)Level=(l) ,有: Level=(l)=Subsampling+Upsampling+1 Level = (l) = Subsampling + Upsampling + 1 Level=(l)=Subsampling+Upsampling+1 由于本身是通过设定大小的窗口 滑动筛选过滤,因此窗口的大小是否 贴合被检测目标的大小,会较大程度上影响最终判定结果。但也需要均衡算力消耗。假设当前用于检测的图像大小为 (Img_W, Img_H)(Img\\_W,\\ Img\\_H)(Img_W, Img_H) ,一套工程上的经验方法计算方式如下: WParams={Size=(W,H)=(⌊Img_W2⌋+1, ⌊Img_H2⌋+1)Step=(u,v)=(Img_Wlv⋅W, Img_Hlv⋅H)Level=(lv),lv∈[1, 3] {\\displaystyle \\begin{aligned} {WParams} = { \\begin{cases} Size &= (W,H) = ( \\lfloor \\tfrac{Img\\_W}{2} \\rfloor + 1,\\ \\lfloor \\tfrac{Img\\_H}{2} \\rfloor + 1) \\\\ Step &= (u,v) = ( \\tfrac{Img\\_W}{lv \\cdot W},\\ \\tfrac{Img\\_H}{lv \\cdot H}) \\\\ Level &= (lv) ,\\quad lv \\in [1,\\ 3 ] \\end{cases} } \\\\ \\end{aligned} } WParams=⎩⎪⎨⎪⎧SizeStepLevel=(W,H)=(⌊2Img_W⌋+1, ⌊2Img_H⌋+1)=(u,v)=(lv⋅WImg_W, lv⋅HImg_H)=(lv),lv∈[1, 3] 代入图像大小获得配置,来快速获取包含完整被检测物体的闭包,方便模型处理得到目标实际区域,并工程缩减模型的输入。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_4.html":{"url":"Chapter_3/Language/cn/Docs_3_4.html","title":"3.4 空域冗余控制 - 基础光流算法与色度压缩","keywords":"","body":"3.4 空域冗余控制 - 基础光流算法与色度压缩 介于上一节分析的时域冗余性质可以得知,时空本身就是紧密相联的。时域冗余的压缩,主要体现于从覆盖整个数据过程的更广视角,来处理宏观上的实际物理物体运动所产生的信息。所以这里的 时域(Time Domain)冗余,指的是 广时空域(Full Spatiotemporal Domain)物体冗余(Objects Redundancy)。而我们 这里所指的空域(Spacial Domain)冗余,可以认为是 相较于时域(Time Domain)的整个数据过程的广度,在单一极短(如前后几帧)的范围内,更细节的像素运动情况的处理,即 狭时空域(Narrow Spatiotemporal Domain)像素冗余(Pixels Redundancy)。 依赖新兴的人工智能方面的运用。广时空域冗余的处理当下虽处于起步阶段,但在标准工程层面探索,如新一代的编解码规格(VVC、MPAI 等)制定获得时续具有关联性的运动区域信息中,已有提案。虽然目前还无法确定最终是否会被采用。其所代表的新一代编解码规格对时域冗余的处理思路,仍然可被有效的借鉴于后续标准确立。这也意味着,传统编解码手段的未来发展方向,需要与人工智能领域在更为基础的方面相结合。必然不可避免需要多级模型的联动。 显而易见,为了保证多级模型的效率,大多数诸如 HOG 在内的一二维信号数据的前处理工作,就需要在模型外解决。而以往这些处理,仅被用于在应用层的具体某些功能过程(比如人脸识别、特征点蒙皮等)的数据准备工作,并未触及到编解码工程的核心区域(不过现在已有一些编解码框架,在利用了这些特性来做相关实践了),因此总是以单元化的单个功能的形式出现。在利用模型针对时域(广)压缩的可能性出现后,部分模型处理结果的简单重复判断过程,可以结合空域频域(如光流运动检测、频域动态分析等)的其他手段,转为由量化的传统算法单元达成。届时整体前后向反馈的系统化工作,会需要提升到音视频工程层面来协助解决。直至模型的推理引擎或算法对应算子的工程标准能够一定程度的统一,从而作为基础功能的一部分,下沉至整体编解码器的规格配置。而这将是一个漫长的过程。 所以,当下必不可少的, 会要求音视频工程师对深度学习(DL [Deep Learning])为代表的机器学习,有一定程度的基础了解和认知。 本书会在第四章节,对这部分的基础知识进行阐述。而现在,让我们回到剩余的域中冗余处理。 空域(指狭时空域,之后若无特别说明则统一按此简化表述) 和频域冗余,在编解码中已有更为成熟的方法论积累。 空域冗余目前的主流处理思路,是在传统块矢量预测、运动补偿的基础上,从更精细的尺度,基于对近似像素前后相邻时间段内的漂移情况分析来进行一定程度的预估。通过块内运动矢量来测算一段时间内,指定空间范围像素亮度值(灰度值)变化。从而使之只需要保存矢量信息,即可适当完成空域信息的还原。 在分块上基于运动矢量推导,而像素则常采用光流法完成。分块处理和规格强相关,我们将在后续编解码规格分析中再行展开。现在让我们只关注细部。 那么什么是 光流(Optical Flow) 和 光流法(Methods of Optical Flow) 呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_4_1.html":{"url":"Chapter_3/Language/cn/Docs_3_4_1.html","title":"3.4.1 传统光流法(Classic Optical Flow Methods)","keywords":"","body":"3.4.1 传统光流法(Classic Optical Flow Methods) 在 计算机视觉(Computer Vision) 体系中,光流(Optical Flow) 指的是场景内像素点在观察者所观察场景空间视角下的瞬时相对运动速度。 光流法(Methods of Optical Flow) 即是利用场景序列间的像素时域运动与相邻像素相关性变化,构建前后场景间像素对应关系的数学模型,完成像素运动信息推算的方法。 光流是一个基于观察者的相对概念,并不能完全覆盖真实的物体运动情况。在由二维图像按时序组成的视频中,采样自原三维空间的抽象像素,其三维运动矢量会被投影到观察者的视窗平面上,转为运动矢量在视窗平面上的二维投影矢量。因此,为了便于区分,往往将原三维空间三维运动矢量全体组成的矢量空间称为 三维光流场(3D Optical Flow Field),简称 光动场(OMF [Optical Momentum Field])。而把视窗平面全体投影矢量构成的矢量平面称为 二维光流场(2D Optical Flow Field),简称 光流场(OFF [Optical Flow Field])。 观察者、光动场、光流场三者的关系如下图所示: 图 3-15 观察者、光动场、光流场投射变化视图[24] 在使用光流法前,首先需要量化光流的表达。 工程上通常选用生物光学的 梯度光流公式族(Gradient-Based Optical Flow Method) 来作为衡量光流的基本数学描述。因此,梯度光流法也被称为 基本光流法(Baseline Methods of Optical Flow) [25] 。 基本光流公式(Basic Gradient-Based Optical Formula) 基本光流公式(Basic Gradient-Based Optical Formula) 也称为 基本光流约束(Basic Optical Constraint),是所有传统梯度光流法的根基,提供了光流与光流场在时序上的基础关系,并构建了通用的基本假设。分别是: 灰度不变假设,即时域稳定,每一个像素点,灰度值不随时间发生改变; 光流场内可导,即空域稳定,每一个像素与其相邻区域,像素的光流场变化是连续的; 这两个假设决定了在此条件下,每个光动场内抽象像素和其投影光流场内像素,在光流运动上的时空稳定性。 记在 ttt 时刻的某位于 p=(x, y)p = (x,\\ y)p=(x, y) 的像素点,存在平面瞬时速度 v⃗=(u, v)\\vec{v} = (u,\\ v)v⃗=(u, v) 即光流。取 I(p, t)I(p,\\ t)I(p, t) 代表对应像素点的灰度值,则根据条件,单位时间变化有: I(p, t)=I(p+v⃗, t+1) {\\displaystyle \\begin{aligned} I(p,\\ t) = I(p + \\vec{v},\\ t+1) \\\\ \\end{aligned} } I(p, t)=I(p+v⃗, t+1) 当 不限制时间流向,自 ttt 时刻经历 Δt\\Delta tΔt 到 t1=t+Δtt_1 = t + \\Delta tt1=t+Δt ,存在 I(p, t)=I(p+v⃗, t+Δt)I(p,\\ t) = I(p + \\vec{v},\\ t + \\Delta t) I(p, t)=I(p+v⃗, t+Δt) 有: {I(x, y, t)=I(x+Δx,y+Δy,t+Δt)v⃗(u, v)=(ΔxΔt, ΔyΔt) {\\displaystyle \\begin{aligned} { \\begin{cases} I(x,\\ y,\\ t) &= I (x + \\Delta x,y + \\Delta y,t + \\Delta t) \\\\ \\vec{v}(u,\\ v) & = (\\tfrac{\\Delta x}{\\Delta t}, \\ \\tfrac{\\Delta y}{\\Delta t}) \\end{cases} } \\\\ \\end{aligned} } {I(x, y, t)v⃗(u, v)=I(x+Δx,y+Δy,t+Δt)=(ΔtΔx, ΔtΔy) 则根据 泰勒级数(Taylor series) 展开,有: I(x+Δx,y+Δy,t+Δt)=I(x, y, t) + ∂I∂x⋅Δx + ∂I∂y⋅Δy + ∂I∂t⋅Δt + ε=I(x, y, t) + ∂I∂x⋅u⋅Δt + ∂I∂y⋅v⋅Δt + ∂I∂t⋅Δt + ε {\\displaystyle \\begin{aligned} I (x + \\Delta x,y + \\Delta y,t + \\Delta t) &= I(x,\\ y,\\ t) \\ +\\ \\tfrac{ \\partial I}{\\partial x} \\cdot \\Delta x \\ +\\ \\tfrac{ \\partial I}{\\partial y} \\cdot \\Delta y \\ +\\ \\tfrac{ \\partial I}{\\partial t} \\cdot \\Delta t \\ +\\ \\varepsilon \\\\ &= I(x,\\ y,\\ t) \\ +\\ \\tfrac{ \\partial I}{\\partial x} \\cdot u \\cdot \\Delta t \\ +\\ \\tfrac{ \\partial I}{\\partial y} \\cdot v \\cdot \\Delta t \\ +\\ \\tfrac{ \\partial I}{\\partial t} \\cdot \\Delta t \\ +\\ \\varepsilon \\\\ \\end{aligned} } I(x+Δx,y+Δy,t+Δt)=I(x, y, t) + ∂x∂I⋅Δx + ∂y∂I⋅Δy + ∂t∂I⋅Δt + ε=I(x, y, t) + ∂x∂I⋅u⋅Δt + ∂y∂I⋅v⋅Δt + ∂t∂I⋅Δt + ε 其中 ε\\varepsilonε 为泰勒展式的高阶无穷小项,它代表了影响灰度不变假设中灰度值的实际样本噪音和量化引入误差,纳入负号,显然我们期望: ∂I∂x⋅u⋅Δt + ∂I∂y⋅v⋅Δt + ∂I∂t⋅Δt + ε=0⇒ε=∂I∂x⋅u + ∂I∂y⋅v + ∂I∂t→0 {\\displaystyle \\begin{aligned} \\tfrac{ \\partial I}{\\partial x} \\cdot u \\cdot \\Delta t \\ +\\ &\\tfrac{ \\partial I}{\\partial y} \\cdot v \\cdot \\Delta t \\ +\\ \\tfrac{ \\partial I}{\\partial t} \\cdot \\Delta t \\ +\\ \\varepsilon = 0 \\\\ &\\Rightarrow \\\\ \\varepsilon =\\tfrac{ \\partial I}{\\partial x} \\cdot u \\ &+\\ \\tfrac{ \\partial I}{\\partial y} \\cdot v \\ +\\ \\tfrac{ \\partial I}{\\partial t} \\rightarrow 0 \\\\ \\end{aligned} } ∂x∂I⋅u⋅Δt + ε=∂x∂I⋅u ∂y∂I⋅v⋅Δt + ∂t∂I⋅Δt + ε=0⇒+ ∂y∂I⋅v + ∂t∂I→0 上式中 ∂I∂x\\tfrac{ \\partial I}{\\partial x}∂x∂I 、 ∂I∂y\\tfrac{ \\partial I}{\\partial y}∂y∂I 、 ∂I∂t\\tfrac{ \\partial I}{\\partial t}∂t∂I 是 I(p, t)=I(x, y, t)I(p,\\ t) = I(x,\\ y,\\ t)I(p, t)=I(x, y, t) 分别在三个参数方向的偏导数,记 ∇xI=∂I∂x\\nabla_xI = \\tfrac{ \\partial I}{\\partial x}∇xI=∂x∂I 、 ∇yI=∂I∂y\\nabla_yI = \\tfrac{ \\partial I}{\\partial y}∇yI=∂y∂I 、 ∇tI=∂I∂t\\nabla_t I = \\tfrac{ \\partial I}{\\partial t}∇tI=∂t∂I 。则原等式就相当于: ε=I′(x, y)⋅v⃗ + ∇tI=∇pI⋅v⃗ + ∇tI→0 {\\displaystyle \\begin{aligned} \\varepsilon = I{'}(x,\\ y) \\cdot \\vec{v} \\ +\\ \\nabla_t I = \\nabla_p I \\cdot \\vec{v} \\ +\\ \\nabla_t I \\rightarrow 0 \\\\ \\end{aligned} } ε=I′(x, y)⋅v⃗ + ∇tI=∇pI⋅v⃗ + ∇tI→0 这就是 基本光流公式 了。 可见当 ttt 确定时,想要求得指定像素点 ppp 的光流 v⃗=(u, v)\\vec{v} = (u,\\ v)v⃗=(u, v) ,单凭基本约束是不够的。因此,必须通过其他的方式引入新的约束条件来进行光流的求解。最容易联想到的,就是通过已有的空域图像信息来进行限制。由此,根据采用空域信息量的方法,传统梯度光流法被分为了 稠密光流法(Dense Optical Flow Methods) 和 稀疏光流法(Sparse Optical Flow Methods)。 稠密光流法(Dense Optical Flow Methods),即 全局光流法(Global Optical Flow Methods),指引入的补充约束需要计算场内所有像素点情况。 稀疏光流法(Sparse Optical Flow Methods),指引入的补充约束只需要计算部分像素区域的光流信息,即可达成约束要求的光流法。 经典稠密光流法的代表是 Horn–Schunck 光流算法,经典稀疏光流法的代表是 Lucas-Kanade 光流算法。 Horn–Schunck 梯度光流法(Horn–Schunck Method) 1981 年,麻省理工计算机实验室的 贝尔特霍尔德·霍恩(Berthold K.P. Horn,1943~Present) 和 布莱恩·舒克(Brian G. Schunck),在基本光流约束的前提下,提出了单帧光流场内光流全局光滑变化的假设 [26] 。 该假设认为,若光流场内 任意一点的光流 与 临近点的光流 变化都是光滑的,则存在能够 描述全场能量的单帧光流场能量函数,使得该时间段的场内能量变化 小值稳定。即对原光流场内可导假设进行了补充,使其建立了范围覆盖到整个场内像素的宏微观光流变化,与全抽象能量场能量强度间的关系。 这一补充假设也被称为 光流平滑约束(Optical Flow Smoothness Constraint),或 Horn–Schunck 约束。由于需要对整个场内的所有像素点光流进行计算,从而获取能量函数求最小值,方法被归类为稠密光流法。 数学上可以通过对 v⃗\\vec{v}v⃗ 求 p=(x, y)p = (x,\\ y)p=(x, y) 的二阶偏导数趋向无穷小来逼近无突变情况,构建平滑程度表示,有: {∇p2u=∂2u∂x2 + ∂2u∂y2∇p2v=∂2v∂x2 + ∂2v∂y2∇p2v⃗=∇p2u + ∇p2v→0 {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\nabla^2_p u &= \\tfrac{ \\partial^2 u}{\\partial x^2} \\ +\\ \\tfrac{ \\partial^2 u}{\\partial y^2} \\\\ \\nabla^2_p v &= \\tfrac{ \\partial^2 v}{\\partial x^2} \\ +\\ \\tfrac{ \\partial^2 v}{\\partial y^2} \\end{cases} } \\\\ &\\nabla^2_p \\vec{v} = \\nabla^2_p u \\ +\\ \\nabla^2_p v \\rightarrow 0 \\\\ \\end{aligned} } {∇p2u∇p2v=∂x2∂2u + ∂y2∂2u=∂x2∂2v + ∂y2∂2v∇p2v⃗=∇p2u + ∇p2v→0 而 ∇p2u\\nabla^2_p u∇p2u 、 ∇p2v\\nabla^2_p v∇p2v 则可以通过 拉普拉斯展式,利用周边像素点光流求逼近值的方式获取 [27] 。 图 3-16 Horn–Schunck 法采用的中心光流平滑度逼近卷积核[27] 有: ∇p2v⃗=∇p2u + ∇p2v=∑xyv⃗xy⋅[112, 16, 11216,−1, 16112, 16, 112]=(u¯ − u)2 + (v¯ − v)2 {\\displaystyle \\begin{aligned} \\nabla^2_p \\vec{v} &= \\nabla^2_p u \\ +\\ \\nabla^2_p v \\\\ &= \\sum_{xy}\\vec{v}_{xy} \\cdot { \\begin{bmatrix} \\tfrac{1}{12} ,& \\quad \\ \\ \\tfrac{1}{6} ,& \\quad \\ \\ \\tfrac{1}{12} \\\\ \\tfrac{1}{6} ,& \\quad -1 ,& \\quad \\ \\ \\tfrac{1}{6} \\\\ \\tfrac{1}{12} ,& \\quad \\ \\ \\tfrac{1}{6} ,& \\quad \\ \\ \\tfrac{1}{12} \\end{bmatrix} } \\\\ &= (\\bar{u} \\ -\\ u)^2 \\ +\\ (\\bar{v} \\ -\\ v)^2 \\end{aligned} } ∇p2v⃗=∇p2u + ∇p2v=xy∑v⃗xy⋅⎣⎡121,61,121, 61,−1, 61, 121 61 121⎦⎤=(u¯ − u)2 + (v¯ − v)2 那么,指定 εc2\\varepsilon_c^2εc2 为光流平滑约束的 L2L_2L2 误差代表值,则: εc2=(u¯ − u)2 + (v¯ − v)2→0 {\\displaystyle \\begin{aligned} \\varepsilon_c^2 = (\\bar{u} \\ -\\ u)^2 \\ +\\ (\\bar{v} \\ -\\ v)^2 \\rightarrow 0 \\\\ \\end{aligned} } εc2=(u¯ − u)2 + (v¯ − v)2→0 结合基本约束条件,针对像素点 ppp 的光流 v⃗=(u, v)\\vec{v} = (u,\\ v)v⃗=(u, v) 求解,就有两个约束条件了: {ε=∇pI⋅v⃗ + ∇tI→0εc2=(u¯ − u)2 + (v¯ − v)2→0 {\\displaystyle \\begin{aligned} { \\begin{cases} \\varepsilon = \\nabla_p I \\cdot \\vec{v} \\ +\\ \\nabla_t I \\rightarrow 0 \\\\ \\varepsilon_c^2 = (\\bar{u} \\ -\\ u)^2 \\ +\\ (\\bar{v} \\ -\\ v)^2 \\rightarrow 0 \\end{cases} } \\\\ \\end{aligned} } {ε=∇pI⋅v⃗ + ∇tI→0εc2=(u¯ − u)2 + (v¯ − v)2→0 至此,假设当前时间 ttt 有全光流场能量 EEE ,引入光滑因子 α\\alphaα 构建能量函数。问题随即转换为,求满足约束的 (u, v)(u,\\ v)(u, v) 值,使得 EEE 最小: E=∫∫(ε2 + α2εc2) dxdy=∫∫[∇pI⋅v⃗ + ∇tI + α2∇p2u + α2∇p2v] dxdy→min {\\displaystyle \\begin{aligned} E &= \\int\\int (\\varepsilon^2 \\ +\\ \\alpha^2 \\varepsilon_c^2) \\ dxdy \\\\ &= \\int\\int [\\nabla_p I \\cdot \\vec{v} \\ +\\ \\nabla_t I \\ +\\ \\alpha^2 \\nabla^2_p u \\ +\\ \\alpha^2 \\nabla^2_p v] \\ dxdy \\\\ &\\rightarrow min \\end{aligned} } E=∫∫(ε2 + α2εc2) dxdy=∫∫[∇pI⋅v⃗ + ∇tI + α2∇p2u + α2∇p2v] dxdy→min 显然,当 EEE 取得最小时: {∂E∂u=2⋅(∇pI⋅v⃗ + ∇tI)⋅∇xI − 2α2(u¯ − u)=0∂E∂v=2⋅(∇pI⋅v⃗ + ∇tI)⋅∇yI − 2α2(v¯ − v)=0 {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\tfrac{ \\partial E}{\\partial u} = 2 \\cdot (\\nabla_p I \\cdot \\vec{v} \\ +\\ \\nabla_t I) \\cdot \\nabla_xI \\ -\\ 2\\alpha^2 (\\bar{u} \\ -\\ u) = 0\\\\ \\tfrac{ \\partial E}{\\partial v} = 2 \\cdot (\\nabla_p I \\cdot \\vec{v} \\ +\\ \\nabla_t I) \\cdot \\nabla_yI \\ -\\ 2\\alpha^2 (\\bar{v} \\ -\\ v) = 0 \\end{cases} } \\\\ \\end{aligned} } {∂u∂E=2⋅(∇pI⋅v⃗ + ∇tI)⋅∇xI − 2α2(u¯ − u)=0∂v∂E=2⋅(∇pI⋅v⃗ + ∇tI)⋅∇yI − 2α2(v¯ − v)=0 进一步对两侧同求 ppp 的二阶导可化为: {(α2 + ∇xI2 + ∇yI2)⋅(u¯ − u)=∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI)(α2 + ∇xI2 + ∇yI2)⋅(v¯ − v)=∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI) {\\displaystyle \\begin{aligned} &{ \\begin{cases} (\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2) \\cdot (\\bar{u} \\ -\\ u) = \\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u} \\ +\\ \\nabla_yI \\cdot \\bar{v} \\ +\\ \\nabla_t I) \\\\ (\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2) \\cdot (\\bar{v} \\ -\\ v) = \\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u} \\ +\\ \\nabla_yI \\cdot \\bar{v} \\ +\\ \\nabla_t I) \\end{cases} } \\\\ \\end{aligned} } {(α2 + ∇xI2 + ∇yI2)⋅(u¯ − u)=∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI)(α2 + ∇xI2 + ∇yI2)⋅(v¯ − v)=∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI) 即: {(u − u¯)=−∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI)α2 + ∇xI2 + ∇yI2(v − v¯)=−∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI)α2 + ∇xI2 + ∇yI2 {\\displaystyle \\begin{aligned} &{ \\begin{cases} (u \\ -\\ \\bar{u}) = - \\frac{\\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u} \\ +\\ \\nabla_yI \\cdot \\bar{v} \\ +\\ \\nabla_t I)}{\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2} \\\\ (v \\ -\\ \\bar{v}) = - \\frac{\\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u} \\ +\\ \\nabla_yI \\cdot \\bar{v} \\ +\\ \\nabla_t I)}{\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2} \\end{cases} } \\\\ \\end{aligned} } ⎩⎪⎪⎨⎪⎪⎧(u − u¯)=−α2 + ∇xI2 + ∇yI2∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI)(v − v¯)=−α2 + ∇xI2 + ∇yI2∇xI⋅(∇xI⋅u¯ + ∇yI⋅v¯ + ∇tI) 但由于启动时 v⃗p=(u, v)\\vec{v}_p = (u,\\ v)v⃗p=(u, v) 实际是未知的,而 avg(v⃗p)=(u¯, v¯)avg(\\vec{v}_p) = (\\bar{u},\\ \\bar{v})avg(v⃗p)=(u¯, v¯) 也是未知的。因此,我们需要将计算转换为由前一次结果驱动的向后迭代运算进行。 通过 克拉默法则(Cramer's Rule) 可知,位于第 n+1 次迭代的像素点 p = (x,\\ y) 光流 v⃗n+1\\vec{v}_{n+1}v⃗n+1 取值,与第 nnn 次迭代时,对应相同像素点 p=(x, y)p = (x,\\ y)p=(x, y) 所处卷积核的光流均值 avg(v⃗n)=(u¯n, v¯n)avg(\\vec{v}_n) = (\\bar{u}_n,\\ \\bar{v}_n)avg(v⃗n)=(u¯n, v¯n) 存在关系: {un+1=u¯n−∇xI⋅(∇xI⋅u¯n + ∇yI⋅v¯n + ∇tI)α2 + ∇xI2 + ∇yI2vn+1=v¯n−∇xI⋅(∇xI⋅u¯n + ∇yI⋅v¯n + ∇tI)α2 + ∇xI2 + ∇yI2 {\\displaystyle \\begin{aligned} &{ \\begin{cases} u_{n+1} = \\bar{u}_n - \\frac{\\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u}_n \\ +\\ \\nabla_yI \\cdot \\bar{v}_n \\ +\\ \\nabla_t I)}{\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2} \\\\ v_{n+1} = \\bar{v}_n - \\frac{\\nabla_xI \\cdot (\\nabla_xI \\cdot \\bar{u}_n \\ +\\ \\nabla_yI \\cdot \\bar{v}_n \\ +\\ \\nabla_t I)}{\\alpha^2 \\ +\\ \\nabla_xI^2 \\ +\\ \\nabla_yI^2} \\end{cases} } \\\\ \\end{aligned} } ⎩⎪⎪⎨⎪⎪⎧un+1=u¯n−α2 + ∇xI2 + ∇yI2∇xI⋅(∇xI⋅u¯n + ∇yI⋅v¯n + ∇tI)vn+1=v¯n−α2 + ∇xI2 + ∇yI2∇xI⋅(∇xI⋅u¯n + ∇yI⋅v¯n + ∇tI) 上式即是 HS 法的核心光流推到公式 了。 当设置好启动时的 avg(v⃗0)=(u¯0, v¯0)avg(\\vec{v}_0) = (\\bar{u}_0,\\ \\bar{v}_0)avg(v⃗0)=(u¯0, v¯0) 初始值,就可以迭代获取后续帧内的像素光流场情况了。 一般取启动帧所有像素点 avg(v⃗0)=(0, 0)avg(\\vec{v}_0) = (0,\\ 0)avg(v⃗0)=(0, 0) 。 可见 Horn–Schunck 算法是需要逐个像素参与核运算,且保存完整前值的历史算法。 Lucas-Kanade 梯度光流法(Lucas-Kanade Method) 1981 年同年,在 HS 法提出的近乎相同时间,当时还在 卡内基梅隆大学(Carnegie-Mellon University) 计算机学院的 布鲁斯·卢卡斯(Bruce D. Lucas) 和 金出武雄(Takeo Kanade,1945~Present) 教授,共同提出了 Lucas-Kanade 光流法,同样试图借此完成对基础光流约束的补充,使得能够预测光流场情况 [26] 。 和 HS 法纯粹对空域的关注不同,LK 法细化基础光流约束中的时空稳定条件 [28] : 时域上,LK 法提出了 像素微位移假设。假设认为图像像素位置随时间变化是连续的,进而才能够求的像素光流和时间之间的偏导关系; 空域上,LK 法提出了 空间趋同性假设。假设认为场景中相同表面的相邻像素点运动模式是趋同的,且由光动场到光流场投影后,其光流情况也是保持了这一性质。 这两个补充条件,让 LK 法定义的整个场景时空,任意一点和其相邻空间都是时空连续的。 这使我们可以将有关全图逐个像素点光流时空关系的推导,通过分割整体图像的像素点集合,转换为不同像素点子集构成的对应分块(卷积核),以核内区域为单元的光流时空关系推导。从点对点,变为了区域对区域。 基于此,在核心位置 c=(x, y)c = (x,\\ y)c=(x, y) 和所处时刻 ttt 已知的情况下,核内区域光流场内所有像素的光流可以被认为是一个相同值 v⃗=(u, v)\\vec{v} = (u,\\ v)v⃗=(u, v) 。且必然有区域内,基础约束条件 ε=∇cI⋅v⃗ + ∇tI\\varepsilon = \\nabla_c I \\cdot \\vec{v} \\ +\\ \\nabla_t Iε=∇cI⋅v⃗ + ∇tI 的高阶无穷小 ε=0\\varepsilon = 0ε=0 成立。 记当前图像大小为 W×HW \\times HW×H ,有 n×nn \\times nn×n 大小分块(卷积核),全图光流场面临的计算量会降为对 N=W/n×H/nN = W/n \\times H/nN=W/n×H/n 个窗口核心光流的推算。记 m=n2m = n^2m=n2 ,则存在核内方程组: {∇cI11⋅v⃗ + ∇tI11=0∇cI12⋅v⃗ + ∇tI12=0⋯∇cInn⋅v⃗ + ∇tInn=0⇒{∇xI1⋅u + ∇yI1⋅v = −∇tI1∇xI2⋅u + ∇yI2⋅v = −∇tI2⋯∇xIm⋅u + ∇yIm⋅v = −∇tIm {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\nabla_c I_{11} \\cdot \\vec{v} \\ +\\ \\nabla_t I_{11} = 0 \\\\ \\nabla_c I_{12} \\cdot \\vec{v} \\ +\\ \\nabla_t I_{12} = 0 \\\\ \\cdots \\\\ \\nabla_c I_{nn} \\cdot \\vec{v} \\ +\\ \\nabla_t I_{nn} = 0 \\end{cases} \\quad \\Rightarrow \\quad \\begin{cases} \\nabla_x I_1 \\cdot u \\ +\\ \\nabla_y I_1 \\cdot v \\ =\\ -\\nabla_t I_1 \\\\ \\nabla_x I_2 \\cdot u \\ +\\ \\nabla_y I_2 \\cdot v \\ =\\ -\\nabla_t I_2 \\\\ \\cdots \\\\ \\nabla_x I_m \\cdot u \\ +\\ \\nabla_y I_m \\cdot v \\ =\\ -\\nabla_t I_m \\end{cases} } \\\\ \\end{aligned} } ⎩⎪⎪⎪⎨⎪⎪⎪⎧∇cI11⋅v⃗ + ∇tI11=0∇cI12⋅v⃗ + ∇tI12=0⋯∇cInn⋅v⃗ + ∇tInn=0⇒⎩⎪⎪⎪⎨⎪⎪⎪⎧∇xI1⋅u + ∇yI1⋅v = −∇tI1∇xI2⋅u + ∇yI2⋅v = −∇tI2⋯∇xIm⋅u + ∇yIm⋅v = −∇tIm 即: [∑∇xIm, ∑∇yIm][uv]=[∑−∇tIm] {\\displaystyle \\begin{aligned} \\begin{bmatrix} \\sum \\nabla_x I_m , \\ \\sum \\nabla_y I_m \\end{bmatrix} \\begin{bmatrix} u \\\\ v \\end{bmatrix} = \\begin{bmatrix} \\sum -\\nabla_t I_m \\end{bmatrix} \\\\ \\end{aligned} } [∑∇xIm, ∑∇yIm][uv]=[∑−∇tIm] 记 Mc=[∑∇xIm, ∑∇yIm]M_c =\\begin{bmatrix} \\sum \\nabla_x I_m , \\ \\sum \\nabla_y I_m \\end{bmatrix}Mc=[∑∇xIm, ∑∇yIm] , Mt=[∑−∇tIm]M_t =\\begin{bmatrix} \\sum -\\nabla_t I_m \\end{bmatrix}Mt=[∑−∇tIm] ,则: v⃗=[uv]=(McT⋅Mc)−1⋅McT⋅Mt=[∑(∇xIm)2, ∑∇xIm⋅∇yIm∑∇xIm⋅∇yIm, ∑(∇yIm)2]−1[∑∇xIm⋅∇tIm∑∇xIm⋅∇tIm] {\\displaystyle \\begin{aligned} \\vec{v} &= \\begin{bmatrix} u \\\\ v \\end{bmatrix} = ({M_c}^T \\cdot M_c)^{-1} \\cdot {M_c}^T \\cdot M_t \\\\ &= \\begin{bmatrix} &\\sum (\\nabla_x I_m)^2 &, \\ \\sum \\nabla_x I_m \\cdot \\nabla_y I_m \\\\ &\\sum \\nabla_x I_m \\cdot \\nabla_y I_m &, \\ \\sum (\\nabla_y I_m)^2 \\end{bmatrix}^{-1} \\begin{bmatrix} \\sum \\nabla_x I_m \\cdot \\nabla_t I_m \\\\ \\sum \\nabla_x I_m \\cdot \\nabla_t I_m \\end{bmatrix} \\end{aligned} } v⃗=[uv]=(McT⋅Mc)−1⋅McT⋅Mt=[∑(∇xIm)2∑∇xIm⋅∇yIm, ∑∇xIm⋅∇yIm, ∑(∇yIm)2]−1[∑∇xIm⋅∇tIm∑∇xIm⋅∇tIm] 上式即是 LK 法的核心光流推到公式 了。 可见 Lucas-Kanade 算法,属于只需要启动(且不用初始化),就能够在分块(卷积核)内自行完成核心光流保存的自适应循环算法。 从物理角度理解,式子中的 ∇xIm\\nabla_x I_m∇xIm 、 ∇yIm\\nabla_y I_m∇yIm 、 ∇tIm\\nabla_t I_m∇tIm ,是分块 mmm 内像素 ppp 的灰度值 III ,对其所处全图像素位置 p=(x, y)p = (x,\\ y)p=(x, y) 和时间参数 ttt 方向的变化趋势,即 灰度加速度。鉴于完备的灰度数据,加速度可以利用动量算法结合牛顿法等方式逼近,快速的从帧变化中取得。那么对光流 v⃗\\vec{v}v⃗ 的求解就成为了 简单的数值计算问题。 对比 HS 稠密光流和 LK 稀疏光流经典算法,显然 LK 在工程场景中更具优势。 同样,以 LK 算法为代表的稀疏光流法,由于其本身占用数据量和算力远远小于稠密光流法的缘故,得到了更为广泛的工程运用。尤其是 LK 算法本身,凭借高可控和简单的特性,被大量使用在如今的编解码器技术上。例如空域冗余压缩所采用的双向光流等算法,就可以被认为是从 LK 算法衍生出的实际运用产物。而稠密光流法,目前还停留在单帧分析等场景,不过考虑到深度学习带来的变革,利用稠密光流的思想来训练光流约束模型,并引入新一代音视频编解码过程,也从另一个角度开始发挥稠密光流法的工程价值。 但不论是哪一种类型的光流法,基于学术需求和面向工程要求的精度还是有极大的差异的。传统音视频工程对于效率要求高,而精度要求相对较低,我们需要 更快速 的处理方式。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_4_2.html":{"url":"Chapter_3/Language/cn/Docs_3_4_2.html","title":"3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow])","keywords":"","body":"3.4.2 双向光流预测(BDOF [Bi-Directional Optical Flow]) 双向光流预测值修正,简称 双向光流预测(BDOF [Bi-Directional Optical Flow]),最早在 H.265 的二版规格,由三星工程师以编码压缩补充手段的方式提出 [29] 。在 VVC 的初版制定过程中,贡献者们通过对算法层面的优化,提升了 BDOF 处理单元的性能。随 VVC 被采纳为 H.266 规格一起,作为标准的一部分被收录其中。 双向光流预测是以 LK 光流法的约束条件为基础,提出的一种亮度值推理算法。方法在编解码过程中以 LK 微位移假设为基,限制所有前后向预测帧(B帧)的选取,必须保持 当前帧(Current Frame) 与前后两帧在相同位置处的光流成 等大反向关系(Reverse Equality)。 通过这一联系,BDOF 在已知时间流向(即视频向前、向后)时,可以通过前向帧和期望预测方向的下一个关联帧,推导出当前帧的实际光流场变化情况。进而在无保存当前帧数据的前提下,求得当前帧的实际灰度值(亮度参考值)。 对于采用具有线性色彩空间映射关系的规格,依赖线性转换保证了关于灰度的推理,这时 BDOF 也可以适用在各自的原色格式(RGB)的数据通道上。但由于视频传输中,一般不直接采用会造成大量数据浪费的原色格式,所以,BDOF 只被用来对传输格式(YUV)代表亮度值的 Y 通道数据,进行冗余控制。 本质上,双向光流预测是个类似二次牛顿法的逼近求解过程。根据镜像的特性,推导可转为线性求中值(对应的交点最小值)。如下图所示: 图 3-17 BDOF 构建参考对称光流示意图[29] 假设,当前临近三帧有需要推算分块 mmm 范围内像素点 p=(x, y)p = (x,\\ y)p=(x, y) 的灰度。 按时序方向(视屏正常播放方向,图中由下而上) 的前向帧(过去帧)为 R0R_0R0 有块灰度值 I0I_0I0 集、当前帧为 RcR_cRc 有块灰度值 IcI_cIc 集、后向帧(未来帧)为 R1R_1R1 有块灰度值 I1I_1I1 集。根据 LK 的局部光流趋同性,分块 mmm 范围内像素点的光流相等,可记 R0R_0R0 光流 v⃗A\\vec{v}_Av⃗A , R1R_1R1 光流 v⃗B\\vec{v}_Bv⃗B 。 由于人为的有 R0R_0R0 、 R1R_1R1 的光流在 RcR_cRc 镜像对称,如果记 R0R_0R0 光流 v⃗A=(Vx, Vy)\\vec{v}_A =(V_x,\\ V_y)v⃗A=(Vx, Vy) ,则 R1R_1R1 光流 v⃗B=(−Vx, −Vy)\\vec{v}_B =(-V_x,\\ -V_y)v⃗B=(−Vx, −Vy) ,即 v⃗B=−v⃗A\\vec{v}_B = -\\vec{v}_Av⃗B=−v⃗A 。 那么,将关系代入 LK 条件下的基础光流公式,存在块间光流满足: {+∇xI0⋅Vx + ∇yI0⋅Vy + ε = −∇tI0−∇xI1⋅Vx − ∇yI1⋅Vy + ε = −∇tI1 {\\displaystyle \\begin{aligned} &{ \\begin{cases} +\\nabla_x I_0 \\cdot V_x \\ +\\ \\nabla_y I_0 \\cdot V_y \\ +\\ \\varepsilon \\ =\\ -\\nabla_t I_0 \\\\ -\\nabla_x I_1 \\cdot V_x \\ -\\ \\nabla_y I_1 \\cdot V_y \\ +\\ \\varepsilon \\ =\\ -\\nabla_t I_1 \\end{cases} } \\\\ \\end{aligned} } {+∇xI0⋅Vx + ∇yI0⋅Vy + ε = −∇tI0−∇xI1⋅Vx − ∇yI1⋅Vy + ε = −∇tI1 因为从 R0→Rc→R1R_0 \\rightarrow R_c \\rightarrow R_1R0→Rc→R1 只 推移单位时间,所以有关时间单位导数近似: {∇tI0 = I0 − Ic∇tI1 = I1 − Ic⇒∇tI0−∇tI1 = ΔI {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\nabla_t I_0 \\ =\\ I_0 \\ -\\ I_c \\\\ \\nabla_t I_1 \\ =\\ I_1 \\ -\\ I_c \\end{cases} } \\Rightarrow \\nabla_t I_0 - \\nabla_t I_1 \\ =\\ \\Delta I \\\\ \\end{aligned} } {∇tI0 = I0 − Ic∇tI1 = I1 − Ic⇒∇tI0−∇tI1 = ΔI 则三者间的光流关系可化为: {I0 − Ic + ∇xI0⋅Vx + ∇yI0⋅Vy + ε = 0I1 − Ic − ∇xI1⋅Vx − ∇yI1⋅Vy + ε = 0 {\\displaystyle \\begin{aligned} &{ \\begin{cases} I_0 \\ -\\ I_c \\ +\\ \\nabla_x I_0 \\cdot V_x \\ +\\ \\nabla_y I_0 \\cdot V_y\\ +\\ \\varepsilon \\ =\\ 0 \\\\ I_1 \\ -\\ I_c \\ -\\ \\nabla_x I_1 \\cdot V_x \\ -\\ \\nabla_y I_1 \\cdot V_y \\ +\\ \\varepsilon \\ =\\ 0 \\end{cases} } \\\\ \\end{aligned} } {I0 − Ic + ∇xI0⋅Vx + ∇yI0⋅Vy + ε = 0I1 − Ic − ∇xI1⋅Vx − ∇yI1⋅Vy + ε = 0 未知量有 IcI_cIc 和 (Vx, Vy)(V_x,\\ V_y)(Vx, Vy) 三个,是无法单独依赖上方的方程组,只通过两个约束获取的。 不过,块的光流 仍然 是满足 LK 约束,而 LK 法提供了对光流相对独立的预估,配合背景有: {v⃗A=[+Vx+Vy]=(Mc0T⋅Mc0)−1⋅Mc0T⋅Mt0v⃗B=[−Vx−Vy]=(Mc1T⋅Mc1)−1⋅Mc1T⋅Mt1 {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\vec{v}_A = \\begin{bmatrix} +V_x \\\\ +V_y \\end{bmatrix} = ({M_{c0}}^T \\cdot M_{c0})^{-1} \\cdot {M_{c0}}^T \\cdot M_{t0} \\\\ \\vec{v}_B = \\begin{bmatrix} -V_x \\\\ -V_y \\end{bmatrix} = ({M_{c1}}^T \\cdot M_{c1})^{-1} \\cdot {M_{c1}}^T \\cdot M_{t1} \\end{cases} } \\\\ \\end{aligned} } ⎩⎪⎪⎨⎪⎪⎧v⃗A=[+Vx+Vy]=(Mc0T⋅Mc0)−1⋅Mc0T⋅Mt0v⃗B=[−Vx−Vy]=(Mc1T⋅Mc1)−1⋅Mc1T⋅Mt1 即: [VxVy]=12[(Mc0T⋅Mc0)−1⋅Mc0T⋅Mt0+(Mc1T⋅Mc1)−1⋅Mc1T⋅Mt1]=[(Mc0T⋅Mc0)−1⋅Mc0T2⋅Mt0+(Mc1T⋅Mc1)−1⋅Mc1T2⋅Mt1] {\\displaystyle \\begin{aligned} \\begin{bmatrix} V_x \\\\ V_y \\end{bmatrix} &= \\tfrac{1}{2}[({M_{c0}}^T \\cdot M_{c0})^{-1} \\cdot {M_{c0}}^T \\cdot M_{t0} + ({M_{c1}}^T \\cdot M_{c1})^{-1} \\cdot {M_{c1}}^T \\cdot M_{t1}] \\\\ &= [\\tfrac{({M_{c0}}^T \\cdot M_{c0})^{-1} \\cdot {M_{c0}}^T}{2} \\cdot M_{t0} + \\tfrac{({M_{c1}}^T \\cdot M_{c1})^{-1} \\cdot {M_{c1}}^T}{2} \\cdot M_{t1}] \\\\ \\end{aligned} } [VxVy]=21[(Mc0T⋅Mc0)−1⋅Mc0T⋅Mt0+(Mc1T⋅Mc1)−1⋅Mc1T⋅Mt1]=[2(Mc0T⋅Mc0)−1⋅Mc0T⋅Mt0+2(Mc1T⋅Mc1)−1⋅Mc1T⋅Mt1] 而同理于时域梯度的差值近似。对于分块 mmm 范围内像素点 p=(x, y)p = (x,\\ y)p=(x, y) 的空域灰度梯度,也可近似换算为: {∇xI0 = I0(x+1) − I0(x−1)2∇yI0 = I0(y+1) − I0(y−1)2∇xI1 = I1(x+1) − I1(x−1)2∇yI1 = I1(y+1) − I1(y−1)2⇒{∇xI0+∇xI1 = Δavg(Ix)=ΔIx¯∇yI0+∇yI1 = Δavg(Iy)=ΔIy¯∇xI0−∇xI1 = avg(ΔIx)=ΔIx¯∇xI0−∇xI1 = avg(ΔIy)=ΔIy¯ {\\displaystyle \\begin{aligned} &{ \\begin{cases} \\nabla_x I_0 \\ =\\ \\frac{I_0(x+1) \\ -\\ I_0(x-1)}{2} \\\\ \\nabla_y I_0 \\ =\\ \\frac{I_0(y+1) \\ -\\ I_0(y-1)}{2} \\\\ \\nabla_x I_1 \\ =\\ \\frac{I_1(x+1) \\ -\\ I_1(x-1)}{2} \\\\ \\nabla_y I_1 \\ =\\ \\frac{I_1(y+1) \\ -\\ I_1(y-1)}{2} \\end{cases} } \\Rightarrow { \\begin{cases} \\nabla_x I_0 + \\nabla_x I_1 \\ =\\ \\Delta avg(I_x) = \\Delta \\bar{I_x} \\\\ \\nabla_y I_0 + \\nabla_y I_1 \\ =\\ \\Delta avg(I_y) = \\Delta \\bar{I_y} \\\\ \\nabla_x I_0 - \\nabla_x I_1 \\ =\\ avg(\\Delta I_x) = \\bar{\\Delta I_x} \\\\ \\nabla_x I_0 - \\nabla_x I_1 \\ =\\ avg(\\Delta I_y) = \\bar{\\Delta I_y} \\end{cases} } \\\\ \\end{aligned} } ⎩⎪⎪⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎪⎪⎧∇xI0 = 2I0(x+1) − I0(x−1)∇yI0 = 2I0(y+1) − I0(y−1)∇xI1 = 2I1(x+1) − I1(x−1)∇yI1 = 2I1(y+1) − I1(y−1)⇒⎩⎪⎪⎪⎨⎪⎪⎪⎧∇xI0+∇xI1 = Δavg(Ix)=ΔIx¯∇yI0+∇yI1 = Δavg(Iy)=ΔIy¯∇xI0−∇xI1 = avg(ΔIx)=ΔIx¯∇xI0−∇xI1 = avg(ΔIy)=ΔIy¯ 代入样本梯度到 Mc=[∑∇xIm, ∑∇yIm]M_c =\\begin{bmatrix} \\sum \\nabla_x I_m , \\ \\sum \\nabla_y I_m \\end{bmatrix}Mc=[∑∇xIm, ∑∇yIm] ,Mt=[∑−∇tIm]M_t =\\begin{bmatrix} \\sum -\\nabla_t I_m \\end{bmatrix}Mt=[∑−∇tIm] ,展开可得 (Vx, Vy)(V_x,\\ V_y)(Vx, Vy) 取值: [VxVy]=[∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔI)−∑(ΔIx¯ΔI)⋅∑ΔIy¯2∑ΔIx¯2⋅∑ΔIy¯2−∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔIx¯)∑(ΔIx¯ΔIy¯)⋅∑(ΔIx¯ΔI)−∑(ΔIy¯ΔI)⋅∑ΔIx¯2∑ΔIx¯2⋅∑ΔIy¯2−∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔIx¯)] {\\displaystyle \\begin{aligned} \\begin{bmatrix} V_x \\\\ V_y \\end{bmatrix} &= \\begin{bmatrix} \\frac{\\sum (\\Delta \\bar{I_x} \\Delta \\bar{I_y} ) \\cdot \\sum (\\Delta \\bar{I_y} \\Delta I) - \\sum (\\Delta \\bar{I_x} \\Delta I ) \\cdot \\sum \\Delta \\bar{I_y}^2} {\\sum \\Delta \\bar{I_x}^2 \\cdot \\sum \\Delta \\bar{I_y}^2 - \\sum (\\Delta \\bar{I_x} \\Delta \\bar{I_y}) \\cdot \\sum (\\Delta \\bar{I_y} \\Delta \\bar{I_x}) } \\\\ \\frac{\\sum (\\Delta \\bar{I_x} \\Delta \\bar{I_y} ) \\cdot \\sum (\\Delta \\bar{I_x} \\Delta I) - \\sum (\\Delta \\bar{I_y} \\Delta I ) \\cdot \\sum \\Delta \\bar{I_x}^2} {\\sum \\Delta \\bar{I_x}^2 \\cdot \\sum \\Delta \\bar{I_y}^2 - \\sum (\\Delta \\bar{I_x} \\Delta \\bar{I_y}) \\cdot \\sum (\\Delta \\bar{I_y} \\Delta \\bar{I_x}) } \\end{bmatrix} \\\\ \\end{aligned} } [VxVy]=⎣⎢⎢⎡∑ΔIx¯2⋅∑ΔIy¯2−∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔIx¯)∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔI)−∑(ΔIx¯ΔI)⋅∑ΔIy¯2∑ΔIx¯2⋅∑ΔIy¯2−∑(ΔIx¯ΔIy¯)⋅∑(ΔIy¯ΔIx¯)∑(ΔIx¯ΔIy¯)⋅∑(ΔIx¯ΔI)−∑(ΔIy¯ΔI)⋅∑ΔIx¯2⎦⎥⎥⎤ 现在,只有 IcI_cIc 是未知的了,而可取范围在分块 mmm 之内时,对于任意块内点 Ic=IpI_c = I_pIc=Ip 。 代入原方程组即可,有: Ic = I0 + I1 + (∇xI0−∇xI1)⋅Vx + (∇yI0−∇yI1)⋅Vy2 + ε= I0 + I1 + ΔIx¯⋅Vx + ΔIy¯⋅Vy2 + εIc =Ipp(x, y)∈m {\\displaystyle \\begin{aligned} I_c \\ &=\\frac{\\ I_0 \\ +\\ I_1 \\ +\\ (\\nabla_x I_0 - \\nabla_x I_1) \\cdot V_x \\ +\\ (\\nabla_y I_0 - \\nabla_y I_1) \\cdot V_y}{2} \\ +\\ \\varepsilon \\\\ &=\\frac{\\ I_0 \\ +\\ I_1 \\ +\\ \\bar{\\Delta I_x} \\cdot V_x \\ +\\ \\bar{\\Delta I_y} \\cdot V_y}{2} \\ +\\ \\varepsilon \\\\ I_c \\ &=I_p \\quad \\quad p(x,\\ y) \\in m \\end{aligned} } Ic Ic =2 I0 + I1 + (∇xI0−∇xI1)⋅Vx + (∇yI0−∇yI1)⋅Vy + ε=2 I0 + I1 + ΔIx¯⋅Vx + ΔIy¯⋅Vy + ε=Ipp(x, y)∈m 式子中的 ε\\varepsilonε 为误差修正值,一般取 ε=0.5\\varepsilon = 0.5ε=0.5 。 如是,双向光流预测的基本原理,数理推导佐证完毕。 可见,BDOF 的算力消耗重点是在有关 (Vx, Vy)(V_x,\\ V_y)(Vx, Vy) 的求解上。所以,工程化会采用小于当前分块的子块大小做卷积核,使用近似求解快速计算。当然也可以在满足精度要求下,通过模型化解决,思路类似于光流补帧的数据预处理。而由于涉及到规格中的不少工程处理技巧,有关 BDOF 标准化的部分,我们留到 H.266 规格详解时再行展开。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_4_3.html":{"url":"Chapter_3/Language/cn/Docs_3_4_3.html","title":"3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow])","keywords":"","body":"3.4.3 光流仿射修正(PROF [Affine Prediction Refinement With Optical Flow]) BDOF 技术的引入,让音视频编解码工程能够进一步提高传输过程的数据压缩比。但由于仍然依托于分块和分块内小块(也是前文的梯度卷积核),当出现块的偏移、扭转、错切等情况时,像素位置的微小变动则会被此类变化成倍的放大误差。所以,还需要 适当的修正。 我们知道,音视频编解码规格(如 H.264、H.265、H.266)中,分块的子块也是存在类似的情况的。我们为了处理问题,采用的是 基于控制点运动矢量(CMVP [Control Point Motion Vector])的子块仿射运动补偿(AMC [Affine Motion Compensation]),并在 H.266 中根据目标子块大小衍生出了 高级运动矢量预测(AMVP [Advanced Motion Vector Prediction])的仿射模式,和 混合预测(Merge)的仿射模式。通俗理解,即通过相邻帧的相同块内子块的仿射变换,来映射原子块区域的对应关系。 但子块控制点的运动是远大于像素运动的,那么同样的情况发生在更小的尺度上,是否还能达到效果呢? 答案是可以的。 在 LK 条件下局部光流趋同性,决定了像素光流的差分补偿对分块只需要单次计算即可。那么对于子块来说,只用在原有仿射运动补偿(AMC)的基础上,对块内像素额外附加 光流补偿值(OFC [Optical Flow Compensation]) 即可。 记分块 mmm 有,中心点 KxyK_{xy}Kxy 在全图的绝对像素位置 Kxy=(Kx, Ky)K_{xy} = (K_x,\\ K_y)Kxy=(Kx, Ky) 的子块 kkk 。存在子块内相对位置为 pij=(i, j)p_{ij} = (i,\\ j)pij=(i, j) 的像素点 pijp_{ij}pij 。由于子块内是不存在时差的,即时间残差 ∇tI=0\\nabla_t I = 0∇tI=0 存在,则记 pijp_{ij}pij 的子块内光流补偿值(OFC)是 ΔIp\\Delta I_pΔIp ,根据基础光流公式就有: ΔIp=∇pI⋅Δv⃗p + ∇tI=∇pI⋅Δv⃗p {\\displaystyle \\begin{aligned} \\Delta I_p = \\nabla_p I \\cdot \\Delta \\vec{v}_p \\ +\\ \\nabla_t I = \\nabla_p I \\cdot \\Delta \\vec{v}_p \\\\ \\end{aligned} } ΔIp=∇pI⋅Δv⃗p + ∇tI=∇pI⋅Δv⃗p 其中, Δv⃗p=(ΔVi, ΔVj)\\Delta \\vec{v}_p = (\\Delta V_i,\\ \\Delta V_j)Δv⃗p=(ΔVi, ΔVj) 即是点 pijp_{ij}pij 在子块 kkk 内的光流偏移,这个值相对子块内部中心 KijK_{ij}Kij ,在分块 mmm 内子块无相对变化情况时,是个恒定值,有: ΔKp=pij−Kij=(Δi, Δj)=Δij {\\displaystyle \\begin{aligned} \\Delta K_p = p_{ij}-K_{ij} = (\\Delta i,\\ \\Delta j) = \\Delta_{ij} \\\\ \\end{aligned} } ΔKp=pij−Kij=(Δi, Δj)=Δij 而根据仿射变换特点,当分块 mmm 发生仿射变换,其每个子块 kkk 的像素点内部光流偏移矢量,也会发生 等效于块中心运动补偿 的仿射变换。 因此,假设分块 mmm 块运动采用左上、右上、左下的三点定位(即标准三控制点),记帧 R0R_0R0 到帧 R1R_1R1 有块三点定位运动矢量分别为 MV⃗0\\vec{MV}_0MV⃗0 、 MV⃗1\\vec{MV}_1MV⃗1 、 MV⃗2\\vec{MV}_2MV⃗2 如下: 图 3-18 PROF 子块光流与块运动矢量示意图 [30] 假设分块 mmm 大小为 Mw×MhM_w \\times M_hMw×Mh ,则有块从帧 R0R_0R0 到帧 R1R_1R1 的位姿仿射变换矩阵 AAA 使得: Δv⃗p=A⋅ΔKp=A⋅Δij=[MV1,x−MV0,xMw,MV2,x−MV0,xMhMV1,y−MV0,yMw,MV2,y−MV0,yMh]⋅[ΔiΔj] {\\displaystyle \\begin{aligned} \\Delta \\vec{v}_p &= A \\cdot \\Delta K_p = A \\cdot \\Delta_{ij} \\\\ &= \\begin{bmatrix} &\\frac{MV_{1,x} - MV_{0,x}}{M_w} &, \\quad \\frac{MV_{2,x} - MV_{0,x}}{M_h} \\\\ &\\frac{MV_{1,y} - MV_{0,y}}{M_w} &, \\quad \\frac{MV_{2,y} - MV_{0,y}}{M_h} \\end{bmatrix} \\cdot \\begin{bmatrix} \\Delta i \\\\ \\Delta j \\end{bmatrix} \\end{aligned} } Δv⃗p=A⋅ΔKp=A⋅Δij=⎣⎢⎡MwMV1,x−MV0,xMwMV1,y−MV0,y,MhMV2,x−MV0,x,MhMV2,y−MV0,y⎦⎥⎤⋅[ΔiΔj] 而 ∇pI\\nabla_p I∇pI 可由子块 LK 计算等效获取,有: Ip(i, j)=Ip(x+Δi, y+Δj)∇pI(i, j)=(∇iIp, ∇jIp)={∇iIp = Ip(i+1) − Ip(i−1)2∇jIp = Ip(j+1) − Ip(j−1)2 {\\displaystyle \\begin{aligned} I_p(i,\\ j) &= I_p(x+\\Delta i,\\ y+ \\Delta j) \\\\ \\nabla_p I(i,\\ j) &= (\\nabla_iI_p,\\ \\nabla_jI_p) = { \\begin{cases} \\nabla_i I_p \\ =\\ \\frac{I_p(i+1) \\ -\\ I_p(i-1)}{2} \\\\ \\nabla_j I_p \\ =\\ \\frac{I_p(j+1) \\ -\\ I_p(j-1)}{2} \\end{cases} } \\\\ \\end{aligned} } Ip(i, j)∇pI(i, j)=Ip(x+Δi, y+Δj)=(∇iIp, ∇jIp)=⎩⎪⎨⎪⎧∇iIp = 2Ip(i+1) − Ip(i−1)∇jIp = 2Ip(j+1) − Ip(j−1) 所以,子块内像素的最终亮度 I^p\\hat{I}_pI^p 取值为: I^p=Ip(x, y) + ΔIp(i, j)=∇pI(i, j)⋅Δv⃗p≈Ip(x, y) + ∇iIp⋅ΔVi + ∇jIp⋅ΔVj {\\displaystyle \\begin{aligned} \\hat{I}_p &= I_p (x,\\ y) \\ +\\ \\Delta I_p (i,\\ j) = \\nabla_p I (i,\\ j) \\cdot \\Delta \\vec{v}_p \\\\ &\\approx I_p (x,\\ y) \\ +\\ \\nabla_i I_p \\cdot \\Delta V_i \\ +\\ \\nabla_j I_p \\cdot \\Delta V_j \\\\ \\end{aligned} } I^p=Ip(x, y) + ΔIp(i, j)=∇pI(i, j)⋅Δv⃗p≈Ip(x, y) + ∇iIp⋅ΔVi + ∇jIp⋅ΔVj 上式中的 IpI_pIp 即像素点 pij=Kxy+Δij=(x+Δi, y+Δj)p_{ij} = K_{xy} + \\Delta_{ij} = (x+\\Delta i,\\ y+ \\Delta j)pij=Kxy+Δij=(x+Δi, y+Δj) 的分块 mmm 内实际亮度预测值,可通过 BDOF 求得,也可以采用其他传统块推理方式获取。根据 PROF 的修正,BDOF 推算所得像素点的亮度将更为准确,进而在 提高压缩程度(以子块为最小压缩单位的块内冗余压缩)的同时,保证了灰度(亮度值)数据还原效果。 以上我们介绍的,就是光流法在音视频编解码过程中较为粗浅的基本应用了。这些数学工具已经通过标准化,被嵌入到了 H.266/VVC 规格中,并在同期其他竞争规格(如 AV1)的最新标准里逐步推广。而光流法的引入,无疑进一步缩减了传统音视频和机器学习之间的工程鸿沟。在可预见的未来,人工智能模型流水线和编解码器必然会有更深入的融合,在技术层面形成一套全新的顶层设计。这种趋势,作为音视频开发者,是不应该忽视的。 回到当前话题,在依靠光流法处理了传输格式的亮度狭时空域冗余数据后,如果能够在纯空域上,同时对随亮度传输的色度信息进行一定程度的压缩,就能更好的降低数据成本,并提升色彩还原程度,支撑更广的色域选择了。 这就是色度缩放亮度映射技术的由来。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_4_4.html":{"url":"Chapter_3/Language/cn/Docs_3_4_4.html","title":"3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling])","keywords":"","body":"3.4.4 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling]) 色度缩放亮度映射(LMCS [Luma Mapping with Chroma Scaling]) 技术,是一类纯粹的空域数据处理技术,即本身不涉及时域相关性,直接针对像素原值的冗余分离手段。传统音视频编解码中,包括大部分帧内预测工具、帧内编码条带分块、色度重建等,严格来说都属于这种类型。 LMCS 最早引入自 H.266/VVC 标准中,用于编解码环路滤波阶段 [31] 。通过建立从传输格式对应存储位深(Bit Depth),到色度和亮度实际可取值范围间的线性转换放缩,来提高针对 标准动态范围(SDR [Standard Dynamic Range]) 和 高动态范围(HDR [High Dynamic Range]) 视频的支持,提升编解码性能。 这是一种基于物理存储方式和实际规格约束的差异,以直接操作空域到数据的映射关系,来间接降低信息熵的一种技术(有别于熵编码技术族对存储的信息熵直接衰减)。 LMCS 由两个组件构成,分别是:分段线性(Piecewise Linear)亮度映射(LM [Luma Mapping]) 和 依赖亮度(Luma-Dependent )色度残差缩放(CRS [Chroma Residue Scaling]),简称为 亮度映射(LM) 和 色度缩放(CS)。前者精简格式,后者压缩数据。 分段线性亮度映射 分段线性亮度映射,即 亮度映射(LM) 的基本目的,是为了方便规格支持的传输格式,在数据存储格式(Data Format)和格式空间(Format Space)原值之间的非对称相互转换。 例如,H.266 中采用 ITU-R BT.2100 的色彩转换规格标准,并兼容 ITU-R BT.709 等其他的的历史转换规格。其中,ITU-R BT.2100 提供了 10-bit YUV 存储格式,而老规格中亦然也有一系列 8-bit YUV 存储格式。同时,YUV 本身亦是具有两种基本有效范围,即 狭隘区间(Narrow Range) 和 完整区间(Full Range)。这些不同的 YUV 格式和区间,虽然各自的原色空间色域表示存在范围差异,但由于传输格式都采用同一套 CIE YUV 色彩空间衡量,因此在颜色的传输格式取值上是互通的,差异只在于存储格式的存储范围上,即两个线性区间的映射。 所以,理论上可以 只采用一个转换标准的传输格式,就能通过数据存储范围的线性转换,实现对所有规格下标准的兼容。 假设目标 YUV 规格亮度存储值为 IoutI_{out}Iout 存储取值范围为 Iout∈[Minout, Maxout]I_{out} \\in [Min_{out},\\ Max_{out}]Iout∈[Minout, Maxout] ,当前输入 YUV 规格亮度存储值为 IinI_{in}Iin 存储取值范围为 Iin∈[Minin, Maxin]I_{in} \\in [Min_{in},\\ Max_{in}]Iin∈[Minin, Maxin] ,则: Iout=Maxout−MinoutMaxin−Minin⋅(Iin−Minin)+Minin {\\displaystyle \\begin{aligned} I_{out} &= \\frac{Max_{out} - Min_{out}}{Max_{in} - Min_{in}} \\cdot (I_{in} - Min_{in}) + Min_{in} \\\\ \\end{aligned} } Iout=Maxin−MininMaxout−Minout⋅(Iin−Minin)+Minin 不过在实际使用过程中,因为均色问题(详见第二章)仍在 CIE YUV 标准空间上存在,亮度值本身在整个色域范围并不均匀,使得亮度值(灰度值)转换到存储值后,存储值也保留了这种性质。这在存储格式和格式空间一致的情况下,由于互为逆变换的缘故,并不存在转换误差。但当两者不一致时,非对称转换非互逆,则会产生误差,并随传输格式的原色格式还原而扩大。如果我们为了保证完美映射,则需要引入复杂的计算,不利于像素通道级别的处理过程。 这个问题,亮度映射提出以 分段牛顿法对亮度存储值取值范围处理,即采用分段线性映射来减小误差水平到可接受的范围,并降低算力消耗。 我们一般将原有亮度值对应的可取范围称为原区域,而在此之上分割得到的每个子段,被称为 子区域。记 原区域码字长(Code Words) 为 CWtotalCW_{total}CWtotal 个,而位于索引 i∈Z[0, N−1]i \\in \\mathbb{Z}[0 ,\\ N - 1]i∈Z[0, N−1] 位置的子区域 CWiCW_{i}CWi 的码字长为 CWbin[i]CW_{bin}[i]CWbin[i] 个,均值为 avg(CWbin)avg(CW_{bin})avg(CWbin) 。 码字(Code Word) 即来自哈夫曼编码数据传输中,所指代的有意义代表值,此处则相当于一个范围内有效的灰度值。则: CWbin[i]=round(CWtotaliend−istart+1)=round(CWtotalindexavail) {\\displaystyle \\begin{aligned} CW_{bin}[i] &= round \\begin{pmatrix} \\frac{CW_{total}}{i_{end} - i_{start} + 1} \\end{pmatrix} = round \\begin{pmatrix} \\frac{CW_{total}}{index_{avail}} \\end{pmatrix} \\\\ \\end{aligned} } CWbin[i]=round(iend−istart+1CWtotal)=round(indexavailCWtotal) 其中, iendi_{end}iend 、 istarti_{start}istart 是实际可用于存放数据子区域上下限的索引的,而 indexavailindex_{avail}indexavail 即为有效索引的数目。 注意分段码字长和存储格式位深(Bit Depth)并无强相关。若非要建立联系,则两者的关联只相关于取值范围。取 存储格式位长 为 DFbitsDF_{bits}DFbits 位(bit),保护位等效(非整)占用 indexsafeindex_{safe}indexsafe 个索引数目,有: avg(CWbin)=2DFbitsN=2DFbitsindexavail+indexsafe {\\displaystyle \\begin{aligned} avg(CW_{bin}) &= \\frac{2^{DF_{bits}}}{N} = \\frac{2^{DF_{bits}}}{index_{avail} +index_{safe}}\\\\ \\end{aligned} } avg(CWbin)=N2DFbits=indexavail+indexsafe2DFbits 例如,当采用 狭隘区间的 10-bit YUV 存储格式时,由于高低电平保护区域的存在,亮度值能够取值的范围其实是 I∈[64, 940]I \\in [64,\\ 940]I∈[64, 940] ,而等效到亮度可用的子区域索引上就相当于只有 Z[1, 14]\\mathbb{Z}[1,\\ 14]Z[1, 14] 可用。那么,就有 avg(CWbin)=64avg(CW_{bin}) = 64avg(CWbin)=64 ,子区域划分如图: 图 3-19 位深 10-bit 亮度映射码字子区域分段示意图(无修正) 则原线性转换就有分段表示: Iout=Maxout[i]−Minout[i]Maxin[i]−Minin[i]⋅(Iin−Minin[i])+Minin[i],i∈Z[istart, iend] {\\displaystyle \\begin{aligned} I_{out} &= \\frac{Max_{out}[i] - Min_{out}[i]}{Max_{in}[i] - Min_{in}[i]} \\cdot (I_{in} - Min_{in}[i]) + Min_{in}[i] \\quad , i \\in \\mathbb{Z}[i_{start} ,\\ i_{end}]\\\\ \\end{aligned} } Iout=Maxin[i]−Minin[i]Maxout[i]−Minout[i]⋅(Iin−Minin[i])+Minin[i],i∈Z[istart, iend] 即,输入和输出的一一对应分段映射。 现在,基本的分段构建完毕,在数据还原程度上有了可行的保证。但是,这一系列操作除了提供兼容性便利外,在数据量上却是无衰减的,所以 对空域冗余的压缩没有太大的帮助。 因此,具体采用过程中还要根据情况,从码字方面进行数据优化。 依赖亮度色度残差缩放 依赖亮度色度残差缩放,即 色度缩放(CS),顾名思义需要依靠亮度码字子区域划分后的分片进行放缩。不过这种放缩和亮度映射不太一样的一点在于, 它甚至并不和物理意义浅关联,而是存粹作为数据上的处理,来进行的数量级上的放缩。当然,色度本身是有意义的,这点不能混淆。 色度缩放依旧采用了码字分段处理,为了匹配亮度值对应码字区域的变化强度,分段即与亮度取值范围子区域 CWiCW_{i}CWi 码字的划分一致。以此计算分段内常量的 色度缩放因子(Chrome Scale Factor),来对 CWiCW_{i}CWi 内色度进行统一处理。 记 CWiCW_{i}CWi 子区域,编码阶段 色度缩放因子(Chrome Scaling Factor)为 Senc[i]S_{enc}[i]Senc[i] ,解码阶段 色度缩放因子为 Sdec[i]S_{dec}[i]Sdec[i] ,显然 Sdec[i]=Senc[i]−1S_{dec}[i] = {S_{enc}[i]}^{-1}Sdec[i]=Senc[i]−1 。若记区域内对应某采样(像素点)亮度 IinI_{in}Iin 的色度值(如采用 YUV 则是其 UV 分量,独立计算)为 CinC_{in}Cin ,而输出存储值(传输值)亮度 IoutI_{out}Iout 的色度值为 CoutC_{out}Cout ,则: {Cout=Cin ⋅Senc[i]Cin=Cout⋅Sdec[i] {\\displaystyle \\begin{aligned} \\begin{cases} C_{out} &= C_{in} \\ \\cdot S_{enc}[i] \\\\ C_{in} &= C_{out} \\cdot S_{dec}[i] \\end{cases} \\\\ \\end{aligned} } {CoutCin=Cin ⋅Senc[i]=Cout⋅Sdec[i] 而 Senc[i]S_{enc}[i]Senc[i] 和亮度保证相同的放缩比,有: Senc[i]=Maxout[i]−Minout[i]+ΔCRSMaxin[i]−Minin[i]=Sdec[i]−1 {\\displaystyle \\begin{aligned} S_{enc}[i] &= \\frac{Max_{out}[i] - Min_{out}[i] + \\Delta CRS}{Max_{in}[i] - Min_{in}[i]} = {S_{dec}[i]}^{-1} \\\\ \\end{aligned} } Senc[i]=Maxin[i]−Minin[i]Maxout[i]−Minout[i]+ΔCRS=Sdec[i]−1 其中, ΔCSR\\Delta CSRΔCSR 即为色度残差修正值,这个量为一个查表或其他方式处理的外部传参。虽然理论上, ΔCSR\\Delta CSRΔCSR 可以通过在 LMCS 过程中,以计算当前帧分块局部色度残差,或全局残差均值来代替,但这种做法消耗太多不必要算力而不太可取。另外,考虑到 ΔCSR\\Delta CSRΔCSR 在编解码中是个相对常用的概念,可以通过其他模块或方法解决,因此一般 不会在 LMCS 里进行处理。 此处我们认为 ΔCSR\\Delta CSRΔCSR 为一个色度放缩修正常量即可。可见色度缩放因子在子区域 CWiCW_{i}CWi 确认的情况下,是一个 固定值。 现在,LMCS 的理论准备就绪了。我们来看这种纯粹的规格技术是怎么运用的。即,子区域码字修正过程。 LMCS 技术在 SDR 和 HDR-HLG 格式中的应用 我们在对图片进行信息分离和提取时了解到,从频域来看,光亮度(灰度值)变化较大,且对亮度精度要求高的部分,一般在低频轮廓区域出现,占用整体数据量比例较小。而光亮度差异较小,变化平滑,且精度要求低的部分,往往是高频区域,占有大量的数据。此时,如果从光亮度数据,即空域角度出发,低频区域内的 局部亮度方差(Local Spatial Variance) 和高频区域相比,与 全局平均空域亮度方差(Global Average Spatial Variance) 的平均平方误差(MSE [Mean-Square Error])则会更大。 通过这一点,我们能够可以在一定程度上,只通过空域亮度数据,就确认是否是低频或高频区域,从而为其分配更少或更多的码字。使得对精度要求高的低频分割更精细,码字分片信息密度更高。而高频则更粗粒度,码字分片信息密度更低。提高精度并减少不必要的数据占用。 那么用于统计局部方差的样本区域该怎么选择呢?在 H.266/VVC 标准的执行委员会联合视频探索小组(JVET [Joint Video Exploration Team]) 推荐的 VVC 验证模型(VTM [VVC Test Model])官方工程实践里,仍然采用了基本卷积核(此处即代指正方形的无权重采样窗口),这种便于 GPU 加速改造的方式来进行中心点周边一定区域的关联性采样。 记 局部方差采样核(Local Variance Kernel) 为 LVKpLVK_{p}LVKp ,简称 方差核,中心为 p=(x, y)p = (x,\\ y)p=(x, y) ,窗口为 K×KK \\times KK×K 大小。取当前帧画面大小为 W×HW \\times HW×H ,有经验取值: K=floor(min(W, H)240)⋅2+1 {\\displaystyle \\begin{aligned} K &= floor \\begin{pmatrix} \\frac{min(W,\\ H)}{240} \\end{pmatrix} \\cdot 2 + 1 \\\\ \\end{aligned} } K=floor(240min(W, H))⋅2+1 则, LVKpLVK_{p}LVKp 对应核心点 ppp 的局部亮度方差 VarpVar_{p}Varp 为: Varp=1K2∑(Ik−Ip)2 {\\displaystyle \\begin{aligned} Var_p &= \\frac{1}{K^2} \\sum (I_k - I_p)^2 \\\\ \\end{aligned} } Varp=K21∑(Ik−Ip)2 于是,只要 确定当前各个分片的平均样本均值情况,就可以进行修正了。 另一个耗时位置在于亮度均方误(MSE)与全局差值比的计算,一个比较鲁棒的实现是,通过求取落于当前码字分段内,包含样本的 平均对数方差(Average Log Variance) 来代替处理,记为 Varavg[i]Var_{avg}[i]Varavg[i] ,有: Varavg[i]=∑log(Varp+1.0)Count[i] {\\displaystyle \\begin{aligned} Var_{avg}[i] &= \\frac{\\sum log(Var_p + 1.0)}{Count[i]} \\\\ \\end{aligned} } Varavg[i]=Count[i]∑log(Varp+1.0) 其中, Count[i]Count[i]Count[i] 为当前码字分段所包含的样本(即亮度落于区段内的像素点)总数。 而我们需要统一衡量所有码字分片的情况,因此需要归一化处理。记归一化后对应分片的平均对数方差为 Norm[i]Norm[i]Norm[i] ,则: Norm[i]=Varavg[i]⋅N∑Varavg[i] {\\displaystyle \\begin{aligned} Norm[i] &= Var_{avg}[i] \\cdot \\frac{N}{\\sum Var_{avg}[i]} \\\\ \\end{aligned} } Norm[i]=Varavg[i]⋅∑Varavg[i]N 至此,我们即可根据归一化的 Norm[i]Norm[i]Norm[i] 取值,开展对当前帧的码字分片进行修正的工作了。取修正补偿为 Δ1[i]\\Delta_1[i]Δ1[i] 和 Δ2[i]\\Delta_2[i]Δ2[i] ,记码字分段子区域 CWiCW_{i}CWi 的包含的样本,占总样本比例为 Hist[i]Hist[i]Hist[i] ,且强制 Hist[i]∈[0, 0.4]Hist[i] \\in [0,\\ 0.4]Hist[i]∈[0, 0.4] 经验范围( 避免失衡 ),有: Hist[i]=max(min(0.0, Count[i]∑Count[i]), 0.4)Δ={Δ1[i]=round(10⋅Hist[i])Δ2[i]=round(20⋅Hist[i])∈Z {\\displaystyle \\begin{aligned} Hist[i] &= max(min(0.0,\\ \\frac{Count[i]}{\\sum Count[i]}),\\ 0.4) \\\\ \\Delta = &\\begin{cases} \\Delta_1[i] &= round(10 \\cdot Hist[i]) \\\\ \\Delta_2[i] &= round(20 \\cdot Hist[i]) \\end{cases} \\quad \\in \\mathbb{Z} \\\\ \\end{aligned} } Hist[i]Δ==max(min(0.0, ∑Count[i]Count[i]), 0.4){Δ1[i]Δ2[i]=round(10⋅Hist[i])=round(20⋅Hist[i])∈Z 则最终修正后的码字长 CWbin^[i]\\hat{CW_{bin}}[i]CWbin^[i] 与原长 CWbin[i]CW_{bin}[i]CWbin[i] 的关系为: CWbin^[i]={CWbin[i],Norm[i]=1.0CWbin[i]+Δ1[i], 0.8≤Norm[i]0.9CWbin[i]+Δ2[i], 0.0≤Norm[i]0.8CWbin[i]−Δ1[i], 1.1≤Norm[i]1.2CWbin[i]−Δ2[i], 1.2≤Norm[i] {\\displaystyle \\begin{aligned} \\hat{CW_{bin}}[i] & = { \\begin{cases} CW_{bin}[i] \\quad &, Norm[i] = 1.0 \\\\ CW_{bin}[i] + \\Delta_1[i] \\quad &,\\ 0.8 \\le Norm[i] CWbin^[i]=⎩⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎧CWbin[i]CWbin[i]+Δ1[i]CWbin[i]+Δ2[i]CWbin[i]−Δ1[i]CWbin[i]−Δ2[i],Norm[i]=1.0, 0.8≤Norm[i]0.9, 0.0≤Norm[i]0.8, 1.1≤Norm[i]1.2, 1.2≤Norm[i] 以新分片码字长度 CWbin^[i]\\hat{CW_{bin}}[i]CWbin^[i] 更新子区域 CWiCW_{i}CWi 后,在将修正后的码字范围,代入色度自适应处理,就组成了最终修正标准(注意只有输出码字子区域需要修正),只展示编码阶段,解码取逆运算: CWin[i]∈[Minin[i], Maxin[i]]=[Maxin[i−1]+1, Minin[i]+CWbin[i]]CWout[i]∈[Minout[i], Maxout[i]]=[Maxout[i−1]+1, Minout[i]+CWbin^[i]]Colorout={Iout=Maxout[i]−Minout[i]Maxin[i]−Minin[i]⋅(Iin−Minin[i])+Minin[i]Cout=Cin ⋅Maxout[i]−Minout[i]+ΔCRSMaxin[i]−Minin[i],i∈Z[istart, iend] {\\displaystyle \\begin{aligned} CW_{in}[i] &\\in [Min_{in}[i],\\ Max_{in}[i]] = [Max_{in}[i-1]+1,\\ Min_{in}[i]+ CW_{bin}[i]] \\\\ CW_{out}[i] &\\in [Min_{out}[i],\\ Max_{out}[i]] = [Max_{out}[i-1]+1,\\ Min_{out}[i]+\\hat{CW_{bin}}[i]] \\\\ Color_{out} & = { \\begin{cases} I_{out} &= \\frac{Max_{out}[i] - Min_{out}[i]}{Max_{in}[i] - Min_{in}[i]} \\cdot (I_{in} - Min_{in}[i]) + Min_{in}[i] \\\\ C_{out} &= C_{in} \\ \\cdot \\frac{Max_{out}[i] - Min_{out}[i] + \\Delta CRS}{Max_{in}[i] - Min_{in}[i]} \\end{cases} } \\quad , i \\in \\mathbb{Z}[i_{start} ,\\ i_{end}] \\\\ \\end{aligned} } CWin[i]CWout[i]Colorout∈[Minin[i], Maxin[i]]=[Maxin[i−1]+1, Minin[i]+CWbin[i]]∈[Minout[i], Maxout[i]]=[Maxout[i−1]+1, Minout[i]+CWbin^[i]]=⎩⎪⎪⎨⎪⎪⎧IoutCout=Maxin[i]−Minin[i]Maxout[i]−Minout[i]⋅(Iin−Minin[i])+Minin[i]=Cin ⋅Maxin[i]−Minin[i]Maxout[i]−Minout[i]+ΔCRS,i∈Z[istart, iend] 两式结合,即是 LMCS 关于 SDR 和 HDR-HLG 格式的修正公式。 依旧选 狭隘区间的 10-bit YUV 存储格式 取均匀样本为例,修正后的结果如下: 图 3-20 位深 10-bit 亮度映射码字子区域分段示意图(修正后) 当然,这一套修正方式,是针对 SDR 和 HDR-HLG 格式采用的 峰值信噪比(PSNR [Peak Signal-to-Noise Ratio]) 指标考核方式进行的。对于采用 加权峰值信噪比(wPSNR [weighted Peak Signal-to-Noise Ratio]) 指标考核的 HDR-PQ 格式,则需要另外的处理流程。具体本书不再行展开,感兴趣可参阅原 H.266/VVC 的 LMCS 补充意见稿 [31] 。 可见偏重于工程规格依赖的技术,和基于现实观察的理论进行迁移的技术,在实践上还是有较大处理细节关注点上的差异的。前者更注重和具体规格设置的匹配(如 LMCS 等),因此相对局限。而后者则更在意规律性质的还原(如 HOG、BDOF 等),对比之下更为通用。同时,前者理论约束较多会比较繁琐,但实现起来的复杂程度和最终效果,却会有较大的波动,即可以非常简单,也可以充满策略。 毕竟对于规格而言,重要的在于规定与限制,以便统一实现。但具体实现的过程,就因设计和目标而异了。 相对于空域两者皆有的情况,频域冗余处理则更偏重依赖传统数学工具,来达成压缩效果。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_5.html":{"url":"Chapter_3/Language/cn/Docs_3_5.html","title":"3.5 频域冗余控制 - 基础变换编码","keywords":"","body":"3.4 空域冗余控制 - 基础光流算法与色度压缩 频域冗余目前仍然采用的是一些传统方式,近些年还没有太大的突破。而工程中对频域冗余的控制,确切的来说,是指从频域角度,对 残差信号(Residual Singnal) 进行频域分离后再 压缩所得数据,以富集变换信息,减小存储空间由于波动数据的不集中分布,而产生存储冗余的过程。 不过需要注意的是,频域冗余并不产生自被采样物理对象客观真实世界下的 原始信息(Original Infomation),而是来自不规律的数字信号的分散占用,导致的高熵存储。 因此,分离规律归类,提纯存储数据,并适当滤掉部分高频数据,才是降低频域冗余的关键。我们选择从帧数据的频域进行切入,即是利用空频分离(SFS)后,从频域能够直观体现数据密度的特点,来更好辅助应用中对数据进行的压缩处理。配合量化、熵编码等其他手段,降低原信息量级。 而这,便需要使用到傅立叶变换,及其衍生自同体系下的信息分离手段了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_5_1.html":{"url":"Chapter_3/Language/cn/Docs_3_5_1.html","title":"3.5.1 整数离散正余弦变换(DST/DCT)","keywords":"","body":"3.5.1 整数离散正余弦变换(IDST/IDCT) 整数离散正余弦变换(IDST/IDCT),顾名思义,就是将原本作用于浮点域的离散正余弦变换(DST/DCT),通过适当放缩量化到整数域进行。 在本章开始时,我们曾花了大量篇幅讲解信号分析的核心算法, 傅立叶变换(Fourier Transform),并简短的辨析了一维/二维离散傅立叶变换(1D/2D-DFT)。 回顾前文。有提到,如果取任意点 P⃗(x,y)\\vec{P}(x,y)P⃗(x,y) 可取 x∈[0, 1, ⋯, W]x \\in [0, \\ 1, \\ \\cdots , \\ W]x∈[0, 1, ⋯, W] , y∈[0, 1, ⋯, H]y \\in [0, \\ 1, \\ \\cdots , \\ H]y∈[0, 1, ⋯, H] ,只取整数位置。同时, u∈[−U2, ⋯, +U2]u \\in [-\\tfrac{U}{2}, \\ \\cdots , \\ +\\tfrac{U}{2}]u∈[−2U, ⋯, +2U] 、 v∈[−V2, ⋯, +V2]v \\in [-\\tfrac{V}{2}, \\ \\cdots , \\ +\\tfrac{V}{2}]v∈[−2V, ⋯, +2V] ,有离散 k⃗∈[k0⃗, k1⃗, ⋯, kn⃗]\\vec{k} \\in [\\vec{k_0}, \\ \\vec{k_1}, \\ \\cdots, \\ \\vec{k_{n}}]k⃗∈[k0⃗, k1⃗, ⋯, kn⃗] , n=UV=HWn = UV = HWn=UV=HW ,则: SDD: f^(u,v)=∑x=0W∑y=0Hf(x,y)⋅e−i(ux+vy)FDD: f(x,y)=1U⋅V∑u=−U/2+U/2∑v=−V/2+V/2f^(u,v)⋅Fω(x,y) {\\displaystyle \\begin{aligned} SDD: \\ \\ \\hat{f}(u,v) &= \\sum_{x = 0}^{W} \\sum_{y = 0}^{H} f(x,y) \\cdot e^{-i (ux+vy)} \\\\ FDD: \\ \\ f(x,y) &= \\frac{1}{U\\cdot V} \\sum_{u=-U/2}^{+U/2} \\sum_{v= -V/2}^{+V/2} \\hat{f}(u,v) \\cdot {\\mathcal {F}}_{\\omega}(x, y) \\\\ \\end{aligned} } SDD: f^(u,v)FDD: f(x,y)=x=0∑Wy=0∑Hf(x,y)⋅e−i(ux+vy)=U⋅V1u=−U/2∑+U/2v=−V/2∑+V/2f^(u,v)⋅Fω(x,y) 即由空域离散化(SDD)与频域离散化(FDD)共同构成空频离散化(SFD [Spacial Frequency Discrete])表达的 二维离散傅立叶(2D-DFT),如下所示: Fω=[Fk0⃗,Fk1⃗,⋯,Fkn⃗]f^(u,v)=∑x=0W∑y=0Hf(x,y)⋅e−i(ux+vy) ⇔ f(x,y)=1U⋅V∑u=−U/2+U/2∑v=−V/2+V/2f^(u,v)⋅Fω(x,y) {\\displaystyle \\begin{aligned} {\\mathcal {F}}_{\\omega} = [{\\mathcal {F}}_{\\vec{k_0}},&{\\mathcal {F}}_{\\vec{k_1}},\\cdots,{\\mathcal {F}}_{\\vec{k_n}}] \\\\ \\hat{f}(u,v) = \\sum_{x = 0}^{W} \\sum_{y = 0}^{H} f(x,y) \\cdot e^{-i (ux+vy)} \\ \\ \\ \\ \\ \\Leftrightarrow & \\ \\ \\ \\ \\ f(x,y) = \\frac{1}{U\\cdot V} \\sum_{u=-U/2}^{+U/2} \\sum_{v= -V/2}^{+V/2} \\hat{f}(u,v) \\cdot {\\mathcal {F}}_{\\omega}(x, y) \\\\ \\end{aligned} } Fω=[Fk0⃗,f^(u,v)=x=0∑Wy=0∑Hf(x,y)⋅e−i(ux+vy) ⇔Fk1⃗,⋯,Fkn⃗] f(x,y)=U⋅V1u=−U/2∑+U/2v=−V/2∑+V/2f^(u,v)⋅Fω(x,y) 虽然当时,并没有约束复平面波 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 波矢 k⃗{\\vec{k}}k⃗ 的方向,即方向可以是平面内任意角度与大小。但对于周期(范围)确定情况下,构成傅立叶变换的基底函数族 Fω=[Fk0⃗, Fk1⃗,⋯,Fkn⃗]{\\mathcal {F}}_{\\omega} = [{\\mathcal {F}}_{\\vec{k_0}},\\ {\\mathcal {F}}_{\\vec{k_1}},\\cdots,{\\mathcal {F}}_{\\vec{k_n}}]Fω=[Fk0⃗, Fk1⃗,⋯,Fkn⃗] ,基底函数(即原函数拆解的目标平面波组)的选取,却是可以被 一定程度约束的。 如果我们约束,取周期 T=2πnT = 2 \\pi nT=2πn 的标准正余弦函数(Sine/Cosine),按照 四分之一周期 的步长 Step=π2Step = \\tfrac{\\pi}2{}Step=2π 偏移得到的 Fξ(x){\\mathcal {F}}_{\\xi}(x)Fξ(x) 和 Fη(y){\\mathcal {F}}_{\\eta}(y)Fη(y) 构成波矢 k⃗{\\vec{k}}k⃗ 。选取沿着 xxx 轴方向的一维波 Fξ(x){\\mathcal {F}}_{\\xi}(x)Fξ(x) 和沿着 yyy 轴方向的一维波 Fη(y){\\mathcal {F}}_{\\eta}(y)Fη(y) 组成的 16n16^n16n 个定向复平面波 Fω(x,y){\\mathcal {F}}_{\\omega}(x,y)Fω(x,y) 集合,为当前函数的基底函数族。 那么,我们就能够在 补齐周期数据 后,使用 快速傅立叶变换(FFT) 来求解了。 但这样的做法,适用于分析,却并不适合冗余处理场景。 即使运用快速傅立叶变换,也仍然会有较大的算力消耗。且由于完整作用于任意数据源信号,所以不能保证基底函数族整体层面的规律性,从而无法提炼出统一的矩阵化算子。这让直接使用传统分析算法的方式,在 GPU 加速方面尽显劣势。 考虑到冗余压缩,并不要求保证数据帧完整不可分的输入,且精度也相对分析场景要求较低。如果能够适当的利用指数函数三角函数化,其本身的周期规律和标准化约束,建立基底整体的规律性,来契合傅立叶变换的性质。就能够在消减不必要参数(常量固定)并限定生效范围后,实现对离散傅立叶变化的常量化矩阵运算。建立卷积核,加速压缩过程。 因此,首选的出发点,就是 泛化离散正余弦变换(DST/DCT)到任何已知周期(范围)的数据信号源。 离散正余弦变换(DST/DCT)的泛化 沿用前文设定,记构成原信号函数 s(t)s(t)s(t) 的复指数函数 Sω(t){\\mathcal {S}}_{\\omega}(t)Sω(t) 有角频率(角速度)为 ωn=2πnT{\\omega_n} = \\tfrac{2\\pi n}{T}ωn=T2πn 。有傅立叶函数: s(t)=1N∑n=0Na^ω⋅cos(ωt)+i⋅b^ω⋅sin(ωt)a^ω=s^(−ω)+s^(ω) b^ω=1i⋅(s^(−ω)−s^(ω)) {\\displaystyle \\begin{aligned} s(t) &= \\frac{1}{N}\\sum_{n = 0}^{N} \\hat{a}_{\\omega} \\cdot cos(\\omega t) + i \\cdot \\hat{b}_{\\omega} \\cdot sin(\\omega t)\\\\ \\hat{a}_{\\omega} &= \\hat{s}(-\\omega) + \\hat{s}(\\omega) \\ \\ \\ \\ \\ \\hat{b}_{\\omega} = \\tfrac{1}{i} \\cdot (\\hat{s}(-\\omega)-\\hat{s}(\\omega)) \\\\ \\end{aligned} } s(t)a^ω=N1n=0∑Na^ω⋅cos(ωt)+i⋅b^ω⋅sin(ωt)=s^(−ω)+s^(ω) b^ω=i1⋅(s^(−ω)−s^(ω)) 按约束条件,信号函数波长 T=2πT = 2 \\piT=2π 做步长 Step=π2Step = \\tfrac{\\pi}{2}Step=2π 的可变 n∈[0, N−1]n \\in [0, \\ N - 1]n∈[0, N−1] 等分,使复指数函数 Sω(t)=Sω(n){\\mathcal {S}}_{\\omega}(t) = {\\mathcal {S}}_{\\omega}(n)Sω(t)=Sω(n) 。则存在 k∈[0, N−1]k \\in [0, \\ N-1]k∈[0, N−1] 有 ωn=2πnT=2πkN=ωk{\\omega_n} = \\tfrac{2\\pi n}{T} = \\tfrac{2\\pi k}{N} = {\\omega_k}ωn=T2πn=N2πk=ωk 简化表示为 ω{\\omega}ω ,可对原式做三角函数离散化处理(详细推导回顾本章首节)。 当输入信号满足奇函数特性时,可得 标准正弦的离散正弦变换(DST)的傅立叶展式 为: s(n)=1N∑k=0N−1s^(k)⋅sin(2πnNk)s^(k)=∑n=0N−1s(n)⋅sin(−2πnNk) {\\displaystyle \\begin{aligned} s(n) &= \\frac{1}{N}\\sum_{k = 0}^{N-1} \\hat{s}(k) \\cdot sin(\\tfrac{2 \\pi n}{N} k) \\\\ \\hat{s}(k) &= \\sum_{n = 0}^{N-1} s(n) \\cdot sin(-\\tfrac{2 \\pi n}{N} k ) \\\\ \\end{aligned} } s(n)s^(k)=N1k=0∑N−1s^(k)⋅sin(N2πnk)=n=0∑N−1s(n)⋅sin(−N2πnk) 当输入信号满足偶函数特性时,有 标准余弦的离散余弦变换(DCT)的傅立叶展式 为: s(n)=1N∑k=0N−1s^(k)⋅cos(2πnNk)s^(k)=∑n=0N−1s(n)⋅cos(−2πnNk) {\\displaystyle \\begin{aligned} s(n) &= \\frac{1}{N}\\sum_{k = 0}^{N-1} \\hat{s}(k) \\cdot cos(\\tfrac{2 \\pi n}{N} k) \\\\ \\hat{s}(k) &= \\sum_{n = 0}^{N-1} s(n) \\cdot cos(-\\tfrac{2 \\pi n}{N} k ) \\\\ \\end{aligned} } s(n)s^(k)=N1k=0∑N−1s^(k)⋅cos(N2πnk)=n=0∑N−1s(n)⋅cos(−N2πnk) 但是,自然信号是不分奇偶的,想要将公式适用范围扩大,就需要根据正余弦傅立叶变换要求,对输入信号进行不改变原始数据的扩充调整。根据选择作为基底的标准函数正余弦的差异,人为构造 满足条件输入的方法论,被分为 离散正弦变换(DST)分解 和 离散余弦变换(DCT)分解,两套实现。 假设原信号函数 s(t)=s(n)s(t) = s(n)s(t)=s(n) 在 n∈Z[0, N−1]n \\in \\mathbb{Z} [0, \\ N - 1]n∈Z[0, N−1] 的各节点位置,有样本采样 S∈[S0, SN−1]S \\in [S_0, \\ S_{N - 1}]S∈[S0, SN−1] ,取 N=4N = 4N=4 模拟最小子块(即实际技术被使用时的通用情况)。如图: 图 3-21 事例样本取值与切片索引关系图示 当目标分解为 DST 时,我们需要平移原数据 +32Step+\\tfrac{3}{2} Step+23Step 个步长,并补充中心原点 O0O_0O0 后,再做基于中心原点 O0=(0, 0)O_0 = (0,\\ 0)O0=(0, 0) 的映射。如此才能保证,补充的映射数据和旧数据,能够组成新的等步长数据组,满足离散化的处理条件。得到如下新集合(蓝色为补充数据,红色为原数据): 图 3-22 事例样本目标 DST 补充后与切片索引关系图示 新的样本集,数据量较原有数据翻了一倍多。但只有 轴正向的取值有意义。所以,采用 DST 类型分解,在扩充后,周期跨度都变为了 T=2N+1T= 2N + 1T=2N+1 ,且原离散展式 只有 n∈[1, N]n \\in [1, \\ N]n∈[1, N] 的部分是有效的。我们可以将偏移的 +1×Step+1 \\times Step+1×Step 划到式中处理,则 nnn 的取值范围就仍然可以保持为 n∈Z[0, N−1]n \\in \\mathbb{Z} [0, \\ N - 1]n∈Z[0, N−1] 。 不过考虑到 DST 目标是为了处理奇数阶信号源分解,为避免 sin(0)=0sin(0)=0sin(0)=0 值无意义的问题,会取 k∈[1, N]k \\in [1, \\ N]k∈[1, N] 的范围,并选用标准正弦向左移动 −12π-\\tfrac{1}{2} \\pi−21π 的偏移作为 基底正弦族。因此,为了统一,对 nnn 采用直接包含偏移 +1×Step+1 \\times Step+1×Step 的取值,使得 nnn 有 n∈Z[1, N]n \\in \\mathbb{Z} [1, \\ N]n∈Z[1, N] 。需要注意这个细节差异。 当目标分解为 DCT 时,需要在基于 y=s(n)y=s(n)y=s(n) 轴对称前,先行平移元数据 +12Step+\\tfrac{1}{2} Step+21Step 个步长。得到如下新集合(蓝色为补充数据,红色为原数据): 图 3-23 事例样本目标 DCT 补充后与切片索引关系图示 新的样本集,数据量较原有数据翻了一倍。同样只有 xxx 轴正向的取值有意义。所以,采用 DCT 类型分解,在扩充后,周期跨度都变为了 T=2NT= 2NT=2N ,且原离散展式 只有 n∈[12, N−12]n \\in [\\tfrac{1}{2}, \\ N - \\tfrac{1}{2}]n∈[21, N−21] 的部分是有效的。而由于非整数索引 nnn 不利于匹配原值,我们将偏移的 +12Step+\\tfrac{1}{2} Step+21Step 划到式中处理,则 nnn 的取值范围就仍然可以保持为 n∈Z[0, N−1]n \\in \\mathbb{Z} [0, \\ N - 1]n∈Z[0, N−1] 。 于是,结合两种分解,有: DST:{s(n)=12N+1∑k=1Ns^(k)⋅sin(2π(k−12)2N+1n)=12N+1∑k=1N−(−12N+1⋅s^(k))⋅sin(πn(2k−1)2N+1)s^(k)=2⋅∑n=1Ns(n)⋅sin(−2π(k−12)2N+1n)=2⋅∑n=1Ns(n)⋅sin(−πn(2k−1)2N+1)DCT:{s(n)=12N∑k=0N−1s^(k)⋅cos(2π(n+12)2Nk)=12N∑k=0N−1(12N⋅s^(k))⋅cos(π(2n+1)k2N)s^(k)=2⋅∑n=0N−1s(n+12)⋅cos(−2π(n+12)2Nk)=2⋅∑n=0N−1s(2n+12)⋅cos(π(2n+1)k2N) {\\displaystyle \\begin{aligned} DST:& { \\begin{cases} s(n) &= \\frac{1}{2N+1}\\sum_{k = 1}^{N} \\hat{s}(k) \\cdot sin(\\tfrac{2 \\pi (k-\\tfrac{1}{2})}{2N+1} n) = \\sqrt{\\frac{1}{2N+1}} \\sum_{k = 1}^{N} -(-\\sqrt{\\frac{1}{2N+1}} \\cdot \\hat{s}(k)) \\cdot sin( \\tfrac{\\pi n (2k-1)}{2N+1} ) \\\\ \\hat{s}(k) &= 2 \\cdot \\sum_{n = 1}^{N} s(n) \\cdot sin(-\\tfrac{2 \\pi (k-\\tfrac{1}{2})}{2N+1} n ) = 2 \\cdot \\sum_{n = 1}^{N} s(n) \\cdot sin(-\\tfrac{\\pi n (2k-1)}{2N+1} ) \\end{cases} } \\\\ DCT:& { \\begin{cases} s(n) &= \\frac{1}{2N}\\sum_{k = 0}^{N-1} \\hat{s}(k) \\cdot cos(\\tfrac{2 \\pi (n+\\tfrac{1}{2})}{2N} k) = \\sqrt{\\frac{1}{2N}} \\sum_{k = 0}^{N-1} (\\sqrt{\\frac{1}{2N}} \\cdot \\hat{s}(k)) \\cdot cos( \\tfrac{\\pi (2n+1) k}{2N} ) \\\\ \\hat{s}(k) &= 2 \\cdot \\sum_{n = 0}^{N-1} s(n+\\tfrac{1}{2}) \\cdot cos(-\\tfrac{2 \\pi (n+\\tfrac{1}{2})}{2N} k ) = 2 \\cdot \\sum_{n = 0}^{N-1} s(\\tfrac{2n+1}{2}) \\cdot cos(\\tfrac{\\pi (2n+1) k}{2N} ) \\end{cases} } \\\\ \\end{aligned} } DST:DCT:⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧s(n)s^(k)=2N+11k=1∑Ns^(k)⋅sin(2N+12π(k−21)n)=√2N+11k=1∑N−(−√2N+11⋅s^(k))⋅sin(2N+1πn(2k−1))=2⋅n=1∑Ns(n)⋅sin(−2N+12π(k−21)n)=2⋅n=1∑Ns(n)⋅sin(−2N+1πn(2k−1))⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧s(n)s^(k)=2N1k=0∑N−1s^(k)⋅cos(2N2π(n+21)k)=√2N1k=0∑N−1(√2N1⋅s^(k))⋅cos(2Nπ(2n+1)k)=2⋅n=0∑N−1s(n+21)⋅cos(−2N2π(n+21)k)=2⋅n=0∑N−1s(22n+1)⋅cos(2Nπ(2n+1)k) 不过,由于 DCT 采用了 非整数步长,当 k=0k=0k=0 时并不一定有拟合的曲线使得 s^(0)=0\\hat{s}(0) = 0s^(0)=0 ,且 偶函数特点使 s^(0)\\hat{s}(0)s^(0) 在上式中被重复计算,因此需要针对变换后的 s(n)s(n)s(n) 剔除一次的 s^(0)\\hat{s}(0)s^(0) 均值累积,所以: DCT∣k=0:{s(n)=1N⋅s^(0)+12N∑k=1N−1(12N⋅s^(k))⋅cos(π(2n+1)k2N)=12N(22N⋅s^(0))+∑k=1N−1(12N⋅s^(k))⋅cos(π(2n+1)k2N))s^(k)=2⋅∑n=0N−1s(2n+12)⋅cos(π(2n+1)k2N) {\\displaystyle \\begin{aligned} DCT|_{k = 0}:& { \\begin{cases} s(n) &= \\frac{1}{N}\\cdot \\hat{s}(0)+ \\sqrt{\\frac{1}{2N}} \\sum_{k = 1}^{N-1} (\\sqrt{\\frac{1}{2N}} \\cdot \\hat{s}(k)) \\cdot cos( \\tfrac{\\pi (2n+1) k}{2N} ) \\\\ &= \\sqrt{\\frac{1}{2N}} ( \\frac{2}{\\sqrt{2N}} \\cdot \\hat{s}(0)) + \\sum_{k = 1}^{N-1} (\\sqrt{\\frac{1}{2N}} \\cdot \\hat{s}(k)) \\cdot cos( \\tfrac{\\pi (2n+1) k}{2N} ) ) \\\\ \\hat{s}(k) &= 2 \\cdot \\sum_{n = 0}^{N-1} s(\\tfrac{2n+1}{2}) \\cdot cos(\\tfrac{\\pi (2n+1) k}{2N} ) \\end{cases} } \\\\ \\end{aligned} } DCT∣k=0:⎩⎪⎪⎪⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎪⎪⎪⎧s(n)s^(k)=N1⋅s^(0)+√2N1k=1∑N−1(√2N1⋅s^(k))⋅cos(2Nπ(2n+1)k)=√2N1(√2N2⋅s^(0))+k=1∑N−1(√2N1⋅s^(k))⋅cos(2Nπ(2n+1)k))=2⋅n=0∑N−1s(22n+1)⋅cos(2Nπ(2n+1)k) 上式中,对原信号函数 s(n)s(n)s(n) 的 DST 均值常量 12N+1\\frac{1}{2N+1}2N+11 拆解为 (12N+1)2\\begin{pmatrix} \\sqrt{\\frac{1}{2N+1}} \\end{pmatrix} ^2(√2N+11)2 两部分,而 DCT 均值常量 12N\\frac{1}{2N}2N1 拆解为 (12N)2\\begin{pmatrix} \\sqrt{\\frac{1}{2N}} \\end{pmatrix} ^2(√2N1)2 两部分。其目的是为了,通过分别分配到各自展开式和傅立叶解上,来保证工程化后的算子,在 正逆运算上的统一。 因此,我们取: DST:Xk=−12N+1⋅s^(k)=12N+1⋅s^(−k)DCT:Xk=12N⋅s^(k)&X0=22N⋅s^(k) {\\displaystyle \\begin{aligned} DST:& X_k = -\\sqrt{\\frac{1}{2N+1}} \\cdot \\hat{s}(k) = \\sqrt{\\frac{1}{2N+1}} \\cdot \\hat{s}(-k) \\\\ DCT:& X_k = \\frac{1}{\\sqrt{2N}} \\cdot \\hat{s}(k) \\quad \\& \\quad X_0 = \\frac{2}{\\sqrt{2N}} \\cdot \\hat{s}(k) \\\\ \\end{aligned} } DST:DCT:Xk=−√2N+11⋅s^(k)=√2N+11⋅s^(−k)Xk=√2N1⋅s^(k)&X0=√2N2⋅s^(k) 代入即可得到,原 离散正弦变换(DST)的工程表达式 : k∈[1, N]n∈[1, N]DST:{Sn=12N+1∑k=1NXk⋅sin(πn(2k−1)2N+1)Xk=22N+1⋅∑n=1NSn⋅sin(πn(2k−1)2N+1) {\\displaystyle \\begin{aligned} &k \\in [1,\\ N] \\quad \\quad n \\in [1,\\ N] \\\\ DST:& { \\begin{cases} S_n &= \\frac{1}{\\sqrt{2N+1}} \\sum_{k = 1}^{N} X_k \\cdot sin( \\tfrac{\\pi n(2k-1) }{2N+1} ) \\\\ X_k &= \\frac{2}{\\sqrt{2N+1}} \\cdot \\sum_{n = 1}^{N} S_n \\cdot sin(\\tfrac{\\pi n(2k-1)}{2N+1} ) \\end{cases} } \\\\ \\end{aligned} } DST:k∈[1, N]n∈[1, N]⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧SnXk=√2N+11k=1∑NXk⋅sin(2N+1πn(2k−1))=√2N+12⋅n=1∑NSn⋅sin(2N+1πn(2k−1)) 和,原 离散余弦变换(DCT)的工程表达式 为: k∈[0, N−1]n∈[0, N−1]DCT:{Sn=12N∑k=0N−1Xk⋅cos(π(2n+1)k2N)Xk=22N⋅∑n=0N−1Sn⋅cos(π(2n+1)k2N) ,k≥1Xk=2⋅22N⋅∑n=0N−1Sn⋅cos(π(2n+1)k2N) ,k=0 {\\displaystyle \\begin{aligned} &k \\in [0,\\ N-1] \\quad \\quad n \\in [0,\\ N - 1] \\\\ DCT:& { \\begin{cases} S_n &= \\frac{1}{\\sqrt{2N}} \\sum_{k = 0}^{N-1} X_k \\cdot cos( \\tfrac{\\pi (2n+1) k}{2N} ) \\\\ X_k &= \\frac{2}{\\sqrt{2N}} \\cdot \\sum_{n = 0}^{N-1} S_n \\cdot cos(\\tfrac{\\pi (2n+1) k}{2N} )\\ , k \\ge 1 \\\\ X_k &= \\frac{2 \\cdot 2}{\\sqrt{2N}} \\cdot \\sum_{n = 0}^{N-1} S_n \\cdot cos(\\tfrac{\\pi (2n+1) k}{2N} )\\ , k = 0 \\end{cases} } \\\\ \\end{aligned} } DCT:k∈[0, N−1]n∈[0, N−1]⎩⎪⎪⎪⎪⎪⎪⎪⎪⎪⎨⎪⎪⎪⎪⎪⎪⎪⎪⎪⎧SnXkXk=√2N1k=0∑N−1Xk⋅cos(2Nπ(2n+1)k)=√2N2⋅n=0∑N−1Sn⋅cos(2Nπ(2n+1)k) ,k≥1=√2N2⋅2⋅n=0∑N−1Sn⋅cos(2Nπ(2n+1)k) ,k=0 这就是信号处理上经常使用的,泛化离散正余弦变换公式组。 从上面的过程中可以发现,我们在傅立叶基底函数族的选取上,实际限定了函数的相位、周期,并约束了原信号的特性。如果在初始相位和原信号特性上做调整,最终的结果也会有所差异。从数学工具角度来看,这种变化 最终会产生 8 种 DST 和 8 种 DCT 的变体,以分别应对实虚部奇偶阶数和初始相位不同时的快速计算。但由于工程化上需要力求简洁和相似(形似)的表达。因此,相对于其他几种的组合,我们最终采用的公式组中的两类,来用于各自条件输入的统一处理。 现在,GPU 加速的理论已准备就绪,我们来看算子是怎么获取的。 整数离散正弦变换(IDST)的 GPU 矩阵算子 首先,将离散正弦变换扩展到二维情况,有: k(u,v)&p(x,y)∈[(1, 1), (N, N)]DST:Xk(u,v)=(22N+1)2⋅∑p=(1,1)(N,N)Sp(x,y)⋅sin(2u−12N+1πx)⋅sin(2v−12N+1πy) {\\displaystyle \\begin{aligned} &k(u,v) \\& p(x,y) \\in [(1,\\ 1),\\ (N,\\ N)] \\\\ DST: X_k(u,v) &= \\begin{pmatrix} \\frac{2}{\\sqrt{2N+1}} \\end{pmatrix} ^2 \\cdot \\sum_{p = (1,1)}^{(N,N)}S_p(x,y) \\cdot sin(\\tfrac{2u-1}{2N+1} \\pi x) \\cdot sin(\\tfrac{2v-1}{2N+1} \\pi y) \\\\ \\end{aligned} } DST:Xk(u,v)k(u,v)&p(x,y)∈[(1, 1), (N, N)]=(√2N+12)2⋅p=(1,1)∑(N,N)Sp(x,y)⋅sin(2N+12u−1πx)⋅sin(2N+12v−1πy) 考虑可构成卷积核的子块最小大小为 4×44 \\times 44×4 ,则有 N=4N=4N=4 使上式变为: k(u,v)&p(x,y)∈[(1, 1), (4, 4)]DST:Xk(u,v)=49⋅∑p=(1,1)(4,4)Sp(x,y)⋅sin(2u−19πx)⋅sin(2v−19πy) {\\displaystyle \\begin{aligned} &k(u,v) \\& p(x,y) \\in [(1,\\ 1),\\ (4,\\ 4)] \\\\ DST: X_k(u,v) &= \\frac{4}{9} \\cdot \\sum_{p = (1,1)}^{(4,4)}S_p(x,y) \\cdot sin(\\tfrac{2u-1}{9} \\pi x) \\cdot sin(\\tfrac{2v-1}{9} \\pi y) \\\\ \\end{aligned} } DST:Xk(u,v)k(u,v)&p(x,y)∈[(1, 1), (4, 4)]=94⋅p=(1,1)∑(4,4)Sp(x,y)⋅sin(92u−1πx)⋅sin(92v−1πy) 如此,就可以矩阵表示 4×44 \\times 44×4 的 DST 变化为: DST4×4:Xk(u,v)∣v=KDST⋅Sp(x,y)=[23⋅∑v=14(23⋅∑u=14sin(2u−19πx))⋅sin(2v−19πy)]⋅Sp(x,y) {\\displaystyle \\begin{aligned} DST_{4 \\times 4}: \\\\ X_k(u,v)|_v &= K_{DST} \\cdot S_p(x, y) \\\\ &= \\begin{bmatrix} &\\frac{2}{3} \\cdot \\sum_{v=1}^4 \\begin{pmatrix} \\frac{2}{3} \\cdot \\sum_{u=1}^4 sin(\\tfrac{2u-1}{9} \\pi x) \\end{pmatrix} \\cdot sin(\\tfrac{2v-1}{9} \\pi y) \\end{bmatrix} \\cdot S_p(x, y) \\\\ \\end{aligned} } DST4×4:Xk(u,v)∣v=KDST⋅Sp(x,y)=[32⋅v=1∑4(32⋅u=1∑4sin(92u−1πx))⋅sin(92v−1πy)]⋅Sp(x,y) 即有: KDST=23[sin(19π),sin(29π),sin(39π),sin(49π)sin(39π),sin(69π),sin(99π),sin(129π)sin(59π),sin(109π),sin(159π),sin(209π)sin(79π),sin(149π),sin(219π),sin(289π)]=23[sin(19π),sin(29π),sin(39π),sin(49π)sin(39π),sin(39π),0,−sin(39π)sin(49π),−sin(19π),−sin(39π),sin(29π)sin(29π),−sin(49π),sin(39π),−sin(19π)] {\\displaystyle \\begin{aligned} K_{DST}&= \\frac{2}{3} \\begin{bmatrix} &sin(\\tfrac{1}{9}\\pi) &, \\quad sin(\\tfrac{2}{9}\\pi) &, \\quad sin(\\tfrac{3}{9}\\pi) &, \\quad sin(\\tfrac{4}{9}\\pi) \\\\ &sin(\\tfrac{3}{9}\\pi) &, \\quad sin(\\tfrac{6}{9}\\pi) &, \\quad sin(\\tfrac{9}{9}\\pi) &, \\quad sin(\\tfrac{12}{9}\\pi) \\\\ &sin(\\tfrac{5}{9}\\pi) &, \\quad sin(\\tfrac{10}{9}\\pi) &, \\quad sin(\\tfrac{15}{9}\\pi) &, \\quad sin(\\tfrac{20}{9}\\pi) \\\\ &sin(\\tfrac{7}{9}\\pi) &, \\quad sin(\\tfrac{14}{9}\\pi) &, \\quad sin(\\tfrac{21}{9}\\pi) &, \\quad sin(\\tfrac{28}{9}\\pi) \\end{bmatrix} \\\\ &= \\frac{2}{3} \\begin{bmatrix} &sin(\\tfrac{1}{9}\\pi) &, \\quad sin(\\tfrac{2}{9}\\pi) &, \\quad sin(\\tfrac{3}{9}\\pi) &, \\quad sin(\\tfrac{4}{9}\\pi) \\\\ &sin(\\tfrac{3}{9}\\pi) &, \\quad sin(\\tfrac{3}{9}\\pi) &, \\quad \\quad 0 &, -sin(\\tfrac{3}{9}\\pi) \\\\ &sin(\\tfrac{4}{9}\\pi) &, -sin(\\tfrac{1}{9}\\pi) &, -sin(\\tfrac{3}{9}\\pi) &, \\quad sin(\\tfrac{2}{9}\\pi) \\\\ &sin(\\tfrac{2}{9}\\pi) &, -sin(\\tfrac{4}{9}\\pi) &, \\quad sin(\\tfrac{3}{9}\\pi) &, -sin(\\tfrac{1}{9}\\pi) \\end{bmatrix} \\end{aligned} } KDST=32⎣⎢⎢⎡sin(91π)sin(93π)sin(95π)sin(97π),sin(92π),sin(96π),sin(910π),sin(914π),sin(93π),sin(99π),sin(915π),sin(921π),sin(94π),sin(912π),sin(920π),sin(928π)⎦⎥⎥⎤=32⎣⎢⎢⎡sin(91π)sin(93π)sin(94π)sin(92π),sin(92π),sin(93π),−sin(91π),−sin(94π),sin(93π),0,−sin(93π),sin(93π),sin(94π),−sin(93π),sin(92π),−sin(91π)⎦⎥⎥⎤ 其中, KDSTK_{DST}KDST 就是 DST 的卷积核算子,但目前还是 浮点数的形式。浮点数矩阵不利于 GPU 算力的节省,因此还需要整数化。考虑 KDSTK_{DST}KDST 本身作用在实际像素取值上,而像素值的数据格式是以整数形式离散化存储的,具有位深数据范围中值记为常量 DDD 。 比如,8-bit 位深格式可取范围为 [0, 255][0,\\ 255][0, 255] ,就有 D=128D=128D=128 取值。我们可以利用这一特点来对原数据进行放缩,并四舍五入取整。 记整数化后的 KDSTK_{DST}KDST 为 K^DST\\hat{K}_{DST}K^DST 则: K^DST≈[29,55,74,8474,74,0,−7484,−29,−74,5555,−84,74,−29]=D⋅KDST {\\displaystyle \\begin{aligned} \\hat{K}_{DST}&\\approx \\begin{bmatrix} &29 &, \\quad 55 &, \\quad 74 &, \\quad 84 \\\\ &74 &, \\quad 74 &, \\quad 0 &, -74 \\\\ &84 &, -29 &, -74 &, \\quad 55 \\\\ &55 &, -84 &, \\quad 74 &, -29 \\end{bmatrix} = D \\cdot K_{DST} \\\\ \\end{aligned} } K^DST≈⎣⎢⎢⎡29748455,55,74,−29,−84,74,0,−74,74,84,−74,55,−29⎦⎥⎥⎤=D⋅KDST 原 DST 的算子,即可以转化为如下表示: Xk(u,v)∣v=1D⋅D⋅KDST⋅Sp(x,y)=1D⋅K^DST⋅Sp(x,y) {\\displaystyle \\begin{aligned} X_k(u,v)|_v &= \\frac{1}{D} \\cdot D \\cdot K_{DST} \\cdot S_p(x, y) \\\\ &= \\frac{1}{D} \\cdot \\hat{K}_{DST} \\cdot S_p(x, y) \\\\ \\end{aligned} } Xk(u,v)∣v=D1⋅D⋅KDST⋅Sp(x,y)=D1⋅K^DST⋅Sp(x,y) 当然,这里单独计算了分离后波矢 k⃗=(u,v){\\vec{k}}=(u,v)k⃗=(u,v) 对应平面波的权重 Xk(u,v)X_k(u,v)Xk(u,v) ,那么对于整个 4×44 \\times 44×4 区域所有的平面波权重(即傅立叶解)就有 等价矩阵 : Xk∣4×4=(1D)2⋅K^DST⋅Sp∣4×4⋅K^DSTT {\\displaystyle \\begin{aligned} X_k|_{4 \\times 4} &= \\begin{pmatrix} \\frac{1}{D} \\end{pmatrix} ^2 \\cdot \\hat{K}_{DST} \\cdot S_p|_{4 \\times 4} \\cdot {\\hat{K}_{DST}}^T \\\\ \\end{aligned} } Xk∣4×4=(D1)2⋅K^DST⋅Sp∣4×4⋅K^DSTT 精简一下,即可写为: Xk=(1D)2⋅K^DST⋅Sp⋅K^DSTT {\\displaystyle \\begin{aligned} X_k &= \\begin{pmatrix} \\frac{1}{D} \\end{pmatrix} ^2 \\cdot \\hat{K}_{DST} \\cdot S_p\\cdot {\\hat{K}_{DST}}^T \\\\ \\end{aligned} } Xk=(D1)2⋅K^DST⋅Sp⋅K^DSTT 这个即为 整数正弦变化(IDST)核心公式,而 K^DST\\hat{K}_{DST}K^DST 则被称为 整数正弦变化的基本算子(IDST Opt)。显然,在已知 SpS_pSp 和存储范围 DDD 的情况下,还是非常容易求得 XkX_kXk 的。而对应的 GPU 程序片也很简单,基本可当作滑动窗口移动步长 K=4K = 4K=4 的固定算子乘法运算,就不再复写了。 整数离散正弦变换(IDST)的 GPU 矩阵算子 同理于 IDST,虽然 整数离散余弦变换(IDCT) 的切入理论,和 IDST 有一些不同。但最终的算子区别仅在于取值上。 仍然需要,将离散正弦变换扩展到二维情况。有: k(u,v)&p(x,y)∈[(0, 0), (N−1, N−1)]εk∣k=(0,0)=12&εk∣k≠(0,0)=1DCT:Xk(u,v)=(2⋅εk2N)2⋅∑p=(0,0)(N−1,N−1)Sp(x,y)⋅cos(2x+12Nπu)⋅cos(2y+12Nπv) {\\displaystyle \\begin{aligned} &k(u,v) \\& p(x,y) \\in [(0,\\ 0),\\ (N-1,\\ N-1)] \\\\ &\\varepsilon_k|_{k=(0,0)} = \\frac{1}{\\sqrt{2}} \\quad \\& \\quad \\varepsilon_k|_{k \\ne (0,0)}=1 \\\\ DCT: X_k(u,v) &= \\begin{pmatrix} \\frac{2 \\cdot \\varepsilon_k}{\\sqrt{2N}} \\end{pmatrix} ^2 \\cdot \\sum_{p = (0,0)}^{(N-1,N-1)}S_p(x,y) \\cdot cos(\\tfrac{2x+1}{2N} \\pi u) \\cdot cos(\\tfrac{2y+1}{2N} \\pi v) \\\\ \\end{aligned} } DCT:Xk(u,v)k(u,v)&p(x,y)∈[(0, 0), (N−1, N−1)]εk∣k=(0,0)=√21&εk∣k≠(0,0)=1=(√2N2⋅εk)2⋅p=(0,0)∑(N−1,N−1)Sp(x,y)⋅cos(2N2x+1πu)⋅cos(2N2y+1πv) 依然,考虑可构成卷积核的子块最小大小为 4×44 \\times 44×4 ,则有 N=4N=4N=4 使上式变为: k(u,v)&p(x,y)∈[(0, 0), (3, 3)]εk∣k=(0,0)=12&εk∣k≠(0,0)=1DCT:Xk(u,v)=(εk2)2⋅∑p=(0,0)(3,3)Sp(x,y)⋅cos(2x+18πu)⋅cos(2y+12Nπv) {\\displaystyle \\begin{aligned} &k(u,v) \\& p(x,y) \\in [(0,\\ 0),\\ (3,\\ 3)] \\\\ &\\varepsilon_k|_{k=(0,0)} = \\frac{1}{\\sqrt{2}} \\quad \\& \\quad \\varepsilon_k|_{k \\ne (0,0)}=1 \\\\ DCT: X_k(u,v) &= \\begin{pmatrix} \\frac{\\varepsilon_k}{\\sqrt{2}} \\end{pmatrix} ^2 \\cdot \\sum_{p = (0,0)}^{(3,3)}S_p(x,y) \\cdot cos(\\tfrac{2x+1}{8} \\pi u) \\cdot cos(\\tfrac{2y+1}{2N} \\pi v) \\\\ \\end{aligned} } DCT:Xk(u,v)k(u,v)&p(x,y)∈[(0, 0), (3, 3)]εk∣k=(0,0)=√21&εk∣k≠(0,0)=1=(√2εk)2⋅p=(0,0)∑(3,3)Sp(x,y)⋅cos(82x+1πu)⋅cos(2N2y+1πv) 如此,就可以矩阵表示 4×44 \\times 44×4 的 DCT 变化为: DCT4×4:Xk(u,v)∣v=KDCT⋅Sp(x,y)=[εv2⋅∑y=03(εu2⋅∑x=03cos(2x+18πu))⋅cos(2y+18πv)]⋅Sp(x,y)=12⋅[εv⋅∑y=0312⋅(εu⋅∑x=03cos(2x+18πu))⋅cos(2y+18πv)]⋅Sp(x,y)εk∣k=(0,0)=12&εk∣k≠(0,0)=1 {\\displaystyle \\begin{aligned} DCT_{4 \\times 4}: \\\\ X_k(u,v)|_v &= K_{DCT} \\cdot S_p(x, y) \\\\ &= \\begin{bmatrix} &\\frac{\\varepsilon_v}{\\sqrt{2}} \\cdot \\sum_{y=0}^3 \\begin{pmatrix} \\frac{\\varepsilon_u}{\\sqrt{2}} \\cdot \\sum_{x=0}^3 cos(\\tfrac{2x+1}{8} \\pi u) \\end{pmatrix} \\cdot cos(\\tfrac{2y+1}{8} \\pi v) \\end{bmatrix} \\cdot S_p(x, y) \\\\ &= \\frac{1}{\\sqrt{2}} \\cdot \\begin{bmatrix} &\\varepsilon_v \\cdot \\sum_{y=0}^3 \\frac{1}{\\sqrt{2}} \\cdot \\begin{pmatrix} \\varepsilon_u \\cdot \\sum_{x=0}^3 cos(\\tfrac{2x+1}{8} \\pi u) \\end{pmatrix} \\cdot cos(\\tfrac{2y+1}{8} \\pi v) \\end{bmatrix} \\cdot S_p(x, y) \\\\ \\varepsilon_k|_{k=(0,0)} &= \\frac{1}{\\sqrt{2}} \\quad \\& \\quad \\varepsilon_k|_{k \\ne (0,0)}=1 \\\\ \\end{aligned} } DCT4×4:Xk(u,v)∣vεk∣k=(0,0)=KDCT⋅Sp(x,y)=[√2εv⋅y=0∑3(√2εu⋅x=0∑3cos(82x+1πu))⋅cos(82y+1πv)]⋅Sp(x,y)=√21⋅[εv⋅y=0∑3√21⋅(εu⋅x=0∑3cos(82x+1πu))⋅cos(82y+1πv)]⋅Sp(x,y)=√21&εk∣k≠(0,0)=1 即有: KDCT=12[12cos(08π),12cos(08π),12cos(08π),12cos(08π)cos(18π),cos(38π),cos(58π),cos(78π)cos(28π),cos(68π),cos(108π),cos(148π)cos(38π),cos(98π),cos(158π),cos(218π)]=12[12,12,12,12cos(18π),cos(38π),cos(38π),−cos(18π)cos(28π),−cos(28π),−cos(28π),cos(28π)cos(38π),−cos(18π),cos(18π),−cos(38π)] {\\displaystyle \\begin{aligned} K_{DCT}&= \\frac{1}{\\sqrt{2}} \\begin{bmatrix} &\\frac{1}{\\sqrt{2}} cos(\\tfrac{0}{8}\\pi) &, \\quad \\frac{1}{\\sqrt{2}} cos(\\tfrac{0}{8}\\pi) &, \\quad \\frac{1}{\\sqrt{2}} cos(\\tfrac{0}{8}\\pi) &, \\quad \\frac{1}{\\sqrt{2}} cos(\\tfrac{0}{8}\\pi) \\\\ &cos(\\tfrac{1}{8}\\pi) &, \\quad cos(\\tfrac{3}{8}\\pi) &, \\quad cos(\\tfrac{5}{8}\\pi) &, \\quad cos(\\tfrac{7}{8}\\pi) \\\\ &cos(\\tfrac{2}{8}\\pi) &, \\quad cos(\\tfrac{6}{8}\\pi) &, \\quad cos(\\tfrac{10}{8}\\pi) &, \\quad cos(\\tfrac{14}{8}\\pi) \\\\ &cos(\\tfrac{3}{8}\\pi) &, \\quad cos(\\tfrac{9}{8}\\pi) &, \\quad cos(\\tfrac{15}{8}\\pi) &, \\quad cos(\\tfrac{21}{8}\\pi) \\end{bmatrix} \\\\ &= \\frac{1}{\\sqrt{2}} \\begin{bmatrix} &\\frac{1}{\\sqrt{2}} &, \\quad \\frac{1}{\\sqrt{2}} &, \\quad \\frac{1}{\\sqrt{2}} &, \\quad \\frac{1}{\\sqrt{2}} \\\\ &cos(\\tfrac{1}{8}\\pi) &, \\quad cos(\\tfrac{3}{8}\\pi) &, \\quad cos(\\tfrac{3}{8}\\pi) &, -cos(\\tfrac{1}{8}\\pi) \\\\ &cos(\\tfrac{2}{8}\\pi) &, -cos(\\tfrac{2}{8}\\pi) &, -cos(\\tfrac{2}{8}\\pi) &, \\quad cos(\\tfrac{2}{8}\\pi) \\\\ &cos(\\tfrac{3}{8}\\pi) &, -cos(\\tfrac{1}{8}\\pi) &, \\quad cos(\\tfrac{1}{8}\\pi) &, -cos(\\tfrac{3}{8}\\pi) \\end{bmatrix} \\end{aligned} } KDCT=√21⎣⎢⎢⎢⎡√21cos(80π)cos(81π)cos(82π)cos(83π),√21cos(80π),cos(83π),cos(86π),cos(89π),√21cos(80π),cos(85π),cos(810π),cos(815π),√21cos(80π),cos(87π),cos(814π),cos(821π)⎦⎥⎥⎥⎤=√21⎣⎢⎢⎢⎡√21cos(81π)cos(82π)cos(83π),√21,cos(83π),−cos(82π),−cos(81π),√21,cos(83π),−cos(82π),cos(81π),√21,−cos(81π),cos(82π),−cos(83π)⎦⎥⎥⎥⎤ 依然取位深数据范围中值记为常量 DDD 。有 D=128D=128D=128 对应 8-bit 位深格式 [0, 255][0,\\ 255][0, 255] 的可取范围,使得我们能够将结果矩阵整数化处理。记整数化后的 KDCTK_{DCT}KDCT 为 K^DCT\\hat{K}_{DCT}K^DCT 则: K^DCT≈[64,64,64,6483,36,−36,−8364,−64,−64,6436,−83,83,−36]=D⋅KDCT {\\displaystyle \\begin{aligned} \\hat{K}_{DCT}&\\approx \\begin{bmatrix} &64 &, \\quad 64 &, \\quad 64 &, \\quad 64 \\\\ &83 &, \\quad 36 &, -36 &, -83 \\\\ &64 &, -64 &, -64 &, \\quad 64 \\\\ &36 &, -83 &, \\quad 83 &, -36 \\end{bmatrix} = D \\cdot K_{DCT} \\\\ \\end{aligned} } K^DCT≈⎣⎢⎢⎡64836436,64,36,−64,−83,64,−36,−64,83,64,−83,64,−36⎦⎥⎥⎤=D⋅KDCT 原 DCT 的算子,即可以转化为如下表示: Xk(u,v)∣v=1D⋅D⋅KDCT⋅Sp(x,y)=1D⋅K^DCT⋅Sp(x,y) {\\displaystyle \\begin{aligned} X_k(u,v)|_v &= \\frac{1}{D} \\cdot D \\cdot K_{DCT} \\cdot S_p(x, y) \\\\ &= \\frac{1}{D} \\cdot \\hat{K}_{DCT} \\cdot S_p(x, y) \\\\ \\end{aligned} } Xk(u,v)∣v=D1⋅D⋅KDCT⋅Sp(x,y)=D1⋅K^DCT⋅Sp(x,y) 当然,这里单独计算了分离后波矢 k⃗=(u,v){\\vec{k}}=(u,v)k⃗=(u,v) 对应平面波的权重 Xk(u,v)X_k(u,v)Xk(u,v) ,那么对于整个 4×44 \\times 44×4 区域所有的平面波权重(即傅立叶解)就有 等价矩阵 : Xk∣4×4=(1D)2⋅K^DCT⋅Sp∣4×4⋅K^DCTT {\\displaystyle \\begin{aligned} X_k|_{4 \\times 4} &= \\begin{pmatrix} \\frac{1}{D} \\end{pmatrix} ^2 \\cdot \\hat{K}_{DCT} \\cdot S_p|_{4 \\times 4} \\cdot {\\hat{K}_{DCT}}^T \\\\ \\end{aligned} } Xk∣4×4=(D1)2⋅K^DCT⋅Sp∣4×4⋅K^DCTT 精简一下,即可写为: Xk=(1D)2⋅K^DCT⋅Sp⋅K^DCTT {\\displaystyle \\begin{aligned} X_k &= \\begin{pmatrix} \\frac{1}{D} \\end{pmatrix} ^2 \\cdot \\hat{K}_{DCT} \\cdot S_p\\cdot {\\hat{K}_{DCT}}^T \\\\ \\end{aligned} } Xk=(D1)2⋅K^DCT⋅Sp⋅K^DCTT 这个即为 整数余弦变化(IDCT)核心公式,而 K^DCT\\hat{K}_{DCT}K^DCT 则被称为 整数余弦变化的基本算子(IDCT Opt)。同样,在已知 SpS_pSp 和存储范围 DDD 的情况下,还是非常容易求得 XkX_kXk 的。而对应的 GPU 程序片也很简单,基本可当作滑动窗口移动步长 StepK=4Step_K = 4StepK=4 的固定算子乘法运算,就不再复写了。 现在汇总两者所述,对于整数离散正余弦变换(IDST/IDCT)的同理性,我们将 K^DST\\hat{K}_{DST}K^DST 与 K^DCT\\hat{K}_{DCT}K^DCT 统一称为 K^\\hat{K}K^ 矩阵,即 整数离散正余弦变换算子(IDST/IDCT Opt)。而 K^\\hat{K}K^ 的取值,显然和位深(Bit Depth)是强相关的。只有确定位深情况,才有固定的 K^\\hat{K}K^ 矩阵。 因此,当存储格式(Data Format)位深为 8-bit 时目标 4×44 \\times 44×4 大小,整合后的公式如下 : K^DST≈[29,55,74,8474,74,0,−7484,−29,−74,5555,−84,74,−29],K^DCT≈[64,64,64,6483,36,−36,−8364,−64,−64,6436,−83,83,−36]Xk=(1D)2⋅K^⋅Sp⋅K^T {\\displaystyle \\begin{aligned} \\hat{K}_{DST} \\approx \\begin{bmatrix} &29 &, \\quad 55 &, \\quad 74 &, \\quad 84 \\\\ &74 &, \\quad 74 &, \\quad 0 &, -74 \\\\ &84 &, -29 &, -74 &, \\quad 55 \\\\ &55 &, -84 &, \\quad 74 &, -29 \\end{bmatrix} , \\quad & \\hat{K}_{DCT} \\approx \\begin{bmatrix} &64 &, \\quad 64 &, \\quad 64 &, \\quad 64 \\\\ &83 &, \\quad 36 &, -36 &, -83 \\\\ &64 &, -64 &, -64 &, \\quad 64 \\\\ &36 &, -83 &, \\quad 83 &, -36 \\end{bmatrix} \\\\ X_k = \\begin{pmatrix} \\frac{1}{D} \\end{pmatrix} ^2 \\cdot & \\hat{K} \\cdot S_p\\cdot \\hat{K}^T \\\\ \\end{aligned} } K^DST≈⎣⎢⎢⎡29748455,55,74,−29,−84,74,0,−74,74,84,−74,55,−29⎦⎥⎥⎤,Xk=(D1)2⋅K^DCT≈⎣⎢⎢⎡64836436,64,36,−64,−83,64,−36,−64,83,64,−83,64,−36⎦⎥⎥⎤K^⋅Sp⋅K^T 整合后的两种变化中, K^DCT\\hat{K}_{DCT}K^DCT 会将卷积核范围内大部分 低频信息 对应基底的 分离权重,富集到结果矩阵 XkX_kXk 的 左上角 ;而 K^DST\\hat{K}_{DST}K^DST 会将卷积核范围内大部分 低频信息 对应基底的 分离权重,富集到结果矩阵 XkX_kXk 的 右上角。而低频权重所对应的高残差区域,才是原始图像最关键的轮廓数据。因此,对于压缩场景,考虑到数据存储惯性,采用 K^DCT\\hat{K}_{DCT}K^DCT 得到关键权重值 Xk(0,0)X_k(0,0)Xk(0,0) 的方式更为合适。而 K^DST\\hat{K}_{DST}K^DST 则由于取用的基底函数类型,决定了其更适合平滑波动区域的数据处理,例如轮廓内的相对均匀填充部分。 我们通常将 K^DCT\\hat{K}_{DCT}K^DCT 得到的 Xk(0,0)X_k(0,0)Xk(0,0) 称为 直流系数(DC [Direct Coefficient]),而把 XkX_kXk 其余位置的基底函数权重值,称为 交流系数(AC [Alternating Coefficient)。 数据还原时,通过矩阵逆运算求得常量矩阵 K^−1\\hat{K}^{-1}K^−1 ,随后代入 Sp=D2⋅K^−1⋅Xk⋅K^−1TS_p = D^2 \\cdot \\hat{K}^{-1} \\cdot X_k\\cdot {\\hat{K}^{-1}}^TSp=D2⋅K^−1⋅Xk⋅K^−1T 式中还原原值。而对于其它类型的三角基底函数,和不同的目标窗口大小(常用为 2n2^n2n , 取 n=2,3,4,5n=2,3,4,5n=2,3,4,5 ),使用基本公式代入,并按照上述推导类比处理,即可获取对应算子。 这就是最终主流的,整数离散正余弦变换。之于其它的 DST/DCT 共计 16 种类型,皆在特殊条件下起相关作用,被运用到针对子块的数据分离过程中。当然,推理过程依旧一致,只不过部分性质存在不同,如 DCT-8 就无法利用周期性来根据已知算子直接类推,每个不同的大小,都需要重新计算,这里不另作展开。 而对于整数离散正余弦变换本身来说,我们常用它来初步完成对子块内高低频数据的分离汇总,即对数据的分离归类。借此,方便后续在频域上,根据提纯结果进行压缩处理。对于其它位深取值,则根据 K^=D⋅K\\hat{K}=D \\cdot KK^=D⋅K 计算即可,而 KKK 在窗口大小不变(即基底函数族固定)情况下,不会发生变化,可认为是一个常数矩阵。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_5_2.html":{"url":"Chapter_3/Language/cn/Docs_3_5_2.html","title":"3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform])","keywords":"","body":"3.5.2 哈达玛变换(WHT [Walsh-Hadamard Transform]) 除了整数离散正余弦变换(IDST/IDCT)外,在早期的规格中(如 H.264)还会使用一种,被称为 沃尔什-哈达玛变换(WHT [Walsh-Hadamard Transform]) 的离散傅立叶的变换变体算法,来代为进一步的富集频域信息。 在之前的傅立叶变换运用中,我们大都选择三角函数或近似拟合,来作为基底函数进行分解。考虑到傅立叶函数,只从周期性上,对基底函数族和目标函数进行了约束。我们是不是可以选择一种,类似于自然排序线性组合的周期性函数,代替正余弦处理,从而获取更符合数据物理介质存储(媒介传输)状态(0/1 双模态)的变换过程呢? 答案是可以的。虽然,从某种意义上,哈达玛变换相当于取用了只拟合正余弦函数极值的特殊函数。但哈达玛变换(WHT)依旧被认为是此类 非三角函数离散傅立叶变换,简称 非三角函数变换(或非正/余弦变换),的经典代表之一。 考虑周期 T=2nT=2^nT=2n 分段函数: f(x)=(−1)⌊xT⌋=(−1)⌊x2n⌋ {\\displaystyle \\begin{aligned} f(x)= & (-1)^{ \\lfloor \\tfrac{x}{T} \\rfloor } = (-1)^{ \\lfloor \\tfrac{x}{2^n} \\rfloor } \\\\ \\end{aligned} } f(x)=(−1)⌊Tx⌋=(−1)⌊2nx⌋ 记周期 T=2n=NT=2^n =NT=2n=N 的原信号函数 s(t)s(t)s(t) 以 f(x)f(x)f(x) 函数族构成基底。根据 傅立叶级数 有: s(n)=1N∑k=0N−1s^(k)⋅(−1)nks^(k)=∑n=0N−1s(n)⋅(−1)nk {\\displaystyle \\begin{aligned} s(n) &= \\frac{1}{N}\\sum_{k = 0}^{N-1} \\hat{s}(k) \\cdot (-1)^{nk} \\\\ \\hat{s}(k) &= \\sum_{n = 0}^{N-1} s(n) \\cdot (-1)^{nk} \\\\ \\end{aligned} } s(n)s^(k)=N1k=0∑N−1s^(k)⋅(−1)nk=n=0∑N−1s(n)⋅(−1)nk 这即是 哈达玛变换的基础公式。 不同于 DST/DCT 需要进行泛化后,才能运用到工程之中的情况。哈达玛变换由于 f(x)f(x)f(x) 本身为偶函数,且始终只有实部的特性,可以直接在原有无扩充的数据集上使用。因此,假设原信号函数 s(t)=s(n)s(t) = s(n)s(t)=s(n) 在 n∈Z[0, N−1]n \\in \\mathbb{Z} [0, \\ N - 1]n∈Z[0, N−1] 的各节点位置,有样本采样 S∈[S0, SN−1]S \\in [S_0, \\ S_{N - 1}]S∈[S0, SN−1] 。则: WHT:{s(n)=1N∑k=0N−1s^(k)⋅(−1)nk=1N⋅∑k=0N−1(1N⋅s^(k))⋅(−1)nks^(k)=∑n=0N−1s(n+12)⋅(−1)nk=∑n=0N−1s(n)⋅(−1)nk {\\displaystyle \\begin{aligned} WHT:& { \\begin{cases} s(n) &= \\frac{1}{N}\\sum_{k = 0}^{N-1} \\hat{s}(k) \\cdot (-1)^{nk} = \\sqrt{\\frac{1}{N}} \\cdot \\sum_{k = 0}^{N-1} \\begin{pmatrix} \\sqrt{\\frac{1}{N}} \\cdot \\hat{s}(k) \\end{pmatrix} \\cdot (-1)^{nk} \\\\ \\hat{s}(k) &= \\sum_{n = 0}^{N-1} s(n+\\tfrac{1}{2}) \\cdot (-1)^{nk} = \\sum_{n = 0}^{N-1} s(n) \\cdot (-1)^{nk} \\end{cases} } \\\\ \\end{aligned} } WHT:⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧s(n)s^(k)=N1k=0∑N−1s^(k)⋅(−1)nk=√N1⋅k=0∑N−1(√N1⋅s^(k))⋅(−1)nk=n=0∑N−1s(n+21)⋅(−1)nk=n=0∑N−1s(n)⋅(−1)nk 即: k∈[0, N−1]n∈[0, N−1]WHT:{Sn=1N∑k=0N−1Xk⋅(−1)nkXk=1N⋅∑n=0N−1Sn⋅(−1)nk {\\displaystyle \\begin{aligned} &k \\in [0,\\ N-1] \\quad \\quad n \\in [0,\\ N - 1] \\\\ WHT:& { \\begin{cases} S_n &= \\frac{1}{\\sqrt{N}} \\sum_{k = 0}^{N-1} X_k \\cdot (-1)^{nk} \\\\ X_k &= \\frac{1}{\\sqrt{N}} \\cdot \\sum_{n = 0}^{N-1} S_n \\cdot (-1)^{nk} \\end{cases} } \\\\ \\end{aligned} } WHT:k∈[0, N−1]n∈[0, N−1]⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧SnXk=√N1k=0∑N−1Xk⋅(−1)nk=√N1⋅n=0∑N−1Sn⋅(−1)nk 扩展到 二维 情况,有: k(u,v)&p(x,y)∈[(0, 0), (N−1, N−1)]WHT:Xk(u,v)=(1N)2⋅∑p=(0,0)(N−1,N−1)Sp(x,y)⋅(−1)xu⋅(−1)yv {\\displaystyle \\begin{aligned} &k(u,v) \\& p(x,y) \\in [(0,\\ 0),\\ (N-1,\\ N-1)] \\\\ WHT: & X_k(u,v) = \\begin{pmatrix} \\frac{1}{\\sqrt{N}} \\end{pmatrix} ^2 \\cdot \\sum_{p = (0,0)}^{(N-1,N-1)}S_p(x,y) \\cdot (-1)^{xu} \\cdot (-1)^{yv} \\\\ \\end{aligned} } WHT:k(u,v)&p(x,y)∈[(0, 0), (N−1, N−1)]Xk(u,v)=(√N1)2⋅p=(0,0)∑(N−1,N−1)Sp(x,y)⋅(−1)xu⋅(−1)yv 如此,就可以矩阵表示 WHT 变化为: Xk=KWHT⋅Sp⋅KWHTT {\\displaystyle \\begin{aligned} X_k &= K_{WHT} \\cdot S_p \\cdot {K_{WHT}}^{T} \\\\ \\end{aligned} } Xk=KWHT⋅Sp⋅KWHTT 其中: KWHT=[1N⋅∑y=0N−1(1N⋅∑x=0N−1(−1)xu)⋅(−1)yv]=1N⋅[(−1)i⋅j]N×N=KWHTT=KWHT−1 {\\displaystyle \\begin{aligned} K_{WHT} &= \\begin{bmatrix} \\frac{1}{\\sqrt{N}} \\cdot \\sum_{y = 0}^{N-1} \\begin{pmatrix} \\frac{1}{\\sqrt{N}} \\cdot \\sum_{x = 0}^{N-1} (-1)^{xu} \\end{pmatrix} \\cdot (-1)^{yv} \\end{bmatrix} \\\\ &= \\frac{1}{\\sqrt{N}} \\cdot \\begin{bmatrix} & (-1)^{i \\cdot j} \\end{bmatrix} _{N \\times N} \\\\ &= {K_{WHT}}^{T} = {K_{WHT}}^{-1} \\\\ \\end{aligned} } KWHT=[√N1⋅y=0∑N−1(√N1⋅x=0∑N−1(−1)xu)⋅(−1)yv]=√N1⋅[(−1)i⋅j]N×N=KWHTT=KWHT−1 所以,我们通常记 NNN 阶哈达玛矩阵为 HN=[(−1)i⋅j]N×N=N⋅KWHTH_N = \\begin{bmatrix} (-1)^{i \\cdot j} \\end{bmatrix} _{N \\times N} = \\sqrt{N} \\cdot K_{WHT}HN=[(−1)i⋅j]N×N=√N⋅KWHT ,原式即可简化为: Xk=1N⋅H⋅Sp⋅H {\\displaystyle \\begin{aligned} X_k &= \\frac{1}{N} \\cdot H \\cdot S_p \\cdot H \\\\ \\end{aligned} } Xk=N1⋅H⋅Sp⋅H 显然,对于 H2NH_{2N}H2N 与 HNH_NHN 有关系: H2N=[HN ,HNHN ,−HN] {\\displaystyle \\begin{aligned} H_{2N} &= \\begin{bmatrix} & H_N \\ , & H_N \\\\ & H_N \\ , -& H_N \\end{bmatrix}\\\\ \\end{aligned} } H2N=[HN ,HN ,−HNHN] 常用的哈达玛变换算子主要有 3 种,分别是: H2=[1 ,11 ,−1]H4=[H2 ,H2H2 ,−H2]=[1,1 ,1 ,11, −1 ,1 , −11,1 , −1 , −11, −1 , −1 ,1]H8=[H4 ,H4H4 ,−H4]=[H2,H2 ,H2 ,H2H2, −H2 ,H2 , −H2H2,H2 , −H2 , −H2H2, −H2 , −H2 ,H2] {\\displaystyle \\begin{aligned} H_2 &= \\begin{bmatrix} & 1 \\ , & 1 \\\\ & 1 \\ , -& 1 \\end{bmatrix} \\\\ H_4 &= \\begin{bmatrix} & H_2 \\ , & H_2 \\\\ & H_2 \\ , -& H_2 \\end{bmatrix}= \\begin{bmatrix} & 1 , & \\quad 1 \\ , & \\quad 1 \\ , & \\quad 1 \\\\ & 1 , & \\ -1 \\ , & \\quad 1 \\ , & \\ -1 \\\\ & 1 , & \\quad 1 \\ , & \\ -1 \\ , & \\ -1 \\\\ & 1 , & \\ -1 \\ , & \\ -1 \\ , & \\quad 1 \\end{bmatrix}\\\\ H_8 &= \\begin{bmatrix} & H_4 \\ , & H_4 \\\\ & H_4 \\ , -& H_4 \\end{bmatrix} = \\begin{bmatrix} & H_2 , & \\quad H_2 \\ , & \\quad H_2 \\ , & \\quad H_2 \\\\ & H_2 , & \\ -H_2 \\ , & \\quad H_2 \\ , & \\ -H_2 \\\\ & H_2 , & \\quad H_2 \\ , & \\ -H_2 \\ , & \\ -H_2 \\\\ & H_2 , & \\ -H_2 \\ , & \\ -H_2 \\ , & \\quad H_2 \\end{bmatrix}\\\\ \\end{aligned} } H2H4H8=[1 ,1 ,−11]=[H2 ,H2 ,−H2H2]=⎣⎢⎢⎡1,1,1,1,1 , −1 ,1 , −1 ,1 ,1 , −1 , −1 ,1 −1 −11⎦⎥⎥⎤=[H4 ,H4 ,−H4H4]=⎣⎢⎢⎡H2,H2,H2,H2,H2 , −H2 ,H2 , −H2 ,H2 ,H2 , −H2 , −H2 ,H2 −H2 −H2H2⎦⎥⎥⎤ 从上面的推导过程可知,采用哈达玛变换,同样能够将频域中的高低频信息,进行分区汇集。理论上 WHT 也可以代替 IDST/IDCT 来做频域压缩(降低信息熵)前的归类处理。 哈达玛变换的常见应用 考虑到 WHT 是 DST/DCT 的特殊拟合,而基底函数有限。其本身在选取较大的窗口尺寸,且被使用在取值范围差异较大的原信号时,会导致一定程度的误差。工程中除非量化到门电路的粒度,其余大多时间还是用它来求解指定窗口范围,残差信号(Residual Singnal) 经哈达玛变换后 绝对误差和(SATD [Sum of Absolute Transformed Difference])。 而哈达玛变换后绝对误差和(SATD)取值,即是变换求得 的所有元素绝对值之和,有: SATD=∑i∑j∣Xk(i,j)∣ {\\displaystyle \\begin{aligned} SATD = \\sum_i \\sum_j |X_k(i,j)| \\\\ \\end{aligned} } SATD=i∑j∑∣Xk(i,j)∣ 以 SATD 来代替传统绝对误差和(SAD [Sum of Absolute Difference])。利用 WHT 的加和快速运算特征计算残差趋势,协助时空域运动估计和数据量化的压缩处理。 哈达玛变换的常见应用 除此之外,如果我们换一种视角,将经过 IDST/IDCT 处理后的一系列子块所得结果,整合各子块得到的直流系数(DC)为一次输入给哈达玛变换。那么根据傅立叶变换特性,WHT 将对已经分离的低频权重信息,再次进行一次基于基底函数的分离。 而哈达玛变换仍属于傅立叶变换,这样的处理会使参与运算的 直流系数(DC) 所处子块,再进行一次变化程度的筛选,从而完成进一步细分并降低区域内的取值量级,更便于随后配合其它量化手段,减少信息熵。而小于 4×44 \\times 44×4 大小的哈达玛变换算子,并不会造成太大损失。 这个做法在 H.264 中得到了较为充分的体现。 H.264 中,对 YUV420 传输格式的亮度值 YkY_kYk 数据,取用了 16×1616 \\times 1616×16 点区域构成包含 4×44 \\times 44×4 个子块的范围,进行了两次特殊的哈达玛变换。得到 二次直流系数矩阵 Y^k\\hat{Y}_kY^k 作为传输值 : HY1=[1,1 ,1 ,12, −1 ,1 , −21,1 , −1 , −11, −2 , −2 ,1]HY2=[1,1 ,1 ,11, −1 ,1 , −11,1 , −1 , −11, −1 , −1 ,1]Y^k=HY2⋅(HY1⋅Yk∣DC⋅HY1)⋅HY2 {\\displaystyle \\begin{aligned} H_{Y_1} = \\begin{bmatrix} & 1 , & \\quad 1 \\ , & \\quad 1 \\ , & \\quad 1 \\\\ & 2 , & \\ -1 \\ , & \\quad 1 \\ , & \\ -2 \\\\ & 1 , & \\quad 1 \\ , & \\ -1 \\ , & \\ -1 \\\\ & 1 , & \\ -2 \\ , & \\ -2 \\ , & \\quad 1 \\end{bmatrix} \\quad &H_{Y_2} = \\begin{bmatrix} & 1 , & \\quad 1 \\ , & \\quad 1 \\ , & \\quad 1 \\\\ & 1 , & \\ -1 \\ , & \\quad 1 \\ , & \\ -1 \\\\ & 1 , & \\quad 1 \\ , & \\ -1 \\ , & \\ -1 \\\\ & 1 , & \\ -1 \\ , & \\ -1 \\ , & \\quad 1 \\end{bmatrix} \\\\ \\hat{Y}_k = H_{Y_2}\\cdot (H_{Y_1} &\\cdot Y_k|_{DC} \\cdot H_{Y_1}) \\cdot H_{Y_2} \\end{aligned} } HY1=⎣⎢⎢⎡1,2,1,1,1 , −1 ,1 , −2 ,1 ,1 , −1 , −2 ,1 −2 −11⎦⎥⎥⎤Y^k=HY2⋅(HY1HY2=⎣⎢⎢⎡1,1,1,1,1 , −1 ,1 , −1 ,1 ,1 , −1 , −1 ,1 −1 −11⎦⎥⎥⎤⋅Yk∣DC⋅HY1)⋅HY2 而对色度分量 CbCrC_bC_rCbCr 数据,则根据格式的数据组成和排布,取用了 8×88 \\times 88×8 点区域构成包含 2×22 \\times 22×2 个子块的范围,进行了单次标准哈达玛变换。得到 二次直流系数矩阵 C^bC^r\\hat{C}_b\\hat{C}_rC^bC^r 作为传输值 : HCbCr=[1 ,11 ,−1]C^b=HCbCr⋅Cb∣DC⋅HCbCrC^r=HCbCr⋅Cr∣DC⋅HCbCr {\\displaystyle \\begin{aligned} &H_{C_bC_r} = \\begin{bmatrix} & 1 \\ , & 1 \\\\ & 1 \\ , -& 1 \\end{bmatrix} \\\\ \\hat{C}_b &= H_{C_bC_r} \\cdot C_b|_{DC} \\cdot H_{C_bC_r} \\\\ \\hat{C}_r &= H_{C_bC_r} \\cdot C_r|_{DC} \\cdot H_{C_bC_r} \\\\ \\end{aligned} } C^bC^rHCbCr=[1 ,1 ,−11]=HCbCr⋅Cb∣DC⋅HCbCr=HCbCr⋅Cr∣DC⋅HCbCr 不过,随着小模型介入了二次变换压缩直流系数矩阵的过程,这套基于哈达玛变换(WHT)的压缩手段,最终还是被压缩比和还原程度更高的,以 低频不可分变换(LFNST)为代表的高频凋零技术,替代了原有的作用。 因为如上的缘故,在现行最新的规格中,以压缩冗余为目的频域数据分离,大都仍然采用整数离散正余弦变换(IDST/IDCT) 为主要入口技术。哈达玛变换(WHT)则相对局限的,被使用在 SATD 上。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Docs_3_5_3.html":{"url":"Chapter_3/Language/cn/Docs_3_5_3.html","title":"3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform])","keywords":"","body":"3.5.3 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform]) 低频不可分变换(LFNST [Low-Frequency Non-Separable Transform]) 是高频凋零技术的代表,通过一个 经过离线训练 所得的不可分变换矩阵,来进一步压缩指定范围内的前一次变换结果 [32] 。我们通常将首次变换称为 一次变换(First Transform) 或 主要变换(Primary Transform)。将以一次变换为输入的第二次变换,称为 二次变换(Secondary Transform)。 低频不可分变换(LFNST),与 H.264 中的哈达玛变换(WHT),都属于作用于二次变换的一种处理技术。其本身复杂的部分,在于如何得到 不可分变换矩阵组(NSMG [Non-Separable Matrix Group])。矩阵组通过特殊的离线小模型计算所得,是一个常量矩阵组。 那么,什么是不可分变换(Non-Separable Transform)? 不可分变换与 LFNST 原理 不可分变换,被用来指代一类,无法分解为独立的行变换与列变换的组合形式表示, 只能 用单一矩阵作统一处理的变换类型。 与之相对的就是可分变换(Separable Transform)。 前文中的离散正弦变换和哈达玛变换,就属于可分变换类型。 可见,对于 可分变换,如果记分离后的行列变化矩阵分别为 M∣rowM|_{row}M∣row 、 M∣colM|_{col}M∣col ,那么始终有: Out=M∣row⋅In⋅M∣col {\\displaystyle \\begin{aligned} Out &= M|_{row} \\cdot In \\cdot M|_{col} \\\\ \\end{aligned} } Out=M∣row⋅In⋅M∣col 而对于 不可分变换,则有变换矩阵 M∣uniM|_{uni}M∣uni 使得: Out=M∣uni⋅In {\\displaystyle \\begin{aligned} Out &= M|_{uni} \\cdot In \\\\ \\end{aligned} } Out=M∣uni⋅In 低频不可分所使用的矩阵组,就属于后一种。 如果记采用一次变换所得 M×NM \\times NM×N 大小的输出 作为 LFNST 输入。则 LFNST 需要根据技术执行前置环节中的一些求得信息(如角度预测模式、帧内预测模式)的参数值,来从矩阵组中选取满足条件的常量矩阵 TTT 。以 TTT 作为当前 LFNST 的算子参与再次变换。 算法要求,目标算子 TTT 矩阵的大小为 输入大小的平方,即有 TTT 取 MN×MNMN \\times MNMN×MN 尺寸。而输入 XkX_kXk 则需要以一维向量形式展开,有: Xk=[X11 ,X12 ,⋯, X1NX21 ,X22 ,⋯, X2N⋮,⋮ ,⋯, ⋮XM1 ,XM2 ,⋯, XMN]M×NXk′=[X11 ,X12 ,⋯, XM1 ,⋯, XMN]MN×1T {\\displaystyle \\begin{aligned} &X_k = \\begin{bmatrix} & X_{11} \\ , & X_{12} \\ , \\cdots,\\ & X_{1N} \\\\ & X_{21} \\ , & X_{22} \\ , \\cdots,\\ & X_{2N} \\\\ & \\vdots , & \\vdots \\ , \\cdots,\\ & \\vdots \\\\ & X_{M1} \\ , & X_{M2} \\ , \\cdots,\\ & X_{MN} \\end{bmatrix}_{M \\times N} \\\\ &X_k\\prime = \\begin{bmatrix} & X_{11} \\ , & X_{12} \\ , \\cdots,\\ & X_{M1} \\ , \\cdots,\\ & X_{MN} \\end{bmatrix}_{MN \\times 1}^{T} \\\\ \\end{aligned} } Xk=⎣⎢⎢⎡X11 ,X21 ,⋮,XM1 ,X12 ,⋯, X22 ,⋯, ⋮ ,⋯, XM2 ,⋯, X1NX2N⋮XMN⎦⎥⎥⎤M×NXk′=[X11 ,X12 ,⋯, XM1 ,⋯, XMN]MN×1T 将 Xk′X_k\\primeXk′ 代入标准公式,得到输出结果 X^k′\\hat{X}_k\\primeX^k′ 为: X^k′=T⋅Xk′ {\\displaystyle \\begin{aligned} \\hat{X}_k\\prime &= T \\cdot X_k\\prime \\\\ \\end{aligned} } X^k′=T⋅Xk′ 上式即是 低频不可分变换,被应用在二次变换时的基本公式 了。所得 X^k′\\hat{X}_k\\primeX^k′ 是长度为 MN×1MN \\times 1MN×1 的一维向量,我们需要按照 Xk→Xk′X_k \\rightarrow X_k\\primeXk→Xk′ 的逆过程,反向将其展开到 M×NM \\times NM×N 的二维区域,得到变换后的 X^k\\hat{X}_kX^k 结果矩阵。 LFNST 变换集矩阵组的获取 在说明原理中我们提到,低频不可分变换中,根据前置参数的差异,会从持有矩阵组里选择合适的算子 TTT 来代入运算,称之为 变换集(Transform Set)。但因为 H.266 的专利权问题,这一传统机器学习(注意,并未使用深度学习手段)聚类分析模型的训练,所采用的基本数据集和部分过程细节,并没有完全开源。 从 LFNST 原论文中可获知的信息是,针对 不可分二次变换(NSST [Non-Separable Secondary Transform])的变换集,采用的是基于 K均值(K-Means)聚类分析法 的变体算法。因此,基本训练过程属于标准的双步(2-Stages)无监督学习(Unsupervised Learning),存在两个阶段,分别是:初始阶段(Initialization)和 迭代阶段(Iteration) [33] 。当然,数据也需要分为 准备数据(Preparing Data) 和 训练数据(Training Data),两者皆来自未开放的黑盒数据集。 初始阶段中,主要处理准备数据,进行了两个工作: 首先,进行 特征(Feature)的选择(Selection)和提取(Extraction)。为每一个从 编码过程(Encoding Process) 获取的 变换系数块(Transform Coefficient Block) 随机的分配一个取值范围为 label∈Z[0, 3]label \\in \\mathbb{Z} [0, \\ 3]label∈Z[0, 3] 的标签。并将分配好标签的变换系数块的 M×NM \\times NM×N 个低频系数,加入到 对应标签聚类(Cluster)的训练用数据集中。 M×NM \\times NM×N 为目标输入输出的大小,例如前文采用核心为 N×NN \\times NN×N ,那么这里企图训练所得核心的参数 M=NM = NM=N 相等。而选出对应标签的 M×NM \\times NM×N 个输入,每个都被认为是独立的一个数据,即每个聚类有 MNMNMN 个准备数据,共 4 组 4MN4MN4MN 个准备数据。 其次, 选择聚类算法(Clustering Algorithm Selection) 和 约束条件设计(Constraint Design)。这里采用 K均值算法,通过利用前一步中,标签范围在 label∈Z[1, 3]label \\in \\mathbb{Z} [1, \\ 3]label∈Z[1, 3] 的聚类(Cluster)分配好的训练用数据集,以 奇异值分解(SVD [Singular Value Decom-position]) 等解释性较弱但快速的方法,来求解各自聚类的协方差矩阵特征向量的最佳不可分离变换矩阵(采用 SVD 所得即是奇异值矩阵)。进而获得 label∈Z[1, 3]label \\in \\mathbb{Z} [1, \\ 3]label∈Z[1, 3] 聚类的 质心(Centroid),与各数据集一起构造了算法 启动输入。而 label=0label = 0label=0 的聚类,则被选做为 对照组(Validation Group),因此该矩阵的质心被设置为单位矩阵 E=[1]E = [1]E=[1] ,以输入输出恒等的形式,不再参与迭代阶段的更新。 那么约束条件是怎么设置的呢?这里采用的是,在单次训练过程后,从 3 个聚类的质心与 label=0label = 0label=0 聚类的质心中,选取最小(或下降最明显)的 率失真优化指数(RDO [Rate-Distortion Optimization]) 作为评判标准。筛选出 4 个聚类中 RDO 最优的一个聚类,用新参与训练的对应聚类数据集,替换原有被选最优聚类的数据集,参与下一次迭代。以此,作为 K-均值聚类分析的标准约束。 随后进入迭代阶段。 迭代阶段中,主要处理训练数据,训练同样也分为两步: 首先,进行 聚类验证(Cluster Validation)。聚类验证的过程就和一般的 K均值算法一致,将分批的训练数据交付到 4 个聚类,分别计率失真优化指数(RDO)。之后,用计算结果,按照设置的约束条件进行处理, 更新聚类标定的数据集。 其次,完成 结果解析(Results Interpretation)。当更新聚类当前数据集后,需要重新计算聚类的质心,方法同初始阶段一致。通过求解协方差矩阵,获取最佳不可分离变换矩阵,替代原聚类的质心。显然, label=0label = 0label=0 聚类的质心 E=[1]E = [1]E=[1] 并不需要更新。 而下一次迭代是否继续,则根据是否到达模型的 最大迭代次数(该参数未提供经验值,也可根据自身训练情况自行设定),或 RDO 没有进一步降低 来决定。两者命中其一,则停止迭代训练,获取 label∈Z[1, 3]label \\in \\mathbb{Z} [1, \\ 3]label∈Z[1, 3] 聚类此时的质心,作为结果构成 MN×MNMN \\times MNMN×MN 尺寸的变化集: TMN×MN∈[Cluster1 ,Cluster2 ,Cluster3 ] {\\displaystyle \\begin{aligned} &T_{MN \\times MN} \\in \\begin{bmatrix} & Cluster_{1} \\ , & Clust&er_{2} \\ , & Cluster_{3} \\ \\end{bmatrix}\\\\ \\end{aligned} } TMN×MN∈[Cluster1 ,Cluster2 ,Cluster3 ] 一般 TMN×MNT_{MN \\times MN}TMN×MN 会比较难记,通常简化为根据输入标记,写做 TM×NT_{M \\times N}TM×N 简记。变换集简写为: TM×N∈[T1∣M×N ,T2∣M×N ,T3∣M×N ] {\\displaystyle \\begin{aligned} &T_{M \\times N} \\in \\begin{bmatrix} & {T_1}|_{M \\times N} \\ , & T_2|_{M \\times N} \\ , & T_3|_{M \\times N} \\ \\end{bmatrix}\\\\ \\end{aligned} } TM×N∈[T1∣M×N ,T2∣M×N ,T3∣M×N ] 此时的 TTT 即是 M×NM \\times NM×N 输入尺寸的 低频不可分变换算子(LFNST Opt)。理论上,矩阵 TTT 会 保留输入源的分布形式,并将之密度梯度化。 若输入前置主变换采用 DCT-2 型,那么二次变换的输入 Xk′X_k\\primeXk′ ,在经过 LFNST 变换后,算子会将低频波密度参数富集到自身靠上方的行信息中,将高频波密度参数富集到靠下方的行信息中。从而实现,变换后输出的相对训练结束时质心位置的相对均匀分布。即维持输出的高低频权重二次变换结果矩阵 X^k\\hat{X}_kX^k ,在一维展开式 X^k′\\hat{X}_k\\primeX^k′ 情况下的类算子高低频分离布局,左侧低频右侧高频。 因此,当还原输出权重矩阵 X^k′\\hat{X}_k\\primeX^k′ 到 M×NM \\times NM×N 大小后,前置 DCT-2 型的低频权重仍然会位于矩阵的左上角。相应,高频则会接近右下角。这样的因素,让主变换采用 DCT-2 型,经过 LFNST 变换后的左上角首个参数值,仍可被当作直流系数(DC)。而结合 H.266/VVC 规格下的包括平面(Planar)模式、直流(DC)模式、65 种角度(Angle)预测模式在内,共计 67 种帧内预测模式本身就需要多组变化集的情况下,对于不同的主变换类型,又要单独再训练一系列变换集。处理代价会高到无法接受。 所以,目前 只将 LFNST 运用在 DCT-2 输入的情况。 至此,在经过多次不同尺寸和模式输入下的模型训练过程后,得到了数个 M×NM \\times NM×N 取值不等的矩阵算子 [T1,⋯, Tq][T_1, \\cdots ,\\ T_q][T1,⋯, Tq] 。共同组成了 LFNST 的基础变换集组 T=[T1,⋯, Tq]T = [T_1, \\cdots ,\\ T_q]T=[T1,⋯, Tq] ,亦被称为 基础多变换集(MTS [Multiple Transform Set]),应对目标主变换。 LFNST 有关不可分二次变换(NSST)的化简 经过上述的推理,我们可以察觉到即便是取一个较小的尺寸,整个 LFNST 的运算也会呈指数的增加算力消耗。例如输入的 M=N=8M = N = 8M=N=8 时,就需要一个尺寸为 64×6464 \\times 6464×64 大小的 LFNST 运算核。但如此大小对于计算机本身的硬件来说,会是一个 巨大的负担。 于是,在 VTM5 有关 LFNST 工程实践的 JVET-K0099 提案中,对 LFNST 的主要应用场景,即二次不可分变换(NSST),做了算法上的调整 [34] 。利用复合基,降低计算成本。 假设当前输入尺寸为 M×NM \\times NM×N 大小,有与输入预测模式对应的尺寸为 MN×MNMN \\times MNMN×MN 的低频不可分变换算子 TTT 。 NSST 规定,对于 min(M, N)=4min(M ,\\ N) = 4min(M, N)=4 的输入,统一取用 4×44 \\times 44×4 输入的算子 TTT 。对于 min(M, N)=8min(M ,\\ N) = 8min(M, N)=8 的输入,统一取用 8×88 \\times 88×8 输入的算子 TTT 。那么需要保存的算子就只分为 16×1616 \\times 1616×16 和 64×6464 \\times 6464×64 大小的共计 6 个变换核,即有 T=[T4×4, T8×8]T = [T_{4 \\times 4},\\ T_{8 \\times 8}]T=[T4×4, T8×8] 变换集。 对于小于输入尺寸的块,补 0 到可以进行计算的大小。 而对于两类变换集,NSST 只需要分离所得的低频权重部分。因此反推算子情况,亦只需要保留所有 MTS 中的算子 TTT 上方一定行即可。提案中,NSST 在经过多次大批量数据的模拟实验后,确定了最终方案。 取尺寸为 RN×RNRN \\times RNRN×RN 的 NSST 低频不可分变换算子 T′T\\primeT′ ,代替原有 MN×MNMN \\times MNMN×MN 大小算子 TTT 。 对于 T4×4′T_{4 \\times 4}\\primeT4×4′ 时的 4×44 \\times 44×4 输入,由于已经被划分的不可再分的量级,因而对于算子没有办法进行压缩。 4×44 \\times 44×4 相当于对输入的 再排列,只有右下角的最高频权重有去掉的可能。此类强制过滤的处理都是有损的,不需要做不必要的工作。 而如果强行构造 2×22 \\times 22×2 输入的算子 T2×2′T_{2 \\times 2}\\primeT2×2′ ,则会因为算子训练特性没有分离的空间,使结果反倒太过平均。因此,对于 2×22 \\times 22×2 大小的输入,无法采用 LFNST 处理。这也阻断了我们通过选用 T2×2′T_{2 \\times 2}\\primeT2×2′ 的局部解构建复合基,等效替代更大尺寸基底,来降低变化成本的途径。 所以,此处仍选择取用原 4×44 \\times 44×4 输入对应的算子 T4×4T_{4 \\times 4}T4×4 ,有 R=4R = 4R=4 即: T4×4′=T4×4 {\\displaystyle \\begin{aligned} T_{4 \\times 4}\\prime = T_{4 \\times 4} \\\\ \\end{aligned} } T4×4′=T4×4 对于 T8×8′T_{8 \\times 8}\\primeT8×8′ 时的 8×88 \\times 88×8 输入,因为存在 T4×4′T_{4 \\times 4}\\primeT4×4′ 作为基础,就能够使用分离复合基的方式了。我们可以将输出的 X^k\\hat{X}_kX^k 分割为 4 个等大的 4×44 \\times 44×4 区域。以 4×44 \\times 44×4 区域为一组 复合解基。采用经过训练的 T4×4′T_{4 \\times 4}\\primeT4×4′ 作为基底函数族,来求得 8×88 \\times 88×8 输入情况下,针对 T4×4′T_{4 \\times 4}\\primeT4×4′ 的解集,构成输出 X^k\\hat{X}_kX^k 。即期望有: W^k=T4×4′⋅WkX^k′′=∑i=14(T4×4′⋅W4×4⋅Xk′)i {\\displaystyle \\begin{aligned} \\hat{W}_k &= T_{4 \\times 4}\\prime \\cdot W_k \\\\ \\hat{X}_k\\prime\\prime &= \\sum_{i=1}^4 ( T_{4 \\times 4}\\prime \\cdot W_{4 \\times 4} \\cdot X_k \\prime )_i\\\\ \\end{aligned} } W^kX^k′′=T4×4′⋅Wk=i=1∑4(T4×4′⋅W4×4⋅Xk′)i 其中, WkW_kWk 是基于 T4×4′T_{4 \\times 4}\\primeT4×4′ 训练的 LFNST 核,它和输出 W^k\\hat{W}_kW^k 都为 4×44 \\times 44×4 大小训练 8×88 \\times 88×8 的 LFNST 基础分解基,训练完毕后是个 固定值。 而 X^k′′\\hat{X}_{k}\\prime\\primeX^k′′ 则是输入 Xk′X_k\\primeXk′ 关于 T4×4′⋅W4×4T_{4 \\times 4}\\prime \\cdot W_{4 \\times 4}T4×4′⋅W4×4 的变换结果。但一组选定尺寸的 LFNST 变换集,只有 3 个矩阵可作为基底。因此,变换的覆盖范围也是有限的。若将输入 8×88 \\times 88×8 大小的 XkX_kXk 也分为 4 个等大的 4×44 \\times 44×4 区域,写作如下形式: Xk=[Xk∣4×4 ,Xk∣4×4Xk∣4×4 ,Xk∣4×4]=[Xk1 ,Xk2Xk3 ,Xk4] {\\displaystyle \\begin{aligned} &X_k = \\begin{bmatrix} & X_k|_{4 \\times 4} \\ , & X_k|_{4 \\times 4} \\\\ & X_k|_{4 \\times 4} \\ , & X_k|_{4 \\times 4} \\end{bmatrix} = \\begin{bmatrix} & X_{k1} \\ , & X_{k2} \\\\ & X_{k3} \\ , & X_{k4} \\end{bmatrix} \\end{aligned} } Xk=[Xk∣4×4 ,Xk∣4×4 ,Xk∣4×4Xk∣4×4]=[Xk1 ,Xk3 ,Xk2Xk4] 那么原 X^k′′\\hat{X}_{k}\\prime\\primeX^k′′ 分离式即变为: X^k′′=[T1∣4×4′ ,T2∣4×4′T1∣4×4′ , [0]4×4]⋅[Wk1 ,Wk2Wk3 ,Wk4]⋅[Xk1 ,Xk2Xk3 ,Xk4]=∑T4×4′⋅[Wk1 ,Wk2Wk3 ,[0]4×4]⋅[Xk1 ,Xk2Xk3 ,[0]4×4] {\\displaystyle \\begin{aligned} \\hat{X}_k\\prime\\prime &= \\begin{bmatrix} & T_1|_{4 \\times 4}\\prime \\ , & T_2|_{4 \\times 4}\\prime \\\\ & T_1|_{4 \\times 4}\\prime \\ , & \\ [0]_{4 \\times 4} \\end{bmatrix} \\cdot \\begin{bmatrix} & W_{k1} \\ , & W_{k2} \\\\ & W_{k3} \\ , & W_{k4} \\end{bmatrix} \\cdot \\begin{bmatrix} & X_{k1} \\ , & X_{k2} \\\\ & X_{k3} \\ , & X_{k4} \\end{bmatrix} \\\\ &= \\sum T_{4 \\times 4}\\prime \\cdot \\begin{bmatrix} & W_{k1} \\ , & W_{k2} \\\\ & W_{k3} \\ , & [0]_{4 \\times 4} \\end{bmatrix} \\cdot \\begin{bmatrix} & X_{k1} \\ , & X_{k2} \\\\ & X_{k3} \\ , & [0]_{4 \\times 4} \\end{bmatrix} \\\\ \\end{aligned} } X^k′′=[T1∣4×4′ ,T1∣4×4′ ,T2∣4×4′ [0]4×4]⋅[Wk1 ,Wk3 ,Wk2Wk4]⋅[Xk1 ,Xk3 ,Xk2Xk4]=∑T4×4′⋅[Wk1 ,Wk3 ,Wk2[0]4×4]⋅[Xk1 ,Xk3 ,Xk2[0]4×4] 存在 Xk4X_{k4}Xk4 区域,乘 000 丢解的问题,因此 X^k′′\\hat{X}_k\\prime\\primeX^k′′ 与 X^k′\\hat{X}_k\\primeX^k′ 的关系,还需要补充 Xk4X_{k4}Xk4 的 LFNST 独立解,记为 X^k4′\\hat{X}_{k4}\\primeX^k4′ ,有: T8×8−1⋅X^k′=[T1∣4×4′−1 ,T2∣4×4′−1T3∣4×4′−1 , [0]4×4]⋅W4×4−1⋅X^k′′+T4×4−1⋅X^k4′=[W^k1−1 ,W^k2−1W^k3−1 ,T4×4−1]⋅[X^k′′ ,[0]4×4[0]4×4 ,X^k4′]=Xk {\\displaystyle \\begin{aligned} {T_{8 \\times 8}}^{-1} \\cdot \\hat{X}_k\\prime &= \\begin{bmatrix} & {T_1|_{4 \\times 4}\\prime}^{-1} \\ , & {T_2|_{4 \\times 4}\\prime}^{-1} \\\\ & {T_3|_{4 \\times 4}\\prime}^{-1} \\ , & \\ [0]_{4 \\times 4} \\end{bmatrix} \\cdot {W_{4 \\times 4}}^{-1} \\cdot \\hat{X}_k\\prime\\prime + {T_{4 \\times 4}}^{-1} \\cdot \\hat{X}_{k4}\\prime \\\\ &= \\begin{bmatrix} & {\\hat{W}_{k1}}^{-1} \\ , & {\\hat{W}_{k2}}^{-1} \\\\ & {\\hat{W}_{k3}}^{-1} \\ , & {T_{4 \\times 4}}^{-1} \\end{bmatrix} \\cdot \\begin{bmatrix} & \\hat{X}_k\\prime\\prime \\ , & [0]_{4 \\times 4} \\\\ & [0]_{4 \\times 4} \\ , & \\quad \\hat{X}_{k4}\\prime \\end{bmatrix} \\\\ &= X_k \\end{aligned} } T8×8−1⋅X^k′=[T1∣4×4′−1 ,T3∣4×4′−1 ,T2∣4×4′−1 [0]4×4]⋅W4×4−1⋅X^k′′+T4×4−1⋅X^k4′=[W^k1−1 ,W^k3−1 ,W^k2−1T4×4−1]⋅[X^k′′ ,[0]4×4 ,[0]4×4X^k4′]=Xk 即: X^k′=[X^k′′ ,[0]4×4[0]4×4 ,X^k4′]T8×8=[W^k1 ,W^k2W^k3 ,T4×4]=[T4×4′⋅Wk ,T4×4′⋅WkT4×4′⋅Wk ,T4×4] {\\displaystyle \\begin{aligned} \\hat{X}_k\\prime &= \\begin{bmatrix} & \\hat{X}_k\\prime\\prime \\ , & [0]_{4 \\times 4} \\\\ & [0]_{4 \\times 4} \\ , & \\quad \\hat{X}_{k4}\\prime \\end{bmatrix} \\\\ T_{8 \\times 8} &= \\begin{bmatrix} & \\hat{W}_{k1} \\ , & \\hat{W}_{k2} \\\\ & \\hat{W}_{k3} \\ , & T_{4 \\times 4} \\end{bmatrix} = \\begin{bmatrix} & T_{4 \\times 4}\\prime \\cdot W_k \\ , & T_{4 \\times 4}\\prime \\cdot W_k \\\\ & T_{4 \\times 4}\\prime \\cdot W_k \\ , & T_{4 \\times 4} \\end{bmatrix} \\end{aligned} } X^k′T8×8=[X^k′′ ,[0]4×4 ,[0]4×4X^k4′]=[W^k1 ,W^k3 ,W^k2T4×4]=[T4×4′⋅Wk ,T4×4′⋅Wk ,T4×4′⋅WkT4×4] 取用: T8×8′=[T4×4′⋅Wk ,T4×4′⋅WkT4×4′⋅Wk ,0] {\\displaystyle \\begin{aligned} T_{8 \\times 8}\\prime &= \\begin{bmatrix} & T_{4 \\times 4}\\prime \\cdot W_k \\ , & T_{4 \\times 4}\\prime &\\cdot W_k \\\\ & T_{4 \\times 4}\\prime \\cdot W_k \\ , & &0 \\end{bmatrix} \\end{aligned} } T8×8′=[T4×4′⋅Wk ,T4×4′⋅Wk ,T4×4′⋅Wk0] 那么原 8×88 \\times 88×8 输入 XkX_kXk 经过 LFNST 变换的输出 X^k′\\hat{X}_k\\primeX^k′ 就有: X^k′=(T8×8′+T4×4′)⋅Xk {\\displaystyle \\begin{aligned} \\hat{X}_k\\prime &= (T_{8 \\times 8}\\prime + T_{4 \\times 4}\\prime )\\cdot X_k \\\\ \\end{aligned} } X^k′=(T8×8′+T4×4′)⋅Xk 而 X^k′\\hat{X}_k\\primeX^k′ 的右上和左下角,皆为 [0]4×4[0]_{4 \\times 4}[0]4×4 值。 T8×8′T_{8 \\times 8}\\primeT8×8′ 算子展开去零后,只有 16×4816 \\times 4816×48 的运算大小 因为固定了基底 T4×4′T_{4 \\times 4}\\primeT4×4′ 的位置,同样也只有 3 个聚类,即 3 个矩阵算子。 最终: NSST:{T=[ T4×4′, T8×8′ ]X^k′=(T8×8′+T4×4′)⋅Xk,min(M, N)=8X^k′=T4×4′⋅Xk,min(M, N)=4 {\\displaystyle \\begin{aligned} NSST:& \\begin{cases} { \\begin{aligned} T &= [\\ T_{4 \\times 4}\\prime,\\ T_{8 \\times 8}\\prime \\ ] \\\\ \\hat{X}_k\\prime &= (T_{8 \\times 8}\\prime + T_{4 \\times 4}\\prime )\\cdot X_k &, min(M ,\\ N) = 8 \\\\ \\hat{X}_k\\prime &= T_{4 \\times 4}\\prime \\cdot X_k &, min(M ,\\ N) = 4 \\end{aligned} } \\end{cases} \\\\ \\end{aligned} } NSST:⎩⎨⎧TX^k′X^k′=[ T4×4′, T8×8′ ]=(T8×8′+T4×4′)⋅Xk=T4×4′⋅Xk,min(M, N)=8,min(M, N)=4 由 T4×4′T_{4 \\times 4}\\primeT4×4′ 和 T8×8′T_{8 \\times 8}\\primeT8×8′ 构造新的基础多变换集(MTS)。结合上述变换过程,构成了 NSST 的完整理论基础。 不过,即使 NSST 已经极大的缩减了 LFNST 变换集的大小,并能在参与熵编码后,能更为有效的降低信息熵。但在以 H.265/HEVC 为目标应用时,就需要 35 组 2 类 3 算子的变换集 [34] 。延伸到 H.266/VVC 规格,则会至少需要 67 组 2 类 3 算子变换集。不论是 H.265 还是 H.266 ,都不可能采纳,属于无法工程化的技术。 那么,如何精简基础多变换集呢? LFNST 在 H.266 应用的工程 RST 与常值 MTS 在 VTM5 的有关 JVET-N0193 提案的提交中,H.266/VVC 采用了 缩减低频不可分变换(R-LFNST [Reduced LFNST]),处理此问题 [33] 。因为是针对 LFNST 的 二次不可分变换(NSST)的逼近算法,R-LFNST 也被称为 缩减二次变换(RST [Reduced Secondary Transform]) [35] 。 缩减二次变换对 LFNST 的 NSST 应用所得 基础多变换集(MTS),进行 整体变换集算子数量 和 算子生效范围,两方面的裁剪。其理论根基仍来源自 NSST 。 RST 在生效范围的调整,主要集中于控制 NSST 在工程实现中的有效计算区域。根据 NSST 的基本公式可以发现,实际上对于尺寸大于 8×88 \\times 88×8 的 M×NM \\times NM×N 大小主变换 XkX_kXk 输入,NSST 能起作用的部分仅局限于左上角和与其相邻的,共计 3 个 4×44 \\times 44×4 大小的范围。 如此一来,介于参与 NSST 的输入已不可再分,对于这三个区域外的的其余 XkX_kXk 值, 根本不需要再次进行二次变换处理。而在 T4×4′T_{4 \\times 4}\\primeT4×4′ 时和 NSST 一致。 所以,原 NSST 公式可调整为: RST:{T=[ T4×4′, T8×8′ ]X^k′=T8×8′⋅Xk′∣48×1,min(M, N)=8X^k′=T4×4′⋅Xk,min(M, N)=4 {\\displaystyle \\begin{aligned} RST:& \\begin{cases} { \\begin{aligned} T &= [\\ T_{4 \\times 4}\\prime,\\ T_{8 \\times 8}\\prime \\ ] \\\\ \\hat{X}_k\\prime &= T_{8 \\times 8}\\prime \\cdot X_k\\prime|_{48 \\times 1} &, min(M ,\\ N) = 8 \\\\ \\hat{X}_k\\prime &= T_{4 \\times 4}\\prime \\cdot X_k &, min(M ,\\ N) = 4 \\end{aligned} } \\end{cases} \\\\ \\end{aligned} } RST:⎩⎨⎧TX^k′X^k′=[ T4×4′, T8×8′ ]=T8×8′⋅Xk′∣48×1=T4×4′⋅Xk,min(M, N)=8,min(M, N)=4 即,对于 M×N≥8×8M \\times N \\ge 8 \\times 8M×N≥8×8 的情况,就如下图所示: 图 3-24 RST 的 8x8 输入理示意图[32] 有 T8×8′T_{8 \\times 8}\\primeT8×8′ 时,只需处理图中蓝色部分的 XkX_kXk 数据。 经 T8×8′T_{8 \\times 8}\\primeT8×8′ 计算后的原输出结果 X^k′\\hat{X}_k\\primeX^k′ ,安全起见会需要对非左上角部分扫描归零: 图 3-25 RST 的 8x8 输入 NSST 处理结果示意图(蓝线扫描顺序归零)[35] 之后,叠加至原主变化 XkX_kXk 位于计算范围外的部分,构成最终输出 X^k\\hat{X}_kX^k 。 经过此番调整后,单次算子计算所需要的算力消耗,较 NSST 相比就非常之小了。 而在 MTS 的算子数量方面,通过整合 K均值聚类机器学习 label∈Z[1, 3]label \\in \\mathbb{Z} [1, \\ 3]label∈Z[1, 3] 中,所得 率失真优化指数(RDO)较大的两个聚类的变换矩阵,将原有输入固定预测模式和尺寸时的 NSST 变换集,从 3 个矩阵精简到了 2 个,成为双算子形式: TM×N∈[T1∣M×N ,T2∣M×N ] {\\displaystyle \\begin{aligned} &T_{M \\times N} \\in \\begin{bmatrix} & {T_1}|_{M \\times N} \\ , & T_2|_{M \\times N} \\ \\end{bmatrix}\\\\ \\end{aligned} } TM×N∈[T1∣M×N ,T2∣M×N ] 同时,RST 对需要处理的 H.266 规格下的各类帧内预测模式进行了分类。将原本需要单独生成变换集的平面(Planar)模式、直流(DC)模式、角度(Angle)预测模式进行了拆解。把临近相似方向的角度预测模式进行了分类。之后归类于 4 个主流变换集到如下索引 [32] : 凭借这样的处理,使得原本大于 67×2×367 \\times 2 \\times 367×2×3 个 MTS 矩阵,缩减到了 4×2×24 \\times 2 \\times 24×2×2 共计 8 个(详见【附表一】)的可接受范围。 至此,根据输入尺寸大小、预测模式所处归类、输入率失真优化指数(RDO)这 3 个参数,就能够选定具体的算子进行相关处理了。完成 RST 的工程化。 到这里,信息频域分离和部分冗余处理,就已经完成了。随后再配合传统音视频的量化和熵编码,即可完成对信息剩余存储空间冗余的压缩。此处不再赘言。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/Playground_3.html":{"url":"Chapter_3/Language/cn/Playground_3.html","title":"【在线展示】","keywords":"","body":" 在线演示 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_3/Language/cn/References_3.html":{"url":"Chapter_3/Language/cn/References_3.html","title":"【参考文献】","keywords":"","body":"三、【参考文献】 [1] Fourier, J.B. Joseph (1878) [1822], The Analytical Theory of Heat, translated by Alexander Freeman, The University Press (translated from French). [2] Champeney, D.C. (1987), A Handbook of Fourier Theorems, Cambridge University Press. [3] Clozel, Laurent; Delorme, Patrice (1985), \"Sur le théorème de Paley-Wiener invariant pour les groupes de Lie réductifs réels\", Comptes Rendus de l'Académie des Sciences, Série I, 300: 331–333. [4] Rahman, Matiur (2011), Applications of Fourier Transforms to Generalized Functions, WIT Press, ISBN 978-1-84564-564-9. [5] Stein, Elias; Weiss, Guido (1971), Introduction to Fourier Analysis on Euclidean Spaces, Princeton, N.J.: Princeton University Press, ISBN 978-0-691-08078-9. [6] Wolf, Kurt B. (1979), Integral Transforms in Science and Engineering, Springer, doi:10.1007/978-1-4757-0872-1, ISBN 978-1-4757-0874-5. [7] Grafakos, Loukas (2004), Classical and Modern Fourier Analysis, Prentice-Hall, ISBN 978-0-13-035399-3. [8] Gauss, Carl Friedrich (1876). Theoria Interpolationis Methodo Nova Tractata. Band 3. Göttingen: Königliche Gesellschaft der Wissenschaften. pp. 265–327. [9] Heideman, M. T., D. H. Johnson, and C. S. Burrus, \"Gauss and the history of the fast Fourier transform,\" IEEE ASSP Magazine, 1, (4), 14–21 (1984). [10] James W. Cooley, John W. Tukey, (1965). \"An algorithm for the machine calculation of complex Fourier series\". Math. Comput. 19 (90): 297–301. doi:10.2307/2003354. [11] James W. Cooley, Peter A. W. Lewis, and Peter W. Welch, \"Historical notes on the fast Fourier transform,\" Proc. IEEE, vol. 55 (no. 10), p. 1675–1677 (1967). [12] Ghissoni, S. , Costa, E. , Lazzari, C. , Monteiro, J. , & Reis, R. . (2011). Radix-2 Decimation in Time (DIT) FFT implementation based on a Matrix-Multiple Constant multiplication approach. IEEE International Conference on Electronics. IEEE. [13] C. Tomasi and R. Manduchi, \"Bilateral filtering for gray and color images,\" Sixth International Conference on Computer Vision (IEEE Cat. No.98CH36271), Bombay, India, 1998, pp. 839-846, doi: 10.1109/ICCV.1998.710815. [14] R. Haralick and L. Shapiro Computer and Robot Vision, Vol. 1, Addison-Wesley Publishing Company, 1992, pp 346 - 351. [15] Irwin Sobel, 2014, History and Definition of the Sobel Operator [16] William T. Freeman, Michal Roth, \"Orientation Histograms for Hand Gesture Recognition\", Tech. Rep. TR94-03, Mitsubishi Electric Research Laboratories, Cambridge, MA, December 1994. [17] Dalal, N. , and B. Triggs . \"Histograms of Oriented Gradients for Human Detection.\" IEEE Computer Society Conference on Computer Vision & Pattern Recognition IEEE, 2005. [18] J. F. Henriques, R. Caseiro, P. Martins and J. Batista, \"High-Speed Tracking with Kernelized Correlation Filters,\" in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 37, no. 3, pp. 583-596, 1 March 2015, doi: 10.1109/TPAMI.2014.2345390. [19] Yu J, Jiang Y, Wang Z, et al. Unitbox: An advanced object detection network[C]//Proceedings of the 24th ACM international conference on Multimedia. 2016: 516-520. [20] Rezatofighi, Hamid , et al. \"Generalized Intersection Over Union: A Metric and a Loss for Bounding Box Regression.\" 2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) IEEE, 2019. [21] Zheng Z, Wang P, Liu W, et al. Distance-IoU loss: Faster and better learning for bounding box regression[C]//Proceedings of the AAAI conference on artificial intelligence. 2020, 34(07): 12993-13000. [22] Zhang Y F, Ren W, Zhang Z, et al. Focal and efficient IOU loss for accurate bounding box regression[J]. Neurocomputing, 2022, 506: 146-157. [23] Li G, Xu D, Cheng X, et al. Simvit: Exploring a simple vision transformer with sliding windows[C]//2022 IEEE International Conference on Multimedia and Expo (ICME). IEEE, 2022: 1-6. [24] Huston SJ, Krapp HG (2008) Visuomotor Transformation in the Fly Gaze Stabilization System. PLoS Biol 6(7): e173. https://doi.org/10.1371/journal.pbio.0060173. [25] Fleet, David J.; Weiss, Yair (2006). \"Optical Flow Estimation\" (PDF). In Paragios, Nikos; Chen, Yunmei; Faugeras, Olivier D. (eds.). Handbook of Mathematical Models in Computer Vision. Springer. pp. 237–257. ISBN 978-0-387-26371-7. [26] Barron, John L.; Fleet, David J. & Beauchemin, Steven (1994). \"Performance of optical flow techniques\" (PDF). International Journal of Computer Vision. 12: 43–77. CiteSeerX 10.1.1.173.481. doi:10.1007/bf01420984. S2CID 1290100. [27] Berthold.K.P. Horn and Brian.G. Schunck, \"Determining optical flow.\" Artificial Intelligence, vol 17, pp 185–203, 1981. [28] Lucas B D and T. Kanade, An iterative image registration technique with an application to stereo vision[C]//Proc. of the 7th International Conference on Artificial Intelligence, pp 121-130, 1981. [29] A. Alshin, E. Alshina and T. Lee, \"Bi-directional optical flow for improving motion compensation,\" 28th Picture Coding Symposium, Nagoya, Japan, 2010, pp. 422-425, doi: 10.1109/PCS.2010.5702525. [30] J. Luo, Y. He and W. Chen, \"Prediction Refinement with Optical Flow for Affine Motion Compensation,\" 2019 IEEE Visual Communications and Image Processing (VCIP), Sydney, NSW, Australia, 2019, pp. 1-4, doi: 10.1109/VCIP47243.2019.8965942. [31] T. Lu et al., \"Luma Mapping with Chroma Scaling in Versatile Video Coding,\" 2020 Data Compression Conference (DCC), Snowbird, UT, USA, 2020, pp. 193-202, doi: 10.1109/DCC47342.2020.00027. [32] M. Koo, M. Salehifar, J. Lim and S. -H. Kim, \"Low Frequency Non-Separable Transform (LFNST),\" 2019 Picture Coding Symposium (PCS), Ningbo, China, 2019, pp. 1-5, doi: 10.1109/PCS48520.2019.8954507. [33] X. Zhao, J. Chen, M. Karczewicz, A. Said and V. Seregin, \"Joint Separable and Non-Separable Transforms for Next-Generation Video Coding,\" in IEEE Transactions on Image Processing, vol. 27, no. 5, pp. 2514-2525, May 2018, doi: 10.1109/TIP.2018.2802202. [34] Salehifar M, Koo M, Lim J, et al. CE 6.2. 6: Reduced Secondary Transform (RST)[J]. Joint Video Experts Team (JVET) of ITU-T SG, 2018, 16: 10-18. [35] Koo M, Salehifar M, Lim J, et al. CE6: reduced secondary transform (RST)(CE6-3.1)[J]. Joint Video Experts Team (JVET) of ITU-T SG, 2019, 16: 19-27. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Apex_4_Introduce.html":{"url":"Chapter_4/Language/cn/Apex_4_Introduce.html","title":"四、音视频机器学习基础","keywords":"","body":"四、音视频机器学习基础 引言 在前一章中,我们对基础音视频的关键技术工具,进行了详细介绍。其中,不少地方需要用到机器学习相关的处理手段。可见结合机器学习尤其是深度学习模型的优秀能力,来强化现有音视频工程的各方面,已逐步成为主流趋势。 因此,需要我们对机器学习这个大类技术族,有初步的认知。 整个机器学习(ML)的发展历程中,总有不一样的想法和更先进(或特色)的方法论被各路探索者们提出来。而深度学习(DL [Deep Learning])作为机器学习(ML [Machine Learning])的实现手段之一,最初的概念早在上个世纪就已经被 Hinton、Bengio、LeCun 等学者提出。受到近年来快速增长的计算机算力和大数据云建设,而得以真正落地。 如果回顾机器学习的发展会发现,过程中通常是多条路线方法论并行的。在历史上(现认为 2019 至今属于第三次高峰),前两次小高峰都是伴随着计算机硬件技术的突破,而带来的飞跃性变革。从单层感知器模型(Single-Perception)到多层感知器模型(Multi-Perception)再到深度信念网络(Deep Belief Network),直至今天百花齐放的 DL。整个历史中的每一次迭代,更像是多次多维度的技术积累准备齐全后,才应运而生的。 本章节主要整理说明了,当下机器学习至 2019 年前的发展简史,并阐明了部分算法的必要基础概念。只给出核心原理,不包含理论证明和推导过程。 关键字:机器学习分类、深度学习、激活函数、损失函数、最优化算法、模型结构速览 目录 4.1 发展概览 4.2 模型工程基础 4.2.1 算子(Operator)& 层(Layer) 4.2.2 神经元(Neuron) 4.2.3 神经网络(NN [Neural Network]) 4.2.4 特征选择(Feature Selection) 4.3 经典激活函数(Classic Activation Function) 4.3.1 Sigmoid 4.3.2 Tanh 4.3.3 Softplus 4.3.4 ReLU 族 4.3.5 ELU & SELU 4.3.6 Mish 4.3.7 Swish 族 4.4 连接函数/衰减函数(Connection/Attenuation Function) 4.4.1 Dropout 4.4.2 Maxout 4.4.3 SoftMax 4.5 损失函数(Loss Function) 4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error]) 4.5.2 回归项-均方误差(MSE [Mean Squared Error]) 4.5.3 回归项-休伯损失(Huber Loss) 4.5.4 回归项-分位数损失(Quantile Loss) 4.5.5 分类项-对数损失(Log Loss) 4.5.6 分类项-交叉熵损失(Cross Entropy Loss) 4.5.7 分类项-合页损失(Hinge Loss) 4.5.8 分类项-对比损失(Contrastive Loss) 4.5.9 分类项-三元损失(Triplet Loss) 4.5.10 分类项-对组排异损失(N-Pair Loss) 4.5.11 正则项-L1 惩罚 4.5.12 正则项-L2 惩罚 4.6 优化算法/优化器(Optimizer) 4.6.1 经典优化算法(Classic Optimize Function) 4.6.2 优化算法的优化-应对震荡 4.6.3 优化算法的优化-应对重点强(弱)化更新 4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation]) 4.6.5 优化算法对比与使用建议 4.7 模型结构速览 4.7.1 卷积神经网络(CNN [Convolutional Neural Network]) 4.7.2 循环神经网络(RNN [Recurrent Neural Network]) 4.7.3 自注意力网络(Transformer) 【参考文献】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_1.html":{"url":"Chapter_4/Language/cn/Docs_4_1.html","title":"4.1 发展概览","keywords":"","body":"4.1 发展概览 机器学习(ML) 传统意义上的方法,可以大致分为两类:有监督学习(Supervised Learning) 和 无监督学习(Unsupervised learning)。 在 1946~2006 年期间,两种类型因为各自侧重领域存在区分,基本是同步并行发展的。有监督学习(Supervised Learning)经常被用来做一些拥有较为充足数据和标注的样本集的分类、预测工作。而无监督学习(Unsupervised learning)则更多的被用于数据重建,或简单二元分类应用上。直到 2006 年 杰弗里·辛顿(Geoffrey Hinton,1947~Present) 和 拉斯·萨拉胡迪诺夫(Russ Salakhutdinov) 提出了RBM 的快速学习算法 [1],并在 2008 年由 雨果·拉罗谢尔(Hugo Larochelle) 和 约书亚·本吉奥(Yoshua Bengio,1964~Present) 实现多层 RBM 节点组成的深度信念网络(DBN)半监督分类 [2] 后,无监督学习才逐渐被更多人所知。 有监督学习(Supervised Learning) 有监督学习(Supervised Learning) 指的是,在迭代中需要人为调参裁剪的 机器学习(ML)过程。即,从标签化训练数据集中推断出函数的机器学习任务。每一个样本集中的训练数据,都是由输入参数和预期结果组合而成。这其中,预期结果也被称为监督信号。常见的有监督学习如:支持向量机(SVM)、线性回归(Linear Regression)、逻辑回归(Logistic Regression)、朴素贝叶斯(NBM)、决策树(DT)、K-临近(K-Nearest)、深度信念网络(DBN)。 图 4-1 经典有监督学习关联图谱 无监督学习(Unsupervised Learning) 无监督学习(Unsupervised Learning) 指的是,在迭代中不需要人为干预的 机器学习(ML)过程。即,根据未标签的训练数据进行函数推断的机器学习任务。每一个样本集中的训练数据,仅由 输入的样本参数组成。无人工标注和对应输入的预期结果标记。 无监督学习主要分为两大类:确定型 和 概率型,不过也有 将概率型做为无监督学习,而 确定型归类为半监督(SSL [Semi-Supervised Learning])的分类方式。这种分类方式的原因主要是因为,确定型的代表主要是 自编码器(Auto Encoder),而自编码器在实际运行过程中,并不是完全不需要人为调参的。自编码器虽然不需要启动时进行样本标记,但是需要对解码后的结果进行对比筛选,因此也被学术界认为并不是完全的无监督。 确定型无监督学习,主要进行数据的复原、重现、重构等操作,无法进行数据分类。因此这种类型目前主要指代自编码器(Auto Encoder)及其改进算法,其目标主要是能够从抽象后的数据中尽量无损地恢复原有数据。自编码器(Auto Encoder)类型最大的特点就是: 数据相关(Data-Specific),只能用于训练集类似数据的编解码。 数据有损(Data-Degradation),解码后的输出与编码前的输入相比是退化的。 定制自动化(Customized Automation),根据指定类型输入,训练特定的自编码器,而不需要完成任何新工作。 概率型无监督学习,主要根据概率情况推算分类、结果状态。这种类型代表就是受限波尔兹曼机(RBM)及其改进算法(rRBM等)或延伸(DBN等),其目标主要是使受限玻尔兹曼机达到稳定状态时原数据出现的概率最大。从基础上来讲,属于贝叶斯学派的观点。 图 4-2 经典无监督学习关联图谱 深度学习(Deep Learning)的崛起 前文中我们提到了 2008 年基于 DBN 的半监督分类带给了业界极大的启发。从这一刻开始,深度学习的前置科技已经准备就绪。而传统的分类方式,显然已经 不足以描述 未来可能的发展趋势了。 2011 年 吴恩达(Andrew Ng,1976~Present) 等学者发表了《有关单层神经网络的无监督特征学习》[3] ,首次将受限波尔兹曼机(RBM)应用于无监督特征学习(Unsupervised Feature Learning)。论文通过简单的算法,实现了当时在 CIFAR-10 数据集(Acc: 79.6%) 和 NORB 数据集(Acc: 97.2%) 上最高的准确度,引起了剧烈反响。大家开始思考,是否能够通过更深层的网络结构,配合强大算力与过去积累的算法,来构造一种能够自主特征提取(Features self-extracting)的人工神经网络(ANNs [Artificial Neural Networks])模型。从而实现从单一模式识别到高层抽象的过渡 [4] ( 2013 年前,此类多数还停留在,用于做复杂多模式识别的应用领域),进一步推进人工智能(AI [Artificial Intelligence])发展。受此启发,大家开始尝试与其他领域概念结合,从而解决过去备受困扰的难题。 2012 年由 Hinton 学生 埃里克斯·克里热夫斯基(Alex Krizhevsky) 发表的 AlexNet [5] 无疑为人们的信心打上了有力的兴奋剂。AlexNet 在 ImageNet LSVRC-2012 训练集上以 top-5 error 15.3% 和高达 78.1% 的识别率展示了深度学习在目标分类领域强大的潜力。要知道当年第二名的 top-5 error 仅为 26.2%,差距高达 10.9%。 AlexNet 的关键之处,就在于它将 LeNet [6] 逐步完善的 卷积神经网络(CNN [Convolutional Neural Network]) 的基本框架和操作单元概念,与深度信念网络(DBF)中由RBM单元构成的计算单元设计理念进行了结合,并引入了由生物学侧抑制概念衍生的 局部响应归一化(LRN [Local Response Normalization]) 来构建了整个网络模型。证明了深度学习的正确性,和手段的多样性。这为随后深度学习概念的分类及发展,有着 承上启下 的作用。 AlexNet 的出现,将深度学习送上了高速发展的快车道。深度学习开始做为一种有效的训练方法而逐渐登上历史舞台,而与之相关的各种其他领域方向也被送上了副驾驶。 综合以往技术与深度学习近年来的发展过程,我们有了如下的脉络: 图 4-3 深度学习与传统及相关进展关联图谱 从图不难看出。时至今日,在深度学习方向上的工业化,逐渐形成以已由 神经网络框架(backbone),配合 逐层预训练(layer-wise pre-training) 与 裁剪(fine-tunning),来构筑一类问题的 批处理解决方案。当前模型发展也呈现了多元化的态势,在不同领域分支里也出现了更多的针对于领域内问题处理的细分。我们将由此发散而出的一系列模式分析方法统一归类为深度学习的手段,就具体研究内容而言,目前主要涉及如下处理理念: 多层自编码神经网络,包括:自编码(Auto Encoder,注意其在实现上区别于 Transformer 的自编码器类型)、稀疏编码(Sparse Coding)、降噪编码(Stacked Denoising Autoencoders)等单元处理手段; 深度信念网络(DBN),由单层或多层RBM构成的神经网络系统; 卷积神经网络(CNN),卷积运算的神经网络系统; 循环神经网络(RNN),共参循环单元链式递归的神经网络系统; 生成对抗网络(GAN),生成 & 判别模型互相博弈的神经网络系统; 自注意力网络(Transformer),一种基于自注意力(Self-Attention)和多头注意力(Multi-head Attention)机制的序列到序列(Sequence to Sequence)深度神经网络模型; 深度神经网络(DNN [Deep Neural Network]) 可以认为是这一系列方法所包含的神经网络类型的统称。这几种处理方式经常 交叉混用,或 多级组合互相协作。例如:通过 CNN+GAN 来完成视觉风格迁移,通过多层Transformer 自编码器(Auto Encoder) 实现的用于 NLP 的 BERT 模型。 而随着近年来的进一步发展,传统机器学习几大领域和新兴深度学习之间逐步交叉覆盖,出现了类似于 深度强化学习(DRL [Deep Reforcement Learning]) 这样的概念。例如:AlphaGo 和 DQN(Deep Q Networks)就是这一交叉方向的产物。同时,由于研究中发现,日常人所处理的信息是有限的,如果想要达到更贴近日常情况的 ML,那么必须考虑样本量不足的情况。为了解决这部分日益增长的问题,结合现有的DL手段,人们从 2019 年开始逐渐重视小样本学习(Few-Shot Learning)、大语言模型(LLM [Large Language Model])等领域的发展和探索。未来与之相关领域(如:元学习 Meta-Learning)可以预见将会有更多的注意力倾注。 图 4-4 深度学习与传统及相关领域关系图(图小圆有重叠部分) 在这一过程中,一些传统的机器学习技术展现出了新的活力。在部分问题的处理上,通过新老技术结合构建的新型网络,如:ArcFace、DQN 等。相对来说,诸如聚类分析、降维、模式分析、自编码器等,在当下往往都以单元、组件、方法论的方式在新网络中发挥传统的作用。而新一代技术的发展,更多的是在原有的研究基础上演变而来的。这就是我们总能够在新发布的 SOTA 中,看到过去的理念和前人的影子的原因。 即 事物的发展,总是螺旋上升的。深度学习是机器学习的手段,最终实现人工智能,或有限度的人工智能,才是目的。 传统机器学习对音视频方面的帮助,并不算太大。但是深度神经网络却极大的契合了音视频工程特征。 由于传统音视频,尤其是图像处理,和深度神经网络的技术栈关联性。音视频工程不可避免会大量使用到深度学习技术。想要简单了解深度学习模型是怎么起到作用的,就需要对一些基本概念有清晰的认知。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_2.html":{"url":"Chapter_4/Language/cn/Docs_4_2.html","title":"4.2 模型工程基础","keywords":"","body":"4.2 模型工程基础 在深度学习(DL)中,我们通过计算损失函数(Loss Function),来衡量当次迭代结果对应各个关键参数权重,在实际描述问题上的有效程度。通过损失函数的变化方向,来获取对应关键参数权重,更趋近于实际结果的梯度方向。从而被我们使用来更新当前参数权重配置,以降低损失函数值,逼近最优解。这一过程被称为一次迭代(Iteration)过程。而通过多次迭代来获取最优解的过程,被称为一次训练(Training)。 在一次迭代(Iteration)中,一般需要对参与训练的所有样本进行分组,我们将这些数据子集称为 批(Batch)。每一批所包含的数据量是有可能有差异的,所以,对不同批次的样本量,我们采用 批大小(Batch Size) 进行衡量。 而训练中,基本不可能通过单次迭代就能达到想要的结果。所以,在工程中,我们把一次迭代所包含的相关数据和处理的周期过程,称为一个 时期(Epoch)。用以区分深度学习学术概念的迭代,和工程执行层面的差异。因此,时期(Epoch)也可以代表数量级,即指代当前一次迭代过程中的所有批的输入样本个数。 两者本质是一个概念的不同角度称呼。 简单来说: sampleinput≤sampletotal1 epochsize=sampleinput≥batchsize⋅batchnum1 batchsize=sampleinputbatchnum {\\displaystyle \\begin{aligned} {sample}_{input} &\\le {sample}_{total} \\\\ 1\\ epoch_{size} &= {sample}_{input} \\\\ &\\ge batch_{size} \\cdot batch_{num} \\\\ 1\\ batch_{size} &= \\frac{ {sample}_{input} } {batch_{num}} \\\\ \\end{aligned} } sampleinput1 epochsize1 batchsize≤sampletotal=sampleinput≥batchsize⋅batchnum=batchnumsampleinput 皆为训练过程中的 样本量级参数。 那么,除去这部分变量,实际进行运算的基本单元是什么呢? Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_2_1.html":{"url":"Chapter_4/Language/cn/Docs_4_2_1.html","title":"4.2.1 算子(Operator)& 层(Layer)","keywords":"","body":"4.2.1 算子(Operator)& 层(Layer) 算子(Operator) 和 层(Layer) 是当前各种通用神经网络模型中,最基础的组成元件。一般来说,算子用于表示模型中的数学运算,而层用于组织模型中的算子。我们通常将单一的数学函数,抽象为一个算子。 需要注意的是,两者皆为 工程概念。 算子(Operator) 算子(Operator) 本身仅代表基础运算。因此,既可以是一对一的输入输出,也可以是多对多的输入输出,可以是有状态的或无状态的,也可以是可微的或不可微的。而在使用中,类似 ReLU 等激活函数,或 Dropout 之类的损失函数,都可以被定义为算子,以方便过程中直接使用。 有状态算子在计算输出时,会对前次计算的结果进行 一定程度的 抽象记录,从而 保存以前的状态。循环神经网络 (RNN) 中的循环单元(Recurrent Unit)就属于有状态算子。无状态算子在计算输出时不需要记住以前的状态,卷积神经网络 (CNN) 中的卷积算子就属于无状态算子。 可微算子的导数可以计算,这使得它们可以用于训练神经网络。例如,线性算子和非线性算子都是可微的。不可微算子的导数不能计算,这使得它们不能用于训练神经网络,但能够做最终汇总所用。例如,Maxout 算子就是一个不可微算子。 可见,算子本身是灵活的,基本作用等同于一次单一的数学运算,而不在意具体类型。由它构成了 整个神经网络中最基础的 “加减乘除” 功能。 层(Layer) 层(Layer) 是由一组算子组成的,神经网络基本组成部分。这些算子共同执行一个特定的任务。例如,卷积层(Convolution Layer) 由一组卷积算子组成,这些算子共同执行卷积操作。池化层(Pooling Layer) 由一组池化算子组成,这些算子共同执行池化操作。 根据不同的出发点,层可以进行 非单一化 的分类。 按照 功能特性,可以分为 卷积层(Convolutional Layer) 、 全连接层(Fully Connected Layer) 、 池化/下采样层(Pooling Layer/Subsampling Layer) 、 上采样层(Upsampling Layer)。 顾名思义,卷积层即卷积算子参与运算的层级,全链接层即采用连接函数精简参数的层级。同理,池化/下采样层即采用 传统/非传统 的下采样算法(Subsampling Function),进行输入数据精简的层级,而上采样即是采用 传统/非传统 的上采样算法(Upsampling Function)对数据进行扩充的层级。这种命名法的好处是 直指功能,缺点是不太好区分流程中位置。需要根据对模型的熟悉程度和经验,来确定实际生效的阶段。 按照 数学特性,可以分为 线性层(Linear Layer) 或 非线性层(Nonlinear Layer),两种类型。线性层由一组线性算子组成,这些算子共同执行线性变换。例如,全连接层就是一个线性层。非线性层由一组非线性算子组成,这些算子共同执行非线性变换。例如,卷积层就是一个非线性层。 按照 网络特性,可以分为 前馈层(Feed Forward Layer) 或 循环层(Recurrent Layer)。前馈层中的信息只从输入流向输出。循环层中的信息可以从输入流向输出,也可以从输出流向输入。这种分类方式常被使用在 自注意力网络(Transformer) 的层单元中,也可以适当的用来描述其他类型深度神经网络中的层划分。不过,由于如 CNN、RNN 相较于 Transformer 的层级特点相对单一,所以一般不会这么使用。例如,卷积神经网络 (CNN) 中的卷积层就是一个前馈层,循环神经网络 (RNN) 中的循环单元就是一个循环层,不如直接以数学特性表述的准确。 不过,最常见的分类方式,还是直接以层所处神经网络(Neural Network)位置进行划分,称为 经典基础层类型(Classic Base Layer Type)。 经典层分类(Classic Base Layer Type) 经典基础层类型,将层分为三类,分别是:输入层(Input Layer) 、 隐藏层(Hidden Layer) 、 输出层(Output Layer)。这种分类非常直观: 图 4-5 经典层分类在简单神经网络中位置示意图(切片) 输入层(Input Layer) 是一个神经网络的 输入节点集合(Input Nodes Set),负责接收外部传入的数据。显然输入数据的维度,决定了输入层节点的数量。如图,假设我们传入的训练用样本中,每一个样本数据皆为 4×14 \\times 14×1 向量的话,那么输入层的节点就同样有 4×14 \\times 14×1 个。 隐藏层(Hidden Layer) 是一个神经网络的 特征提取节点集合(Feature Extract Nodes Set),负责将输入层经过激活函数处理后的数据,交付权重运算,得到抽象后的 特征向量(Feature Vector) 输出。如图,这里我们指定抽取的特征为 3×13 \\times 13×1 向量,因此需要 3×13 \\times 13×1 个隐藏层节点。由于本身处于神经网络内部,所以被称为隐藏层。 该层也是反向传播(BP)算法,起到主要作用的层级。 输出层(Output Layer) 则是神经网络的预测结果 输出节点集合(Prediction Output Nodes Set),负责将临近的隐藏层输入,通过连接函数(Connection Function)转换为最终的预测结果输出。也就是将抽象的特征向量,转化为实际当次时期(epoch)预测结果的关键层。 通常情况下,一个神经网络只会有一个经过专门设计的输出层。输出层的结果将会与样本集中该样本的标注结果,一同作为损失函数的输入做损失计算,并以此迭代权重变化。 图中,我们期望的预测输出是个 2×12 \\times 12×1 的结果向量,向量的维度依赖于对比集的标注。此时,输出层就需要采用 2×12 \\times 12×1 个节点,来接收前一级隐藏层的输入(例子只有一层隐藏层)。 所以综合而言,在工程上,算子常常是以最小的 方法单元(Method Unit) 而存在,层中节点相当于最小 执行单元(Operation Unit)。层则相当于由一系列算子按照一定的处理顺序,组成的 任务单元(Task Unit)。而模型(Model)则是由一系列层按照既定目标排列组合,形成的 作业流水线(Process Pipeline)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_2_2.html":{"url":"Chapter_4/Language/cn/Docs_4_2_2.html","title":"4.2.2 神经元(Neuron)","keywords":"","body":"4.2.2 神经元(Neuron) 神经元(Neuron) 是对神经网络中的 节点(Node) 的一种,来自于仿生学的概念。上一节中我们提到的 输入/特征/输出节点集合 中的节点,都可以被称为神经元。 图 4-6 生物神经元示意图 生物学神经元之间的信号传递,是通过突触进行的。突触是神经元之间连接的部位。当一个神经元接收到信号时,它会释放神经递质,神经递质会穿过突触间隙,并与另一个神经元的受体结合。这种结合会引发一系列化学反应,最终导致另一个神经元产生动作电位。 这即是神经网络雏形,多层感知器(MLP [Multi-Layer Perceptron]) 的灵感来源。 在深度学习中,我们继续沿用了这一称谓,将所有设计相关层内数据计算的节点,统称为神经元。 神经元的组成 作为神经网络中最小的执行单位,神经元的成分可以统一用一个函数来说明: zi=wi⋅δ(xi)+bi z_i = w_i \\cdot \\delta(x_i) +b_i zi=wi⋅δ(xi)+bi 上式中, 角标 [i][_i][i] 表示当前神经元在层内的标号为 iii ,是一种 固定的表示 ; 以 xxx 表示一个输入的数值信号; 以 www 表示当前输入的 附加权重(wight),既可作为 参与训练 的层级特征权重,也可为常数 ; 以 bbb 表示当前输入的 附加偏移(bias),既可作为 参与训练 的层级特征偏移,也可为常数 ; 以 zzz 表示当前神经元的输出数值; 以 δ(x)\\delta(x)δ(x) 为当前神经元的激活函数; 可见,激活函数(Activation Function)是直接作用于神经元输入上的。 一般情况下,不论是 输入层、隐藏层,还是输出层的神经元,它们的 权重 www 和 偏移 bbb ,理论上都可以参与到反向传播(BP)的参数迭代中。 但是,输出层(Output Layer) 由于本身主要作用,是 接收连接函数(Connection Function)计算预测值,不会使用到 权重 www 和 偏移 bbb 。我们一般为了方便起见,会将作用于输出层与前一级隐藏层之间的链接函数,整合到输出层神经元中来便于代码实现。取链接函数为 f(x)f(x)f(x) 表示,有: z=f(x) z = f(x) z=f(x) 而 输入层(Input Layer) 一般为了工程和说明方便,会单独 只做激活,或 只传递(pass)数据并入下一级隐藏层。这使得对于输入层,神经元函数就变为了简单的: z=x z = x z=x 所以,真正 完整使用 到公式的,只有 隐藏层(Hidden Layer) 中的神经元。公式: zi=wi⋅δ(xi)+bi z_i = w_i \\cdot \\delta(x_i) +b_i zi=wi⋅δ(xi)+bi 也由此,可以被称为 隐藏层函数(Hidden Layer Function)。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_2_3.html":{"url":"Chapter_4/Language/cn/Docs_4_2_3.html","title":"4.2.3 神经网络(NN [Neural Network])","keywords":"","body":"4.2.3 神经网络(NN [Neural Network]) 本书中的 神经网络(NN [Neural Network]) 主要是对深度神经网络(DNN,这是个大类别,见第一节)中,采用 反向传播(Back Propagation) 技术的一类深度神经网络的统称。也被称为 反向传播神经网络(Back Propagation Neural Network),是个广义分类。 所谓反向传播(BP)算法,是一种 有监督学习(Supervised Learning)算法。它需要一个标记好判定结果的数据集,来进行隐藏层特征权重和偏移的迭代。BP 在当前神经网络的损失函数,计算输出预测值与正确值误差之后,以导数驱动调整神经元的权重和偏移(依赖是否参与运算),以期望下次迭代跟贴近预测结果,减少误差 [1] 。 而这涵盖了,包括 CNN、RNN、GAN、Transformer 在内的这些经典 DNN 模式。 图 4-7 完整的 Alexnet 示意图(工程版) 如上图所示,我们以经典 CNN 图像分类模型 AlexNet 为例。 由图可以看出,一个神经网络(NN)的构成,通常由一个输入层、多个隐藏层、一个输出层组成。而隐藏层中,根据具体作用的不同,按照之前提到的层级功能性划分,又可以分为 卷积层、池化层等多种子类型。 不同类型的网络,差异体现在层级的设计上。而层级的排列和执行方式,共同组成了工程流水线(Pipeline)。这一整体,被称为神经网络结构(Nerual Network Structure)。我们在实际工作中,常以 神经网络(NN)、模型(Model)来等价指代 神经网络结构。 当然,我们这里展示的只是最简单的深度神经网络。除了单独使用一个模型外,NN 之间也可以根据各种情况进行组合串联 或 联合训练,共同构成更大的神经网络,这种方式被称为 神经网络聚合(NNE [Neural Network Ensemble])。 除此之外,当下包括 大模型(Large Model) 在内的多种模型融合技术,简称 多模态(Multi Model),皆开始采用多模型混合的实现。 例如,由 杨立昆(Yann LeCun) 提出的,基于 短期预测(Short Term Prediction) 和 长期预测交叉融合(Joint Embedding) 实现完整连续时效预测,的 自监督大模型(Self-Supervised Large Model) 理论中,通过将传统深度学习(指带单一功能深度学习模型)的各个功能层或层组合,拆分为包含:损失模型(Cost Module,类似于一个复杂的,非单一点生效的损失函数替代模型)、感知模型(Perception Module)、规则模型(Policy Module)、动作模型(Action Model)、世界模型(World Model)在内的多种特定任务模型(Specific Model),组合为复杂的连续网络,以期实现模型自学习处理体系。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_2_4.html":{"url":"Chapter_4/Language/cn/Docs_4_2_4.html","title":"4.2.4 特征选择(Feature Selection)","keywords":"","body":"4.2.4 特征选择(Feature Selection) 特征选择(Feature Selection) 是每个模型启动前最为重要的一环,也是 特征工程(Feature Engineering) 方法论所靶向的关键问题之一。 在传统机器学习(ML)中,特征选择是对影响结果较不明显的 因变量(Independent Variables),以一系列处理手段,转换为较为明显的 关系参数(Related Parameters)表示,从而发掘出潜藏影响因素。这一过程所产生的单次影响因子参数,构成的一组标量数组,就是 特征向量(Feature Vector)。而对全体样本进行相同流程的抽象,得到的特征向量集合,即是 训练特征集(Training Feature Set)。 工程上,训练特征集 通常以 一批次(1 Batch) 样本计算后,由神经网络输出的当前权重下,输入样本的抽象非零解集构成。这个输出的抽象特征向量数据集,才是正真被我们用来衡量当前迭代结果情况的决定数据。即,损失函数(Loss Function)作用的部分。 而特征选择,正是对如何获取满足模型目标的特征和训练特征集的方法论。 常用的特征选择方式,可以分为三大类别: 过滤法(Filtered),以相关性和扩散性对特征评分,采用阈限法或策略来筛选; 包裹法(Wrapped),以评分函数或预测效果校验评分,筛选满足条件特征; 嵌入法(Embedded),以影响权重加强/衰减影响过程,用权重变换决定特征; 采用不同方法获取的训练集,也根据方法的选择情况,被分别称为 过滤集(Filterings) 、 包裹集(Wrappings) 、 嵌入集(Embeddings)。 显然,在深度学习中,被批量使用的特征选择方法,就是嵌入法。 嵌入集(Embeddings) 经由神经网络抽象高维特征的输出向量数据集,被我们称为嵌入特征向量组(Embeddings of Low-Dimesional Features),简称嵌入集(Embeddings)。与特征工程的相关称谓同名,并不矛盾。 它既可以是一组由 n×mn \\times mn×m 的向量构成的数组,如下 n×m=8×1n \\times m = 8 \\times 1n×m=8×1 有: double embeddings[BATCH_SIZE][VECTOR_SIZE] = { {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, {4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0}, /* ... */ }; 也可以是单纯的评估数据,相当于 n×m=1n \\times m = 1n×m=1 的向量组成: double predictions[BATCH_SIZE] = { 0.1, 0.8, 0.2, 0.3, 0.5, 0.7, 1.0, 0.9, /* ... */ }; 即,组成嵌入集的特征向量形式,并没有特殊的要求。但往往需要根据采用的损失函数来决定最终的格式。这一点在实践中非常重要。由于评估数据常用于线性回归,区别起见被称为 预测集(Predictions)。 现在,我们基本掌握了深度学习的入门概念。让我们分步来看,一个神经网络的具体细节。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_3.html":{"url":"Chapter_4/Language/cn/Docs_4_3.html","title":"4.3 经典激活函数(Classic Activation Function)","keywords":"","body":"4.3 经典激活函数(Classic Activation Function) 激活函数(Activation Function) 是一种被设计用来,在模型训练的每个单元数据输入位置,为输入引入非对称性特征 的特殊辅助函数。 图 4-8 激活函数作用阶段(图中蓝色线条)示意图 从图上可以看出,激活函数主要作用于隐藏层的输入。示例中只有一层隐藏层,因此激活函数作用位置在输入层接收输入数据后,交付到隐藏层的过程中。而对于多个隐藏层情况,前一级的输入也会经激活后才交付给后一级。 如果不采用激活函数,那么我们经过每层神经网络计算后,得到的最终输出都将为线性结果。线性输出实际就是最原始的感知器(Perceptron)。而单纯使用线性函数计算,在实际的处理过程中,对于大多是场景将不能很好的描述其特征。常见的算法问题常常需要引入非线性特性,才能更好的拟合样本。通常,我们通过引入激活函数来给我们设计、使用的神经网络,提供逼近任何非线性场景的能力。 激活函数,基本满足:单一输入输出、单一层处理、可参与训练参数 ,的一类激活函数。其中常用的几类,被称为 经典激活函数(Classic Activation Function)。 一般的: 当一个激活函数 f(x)f(x)f(x) 满足 x→+∞f′(x)=0x \\rightarrow +\\infty \\quad f\\prime(x)=0x→+∞f′(x)=0 时,我们称之为 右饱和。 当一个激活函数 f(x)f(x)f(x) 满足 x→−∞f′(x)=0x \\rightarrow -\\infty \\quad f\\prime(x)=0x→−∞f′(x)=0 时,我们称之为 左饱和。 当一个激活函数,既满足左饱和又满足又饱和时,我们称之为 饱和。 对任意的 xxx ,如果存在常数 ccc ,当 x>cx > cx>c 时恒有 f′(x)=0f\\prime(x)=0f′(x)=0 取值,则称其为 右硬饱和。 对任意的 xxx ,如果存在常数 ccc ,当 xcx xc 时恒有 f′(x)=0f\\prime(x)=0f′(x)=0 取值,则称其为 左硬饱和。 若既满足左硬饱和,又满足右硬饱和,则称这种激活函数为 硬饱和。 如果只有在 极限 状态下偏导数 f′(x)=0f\\prime(x)=0f′(x)=0 的函数,称之为 软饱和。 由于激活函数的作用,大多基于同向对比实验的统计结果来进行说明(目前,部分有相关的数理研究佐证,如 ReLU,但仍有争议)。因此,这里仅列出算子的公认已证明特性,和 C 语言实现。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_3_1.html":{"url":"Chapter_4/Language/cn/Docs_4_3_1.html","title":"4.3.1 Sigmoid","keywords":"","body":"4.3.1 Sigmoid 迭代公式: δ(x)=11+e−x {\\displaystyle \\begin{aligned} \\delta(x) = \\frac{1}{1+e^{-x}} \\\\ \\end{aligned} } δ(x)=1+e−x1 图像: 图 4-9 Sigmoid 函数图 特性: 非 0 为中心(non-zero-centered) 输出范围在 [0, 1][0,\\ 1][0, 1] 之间,导数 0.250.25 输出 >0> 0>0 ,反向传播(BP)权值正向堆积(梯度始终 >0> 0>0) 输入 (−∞, −5](-\\infty,\\ -5](−∞, −5] 或 [+5, +∞)[+5,\\ +\\infty)[+5, +∞) 时,输出近乎无变化,逐层梯度趋 ,更易导致梯度消失 指数计算,较为消耗资源 Sigmoid 激活函数梯度趋近于 0,即软饱和。这会导致BP在该区域部分的导数,无法有效的传递误差至上层(趋 0 失效),导致前层权值无更新,从而无法收敛。且因为 非 0 为中心,使得我们在使用它做激活函数时,需要考虑数据对称(zero-mean data)。 Sigmoid 也可以根据情况,使用其他算法代替,例如(swish、h-swish)。通常在二分问题上采用 Sigmoid 是不错的选择,诸如:是否是某一类、问题对错,即古典逻辑回归(Classical Logical Regression)。 Sigmoid 算子化 利用 C 语言实现对算子的封装,有: #include #include double sigmoid(double x) { return 1 / (1 + exp(-x)); } int main() { double x = 0.5; double y = sigmoid(x); printf(\"The sigmoid of %f is %f\\n\", x, y); return 0; } 运行验证可得到结果: The sigmoid of 0.500000 is 0.622459 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_3_2.html":{"url":"Chapter_4/Language/cn/Docs_4_3_2.html","title":"4.3.2 Tanh","keywords":"","body":"4.3.2 Tanh 迭代公式: sinh(x)=ex−e−x2cosh(x)=ex+e−x2δ(x)=tanh(x)=sinh(x)cosh(x)=ex−e−xex+e−x {\\displaystyle \\begin{aligned} sinh(x) &= \\frac{e^x-e^{-x}}{2} \\\\ cosh(x) &= \\frac{e^x+e^{-x}}{2} \\\\ \\delta(x) = tanh(x) &= \\frac{sinh(x)} {cosh(x)} = \\frac{e^x-e^{-x}}{e^x+e^{-x}} \\\\ \\end{aligned} } sinh(x)cosh(x)δ(x)=tanh(x)=2ex−e−x=2ex+e−x=cosh(x)sinh(x)=ex+e−xex−e−x 图像: 图 4-10 Tanh 函数图 特性: 0 为中心(zero-centered) 输出范围在 [−1, +1][-1,\\ +1][−1, +1] 之间,输出值域对称 当输入在 (−∞, −2.5](-\\infty,\\ -2.5](−∞, −2.5] 或 (−∞, −2.5](-\\infty,\\ -2.5](−∞, −2.5] 时,Tanh也会面临梯度趋 000 问题(过饱和问题) 指数计算,较为消耗资源 不难看出 Tanh(x)=2⋅Sigmoid(2x)−1Tanh( x ) = 2 \\cdot Sigmoid( 2x ) - 1Tanh(x)=2⋅Sigmoid(2x)−1 。本质上来讲 Tanh 属于Sigmoid 的一种变体,尝试通过平移拉伸变换,来解决 Sigmoid 的非原点对称问题。虽然能够处理梯度堆积带来的影响,但是 tanh 同样不能处理相较于堆积更为严重的梯度消失问题。这也是饱和类激活函数的通病。 Tanh 算子化 利用 C 语言实现对算子的封装,有: #include #include double tanh(double x) { return (exp(x) - exp(-x)) / (exp(x) + exp(-x)); } int main() { double x = 0.5; double y = tanh(x); printf(\"The tanh of %f is %f\\n\", x, y); return 0; } 运行验证可得到结果: The tanh of 0.500000 is 0.462117 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_3_3.html":{"url":"Chapter_4/Language/cn/Docs_4_3_3.html","title":"4.3.3 Softplus","keywords":"","body":"4.3.3 Softplus 迭代公式: δ(x)=log(1+ex) {\\displaystyle \\begin{aligned} \\delta(x) = log(1+e^x) \\\\ \\end{aligned} } δ(x)=log(1+ex) 图像: 图 4-11 Softplus 函数图 特性: 非 0 为中心(non-zero-centered) 输出范围在 [0, +∞)[0,\\ +\\infty)[0, +∞) 之间,导数正好为 Sigmoid 输出 ≥0\\ge 0≥0 ,反向传播(BP)权值正向堆积(梯度始终 ≥0\\ge 0≥0 ) 当输入在 [+5, +∞)[+5,\\ +\\infty)[+5, +∞) 时,梯度趋近常量 111 ,极大避免梯度消失问题 及 梯度爆炸问题 当输入在 (−∞, −5](-\\infty,\\ -5](−∞, −5] 时,输出近乎无变化,逐层梯度趋 000 ,更易导致梯度消失 指数计算,较为消耗资源 Softplus 可以看作是 ReLU 的平滑版,即无穷阶连续可导。但是因为采用了指数运算,且特性在计算机处理可近似相同。因此,常常使用 ReLU 而不是 Softplus。并且实验验证,Softplus 也并不优于 ReLU。 Softplus 算子化 利用 C 语言实现对算子的封装,有: #include #include double softplus(double x) { return log(1 + exp(x)); } int main() { double x = 0.5; double y = softplus(x); printf(\"The softplus of %f is %f\\n\", x, y); return 0; } 运行验证可得到结果: The softplus of 0.500000 is 0.648721 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_3_4.html":{"url":"Chapter_4/Language/cn/Docs_4_3_4.html","title":"4.3.4 ReLU 族 ","keywords":"","body":"4.3.4 ReLU 族 矫正线性单元(ReLU [Rectified Linear Unit]) 是整个经典激活函数中,被使用最广泛的经典中的经典。经过多年探索,已经形成了一系列以 ReLU 为基础的多种变体,用于各种突出场景。 ReLU(Rectified Linear Unit) 迭代公式: δ(x)=Max(0, x) {\\displaystyle \\begin{aligned} \\delta(x) = Max(0,\\ x) \\\\ \\end{aligned} } δ(x)=Max(0, x) 图像: 图 4-12 ReLU 函数图 特性: 非 0 为中心(non-zero-centered) 输出范围在 [0, +∞)[0,\\ +\\infty)[0, +∞) 之间 输出 ≥0\\ge 0≥0 ,反向传播(BP)权值正向堆积(梯度始终 ≥0\\ge 0≥0 ) 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,梯度为常量 111 ,完美解决梯度消失问题 及 梯度爆炸问题 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,梯度为 000 ,面临梯度归零问题 线性处理便于计算 ReLU(2013)被称为 线性整流函数,又称为线性修正单元。ReLU 因其简洁的特性,极低的运算量,成为了当前最常用的激活函数。业界各位炼丹师在不清楚或不确定具体使用什么激活函数时,常常选择 ReLU 或 其变体 来作为默认激活函数。 不过,纯粹的 ReLU 因为对于 神经元死亡(Dead Neuron)。已经不是梯度消失,而是直接没有了哪怕细微的迭代变化可能,即完全失活梯度归零。 但即便如此,ReLU 仍是目前最好用的激活函数。 PReLU & LReLU & RReLU 迭代公式: PReLU: δ(x)=Max(0, x)+α⋅Min(0, x)(α=0.1)LReLU: δ(x)=Max(τx, x)(τ=0.1)RReLU: δ(x)=Max(αx,x)withα=Random(lower, upper) ) {\\displaystyle \\begin{aligned} PReLU: \\ \\delta(x) &= Max(0,\\ x) + \\alpha \\cdot Min(0,\\ x) \\quad (\\alpha=0.1) \\\\ LReLU: \\ \\delta(x) &= Max(\\tau x,\\ x) \\quad (\\tau=0.1) \\\\ RReLU: \\ \\delta(x) &= Max(\\alpha x,x) \\quad with \\\\ \\alpha &= Random(lower,\\ upper) \\ ) \\\\ \\end{aligned} } PReLU: δ(x)LReLU: δ(x)RReLU: δ(x)α=Max(0, x)+α⋅Min(0, x)(α=0.1)=Max(τx, x)(τ=0.1)=Max(αx,x)with=Random(lower, upper) ) 图像: 图 4-13 PReLU & LReLU & RReLU 函数图 特性: 0 为中心(zero-centered) 输出范围在 (−∞, +∞)(-\\infty,\\ +\\infty)(−∞, +∞) 之间 输出值域对称,降低在正向堆积风险 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,梯度为常量 111 ,完美解决梯度消失问题 及 梯度爆炸问题 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,PReLU 梯度为训练参数 τ\\tauτ (参与训练,启动值为 τ=0.1\\tau=0.1τ=0.1 ) 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,LReLU 梯度为 α=0.1\\alpha=0.1α=0.1 (常量) 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,RReLU 梯度为范围内参数(随机值) 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,三类梯度度 0.50.5 (大部分情况),还是存在 梯度消失问题 线性处理便于计算 PReLU(2016) 、 LReLU(2015 Russakovsky ImageNet 分类)、RReLU(2017 Kaggle 全美数据科学大赛 即 NDSB) 三者间的差别主要就在于 ( 0, +∞) 时的梯度是常数、参与训练、随机限定范围内取值。三者的目的都是试图通过引入 ReLU 灵活方案:NReLU(Noisy ReLU)& ReLU-N 除了上述的 ReLU 变体外,我们还可以根据实际需要选择在使用上述变体的时候,引入辅助处理,常见的辅助处理有两种: 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,引入噪音偏置常量(梯度非 111 ),或参与训练参数(类 PReLU) 当输入在 (c, +∞)(c,\\ +\\infty)(c, +∞) 时,限定最大上限常量(右饱和),或类比 LReLU 处理 这样的操作常用在一些需要限定约束激活层输出的地方使用,属于 小技巧(Tricks)。是需要 谨慎使用 的一种处理手段。 ReLU 族算子化 利用 C 语言实现对算子的封装,有: #include #include #include #include double relu(double x) { return fmax(0, x); } double prelu(double x, double tau) { return fmax(0, x) + tau * fmin(0, x); } double lrelu(double x, double alpha) { return fmax(alpha * x, x); } double rrelu(double x, double alpha, double lower, double upper) { double r = (double)rand() / (double)RAND_MAX; double alpha_rand = lower + r * (upper - lower); return fmax(alpha_rand * x, x); } int main() { // ReLU { double x = -0.5; double y = relu(x); printf(\"The ReLU of %f is %f\\n\", x, y); } { double x = +0.5; double y = relu(x); printf(\"The ReLU of %f is %f\\n\", x, y); } // PReLU { double x = -0.5; double tau = 0.1; double y = prelu(x, tau); printf(\"The PReLU of %f with alpha=%f is %f\\n\", x, tau, y); } // LReLU { double x = -0.5; double alpha = 0.1; double y = lrelu(x, alpha); printf(\"The LReLU of %f with alpha=%f is %f\\n\", x, alpha, y); } // RReLU { // Set the random seed srand(time(NULL)); double x = -0.5; double alpha = 0.1; double lower = 0.0; double upper = 1.0; double y = rrelu(x, alpha, lower, upper); printf(\"The RReLU of %f with alpha=%f, lower=%f, and upper=%f is %f\\n\", x, alpha, lower, upper, y); } return 0; } 运行验证可得到结果: The ReLU of -0.500000 is 0.000000 The ReLU of +0.500000 is 0.500000 The PReLU of -0.500000 with alpha=0.100000 is -0.050000 The LReLU of -0.500000 with alpha=0.100000 is -0.050000 The RReLU of -0.500000 with alpha=0.100000, lower=0.000000, and upper=1.000000 is -0.019595 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_3_5.html":{"url":"Chapter_4/Language/cn/Docs_4_3_5.html","title":"4.3.5 ELU & SELU","keywords":"","body":"4.3.5 ELU & SELU 迭代公式: ELU: δ(x)={xx≥0α(ex−1)x0SELU: δ(x)=λ⋅ELU(x, α) {\\displaystyle \\begin{aligned} ELU: \\ \\delta(x) &= \\begin{cases} x & x \\geq 0 \\\\ \\alpha (e^x-1) & xELU: δ(x)SELU: δ(x)={xα(ex−1)x≥0x0=λ⋅ELU(x, α) 图像: 图 4-14 ELU & SELU 函数图 特性: 0 为中心(zero-centered) 输出范围在 (−c, +∞)(-c,\\ +\\infty)(−c, +∞) 之间,称 ccc 为常量乘数 输出值域对称,降低在正向堆积风险 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,梯度为常量 111 ,完美解决梯度消失问题 及 梯度爆炸问题 当输入在 (−∞, 0 ](-\\infty,\\ 0\\ ](−∞, 0 ] 时,梯度以 f(x)+cf(x)+cf(x)+c 形式变化,仍然存在梯度消失风险 公式中的 α\\alphaα 可取经验值,也可参与迭代 指数计算,较为消耗资源 ELU(2016)被称为 指数线性单元。也是一种为了处理 ReLU 梯度消失问题而提出的激活函数。 ELU 比之 ReLU 其他几种变体,最大的特点就是曲线平滑。而 SELU 则是在原有 ELU 激活函数的基础上,再乘以一个系数(通常取固定常量),即 SELU(x)=λ⋅ELU(x)SELU( x ) = \\lambda \\cdot ELU( x )SELU(x)=λ⋅ELU(x) 。根据原作者 京特·克兰鲍尔(Günter Klambauer) 在论文《Self-Normalizing Neural Networks》中的描述 [8] ,推荐取 λ=1.0507009873554804934193349650124\\lambda = 1.0507009873554804934193349650124λ=1.0507009873554804934193349650124 的经验值。 SELU 可使输入经过一定层数处理后,变为固定分布。 ELU & SELU 算子化 利用 C 语言实现对算子的封装,有: #include #include double elu(double x, double alpha) { return x >= 0 ? x : alpha * (exp(x) - 1); } double selu(double x, double alpha, double lambda) { return lambda * (x >= 0 ? x : alpha * (exp(x) - 1)); } int main() { // ELU { double x = -0.5; double alpha = 1.0; double y = elu(x, alpha); printf(\"The ELU of %f with alpha=%f is %f\\n\", x, alpha, y); } // SELU { double x = -0.5; double alpha = 1.6732632423543772848170429916717; double lambda = 1.0507009873554804934193349650124; double y = selu(x, alpha, lambda); printf(\"The SELU of %f with alpha=%f and lambda=%f is %f\\n\", x, alpha, lambda, y); } return 0; } 运行验证可得到结果: The ELU of -0.500000 with alpha=1.000000 is -0.393469 The SELU of -0.500000 with alpha=1.673263 and lambda=1.050701 is -0.428348 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_3_6.html":{"url":"Chapter_4/Language/cn/Docs_4_3_6.html","title":"4.3.6 Mish","keywords":"","body":"4.3.6 Mish 迭代公式: δ(x)i=x⋅tanh(softplus(x)) {\\displaystyle \\begin{aligned} \\delta(x)_i &= x \\cdot tanh(softplus(x)) \\\\ \\end{aligned} } δ(x)i=x⋅tanh(softplus(x)) 即: δ(x)i=x⋅eln(1+ex)−e−ln(1+ex)eln(1+ex)+e−ln(1+ex)=x⋅(1+ex)2−1(1+ex)2+1=x⋅2ex+e2x2+2ex+e2x=x1+12ex+e2x {\\displaystyle \\begin{aligned} \\delta(x)_i &= x \\cdot \\frac{e^{ln(1+e^x)}-e^{-ln(1+e^x)}}{e^{ln(1+e^x)}+e^{-ln(1+e^x)}} \\\\ &=x \\cdot \\frac{(1+e^x)^2-1}{(1+e^x)^2+1} \\quad \\\\ &=x \\cdot \\frac{2e^x+e^{2x}}{2+2e^x+e^{2x}} \\\\ &= \\frac{x}{1+\\frac{1}{2e^x+e^{2x}}} \\qquad \\quad \\\\ \\end{aligned} } δ(x)i=x⋅eln(1+ex)+e−ln(1+ex)eln(1+ex)−e−ln(1+ex)=x⋅(1+ex)2+1(1+ex)2−1=x⋅2+2ex+e2x2ex+e2x=1+2ex+e2x1x 图像: 图 4-15 Mish 函数图 特性: 0 为中心(zero-centered) 输出范围在 [≈0.278, +∞)[\\approx 0.278,\\ +\\infty)[≈0.278, +∞) 之间,导数近似 Switch(x)Switch(x)Switch(x) 但过于复杂 输出值域对称,降低在正向堆积风险,但负向变化慢 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,梯度 ≥0.5\\ge 0.5≥0.5 当输入趋近 +∞+\\infty+∞ 时,近似于 ReLU,梯度趋近 111 当输入趋近 −∞-\\infty−∞ 时,近似于 ReLU,梯度趋近 000 ,负向过输入大存在梯度消失风险 Mish 当 β→+∞\\beta \\rightarrow +\\inftyβ→+∞ 时,趋近 ReLU 平滑不单调 Mish 是由 迪甘塔·米斯拉(Diganta Misra) 在 2019 年提出的,其目的是为了在 Swish 基础上,提供一种更有效的激活函数。就目前而言,Mish 的有效性和性价比其实一直处于讨论中 [9] 。 不过,在实验检验下 Mish 并没有那么好用,其各方面特性都与 Swish 高度相似。而且采用 ImageNet 数据集 + MobileNetV2 + FPN 来做物体识别,从结果上反倒没有直接用 ReLU、或者 Swish 效果好,且 MAdds 激增。 因此,本书作者不建议使用。如果既想要利用函数平滑特性来提高优化函数效率,又不想要增加太多算力消耗的话,建议可以考虑 Swish,或 h-Swish(ReLU-N)。 Mish 算子化 利用 C 语言实现对算子的封装,有: #include #include double mish(double x) { return x * tanh(log(1 + exp(x))); } int main() { double x = 0.5; double y = mish(x); printf(\"The mish of %f is %f\\n\", x, y); return 0; } 运行验证可得到结果: The mish of 0.500000 is 0.462117 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_3_7.html":{"url":"Chapter_4/Language/cn/Docs_4_3_7.html","title":"4.3.7 Swish 族 ","keywords":"","body":"4.3.7 Swish 在本节开始时,我们曾提到过可以用 Swish 来代替 Sigmoid 在模型中的作用,以获取平滑非线性特征。那么 Swish 具体是什么样的呢? Swish & Swish-β 迭代公式: δ(x)i=x⋅sigmoid(x)=x1+e−x {\\displaystyle \\begin{aligned} \\delta(x)_i = x \\cdot sigmoid(x)=\\frac{x}{1+e^{-x}} \\\\ \\end{aligned} } δ(x)i=x⋅sigmoid(x)=1+e−xx 迭代公式(参与训练动态参数版本,Swish-β ): δ(x)i=x⋅sigmoid(βx)=x1+e−βx {\\displaystyle \\begin{aligned} \\delta(x)_i = x \\cdot sigmoid(\\beta x)=\\frac{x}{1+e^{-\\beta x}} \\\\ \\end{aligned} } δ(x)i=x⋅sigmoid(βx)=1+e−βxx 图像: 图 4-16 Swish 函数图 特性: 0 为中心(zero-centered) 输出范围在 [≈0.278, +∞)[\\approx 0.278,\\ +\\infty)[≈0.278, +∞) 之间,导数为 swish(x)+sigmoid(x)⋅(1−swish(x))swish(x) + sigmoid(x) \\cdot ( 1-swish(x) )swish(x)+sigmoid(x)⋅(1−swish(x)) 输出值域对称,降低在正向堆积风险,但负向变化慢 当输入在 (0, +∞)(0,\\ +\\infty)(0, +∞) 时,梯度 ≥0.5\\ge 0.5≥0.5 当输入趋近 +∞+\\infty+∞ 时,近似于 ReLU,梯度趋近 111 当输入趋近 −∞-\\infty−∞ 时,近似于 ReLU,梯度趋近 000 ,负向过输入大存在梯度消失风险 Swish-β 当 β→+∞\\beta \\rightarrow +\\inftyβ→+∞ 时,趋近 ReLU 平滑不单调 Swish 是由谷歌实验室在 2017 年提出的,提出以后其实一直争议不断。Swish 被谷歌认为是一种可以完美替代 ReLU 的简单激活函数,在论文中的演示里,其使用同模型在 Mobile NASNet-A (Zoph et al., 2017) 和 Inception-ResNet-v2 (Szegedy et al., 2017) 数据集上分别带来了0.9% 和 0.6% 的准确度提升 [10] 。不过业界普遍认为这个是因为数据集完善带来的。 Swish 作为一种平滑函数,它的特性和 SoftPlus 类似,优势都体现在优化函数的连续处理上。另外不单调,也能够提供更灵活的特性变化。兼容算力消耗,Swish-β 也不失为一种良好的选择。否则还是建议使用 ReLU 处理。 h-Swish 迭代公式: δ(x)i=x⋅h-sigmoid(x)=x⋅ReLU6(x+3)6 {\\displaystyle \\begin{aligned} \\delta(x)_i = x \\cdot h\\text{-}sigmoid(x)= x \\cdot \\frac{ReLU6(x+3)}{6} \\\\ \\end{aligned} } δ(x)i=x⋅h-sigmoid(x)=x⋅6ReLU6(x+3) 图像: 图 4-17 h-Swish 函数图 特性: 非 0 为中心(non-zero-centered) 输出范围在 [−0.375, +∞)[ -0.375,\\ +\\infty)[−0.375, +∞) 之间 输出 ≥0\\ge 0≥0 ,反向传播(BP)权值正向堆积(梯度始终 ≥0\\ge 0≥0 ) 当输入在 [+3, +∞)[ +3,\\ +\\infty)[+3, +∞) 时 梯度为 111 ,完美解决梯度消失问题 及 梯度爆炸问题,等效 ReLU 当输入在 (−∞, −3](-\\infty,\\ -3](−∞, −3] 时 梯度为 000 ,面临神经元死亡问题,等效ReLU 当输入在 (−3, +3)(-3,\\ +3)(−3, +3) 时 梯度为 cx+bcx+bcx+b ,c=16c = \\tfrac{1}{6}c=61 ,b=0.5b = 0.5b=0.5 ,梯度 ≥0.5\\ge 0.5≥0.5 非指数处理便于计算 非平滑不单调 h-Swish 是由谷歌实验室在 2019 年的 MobileNetV3 中提出的,用于作为两种 MobileNet 关键优化手段中的一种 [11] 。h 表示 hard。h-Swish 与 Swish 最大的不同就在于,用近似 sigmoid 的 ReLU-6(x+3) / 6 代替了 Sigmoid,也被称为 h-Sigmoid。 h-Swish 保留了 Swish不单调的特性,能够更好的进行非线性特性的引入。但是 h-Swish 也保留了 Swish 的有效范围特性。且因为采用 h-Sigmoid 处理,在样本输入小于 -3,将会导致神经元死亡问题。但是 h-Swish 的优势也同样明显,因为单激活函数最高只用到二次幂,实际运算当中较 Swish 节约了相当的算力。因此,建议根据情况,选择特征缩放处理(或单边限定偏移)后使用。其本身还是很有潜力的新兴激活函数。 需要注意的是,考虑到计算便利性和 Tanh 与 Sigmoid 的函数趋势近似。在工程中,我们 采用 Tanh 代替原论文的 Sigmoid 进行 Swish 族的算子化。同理也适用于,采用 log 代替 h-Sigmoid。从而简化了计算过程。 Swish 族算子化 利用 C 语言实现对算子的封装,有: #include #include double swish(double x) { return x * tanh(x); } double swish_beta(double x, double beta) { return x * tanh(beta * x); } double h_swish(double x) { return x * tanh(log(1 + exp(x))); } int main() { // Swish { double x = 0.5; double y = swish(x); printf(\"The swish of %f is %f\\n\", x, y); } // Swish-β { double x = 0.5; double beta = 1.0; double y = swish_beta(x, beta); printf(\"The swish-beta of %f with beta=%f is %f\\n\", x, beta, y); } // h-Swish { double x = 0.5; double y = h_swish(x); printf(\"The h-swish of %f is %f\\n\", x, y); } return 0; } 运行验证可得到结果: The swish of 0.500000 is 0.462117 The swish-beta of 0.500000 with beta=1.000000 is 0.462117 The h-swish of 0.500000 is 0.462117 至此,常用激活函数基本梳理完毕。 其实,常用于中间层的激活函数,往往都是简单激活函数,这样能够在引入非线性特征的同时,相对较小或者几乎不怎么消耗算力资源。而在最终阶段,常常使用复杂的激活函数来做分类器,或结果控制的操作。相对来说,复杂激活函数的使用往往都放在最终阶段的原因,就是因为其过于复杂的特性,能够适应更严格的情况,但也相对更耗费算力资源,无法频繁使用。 至于如何更好的使用激活函数,建议结合激活函数的特性,将输入值进行适当的放缩,例如:如果使用Sigmoid,那么我们可以先行放缩上层输入到 ( -5, 5 ) 的范围内,这样一定程度的避免梯度消失问题。所以,如何根据选用的激活函数,适当的调整上层输入,将会对结果大有裨益。 另外,个人理解 光滑(smooth) 函数类型的激活函数,其优势在于 能够更好的配合优化方法,而且能够 解离 不同分类之间的差异性(连续非离散,差异细化),使得模型具有更好的鲁棒性。但是因为算力上不占优势,建议用在 last stage 部分。 除此之外,非单调性也是近期激活函数新的关注点,业界的研究显示,适当的引入非单调性,能够很好的增强激活函数将源数据,输出为非线性数据的信息保存水平。 综合而言,建议现阶段使用激活函数,优先考虑:ReLU、LReLU、ReLU-N、h-Swish,根据是否需要配合优化算法(利用 smooth 特性),进一步选择是否采用:Softplus、Swish。结合现有硬件水平,适度的考虑含有指数计算的激活函数。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_4.html":{"url":"Chapter_4/Language/cn/Docs_4_4.html","title":"4.4 连接函数/衰减函数(Connection/Attenuation Function)","keywords":"","body":"4.4 连接函数/衰减函数(Connection/Attenuation Function) 连接函数(Connection Function) 是一种被设计用来,在模型训练的每个单元数据输入位置,为输入进行筛选收减的特殊辅助函数。常被用在诸如:全链接层(Full Connected Layer) 、 自注意力层(Self-Attention Layer) 等 输出层(Output Layer) 或 部分特殊隐藏层(Hidden Layer) 单元设计中。用来对经过激活(或纯输入)的当前层输入进行 特征提炼(Feature Extraction)。由于在当下占据主流的 Transformer 模型中,以自注意力层的注意力衰减机制存在,因而也被称为 衰减函数(Attenuation Function)。 图 4-18 连接函数作用阶段(图中蓝色线条)示意图 连接函数,基本满足:单一输入输出、多层参数处理、可参与训练参数 > 1,的特点。其中,较为经典的主要有 3 个,分别是 Dropout 、Maxout 、Softmax 。 由于链接函数作用于层中各节点的串联,因此,为了便于说明。 统一的: 多个前置常为向量形式输入,这里我们统一记输入为 x⃗\\vec{x}x⃗ ,各分量值都以 xxx 代替。 以 iii 代表对应层输入通道,输入层输入设置为 nnn ,则 iii 顺序取值 [1, n][ 1,\\ n][1, n] 。 以 jjj 代表对应层激活节点,激活层节点设置为 kkk ,则 jjj 顺序取值 [1, k][ 1,\\ k][1, k] 。 以 WWW 代表对应层激活节点权重,对应输入 iii 的激活节点 jjj 的权重就为 WijW_{ij}Wij 。 以 bbb 代表对应层激活节点偏移,对应输入 iii 的激活节点 jjj 的偏移就为 bijb_{ij}bij 。 以 zzz 代表对应层计算后值,对应激活节点 jjj 的算后值就为 zjz_jzj 。 以 hj(x)h_j(x)hj(x) 代表对应层,各节点计算值 zjz_jzj 经过函数 f(zj)f(z_j)f(zj) 处理后输出,对应下一层的输入。 则,未经过链接函数处理前后的网络情况,可以用 公式表示 为: zj=wijT⋅x+bijhj(x)=f(z) {\\displaystyle \\begin{aligned} z_j &= {w_{ij}}^T \\cdot x+b_{ij} \\\\ h_j(x) &= f(z) \\end{aligned} } zjhj(x)=wijT⋅x+bij=f(z) 其中,函数 f(x)f(x)f(x) 的选择非常广泛,既可以是一些激活函数,如 ReLU,也可以是链接函数。而链接函数的作用位置往往有一些差异,部分作用于 zjz_jzj 的计算过程,另一些则直接作用于结果的筛选。介于作用在多个节点范围,我们用 Σ(x⃗)\\Sigma(\\vec{x})Σ(x⃗) 来代指整个 链接函数生效过程,它的输出即是下一层(即后一级,如果有)的输入向量。 则,经过链接函数处理前后的网络情况,就可以用公式表示,有: Σ(x⃗)=∑hj(x) {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\end{aligned} } Σ(x⃗)=∑hj(x) 在这些前提下,我们来看这三个经典链接函数。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_4_1.html":{"url":"Chapter_4/Language/cn/Docs_4_4_1.html","title":"4.4.1 Dropout","keywords":"","body":"4.4.1 Dropout 迭代公式: Σ(x⃗)=∑hj(x)∈{f(z)∈Activation FunctionRj=0 or 1∈Bernoulli(p)zj=Rj⋅WijT⋅x+bijhj(x)=f (zij) {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} f(z) &\\in Activation \\ Function\\\\ R_j &= 0 \\ \\text{or} \\ 1 \\in Bernoulli(p) \\\\ z_j &= R_j \\cdot {W_{ij}}^T \\cdot x+b_{ij} \\\\ h_j(x) &= f\\ (z_{ij}) \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎪⎪⎪⎨⎪⎪⎪⎧f(z)Rjzjhj(x)∈Activation Function=0 or 1∈Bernoulli(p)=Rj⋅WijT⋅x+bij=f (zij) 图像: 图 4-19 Dropout 输入输出作用示意图 特性: Dropout 采用了根据采用者需要的任意设定激活函数,来作为 f(zj)f(z_j)f(zj) 功效 Dropout 对每一个激活节点输出 zjz_jzj 都赋予了根据伯努利分布的随机 000 或 111 附加筛选值 伯努利分布(Bernoulli Distribution)参数 ppp 的值,越大越容易取 111 ,越小则易取 000 被证明,当 p=0.5p=0.5p=0.5 时,能够带来最好的 类正则效果 每次触发层计算,伯努利结果 RijR_{ij}Rij 都会根据 ppp 重新获取 变相取平均,能够减少同层内,神经元间的公适性 辅助链接层处理,作用于节点选择,0 丢弃,1 通过 Dropout 是由 Hinton 于 2012 年提出的一种,针对容易过拟合小数据集训练的,过拟合防治手段 [11] 。其本身通过阻塞当前层计算中的生效节点,来实现对当次参与计算权重的随机过滤,从而降低各个训练参数间的关联性。 这个方法随后就被用在了于同年发表的 AlexNet 上,并随着 AlexNet 飞跃式的高准确度(在发表时间点),一起被人们熟知。而随着后续多篇相关 Dropout 数学特征和统计研究的文献中,证明了 Dropout 不止可以被运用于小样本情况,更是相当有效的正则化和模型鲁棒性处理方式。 直到今日,仍然被运用于大量模型的训练中。 Dropout 算子化 利用 C 语言实现对算子的封装,有: #include #include #include double dropout(double x, double p) { if (drand48() 运行验证可得到结果: The dropout of 0.500000 with p=0.500000 is 0.000000 The dropout of 0.500000 with p=0.500000 is 1.000000 和理论表现一致。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_4_2.html":{"url":"Chapter_4/Language/cn/Docs_4_4_2.html","title":"4.4.2 Maxout","keywords":"","body":"4.4.2 Maxout 迭代公式: Σ(x⃗)=∑hj(x)∈{zj=WijT⋅x+bijhj(x)=maxj∈[1,k] (zj) {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} z_j &= {W_{ij}}^T \\cdot x+b_{ij} \\\\ h_j(x) &= \\max_{j \\in [1, k]}\\ (z_j) \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎨⎧zjhj(x)=WijT⋅x+bij=j∈[1,k]max (zj) 图像: 图 4-20 Maxout 输入输出作用示意图 特性: Maxout 对输入,走激活层进行线性处理,一个节点即一次线性拟合,参数激增 ≥ k 倍 最终是由经由激活层映射后的数据,计算 Max 取极大值 本身不面对梯度消失导致和梯度爆炸问题 适合配套 Dropout ,作为后级或位于同层一起使用 无法用于求导,Maxout 不可微 整体处理线性,且非饱和 在 Goodfellow 提出的 Maxout Networks 中指出 Dropout 在层数更多的框架中,能有更好的性能 [13] 。因此,应该有与之匹配的激活函数,来以一种通用的手段将原有模型抽象非线性特征过程,进行层化处理。 Maxout 的设计目的,就是为了更好的使用 Hinton 提出的 Dropout 的性能,提供此类操作。其需要学习的参数就是k个神经元中的权值和偏置,这就相当于常规的激活函数一层,而 Maxout 是两层,而且参数个数增加了 K 倍。 Maxout 能够有效的原理是,任何 ReLU 及其变体等激活函数都可以看成分段的线性函数,而 Maxout 加入的一层神经元正是一个可以学习参数的分段线性函数。所以,理论是可以拟合(无限分割)所有凸函数的。 如下图展示的 k 为 1、2、4 时的情况: 图 4-21 Maxout 凸函数拟合示意图[13] 但是,由于 Maxout 会导致参数激增,从而造成运算量增加,因此不常使用。且由于本身的 不可微 特性,大部分情况下 Maxout 仅能 被用于末尾层中,来对此时已经经过提炼,参数相对较少的特征,进行连接拟合。 Maxout 算子化 利用 C 语言实现对算子的封装,有: #include #include double maxout(double *x, int size) { double max_value = x[0]; for (int i = 1; i max_value) { max_value = x[i]; } } return max_value; } int main() { int size = 3; double vecx[] = {0.5, 0.75, 1.0}; double w = maxout(vecx, size); printf(\"The maxout of the input vector is %f\\n\", w); return 0; } 运行验证可得到结果: The maxout of the input vector is 1.000000 和理论表现一致。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_4_3.html":{"url":"Chapter_4/Language/cn/Docs_4_4_3.html","title":"4.4.3 SoftMax","keywords":"","body":"4.4.3 Softmax 迭代公式: Σ(x⃗)=∑hj(x)∈{zj=WijT⋅x+bijhj(x)=ezj∑ezj {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} z_j &= {W_{ij}}^T \\cdot x+b_{ij} \\\\ h_j(x) &= \\frac{e^{z_j}}{\\sum{e^{z_j}}} \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎨⎧zjhj(x)=WijT⋅x+bij=∑ezjezj 迭代公式( log 版本,log-Softmax): Σ(x⃗)=∑hj(x)∈{zj=WijT⋅x+bijhj(x)=log(ezj∑ezj)=zj−log(∑j=1kezj) {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} z_j &= {W_{ij}}^T \\cdot x+b_{ij} \\\\ h_j(x) &= log(\\frac{e^{z_j}}{\\sum{e^{z_j}}})=z_j-log(\\sum_{j=1}^k{e^{z_j}}) \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎪⎪⎨⎪⎪⎧zjhj(x)=WijT⋅x+bij=log(∑ezjezj)=zj−log(j=1∑kezj) 迭代公式( stable 版本,stable-Softmax): Σ(x⃗)=∑hj(x)∈{zj=WijT⋅x+bijD=log(C)=−max(z1,z2,...,zk)hj(x)=C⋅ezjC⋅∑ezj=ezj+log(C)∑ezj+log(C)=ezj+D∑ezj+D {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} z_j &= {W_{ij}}^T \\cdot x+b_{ij} \\\\ D &= log(C)=-max(z_1, z_2,...,z_k) \\\\ h_j(x) &= \\frac{C\\cdot e^{z_j}}{C\\cdot \\sum{e^{z_j}}} = \\frac{e^{z_j+log(C)}}{\\sum {e^{z_j+log(C)}}}=\\frac{e^{z_j+D}}{\\sum {e^{z_j+D}}} \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎪⎪⎪⎨⎪⎪⎪⎧zjDhj(x)=WijT⋅x+bij=log(C)=−max(z1,z2,...,zk)=C⋅∑ezjC⋅ezj=∑ezj+log(C)ezj+log(C)=∑ezj+Dezj+D 迭代公式( stable-log 结合版本,stable-log-Softmax): Σ(x⃗)=∑hj(x)∈{zj=WijT⋅x+bijD=log(C)=−max(z1,z2,...,zk)hj(x)=log(C⋅ezjC⋅∑ezj)=(zj−D)−log(∑j=1ke(zj−D)) {\\displaystyle \\begin{aligned} \\Sigma(\\vec{x}) =\\sum h_j(x) \\in \\begin{cases} z_j &= {W_{ij}}^T \\cdot x+b_{ij} \\\\ D &= log(C)=-max(z_1, z_2,...,z_k) \\\\ h_j(x) &= log(\\frac{C\\cdot e^{z_j}}{C\\cdot \\sum{e^{z_j}}}) = (z_j-D)-log(\\sum_{j=1}^k{e^{(z_j-D)}}) \\end{cases} \\\\ \\end{aligned} } Σ(x⃗)=∑hj(x)∈⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧zjDhj(x)=WijT⋅x+bij=log(C)=−max(z1,z2,...,zk)=log(C⋅∑ezjC⋅ezj)=(zj−D)−log(j=1∑ke(zj−D)) 图像: 图 4-22 Softmax 输入输出作用示意图 特性: Softmax 能够起到归一化作用,将输入变换到输出范围在 [ 0, 1 ] 之间 输出满足概率累和为 1 求最大值过程非稀疏化 只增加了一层用于概率映射的隐藏层,增加了 input 个参数 Softmax 存在大指数输入导致的数值稳定性问题 log-Softmax 少了除法,相对数值更加稳定 stable-Softmax 对指数做了差值限定,但因为除法,可能会导致除零问题 stable-log-Softmax 有 stable 和 log 两者的优点,且无除零问题,但略微增加消耗 Softmax 常用于多目标分类、目标预测、NLP领域。能够将数字特征映射到概率范围内。常用在全联接层后,并配合 Cross-Entropy 损失函数使用。 目前 Softmax 的多种变体中,被使用最多的还是 stable-log-Softmax ,且涵盖了 log-Softmax 的相关情况。因此,一般将 stable-log-Softmax 和 log-Softmax ,统一称为 log-Softmax。 Softmax 被广泛使用的原因,还是在于它自带归一化,且能够稳定神经元的功能。这使得用 Softmax 做链接层算子,能够在分类场景上,更快的达到期望结果。是提升训练速率的有效手段。 Softmax 算子化 利用 C 语言实现对算子的封装,有: #include #include double ori_softmax(double *x, int size) { double sum = 0; for (int i = 0; i max_value) { max_value = x[i]; } } double sum = 0; for (int i = 0; i max_value) { max_value = x[i]; } } double sum = 0; for (int i = 0; i 运行验证可得到结果: The softmax of the input vector is 0.244728 The log-softmax of the input vector is -1.401880 The stable-softmax of the input vector is 0.244728 The log-stable-softmax of the input vector is -1.401880 和理论表现一致。 当然,连接函数并不只有列出的这三种类型。每年都有大量有关此方面的研究,给出新的样式。但从上我们也能够发现,若非足够泛化,连接函数鲜有脱离模型而存在的独立类型。这在上文中列出的 Maxout 与 Dropout、Softmax 的对比中有明显体现。因此,需要在训练中注意这一点。 目前,我们已经掌握了基本的样本提炼手段, 接下来就需要考虑权重迭代了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5.html":{"url":"Chapter_4/Language/cn/Docs_4_5.html","title":"4.5 损失函数(Loss Function)","keywords":"","body":"4.5 损失函数(Loss Function) 损失函数(Loss Function) 是用来,评估当前通过模型得到的预测值和实际样本真实值之间差异的大小。通过损失函数的导数,可以得到当前迭代预测值趋近实际值的变化情况,即梯度。因此常被我们用来作为下一次迭代的依据。损失函数的计算涉及所有引入的参数,计算的尺度是从整个模型层面进行的。 图 4-23 损失函数作用阶段(图中蓝色线条)示意图 如图,一次有效的损失函数计算,通常都是发生在一次全样本遍历(epoch)之后。我们通常使用的损失函数(Loss Function),严格意义上应该被称为 成本函数(Cost Function),即一种针对 整个训练集误差进行衡量 的目标函数。 损失函数的组成 损失函数的风险来源主要有两个:来自数据的风险 和 来自结构结构。这两种风险都会导致训练模型容易过拟合,而使得泛化能力受到影响。我们可以通过降低模型的复杂度来防止过拟合,这种方法被称为 正则化(Regularization)。 以最小化损失为目标,称为 经验风险最小化 : minimize( Loss( Data∣Model ) ) minimize(\\ Loss(\\ Data|Model\\ )\\ ) minimize( Loss( Data∣Model ) ) 以最小化损失和复杂度为目标,称为 结构风险最小化 : minimize( Loss( Data∣Model ) +complexity( Model ) ) minimize(\\ Loss(\\ Data|Model\\ )\\ + complexity(\\ Model\\ )\\ ) minimize( Loss( Data∣Model ) +complexity( Model ) ) 我们通常用结构风险最小化的目标函数,作为实际损失函数。其成分广义上分为两个部分,损失项 和 正则项(在线上学习的角度上,还会引入第三项中心值项,用来约束新的迭代结果与历史记录差异性)。 损失项(Losses),用于衡量模型与数据的 拟合度(fitness) 的损失函数组成部分,也是实际需要进行选择性设计和采用的模型关键成分。这种针对性的处理,在聚类分析和人脸识别领域非常常见。根据功能的不同,又可以细分为 回归项(Regression) 和 分类项(Classification)。 正则项(Regularities),用于衡量模型 复杂度(complexity) 的损失函数组成部分。衡量模型复杂度的方法有很多。大部分是从权重对整个模型影响的层面来判断的,即从权重的大小,来衡量某个参数对整体模型的影响。 接下来,我们就分别从 回归项(Regression)、分类项(Classification)、正则项(Regularities)三种类型,来了解损失函数的使用。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_1.html":{"url":"Chapter_4/Language/cn/Docs_4_5_1.html","title":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error])","keywords":"","body":"4.5.1 回归项-平均绝对误差(MAE [Mean Absolute Error]) 迭代公式: Loss=1N∑i=1N∣yi−predictioni∣ {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i = 1}^{N}|y_i-prediction_i| \\\\ \\end{aligned} } Loss=N1i=1∑N∣yi−predictioni∣ 图像: 图 4-24 MAE 函数图 特性: 契合拉普拉斯分布(Laplace distribution)样本 通过样本投影平面的距离向量绝对值,来衡量预测结果 导数为常数,梯度迭代线形 非光滑(non-smooth) 线性处理便于计算 MAE 也被称为 L-1 损失(L1L_1L1 Loss)。虽然 MAE 常用于机器学习,但它既不是唯一实用的损失函数,也不是适用于所有情形的最佳损失函数。MAE 以样本分布满足拉普拉斯分布的情况为假设,因此对于样本分布满足拉普拉斯分布的样本集,会有更好的效果。MAE 的梯度变换是刚性的,但也因此不容易受到离群值的影响。相应的,MAE 的收敛速度也会更慢一些。 MAE 算子化 利用 C 语言实现对算子的封装,有: #include #include double mae(double *y_true, double *y_pred, int size) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The MAE is 0.100000 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_2.html":{"url":"Chapter_4/Language/cn/Docs_4_5_2.html","title":"4.5.2 回归项-均方误差(MSE [Mean Squared Error])","keywords":"","body":"4.5.2 回归项-均方误差(MSE [Mean Squared Error]) 迭代公式: Loss=1N∑i=1N(yi−predictioni)2 {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i = 1}^{N}(y_i-prediction_i)^2 \\\\ \\end{aligned} } Loss=N1i=1∑N(yi−predictioni)2 图像: 图 4-25 MSE 函数图 特性: 契合正态分布(Normal distribution)样本 通过投影平面上的欧式距离,来衡量预测结果 导数非常数,梯度迭代非线形 光滑(smooth),适合优化算法 非指数计算,算力消耗相对较低 MSE 也被称为 L-2 损失(L2L_2L2 Loss),它相当于 MAE 的光滑版。虽然 MSE 常用于机器学习,但它既不是唯一实用的损失函数,也不是适用于所有情形的最佳损失函数。 MSE 从本质上是以极大似然估计,拟合正态分布。对于满足正态分布特性的样本数据,MSE 能相对得到满意的结果。但是对于非正态分布的问题,如:二分类,或更进一步的聚类分析,MSE 不能满足需求。MSE 常被用来做多对一正态分布样本集结果预测的损失函数使用。 MSE 和 MAE 对应差异主要是在于 鲁棒性 和 收敛速度 的权衡上,在使用条件上是类似的,根据情况选择使用。 MSE 算子化 利用 C 语言实现对算子的封装,有: #include #include double mse(double *y_true, double *y_pred, int size) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The MSE is 0.033333 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_3.html":{"url":"Chapter_4/Language/cn/Docs_4_5_3.html","title":"4.5.3 回归项-休伯损失(Huber Loss)","keywords":"","body":"4.5.3 回归项-休伯损失(Huber Loss) 迭代公式: Loss={1N∑i=1N[12⋅(yi−predictioni)2]∣yi−predictioni∣≤δ1N∑i=1N[δ⋅(∣yi−predictioni∣−12δ)]∣yi−predictioni∣>δ {\\displaystyle \\begin{aligned} Loss = \\begin{cases} \\frac{1}{N} \\sum_{i = 1}^{N} [\\frac{1}{2} \\cdot (y_i-prediction_i)^2] \\quad & |y_i-prediction_i| \\leq \\delta \\\\ \\frac{1}{N} \\sum_{i = 1}^{N}[\\delta \\cdot (|y_i-prediction_i| -\\frac{1}{2}\\delta) ] \\quad & |y_i-prediction_i| > \\delta \\end{cases} \\\\ \\end{aligned} } Loss=⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧N1i=1∑N[21⋅(yi−predictioni)2]N1i=1∑N[δ⋅(∣yi−predictioni∣−21δ)]∣yi−predictioni∣≤δ∣yi−predictioni∣>δ 图像: 图 4-26 Huber Loss 函数图 特性: 当绝对误差在 [0, δ][ 0,\\ \\delta][0, δ] 时,契合正态分布(Normal distribution) 当绝对误差在 (δ, +∞)( \\delta,\\ +\\infty)(δ, +∞) 时,契合拉普拉斯分布(Laplace distribution) 当绝对误差小于 δ\\deltaδ 时,它采用平方误差,导数非常数 当绝对误差大于 δ\\deltaδ 时,采用的线性误差,导数常数 δ2\\tfrac{\\delta}{2}2δ 。 光滑(smooth),适合优化算法 非指数计算,算力消耗相对较低 休伯损失(Huber Loss) 实际上是基于 MAE 和 MSE 基础上,提出的一种兼容 MAE 与 MSE 各自优点的损失函数设计。 相比于 MSE 和 MAE,Huber Loss 的算力消耗没有太多的提升。相比于 MSE,Huber Loss 降低了 δ\\deltaδ 半径外对离群值的惩罚;相比于 MAE,Huber Loss 提高了 δ\\deltaδ 半径内回归的收敛速度。可以看出,Huber Loss 的效果首 δ\\deltaδ 的选择影响较大。因此,使用它的时候,需要注意 δ\\deltaδ 调参问题。 Huber Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double huber_loss(double *y_true, double *y_pred, int size, double delta) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The Huber loss is 0.033333 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_4.html":{"url":"Chapter_4/Language/cn/Docs_4_5_4.html","title":"4.5.4 回归项-分位数损失(Quantile Loss)","keywords":"","body":"4.5.4 回归项-分位数损失(Quantile Loss) 迭代公式: Loss={1N∑i=1N(1−γ)⋅∣yi−predictioni∣yipredictioni1N∑i=1Nγ⋅∣yi−predictioni∣yi≥predictioni {\\displaystyle \\begin{aligned} Loss = \\begin{cases} \\frac{1}{N} \\sum_{i = 1}^{N} (1-\\gamma) \\cdot |y_i-prediction_i| \\quad & y_i Loss=⎩⎪⎪⎪⎪⎨⎪⎪⎪⎪⎧N1i=1∑N(1−γ)⋅∣yi−predictioni∣N1i=1∑Nγ⋅∣yi−predictioni∣yipredictioniyi≥predictioni 图像: 图 4-27 Quantile Loss 函数图 图 4-28 Quantile Loss 样本拟合示意图 特性: 当预测值残差在 [0, +∞)[ 0,\\ +\\infty)[0, +∞) 时,梯度为设定值 γ\\gammaγ 当预测值残差在 (−∞, 0)(-\\infty ,\\ 0)(−∞, 0) 时,梯度为设定值 1−γ1- \\gamma1−γ 可通过 γ\\gammaγ 的设定,来有指向的调整模型结果,γ\\gammaγ 的可范围在 [0, 1][ 0,\\ 1][0, 1] 适用于区间预测,通过调整 γ\\gammaγ 范围覆盖预测区间 非光滑(non-smooth) 非指数计算,算力消耗相对较低 分位数损失(Quantile Loss) 是一种用于区间预测的损失函数。MAE、MSE、Huber 等损失函数,基于的是最小二乘法,默认预测实际值残差方差保持不变且相对独立。而以分位数损失作为损失函数的回归模型,对于具有变化方差或非正态分布的残差,也能给出合理的预测区间。 分位损失函数中,γ\\gammaγ 值代表对预测结果的预判程度:γ\\gammaγ 值 越大,对结果被低估的惩罚程度越高,即越容易被 高估 ;γ\\gammaγ 值 越小,对结果被高估的惩罚程度越高,即越容易被 低估。在区间预测过程中,通过调整 γ\\gammaγ 取值范围,来实现对样本的覆盖,得到预测区间。 因为 Quantile Loss 的这种特性,常被用来做商业评估类型的回归模型。 Quantile Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double quantile_loss(double *y_true, double *y_pred, int size, double q) { double sum = 0; for (int i = 0; i 0) { sum += q * error; } else { sum += (1 - q) * error; } } return sum / size; } int main() { int size = 3; double y_true[] = {0.5, 0.75, 1.0}; double y_pred[] = {0.6, 0.8, 0.9}; double q = 0.5; double quantile_loss_value = quantile_loss(y_true, y_pred, size, q); printf(\"The quantile loss is %f\\n\", quantile_loss_value); return 0; } 运行验证可得到结果: The quantile loss is 0.083333 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_5.html":{"url":"Chapter_4/Language/cn/Docs_4_5_5.html","title":"4.5.5 分类项-对数损失(Log Loss)","keywords":"","body":"4.5.5 分类项-对数损失(Log Loss) 迭代公式: Loss=1N∑i=1N−yi⋅log(predictioni)−(1−yi)⋅log(1−predictioni) {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i=1}^N -y_i \\cdot log(prediction_i)-(1-y_i) \\cdot log(1-prediction_i) \\\\ \\end{aligned} } Loss=N1i=1∑N−yi⋅log(predictioni)−(1−yi)⋅log(1−predictioni) 图像: 图 4-29 Log Loss 函数图 特性: 契合逻辑分布(Logistic distribution)样本,拟合 Sigmoid 模型 二分类下的交叉熵损失表现,二者本质等价 越接近目标,损失越小 越趋近两极,结果越准确 基于贝叶斯统计(Bayesian statistics),采用交叉熵估计 光滑(smooth),适合优化算法 对数计算,算力消耗相对较高 对数损失(Log Loss) 是一种利用最小化负对数似然,即交叉熵最小化,来进行逻辑回归的损失函数。实际上,Log Loss 相当于 只包含两种分类 情况下的交叉熵损失函数。其所适应逻辑分布样本集,我们认为只存在 “是/否”两种情况 的 独热向量(one-hot vector) 集合。对于此类样本集,我们一般采用 Sigmoid 将输出压缩到 [0, 1][ 0,\\ 1][0, 1] 范围内,以便于输出百分比估计结果,作为预测结果的置信水平。而从 Log Loss,我们不难看出,最小化交叉熵函数本质就是对数似然函数的最大化。 注意,对数损失只能用来区分 “是/否” 为某个物体。 这一点在初学者首次接触时,容易与交叉熵损失搞混,从而选错分类项(比如目标是多分类检测)需要小心。 Log Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double log_loss(double *y_true, double *y_pred, int size) { double sum =0; for (int i =0; i 运行验证可得到结果: The log loss is -0.056644, for object class 'apple' Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_6.html":{"url":"Chapter_4/Language/cn/Docs_4_5_6.html","title":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss)","keywords":"","body":"4.5.6 分类项-交叉熵损失(Cross Entropy Loss) 迭代公式: Loss=1N∑i=1N[∑j=1k−yj⋅log(predictionj)]i {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i=1}^N [\\sum_{j=1}^k -y_j \\cdot log(prediction_j)]_i \\\\ \\end{aligned} } Loss=N1i=1∑N[j=1∑k−yj⋅log(predictionj)]i 图像: 图 4-30 Cross Entropy Loss 函数图 特性: 契合逻辑分布(Logistic distribution)样本,拟合 Softmax 模型 二分类下的交叉熵损失表现,二者本质等价 越接近目标,损失越小 越趋近两极,结果越准确 基于贝叶斯统计(Bayesian statistics),采用交叉熵估计 光滑(smooth),适合优化算法 对数计算,算力消耗相对较高 交叉熵损失(CEL [Cross Entropy Loss]) 是一种处理分布于高维同平面(K-Space)下独热向量(one-hot vector)样本集的聚类分析手段。交叉熵损失函数是一种为了 配合 Softmax 激活函数 的损失函数设计,输出满足概率累和为 1。这是因为交叉熵的本质,是试图用预测值来表示某个事件发生所需要的平均概率,从概念上,将事物可能发生的几率,和事物不可能发生的几率做了二元分割,即 Log Loss 实际上是 CEL 的最简表示形式。 所以,在使用交叉熵损失前,最好 先对参与交叉熵运算的所有同样本,进行一次 Softmax 处理,以求尽可能保证估计值之和为 1。 但是需要注意的是,交叉熵损失本身,并不依赖于是否对输入概率进行了归一化。也就是说,虽然可以进行估值之和大于 1 的输入处理,但本身会相对失去意义。因为,CEL 的结果越小,越说明分类估值准确性。非归一化输入只会干扰结果,从而影响模型准确。 Cross Entropy Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double cross_entropy_loss(double *y_true, double *y_pred, int size, int num_classes) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The cross entropy loss is 0.1982671, for 'cat' 'puppy' 'dog' 上面的代码中,展示了存在三类分类情况下,样本的输入分类和预测特征向量,皆未归一化会产生的结果。交叉熵损失仍然能使用,但不精确。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_7.html":{"url":"Chapter_4/Language/cn/Docs_4_5_7.html","title":"4.5.7 分类项-合页损失(Hinge Loss)","keywords":"","body":"4.5.7 分类项-合页损失(Hinge Loss) 迭代公式: Loss=1N∑i=1Nmax(0,1−yi⋅predictioni) {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i=1}^N \\max(0, 1-y_i \\cdot prediction_i) \\\\ \\end{aligned} } Loss=N1i=1∑Nmax(0,1−yi⋅predictioni) 图像: 图 4-31 Hinge Loss 函数图 特性: 缺乏统计学理论支持,没有太好的概率解释 样本值为 -1 或 1,预测值区间范围限定 [−1, +1][ -1,\\ +1][−1, +1] 之间 一般情况下,会限定排除 ∣prediction∣>1|prediction| > 1∣prediction∣>1 的取值,因此对离群值有较好的健壮性 越趋近于 0,结果越准确 依赖决策边界驱动,通过决策边界移动分割样本集 非光滑(non-smooth) 线性处理便于计算 合页损失(Hinge Loss) 通常与 L-2 正则项一起使用,这正是 SVM 支持向量机模型采用的损失函数。Hinge Loss 对非超出驱动边界的满足条件预测给予偏离度惩罚,而对于离散值则直接进行忽略。因此,Hinge Loss 的健壮性比较强。 然而 Hinge Loss 所依赖的决策边界的处理方式更类似于经验划分。对于样本量不足的情况,Hinge Loss(实际上是对应的 SVM)常常会过拟合(Over-fitting)。所以,这种边界限定的方式,在深度学习中常被衍生为一种样本集的裁剪方式的小技巧(trick)来使用。 此外,在概率,尤其是贝叶斯学派看来,Hinge Loss 并不足够合理。贝叶斯学派认为,概率应该用来量化不确定性,而 Hinge Loss 则是一种确定性的损失函数。因此,Hinge Loss 并不完全符合贝叶斯学派的观点。 不过,因果推断方面的领军人物 朱迪亚·珀尔(Judea Pearl) 在其著作《Causality》中阐述了他早期作为贝叶斯学派支持者对于 SVM 的看法。他认为,SVM 是一种经验风险最小化(ERM)方法,它并不依赖于概率模型。因此,Hinge Loss 虽然不完全符合概率,但也并不违背贝叶斯学派的基本原则。 Hinge Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double hinge_loss(double *y_true, double *y_pred, int size) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The hinge loss is 0.250000 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_8.html":{"url":"Chapter_4/Language/cn/Docs_4_5_8.html","title":"4.5.8 分类项-对比损失(Contrastive Loss)","keywords":"","body":"4.5.8 分类项-对比损失(Contrastive Loss) 迭代公式: Di=∣predictioni∣Loss=1N∑i=1N(yi⋅Di2+(1−yi)⋅max(0, m−Di)2) {\\displaystyle \\begin{aligned} D_i &= | prediction_i| \\\\ Loss &= \\frac{1}{N} \\sum_{i=1}^N(y_i \\cdot D_i^2 + (1 - y_i) \\cdot max(0,\\ m - D_i)^2) \\\\ \\end{aligned} } DiLoss=∣predictioni∣=N1i=1∑N(yi⋅Di2+(1−yi)⋅max(0, m−Di)2) 图像(蓝线 Pred,红线 True): 图 4-32 Contrastive Loss 函数图[14] 特性: 基于投影平面角度,降维分离样本类型 mmm 项代表被认为相似的确认半径 样本相似则 yi=1y_i = 1yi=1 ,样本不相似则 yi=0y_i = 0yi=0 增大类间差异并且减小类内差异,损失函数值最小时,两者达到均衡点 当样本不相似时,预测距离在 DwmD_w Dwm 的范围内,模型会试图增大不相似样本点之间的距离 越接近样本情况,损失越小 光滑(smooth),适合优化算法 非指数计算,算力消耗相对较低 对比损失(Contrastive Loss) 函数是在 2006 年,由 R.Hadsell、S.Chopra、Y.LeCun 在论文《通过学习不变映射进行降维运算》[14] 中 ,提出的一种用来解决样本集中数据聚集过于密集,而导致的 退化解(Degenerate Solutions) 问题。 这种通过降维来寻找合适投影角度,来得到比较优秀的分离聚类的分类损失函数的想法,首次经过合理的论证,并进入广泛大众的视野。为后续 Triplet Loss、N-pair Loss 等,类似的通过分离特性来进行处理的损失函数,打下了基础。 对比损失中,输入的 yiy_iyi 指的是选取样本点 SiS_iSi 和某个类型标签的接近程度。 同理, predictioniprediction_ipredictioni 则是模型预测的该样本 SiS_iSi 距离指定类型标签的结果。 为什么将之前通用的样本的类型概率数据,转为距离描述呢?这是因为,对比损失是通过 确认半径(Margin) 来得到优化的。对比损失函数结果越小,越认为当前权重所对应训练结果越接近实际情况。而方法对于预测距离小于确认半径的数据,取用 max(0, m−Di)2max(0,\\ m - D_i)^2max(0, m−Di)2 拉高了损失函数的结果,达到淘汰分类的效果。 Contrastive Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include double contrastive_loss(double *y_true, double *y_pred, int size, double margin) { double sum = 0; for (int i = 0; i 运行验证可得到结果: The contrastive loss is 0.1250000 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_9.html":{"url":"Chapter_4/Language/cn/Docs_4_5_9.html","title":"4.5.9 分类项-三元损失(Triplet Loss)","keywords":"","body":"4.5.9 分类项-三元损失(Triplet Loss) 迭代公式: Dni=∣∣negativei−yi∣∣2Dpi=∣∣positivei −yi∣∣2Loss=1N∑i=1Nmax( 0, Dpi−Dni+m ) {\\displaystyle \\begin{aligned} Dn_i &= \\sqrt{|| negative_i - y_i||^2} \\\\ Dp_i &= \\sqrt{|| positive_i \\ - y_i||^2} \\\\ Loss &= \\frac{1}{N} \\sum_{i=1}^N \\max( \\ 0, \\ {Dp_i} - {Dn_i} + m \\ ) \\\\ \\end{aligned} } DniDpiLoss=√∣∣negativei−yi∣∣2=√∣∣positivei −yi∣∣2=N1i=1∑Nmax( 0, Dpi−Dni+m ) 图像: 图 4-33 Triplet Loss 函数图[15] 特性: 使具有相同标签的样本(positive)之间的距离,尽量接近 使具有不同标签的样本(negative)之间的距离,尽量远离 要求输入 3 个分类样本子集:相似正样本集、相反负样本集、原样本对照集,并行训练 以 mmm 项代表被认为相似的确认半径,Loss 最小则理论上 Dn->m, Dp->0 越接近样本情况,损失越小 光滑(smooth),适合优化算法 非指数计算,算力消耗相对较低 三元损失(Triplet Loss) 函数来自于论文《FaceNet: A Unified Embedding for Face Recognition and Clustering》中 [15] ,提出的通过拆分 三元组(Triplet),选取正负样本与原样本进行差异化处理,来让预测值趋近于原样本而远离负样本的一种损失函数。 三元组(Triplet) 来自于输入分批的卷积神经网络(CNN)结果,我们需要将输入样本分为三类,在每一时代(Epoch)中都进行相同神经网络隐藏层权重(Wights)影响下的结果计算。累计 单次样本 的损失计算(Loss),以求得分批的损失函数(Cost Function)输出评分。 Triplet Loss 的使用 介于三元组损失提出最初目的,是为了进行人脸识别(FD [Face Detection]),我们因此取用人脸样本集举例。类似于人脸样本集,一般由 PPP 位不同人的 DDD 张该人不同脸的图片样本组成的,样本总量 P⋅DP \\cdot DP⋅D 大小的数据集。 以此为基础,三元组损失要求的三种样本分类子集分别是: 相似正样本集(Positives),由同人不同脸组成的 D−1D -1D−1 大小子集 相反负样本集(Negatives),由不同人不同脸组成的 (P−1)⋅(D−1)(P-1) \\cdot (D -1)(P−1)⋅(D−1) 大小子集 原样本对照集(Anchors),由不同人同脸(选一校订)组成的 PPP 大小子集 这三类子集,在数据分批后,会被分为相同批数并组合为一批数据,作为单次迭代输入数据,参与训练。我们仍然采用角标 [i][_i][i] 来表示分批,那么有: batch_size=(Di−1)+(Pi−1)(Di−1)+Pi=DiPi {batch\\_size} = (D_i-1) + (P_i-1)(D_i-1)+P_i = D_iP_i batch_size=(Di−1)+(Pi−1)(Di−1)+Pi=DiPi 则,在分批数据参与一次批计算后,最终会构成 batch_size{batch\\_size} batch_size 大小的一组 嵌入集(Embeddings),被我们用来计算损失函数(Loss)的实际处理对象。 最终,计算损失后的三元组,按照质量 来划分,可以分为三个类别: 易辨三元组(easy triplets),可以使得 loss 基本趋近于 0 的类型 难辩三元组(hard triplets),有 Dn 模糊三元组(semi-hard triplets),有 Dp 可见,如果构成的三元组一上来就是易辨三元组,那只能证明模型训练参数的启动配置,使模型陷入了过拟合。通常,我们希望每一时代(Epoch)被计算的三元组都具有一定的模糊特性,而方便权重更新。因此,模糊三元组(semi-hard triplets)才是迭代的选择。 那么怎么评估当前的三元组,是否是模糊三元组呢? 其实很简单,通过当前正样本集所占有效样本的百分比,就能大致估算是否属于模糊类型。记正样本集百分比为 fraction_positive{fraction\\_positive}fraction_positive ,则有: fraction_positive=num_positivenum_available=count(loss>0)count(vector) {\\displaystyle \\begin{aligned} {fraction\\_positive} &= \\frac{num\\_positive}{num\\_available} \\\\ &= \\frac{count( loss > 0)}{count(vector)} \\\\ \\end{aligned} } fraction_positive=num_availablenum_positive=count(vector)count(loss>0) 我们一般取 fraction_positive>0.2{fraction\\_positive} > 0.2fraction_positive>0.2 认为是一次有效训练中的模糊三元组数据。 三元损失在对比损失的基础上更近一步,引入了正负样本概念,来使得分类预测结果更加聚集,且使分类间能够更加远离。本身计算并不算非常复杂,因此可以用在如人脸识别、车辆识别等模型的移动端迁移上。但是,三元损失只是在对比损失上引入正负概念,实际处理过程中,每次只能对比一个负样本而忽略了其他的非关联性。这样就很容易造成迭代结果陷入不稳定(在多个距离相近但实际不同的负样本间抖动),或者局部最优解。 Triplet Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include #include #include #define BATCH_SIZE 10 // Batch_size = Samples_of_Person x Data/Person #define VECTOR_SIZE 128 // Extract output layer Feature vector's dimissions #define DEVIDE_SAFE 1e-12 // protect when gridant at 0 will be to lage // Pairwise Distance Calculation void pairwise_distance(double embeddings[BATCH_SIZE][VECTOR_SIZE], double distances[BATCH_SIZE][BATCH_SIZE], bool squared) { for (int i = 0; i 0) { num_positive++; } if (current_mask > 0) { num_validate++; } } } } } // Calculate fraction of positive triplets *fraction_positives = (double)num_positive / ((double)num_validate + DEVIDE_SAFE); return triplet_cost / (double)(num_positive + DEVIDE_SAFE); } int main() { // Example input (fulfill to BATCH_SIZE x VECTOR_SIZE) // Use Random labels and embeddings for testing // Use three classes as different type, to generate labels int type = 3; int labels[BATCH_SIZE]; double embeddings[BATCH_SIZE][VECTOR_SIZE]; for (int i = 0; i 运行验证可得到结果: The triplet loss is 0.270146 with positives 0.668605 虽然看上去比较复杂,然而在实际执行过程中, 一个时代(Epoch)只会执行一次三元组损失的计算,而空间复杂度上,仅额外增加了距离矩阵和遮罩的共 O(batch_size2+batch_size3)O({batch\\_size}^2 + {batch\\_size}^3)O(batch_size2+batch_size3) 的空间大小。是完全可以接受的。 代码中,我们所使用的 遮罩(Mask)矩阵,实际上相当于将原论文中对三元组的三分类计算,用遮罩来代替了有效处理流程。这样做可行的基本原因,在于距离矩阵本身,在以整体分批不做区别输入的情况下,仍旧可以用全体分批包含样本的欧式距离,构成 batch_size×batch_size{batch\\_size} \\times {batch\\_size}batch_size×batch_size 大小的差异矩阵,记为 MdistM_{dist}Mdist 。以人脸检测为例,同人物同一张样本脸的情况,就相当于 MdistM_{dist}Mdist 的对角线位置。而对角线两侧的数据,则涵盖了同人不同脸、不同人的两种类型。 如此,计算所得 MdistM_{dist}Mdist 实际就包含和三元组的三分类计算中,不同分类的 所有距离类型。与此同时,最终损失函数的计算,是要叠加所有分类独立计算的单次损失的。进而,让我们有机会通过遮罩矩阵就能直接规划不同分类情况,应该取用哪一个距离值,来直接获取当次损失值叠加。如果记遮罩矩阵为 MmaskM_{mask}Mmask ,那么三元损失有工程公式: Loss=Mdist⋅Mmask {\\displaystyle \\begin{aligned} Loss &= M_{dist} \\cdot M_{mask} \\\\ \\end{aligned} } Loss=Mdist⋅Mmask 而既然是矩阵乘法,除了本书例子中采用的纯 C 语言实现外,也可以通过 GPU 算子来实现进一步加速。类似于 CUDA 算子,或部分成熟的推理引擎(如 Keras、py-Touch 等)就是这样处理的。 从这个例子就能看出, 有效的工程化能够极大提升算法的训练效率,减小耗时。 这即是工程师在此处的关键作用。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_10.html":{"url":"Chapter_4/Language/cn/Docs_4_5_10.html","title":"4.5.10 分类项-对组排异损失(N-Pair Loss)","keywords":"","body":"4.5.10 分类项-对组排异损失(N-Pair Loss) 迭代公式: Dni=∣∣negativei−yi∣∣2Dpi=∣∣positivei −yi∣∣2Loss=1N∑i=1Nlog[m+∑j≠iexp(Dpi−Dni)]=1N∑i=1N∑j≠ilog[m+exp(Dpi−Dni)] {\\displaystyle \\begin{aligned} Dn_i &= \\sqrt{|| negative_i - y_i||^2} \\\\ Dp_i &= \\sqrt{|| positive_i \\ - y_i||^2} \\\\ Loss &= \\frac{1}{N} \\sum_{i=1}^{N} log[m+ \\sum_{j\\neq i} exp( Dp_i - Dn_i)] \\\\ &= \\frac{1}{N} \\sum_{i=1}^{N} \\sum_{j\\neq i} log[m+ exp(Dp_i - Dn_i)] \\\\ \\end{aligned} } DniDpiLoss=√∣∣negativei−yi∣∣2=√∣∣positivei −yi∣∣2=N1i=1∑Nlog[m+j≠i∑exp(Dpi−Dni)]=N1i=1∑Nj≠i∑log[m+exp(Dpi−Dni)] 图像: 图 4-34 N-Pair Loss 函数图[16] 特性: 使具有相同标签的样本(positive)之间的距离,尽量接近 使具有不同标签的样本(negative)之间的距离,尽量远离 输入 N+1 个子集:1 个相似正样本集、N-1 个相反负样本集、1 个原样本对照集 同三元组损失一样,输入样本集,都在同权重下并行训练 以 mmm 项代表二维平面上多角度排斥最小力矩,一般 m=1m = 1m=1 防止样本过近重合 越接近目标,损失越小 光滑(smooth),适合优化算法 非指数计算,算力消耗相对较低 对组排异损失(N-Pair Loss) 的提出,旨在解决 对比损失(Contrastive Loss) 和 三元组损失(Triplet Loss) 在分类上的局限性问题 [16] 。这两者,从物理学受力角度分析权重促成的样本聚集,会发现都是一维运动过程。 N-Pair Loss 的使用 N-Pair Loss 在每一次计算中,采用了 同样本集(Positive Set) 和 负样本类集(Negative Classes Set) 的概念。类集中的每一个负样本,都会对预测结果产生排斥,而单一的正样本则会对预测结果产生吸引。这样就能够更好地实现同类型聚集效果。一个比较适当的例子,就像一滴油散到了水中,最终会被水排斥而聚成一个集合的油滴。 实际上,基克·索恩(Kihyuk Sohn) 在对组排异损失的推导过程中,详细描述了从 Triplet Loss -> (N+1)-Tuplet Loss -> N-Pair Loss 的完整过程。其中,(N+1)-Tuplet Loss 可认为是 N-Pair Loss 的过渡。 文中指出,当 N = 2 时,(N+1)-Tuplet Loss 可认为近似于 Triplet Loss。以此为起点,我们很快便会发现,对组排异损失 相当于将 三元组损失中 一组 相似正样本集(Positives) 、 一组 相反负样本集(Negatives) 、 一组 原样本对照集(Anchors) 总共三组之间,两两样本集间样本的距离均值计算,改换成了 一组 相似正样本集(Positives) 、 多组 相反负样本集(Negatives) 、 一组 原样本对照集(Anchors) 总共 N+1 组之间的距离计算。 即,相较于三元组损失,进一步细化了 相反负样本集(Negatives)内,不同标签的对正样本集的驱动作用。 同样以人脸识别(FD [Face Detection])为例,由 PPP 位不同人的 DDD 张该人不同脸的图片样本组成的,样本总量 P⋅DP \\cdot DP⋅D 大小的数据集。 对组排异损失要求,也是三种样本分类子集分类: 相似正样本集(Positives),由同人不同脸组成的 D−1D -1D−1 大小子集 相反负样本集(Negatives),由不同人不同脸组成的 P−1P - 1P−1 组各 D−1D - 1D−1 大小子集 原样本对照集(Anchors),由不同人同脸(选一校订)组成的 PPP 大小子集 这三类子集,在数据分批后,会被分为相同批数并组合为一批数据,作为单次迭代输入数据,参与训练。我们仍然采用角标 [i][_i][i] 来表示分批,那么有: batch_size=(Di−1)+∑Pi−1(Di−1)+Pi=DiPi {batch\\_size} = (D_i-1) + \\sum^{P_i-1}(D_i-1)+P_i = D_iP_i batch_size=(Di−1)+∑Pi−1(Di−1)+Pi=DiPi 则,在分批数据参与一次批计算后,最终还是会构成同三元组损失类似的 batch_size{batch\\_size} batch_size 大小的一组嵌入集(Embeddings),被我们用来计算损失函数(Loss)的实际处理对象。 因此,在工程上,我们 只需要更换单次损失的计算公式,就能从三元组损失的迁移至对组排异损失的计算过程。 N-Pair Loss 算子化 利用 C 语言实现对算子的封装,有: #include #include #include #include #define BATCH_SIZE 10 // Batch_size = Samples_of_Person x Data/Person #define VECTOR_SIZE 128 // Extract output layer Feature vector's dimissions #define DEVIDE_SAFE 1e-12 // protect when gridant at 0 will be to lage // Pairwise Distance Calculation void pairwise_distance(double embeddings[BATCH_SIZE][VECTOR_SIZE], double distances[BATCH_SIZE][BATCH_SIZE], bool squared) { for (int i = 0; i margin) { num_positive++; } if (current_mask > 0) { num_validate++; } } } n_pair_cost += log(margin + n_pair_loss); } } // Calculate fraction of positive n_pairs *fraction_positives = (double)num_positive / ((double)num_validate + DEVIDE_SAFE); return n_pair_cost / (double)(num_positive + DEVIDE_SAFE); } int main() { // Example input (fulfill to BATCH_SIZE x VECTOR_SIZE) // Use Random labels and embeddings for testing // Use three classes as different type, to generate labels int type = 3; int labels[BATCH_SIZE]; double embeddings[BATCH_SIZE][VECTOR_SIZE]; for (int i = 0; i 运行验证可得到结果: The n_pair loss is 0.408567 with positives 0.377907 对组排异损失从样本宏观角度,统一了正负样本概念。指明了,非当前指向类的负样本,可以被认为是指向负样本类型情况的正样本。因此,对于 N 分类处理过程,整个运算损失计算时间复杂度被化简为仅有 2N。相当的高效。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_11.html":{"url":"Chapter_4/Language/cn/Docs_4_5_11.html","title":"4.5.11 正则项-L1 惩罚","keywords":"","body":"4.5.11 正则项-L1 惩罚 迭代公式: L1=∣w1∣+∣w2∣+∣w3∣+⋯+∣wn∣ {\\displaystyle \\begin{aligned} L_1 = |w_1|+|w_2|+|w_3|+ \\cdots +|w_n| \\\\ \\end{aligned} } L1=∣w1∣+∣w2∣+∣w3∣+⋯+∣wn∣ 特性: 根据参数权重绝对值之和,来惩罚权重 当权重 > 0 时,指定权重偏导数为 1,所有权重变化线性统一,因此无法区分主次 当权重 ≤ 0 时,使用 L-1 的参数迭代在 0 处不具备连续性,即 ≤ 0 的值都会为 0 可以使不相关或几乎不相关权重归 0,从模型中移除不相关特征 线性方便计算 L-1 惩罚项(L1L_1L1 Regularity) 由于其特性,常被用于裁剪参数数量,缩减模型宽度。从另一种角度来理解,可以认为 L-1 的思想其实和 Maxout 激活函数的思想有些类似。都是通过线性关系,来整合实际特征曲线。只不过 L-1 是从模型复杂度的角度,Maxout 是从非线性特征的角度。 L-1 惩罚项被证明,对于稀疏性模型优化非常有效。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_5_12.html":{"url":"Chapter_4/Language/cn/Docs_4_5_12.html","title":"4.5.12 正则项-L2 惩罚","keywords":"","body":"4.5.12 正则项-L2 惩罚 迭代公式: L2=w12+w22+w32+⋯+wn2 {\\displaystyle \\begin{aligned} L_2 = {w_1}^2+{w_2}^2+{w_3}^2+ \\cdots +{w_n}^2 \\\\ \\end{aligned} } L2=w12+w22+w32+⋯+wn2 特性: 根据参数权重平方和,来惩罚权重 L-2 导数为 2x,所有权重变化非线性,可以以此区分参数主次(模型层面) 无法使不相关或几乎不相关权重归 0,无法从模型中移除不相关特征 平滑连续,权重变化自然 平方计算,非指数,可接受 L-2 惩罚项(L2L_2L2 Regularity) 最大的特点就是平滑(smooth)。这决定了在实际运算过程中,L-2 惩罚项只有办法让权重趋近于 0 ,而无法彻底移出对应参数。但是这种特点也使得,L-2 惩罚项可以通过非线性权重,调整模型相关参数在模型中的重要程度。 因此,L-2 惩罚项也被称为 权重衰减(Weight Decay)。并不能消除不相关特征,但能较好的保证特征和结果的因果关系。 至此,损失函数的三类组成部分认识完毕。其实我们只做了粗浅的介绍,真正实用中,还有大量的细分和类型设计。除了少数我们介绍的经典如 MAE、MSE 等,每一个新的损失函数,都可能意味着有自己独特的配套神经网络结构。 究其原因,还是在于损失函数作用的范围,在于衡量整个网络的迭代,这决定了它不太可能会脱离而存在。使用中,需要小心。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_6.html":{"url":"Chapter_4/Language/cn/Docs_4_6.html","title":"4.6 常用最优化算法(Optimizer Operator)","keywords":"","body":"4.6 优化算法/优化器(Optimizer) 优化算法(Optimize Function) 是一种更快实现权重更新的处理办法。常用优化算法能够根据梯度方向和一些其他的关键信息,对当前权重作出更有效率的更新,降低训练所需要的迭代次数,同时提高模型鲁棒性。根据其相对固定的作用位置,在工程中,部署优化算法的 关键单一组件,被称为 优化器(Optimizer)。 图 4-35 优化器作用阶段(图中蓝色线条)示意图 优化器起作用于 隐藏层(Hidden Layer) 的训练中特征权重的加速迭代,所以生效阶段在每一 时代(Epoch) 后,完成 损失函数(Cost) 计算的 结算位置。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_6_1.html":{"url":"Chapter_4/Language/cn/Docs_4_6_1.html","title":"4.6.1 基础优化算法","keywords":"","body":"4.6.1 经典优化算法(Classic Optimize Function) 常用的经典优化算法,主要有三种,分别是:随即梯度下降法(SGD)、批量梯度下降法(BGD)、小批梯度下降法(MBGD)。 随机梯度下降法(SGD [Stochastic Gradient Descent]) 迭代公式: θt=θt−1−η˙∇θJ(θ;xi;yi) {\\displaystyle \\begin{aligned} \\theta_t = \\theta_{t-1} - \\eta \\dot{} \\nabla_\\theta J(\\theta ; x_i; y_i) \\\\ \\end{aligned} } θt=θt−1−η˙∇θJ(θ;xi;yi) 每次更新时,只对当前对应批次的被选用样本数据,进行 损失函数(Loss) 计算,一次计算一次更新。因为计算少,所以速度快,并且可以实现实时计算,支持动态的样本添加。 批量梯度下降法(BGD [Batch Gradient Descent]) 迭代公式: θt=θt−1−η˙∇θJ(θ) {\\displaystyle \\begin{aligned} \\theta_t = \\theta_{t-1} - \\eta \\dot{} \\nabla_\\theta J(\\theta) \\\\ \\end{aligned} } θt=θt−1−η˙∇θJ(θ) 每次迭代需要计算当前批次整个数据集 损失函数(Loss) ,更新梯度。所以每次计算的耗时比较高。对于大批量数据来说,比较难以处理,更新不实时。简单的说,就是粒度太大。 小批梯度下降法(MBGD [Mini-Batch Gradient Descent]) 迭代公式: θt=θt−1−η˙∇θJ(θ;x(i:i+n);y(i:i+n)) {\\displaystyle \\begin{aligned} \\theta_t = \\theta_{t-1} - \\eta \\dot{} \\nabla_\\theta J(\\theta ; x_{(i:i+n)}; y_{(i:i+n)}) \\\\ \\end{aligned} } θt=θt−1−η˙∇θJ(θ;x(i:i+n);y(i:i+n)) 针对 BGD 每次都需要对当前批次数据集的问题,MBGD 进行了改良,每一次更新,取当前批次中的一组子样本集合来进行 损失函数(Loss)计算,降低参数更新方差计算,收敛更稳定,并且因为采用子批次构成矩阵运算,更加有优势。 基础优化算法比较 三个经典算法各有优劣,基本可以以下表来判断。 粒度:小 SGDMBGDBGD大速度:慢 BGDMBGDSGD快收敛:低 SGDMBGDBGD高过拟合:低 SGDMBGDBGD高 {\\displaystyle \\begin{aligned} \\text{粒度:} &\\text{小} \\ &SGD 粒度:速度:收敛:过拟合:小 慢 低 低 SGDMBGDBGDBGDMBGDSGDSGDMBGDBGDSGDMBGDBGD大快高高 因为 SGD 每次处理数据单取一个样本点,相比于 MBGD 的当前批次全数据取子集,和 BGD 当前批次扫描全部数据,SGD 更新权重每次计算出的梯度变化幅度相对都会比较大一些,所以不容易在梯度更新上陷入局部最优解。这也是 SGD 较其余两种基本算法的最大优势。建议没有特殊要求,而需要在这三种算法中做选择的话,优先使用 SGD。 当然,他们都有同样的缺点,那就是: 仍存在易陷入局部最小值或鞍点震荡问题,以 BGD 为最 仍存在无法根据不同参数重要程度进行变速权重更新问题,即全权重更新速度统一问题 不过,既然有了疑问,那自然有解决办法。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_6_2.html":{"url":"Chapter_4/Language/cn/Docs_4_6_2.html","title":"4.6.2 优化算法的优化-应对震荡","keywords":"","body":"4.6.2 优化算法的优化-应对震荡 常用的减震算法,比较容易想到的就是利用 阻尼运动特性 和 加速度,即 动量(Momentum),来减小离散瞬时值的影响。因此,先贤们首先想到的就是梯度迭代动量化。 标准动量(Standard Momentum) 迭代公式: vt=γvt−1+η∇θJ(θ)θt=θt−1−vt {\\displaystyle \\begin{aligned} v_t &= \\gamma v_{t-1} + \\eta \\nabla_\\theta J(\\theta) \\\\ \\theta_t &= \\theta_{t-1} - v_t \\\\ \\end{aligned} } vtθt=γvt−1+η∇θJ(θ)=θt−1−vt 标准动量(Standard Momentum) 是在原有计算权重迭代基础上,通过引入上一次变化值情况,来强化梯度延方向变化趋势。即 SGD/BGD/MBGD + Momentum。 这样做可以使得梯度方向不变的维度,权重迭代速率加快,而方向发生改变的维度,更新速度变慢。并且由于速度此时变化是和 之前状态 有关系的,就不会发生“指向突变”的情况,有助于减小震荡和跃出鞍点。 超参数 γ\\gammaγ 被称为 阻尼系数,或遗忘因子。一般取 γ=0.9\\gamma = 0.9γ=0.9 ,表示经验重要程度。 然而,单纯的动量处理却也存在其他问题。最明显的就是,因为动量叠加,没有修正逻辑的纯动量叠加,会导致每一次的轻微误差也随着时间一起叠加,导致当前时刻 ttt 时,实际梯度变化速率要远大于实际值,阻尼因子设定过小和初速度过大都可能会久久不能收敛。所以,在动量化的基础上,我们更希望能够有修正方法来减小误差的累积。 幸运的是 Nesterov Y. 在1983年提出的 NAG 很好的解决了这个问题。 涅斯捷罗夫梯度加速(NAG [Nesterov Accelerated Gradient]) 迭代公式: vt=γvt−1+η∇θJ(θ−γvt−1)θt=θt−1−vt {\\displaystyle \\begin{aligned} v_t &= \\gamma v_{t-1} + \\eta \\nabla_\\theta J(\\theta-\\gamma v_{t-1}) \\\\ \\theta_t &= \\theta_{t-1} - v_t \\\\ \\end{aligned} } vtθt=γvt−1+η∇θJ(θ−γvt−1)=θt−1−vt 涅斯捷罗夫梯度加速(NAG [Nesterov Accelerated Gradient]) 较标准动量化处理来说,用来计算当前梯度方向的时候,计算 损失函数(Loss) 采用的是基于当前上一次梯度变化值预测的,当前状态下,下一次可能的维度权重。以这个预测的维度权重来计算当前位置的方向梯度变化,来修正动量化算法。这样,当我们计算当前 ttt 时梯度变化速度的时候,就可以从一定程度上避免掉误差堆积导致的问题。 这里借用一下 Hinton 课程 [17] 中的图来说明效果: 图 4-36 NAG 加速作用过程示意图[17] 可以看出,蓝色(blue vector)是 标准动量 的过程,会先计算当前的梯度,然后在更新后的累积梯度后会有一个大的跳跃。绿色是 NAG 会先在前一步 棕色(brown vector) 的累积梯度上有一个大的跳跃,然后衡量梯度做 红色(red vector) 修正偏移。 这种预期的更新可以避免我们走的太快。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_6_3.html":{"url":"Chapter_4/Language/cn/Docs_4_6_3.html","title":"4.6.3 优化算法的优化-应对重点强(弱)化更新","keywords":"","body":"4.6.3 优化算法的优化-应对重点强(弱)化更新 另一个问题,就是针对性处理对结果影响更大/更小的权重,让重要的迭代的迭代更谨慎,而不重要的获得更快衰减。以保证优势权重,剔除不必要影响。 自适应梯度算法(AdaGrad/AGA [Adaptive Gradient Algorithm]) 迭代公式: gt,i=∇θJ(θi)Gt,i=∑τ=1tgτ,i2θt+1,i=θt,i−ηGt,i+ϵ˙gt,i {\\displaystyle \\begin{aligned} g_{t,i} &= \\nabla_\\theta J(\\theta_i) \\\\ G_{t,i} &= \\sum _{\\tau=1} ^{t} g_{\\tau, i}^2 \\\\ \\theta_{t+1,i} &= \\theta_{t,i} - \\frac{\\eta}{\\sqrt{G_{t,i}+\\epsilon}} \\dot{} g_{t,i} \\\\ \\end{aligned} } gt,iGt,iθt+1,i=∇θJ(θi)=τ=1∑tgτ,i2=θt,i−√Gt,i+ϵη˙gt,i 以 Gt,iG_{t,i}Gt,i 为当前索引为 [i][_i][i] 的参数,所对应从 111 到时刻 ttt 的 所有梯度平方和。 自适应梯度算法(AdaGrad/AGA [Adaptive Gradient Algorithm]) 是将 SGD 的统一学习速率修改为,有一定预测成分在内的,参数对应独立更新的处理方式。这样处理的好处是,每一个不同参数都会根据当前自身变化和总模型结果关系的差异,独立的进行变更,变化大的会更快,变化小的会更慢。减少了手动调节学习速率的次数。 缺点也比较明显: 前期需手工设置一个全局的初始学习率,过大值会导致惩罚变化不明显,过小会提前停止 中后期自适应分母会不断累积导致学习速率不断收缩,趋于非常小从而可能提前结束训练 因此,我们有了改进版 RMSprop 法。 均方根传播法(RMSprop) 迭代公式: gt,i=∇θJ(θi),E[g2]t,i=γE[g2]t−1,i+(1−γ)gt,i2Δθt,i=−ηE[g2]t,i+ϵgt,i=−ηRMS[g]t,igt,iθt+1,i=θt,i+Δθt,i=θt,i−ηE[g2]t,i+ϵgt,i {\\displaystyle \\begin{aligned} g_{t,i} &= \\nabla_\\theta J(\\theta_i) \\quad , \\quad E[g^2]_{t,i} = \\gamma E[g^2]_{t-1,i} + (1-\\gamma)g_{t,i}^2 \\\\ \\Delta \\theta_{t,i} &= - \\frac{\\eta}{\\sqrt{E[g^2]_{t,i}+\\epsilon}}g_{t,i} = - \\frac{\\eta}{RMS[g]_{t,i}}g_{t,i} \\\\ \\theta_{t+1,i} &= \\theta_{t,i} + \\Delta \\theta_{t,i} =\\theta_{t,i} - \\frac{\\eta}{\\sqrt{E[g^2]_{t,i}+\\epsilon}}g_{t,i} \\\\ \\end{aligned} } gt,iΔθt,iθt+1,i=∇θJ(θi),E[g2]t,i=γE[g2]t−1,i+(1−γ)gt,i2=−√E[g2]t,i+ϵηgt,i=−RMS[g]t,iηgt,i=θt,i+Δθt,i=θt,i−√E[g2]t,i+ϵηgt,i 以 E[g2]t,iE[g^2]_{t,i}E[g2]t,i 为当前索引为 [i][_i][i] 的参数,所对应从 111 到时刻 ttt 的所有梯度均方和,有: RMS[g]t,i=E[g2]t,i+ϵ RMS[g]_{t,i}=\\sqrt{E[g^2]_{t,i}+\\epsilon} RMS[g]t,i=√E[g2]t,i+ϵ 因为学习速率变化采用的是 梯度均方和(RMS)。所以,某一维度变化较大时,RMS 较大;变化较小时,RMS 较小。这样就保证了各个维度的变化速率是基于同一个变化量级的,同时也避免了 AdaGrad 中后期的学习速率极速下降,过早停止的问题。而且,因为 RMS 采用近似算法,极大降低了内存消耗(毕竟不需要记录每一次的迭代值了) 不过,RMSprop 可以看出,仍然依赖于全局学习速率 的设定,那么是否能够继续改进不依赖呢? 如果对比两个方法的过程中单位差异,或许能找到答案。 AdaGrad 和 RMSprop 单位问题 我们知道,很多单位是有实际价值的。比如是米(meter),天(day)等,就有具体物理含义。所以,对于迭代使用的加速度 Δθt\\Delta\\theta_tΔθt ,一个很自然的期望是,的单位和是保持一致的。 但是: Δx∝g∝∂f∂x∝1x \\Delta x \\propto g \\propto \\frac{\\partial f}{\\partial x} \\propto \\frac{1}{x} Δx∝g∝∂x∂f∝x1 有 Δx\\Delta xΔx 和 ggg 为同单位,而与 xxx 的单位互逆。即 x−1x^{-1}x−1 表示的瞬时变化才与 Δx\\Delta xΔx 和 ggg 为同单位。 也就是说,对于 AdaGrad 和 RMSprop 来说,Δθt\\Delta\\theta_tΔθt 权重变化量最终得到的结果,其单位和 θt\\theta_tθt 单位并不一致,而是对应时间单位的倒数。而我们要的 权重 θt\\theta_tθt 是时间单位的。 如果我们用牛顿法使 Δx=Ht−1gt\\Delta x =H_t^{-1 }g_tΔx=Ht−1gt , HtH_tHt 为 Hessian 矩阵,即所有参数指定 ttt 时刻二阶偏导数方阵,有: Δx∝H−1g∝∂f∂x∂2f∂2x∝1x \\Delta x \\propto H^{-1 }g \\propto \\frac{\\tfrac{\\partial f}{\\partial x}}{\\tfrac{\\partial^2 f}{\\partial^2 x}} \\propto \\frac{1}{x} Δx∝H−1g∝∂2x∂2f∂x∂f∝x1 上述变化后,便能将 xxx 、 Δx\\Delta xΔx 和 ggg 单位一致化。但是 Hessian 矩阵计算量太大,我们没办法直接使用。所以,我们还需要模拟退火牛顿法,有: Δx=∂f∂x∂2f∂2x⇒Δxt=−∑τ=1t−1ΔxτE[g2]t+ϵ \\Delta x = \\frac{\\frac{\\partial f}{\\partial x}}{\\frac{\\partial ^2 f}{\\partial ^2 x}} \\Rightarrow \\Delta x_t = -\\frac{\\sqrt{\\sum{ _{\\tau=1} ^{t-1}} \\Delta x_\\tau} }{\\sqrt{E[g^2]_t+\\epsilon}} Δx=∂2x∂2f∂x∂f⇒Δxt=−√E[g2]t+ϵ√∑τ=1t−1Δxτ 上式在 ∞\\infty∞ 位置近似等价。 如此,既可以保证单位,又能简化运算。同时我们发现,Δθt\\Delta\\theta_tΔθt 的更新在这种拟合下,后续迭代不再依赖于全局学习速率 η\\etaη 。 于是,便有了 AdaDelta 算法。 自适应梯度算法改进版(AdaDelta/ADGA [Adaptive Delta Gradient Algorithm]) 迭代公式: gt,i=∇θJ(θi),E[g2]t,i=γE[g2]t−1,i+(1−γ)gt,i2Δθt,i=−RMS[Δθ]t−1,iRMS[g]t,igt,iθt+1,i=θt,i+Δθt,i=θt,i−RMS[Δθ]t−1,iRMS[g]t,igt,i {\\displaystyle \\begin{aligned} g_{t,i} &= \\nabla_\\theta J(\\theta_i) \\quad , \\quad E[g^2]_{t,i} = \\gamma E[g^2]_{t-1,i} + (1-\\gamma)g_{t,i}^2 \\\\ \\Delta \\theta_{t,i} &= - \\frac{RMS[\\Delta \\theta]_{t-1,i}}{RMS[g]_{t,i}}g_{t,i} \\\\ \\theta_{t+1,i} &= \\theta_{t,i} + \\Delta \\theta_{t,i} =\\theta_{t,i} - \\frac{RMS[\\Delta \\theta]_{t-1,i}}{RMS[g]_{t,i}}g_{t,i} \\\\ \\end{aligned} } gt,iΔθt,iθt+1,i=∇θJ(θi),E[g2]t,i=γE[g2]t−1,i+(1−γ)gt,i2=−RMS[g]t,iRMS[Δθ]t−1,igt,i=θt,i+Δθt,i=θt,i−RMS[g]t,iRMS[Δθ]t−1,igt,i 以 E[g2]t,iE[g^2]_{t,i}E[g2]t,i 为当前索引为 [i][_i][i] 的参数,所对应从 111 到时刻 ttt 的所有梯度均方和,有: RMS[g]t,i=E[g2]t,i+ϵ RMS[g]_{t,i}=\\sqrt{E[g^2]_{t,i}+\\epsilon} RMS[g]t,i=√E[g2]t,i+ϵ 相较于前两种,AdaDelta 具有优势: 结合了 AdaGrad 善于处理稀疏梯度和 RMSprop 善于处理非平稳目标的优点 不需要依赖于 全局学习速率的设置 是一种相对理想的,针对强弱重点的梯度优化算法了。 目前,我们所有的处理方式都是秩针对性的解决单一问题。那么有没有什么方法,可以结合两类的优点呢?既解决鞍点,又能自适应学习速率呢? 当然有,那就是 Adam 自适应实时评估算法。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_6_4.html":{"url":"Chapter_4/Language/cn/Docs_4_6_4.html","title":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation])","keywords":"","body":"4.6.4 自适应实时评估算法(Adam [Adaptive Moment Estimation]) 自适应实时评估算法(Adam [Adaptive Moment Estimation]),相当于RMSprop 和 Momentum 结合的一种算法,标准Adam 可以认为是 一阶AdaDelta 的动量改进版。 迭代公式: gt,i=∇θJ(θi)mt,i=β1mt−1,i+(1−β1)gt,ivt,i=β2 vt−1,i+(1−β2)gt,i2Δθt,i=−ηv^t,i+ϵm^t,iθt+1,i=θt,i+Δθt,i=θt,i−ηv^t,i+ϵm^t,i {\\displaystyle \\begin{aligned} g_{t,i} &= \\nabla_\\theta J(\\theta_i) \\\\ m_{t,i} &= \\beta_1 m_{t-1,i} + (1-\\beta_1) g_{t,i} \\\\ v_{t,i} &= \\beta_2 \\ v_{t-1,i} + (1-\\beta_2) g_{t,i}^2 \\\\ \\Delta \\theta_{t,i} &= - \\frac{\\eta}{\\sqrt{\\hat{v}_{t,i}}+\\epsilon}\\hat{m}_{t,i} \\\\ \\theta_{t+1, i} &= \\theta_{t,i} +\\Delta \\theta_{t,i} =\\theta_{t,i} - \\frac{\\eta}{\\sqrt{\\hat{v}_{t,i}}+\\epsilon}\\hat{m}_{t,i} \\\\ \\end{aligned} } gt,imt,ivt,iΔθt,iθt+1,i=∇θJ(θi)=β1mt−1,i+(1−β1)gt,i=β2 vt−1,i+(1−β2)gt,i2=−√v^t,i+ϵηm^t,i=θt,i+Δθt,i=θt,i−√v^t,i+ϵηm^t,i 其中 m^t\\hat{m}_tm^t 、 v^t\\hat{v}_tv^t 是我们为了防止 mmm 、 vvv 被初始化时为 000 导致向 000 偏移而做的 偏差校正值,有: m^t=mt1−β1v^t=vt1−β2 {\\displaystyle \\begin{aligned} \\hat{m}_t &= \\frac{m_t}{1-\\beta_1} \\\\ \\hat{v}_t &= \\frac{v_t}{1-\\beta_2} \\\\ \\end{aligned} } m^tv^t=1−β1mt=1−β2vt 取 经验系数 β1\\beta_1β1 、 β1\\beta_1β1 ,Hinton建议 β1=0.9\\beta_1 = 0.9β1=0.9 ,β2=0.999\\beta_2 = 0.999β2=0.999 取 η\\etaη 防爆因子,建议 ϵ=10e-8\\epsilon = \\text{10e-8}ϵ=10e-8 避免干扰运算 Adam 很好的结合了前辈们的各种优化处理手段,成为了集大成之优化函数。因此,Adam是被经常使用的,现代主流优化函数之一。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_6_5.html":{"url":"Chapter_4/Language/cn/Docs_4_6_5.html","title":"4.6.5 优化算法对比与使用建议","keywords":"","body":"4.6.5 优化算法对比与使用建议 这里引用一下特斯拉人工智能主管 安德烈·卡尔帕蒂(Andrej Karpathy) 的 在线 Demo(使用的是 pytouch) ,来做一下演示。 我们需要将脚本改成如下(增加 Adam): // lets use an example fully-connected 2-layer ReLU net var layer_defs = []; layer_defs.push({type:'input', out_sx:24, out_sy:24, out_depth:1}); layer_defs.push({type:'fc', num_neurons:20, activation:'relu'}); layer_defs.push({type:'fc', num_neurons:20, activation:'relu'}); layer_defs.push({type:'softmax', num_classes:10}); // below fill out the trainer specs you wish to evaluate, and give them names for legend var LR = 0.01; // learning rate var BS = 8; // batch size var L2 = 0.001; // L2 weight decay nets = []; trainer_defs = []; trainer_defs.push({learning_rate:10*LR, method: 'sgd', momentum: 0.0, batch_size:BS, l2_decay:L2}); trainer_defs.push({learning_rate:LR, method: 'sgd', momentum: 0.9, batch_size:BS, l2_decay:L2}); trainer_defs.push({learning_rate:LR, method: 'nesterov', momentum: 0.9, batch_size:BS, l2_decay:L2}); trainer_defs.push({learning_rate:LR, method: 'adagrad', eps: 1e-6, batch_size:BS, l2_decay:L2}); trainer_defs.push({learning_rate:1.0, method: 'adadelta', eps: 1e-6, ro:0.95, batch_size:BS, l2_decay:L2}); trainer_defs.push({learning_rate:LR, method: 'adam', eps: 1e-6, betas:[0.9, 0.999], batch_size:BS, l2_decay:L2}); // names for all trainers above legend = ['sgd', 'sgd+momentum', 'Nesterov', 'AdaGrad', 'AdaDelta', 'Adam']; 在运行一小段时间后(大概 11 k 经处理样本左右),有如下的结果: 感兴趣的读者,可以自行前往地址: https://cs.stanford.edu/people/karpathy/convnetjs/demo/trainers.html 观看更为直观的展示。 通过对比,我们也发现了问题。针对震荡优化的几个算法,在速度上不太有优势;而针对强弱重点的算法,又不是太稳定。 但 Adam 综合表现始终良好,证明了其优秀的可用性。 至此,我们可以得出大致结论: 如果数据稠密,实际上简单的算法就能得到鲁棒性很好的结果。参考使用 标准动量 的 SGD/BGD/MBGD + Momentum 。加动量既可以保证相对快的训练速度,也可以一定程度避免局部最小值。 如果数据稀疏,因为需要对关键特征点进行提取,所以需要用一些自适应算法。对于简单凸性和相对不复杂的数据,可以采用 L1、L2正则化 + 组合分桶。而复杂一些的,就需要采用Adagrad, Adadelta, RMSprop, Adam 等方式了。 如果关键参数更多的出现在运算后期,即梯度稀疏一定程度后,则Adam 比 RMSprop 效果会好。这时 Adam 明显是最好的选择。 按照这样的策略,灵活且合理的选取优化器。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_7.html":{"url":"Chapter_4/Language/cn/Docs_4_7.html","title":"4.7 模型结构速览","keywords":"","body":"4.7 模型结构速览 现在,基本了解了神经网络的主要工程元件后,即能设计简单模型结构,做一些训练了。 不过,在起步阶段,我们还需要决定具体使用哪一种模型类型,来构建面向目标的神经网络。可供选择的类型,其实在本章的开篇就已介绍,即深度神经网络(DNN [Deep Neural Network])的分类(见 4.1)。 这里我们主要对 当下主流的 CNN、RNN、GAN、Transformer 类别,进行说明。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_7_1.html":{"url":"Chapter_4/Language/cn/Docs_4_7_1.html","title":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network])","keywords":"","body":"4.7.1 卷积神经网络(CNN [Convolutional Neural Network]) 卷积神经网络(CNN [Convolutional Neural Network]),是对采用 卷积核(Convolutional Kernel),配合 层叠网格结构 构成的流水线,来进行特征提取的一类神经网络的统称。该类型最为擅长抽象图片或更复杂信息的高维特征。 仍以 AlexNet 为例,原工程示意图之前已有展示: 图 4-37 完整的 Alexnet 示意图(工程版) 我们用它来做一个,基于 MINST 手写字母图像集的,简单字母分类识别模型。 如下所示: 图 4-38 以 AlexNet 部署的 MINST 字母识别模型 可以发现,该模型在层级设计上,前半部分使用到了一系列由多个 更偏重功能性 的特殊隐藏层,即卷积层(Conv)、池化层(Pool),以连接构成复杂结构。之后,在经过一个独立的平化层(Flatten Layer)处理多通道数据完毕,才到达我们熟知类似之前介绍的,三层简易结构组成 多层感知器(MLP)朴素神经网络(Simple Neural Network) 的部分。 CNN 层级分类 显然,新引入的 卷积层(Conv) 、 池化层(Pool) 、 平化层(Flatten Layer),从神经网络结构上划分,仍然属于隐藏层。但所偏重的处理却更为细分。单纯使用结构体系的称谓,已经不能体现其具体作用了。 介于此,我们不得不采用 功能分类 方式,细化隐藏层类,来扩展对 CNN 的结构描述能力。 于是,结合原本输入层、输出层的相对准确描述,通常情况,我们能够将 CNN 的层类型分为 6 类。分别是: 输入层(Input Layer),处理接收样本的前处理准备工作; 卷积层(Conv [Convolutional Layer]),处理卷积核的移动 和 相关数据过滤工作; 池化层(Pool [Pooling Layer]),处理压缩/提升参数量的工作; 平化层(Flat [Flatten Layer]),处理接收样本的前处理准备工作; 全连接层(FC [Fully Connected Layer]),完成神经网络提取后特征的权重迭代部分; 输出层(Output Layer),完成最终特征向量结果输出,交由损失函数处理 不过,因为是按照针对处理任务进行的划分,因此,不同的 CNN 模型,在分类上会有或多或少的不同。例如,有些没有平化层,而有些则会多出独立的激活层(专门用于激活函数生效的隐藏层)。所以需要根据实际情况,做些许调整。但分类原则仍然依照上述标准。 为了区别于基础 输入层、输出层、隐藏层 概念,我们在介绍时统一采用 CNN 前缀(如,CNN-Input),表明特殊的分类形式。 CNN 输入层(CNN-Input) CNN 输入层(CNN-Input) 所做的工作和传统神经网络的输入层工作有一定的区分,除了要完成初步激活外,还需要对输入样本数据做一定的 预处理(Pre-Process) 工作。此类主要为 特征工程(Feature Engineering) 的相关工作,包括但不限于: 数据过滤(Data Filtering),初步筛选部分可以直接从样本本身判断出来的无效数据; 标准化(Standardization),将数据转为均值 0 且标准差为 1 的分布,无关原分布特征; 归一化(Normalization),将样本数据映射到一定范围区间内,放大或缩小范围; 中心化(Zero-Centered),将样本数据转为均值 0 但保留原有分布特征; 这里的 归一化(Normalization),指的是将参与训练的数据,归一到 统一尺度 下处理。虽然 [0, 1][0,\\ 1][0, 1] 范围因契合概率特征,常作为考虑范围之一,但并不一定都会选在 [0, 1][0,\\ 1][0, 1] 范围。例如我们在音视频中,以 RGB 作为输入数据时,更希望保留 [0, 255][0,\\ 255][0, 255] 的离散范围作为样本 。 除此之外,还有对原有数据的去关联性、离散化、量化转移。因此,输入层的工作,也被经常称为 数据预处理(Data Preprocessing)。这一部分存在相当多的工作,本书在章节末尾的书目推荐中,已列出相关参考推荐,感兴趣可以自行前往了解。 CNN 卷积层(CNN-Conv) CNN 卷积层(CNN-Conv) 主要采用一些滤波器算法,来针对性的提取特征信息。这一过程中使用的滤波器(Filter)即是我们在本书第三章第二节中介绍的一类类型,涵盖了之前所提到的常用滤波手段在内的一系列滤波处理方式。而这也是 CNN 的核心概念之一。 在 CNN 中,一般将 采用滤波器称为卷积核(Kernel)。 图 4-39 CNN 卷积层的计算过程示例 通过卷积操作,不断的从输入样本(CNN 一般是多维数据)中,提取出滤波后的特征。这一过程实际上是对最终被用来作为训练的输出特征向量,所在高维投影信息的一种过滤和逼近。通过对多层 CNN 卷积层的加权训练,来实现简洁观察到样本集代表数据的本质特征。 由于卷积操作的特点,CNN 最适合被 GPU 加速的运算,即是卷积核运算。 CNN 池化层(CNN-Pool) CNN 池化层(CNN-Pool) 是除了 CNN 卷积层外的另一种特征提取方式。它本身其实也可归类为一种算子简单的 CNN 卷积层。为了区别,我们把池化层的卷积核,称为池化核。 因此,CNN 池化层具有 CNN 卷积层的所有特点,并一样利于 GPU 化加速。 池化算子根据前一级输入,一般为 2×22 \\times 22×2 或 3×33 \\times 33×3 大小,移动步长为了避免范围覆盖,会取用等核大小步长。 常见的池化算子(Pooling Operator)主要有两种,分别是: 最大值池化(Max-Pooling),以池化核内最大值为输出; 核均值池化(Avg-Pooling),以池化核内所有值的均值作为输出; 这两类都是 向下采样(Subsampling) 过程,效果如下: 图 4-40 CNN 池化层的计算过程示例 [18] 除此外,还有各种类型的其它池化算法,例如:混合池化(Mixed Pooling)、线性探测池化(Linear Probing Pooling)[19] 、向上采样(Upsampling)的全局池化(Global Pooling)等。 方法不一而足。 但池化层的目的,始终是对前级输入的一种,引入相对对抗性的校准方式。使得特征的小范围内值得到突出,或用以磨平部分核内数据干扰的手段。 CNN 平化层(CNN-Flat) CNN 平化层(CNN-Flat),从字面意义理解,即把输入变换到指定维度大小数据当特殊处理层。它的意义在于,为传统 MLP 的神经网络部分,提供可供其学习的输入特征。 因此,常见的平化层操作,即将前一级输入直接按照顺序延展到指定维度即可。 图 4-41 CNN 平化层的计算过程示例 通常情况,我们会选择输出扁平化到 一维张量(1-dim Tensor,即 的有 n×1n \\times 1n×1 个元素的向量)。这个过程如上图展示。 CNN 全连接层(CNN-FC)& CNN 输出层(CNN-Output) CNN 全连接层(CNN-FC) & CNN 输出层(CNN-Output),相比之前几类,和它俩在 MLP 时期的作用基本无变化。 CNN 全连接层(CNN-FC) 相当于前文中三层朴素神经网络里的隐藏层; CNN 输出层(CNN-Output) 相当于前文中三层朴素神经网络里的输出层; 此处就不再赘述。 需要注意的是,CNN 输出层(CNN-Output)的输出结果,才是我们 在训练阶段 中,用来 交付损失函数计算,并参与优化器迭代权重的部分。为区别于其它,有时会被称为模型的 关键特征向量(Key Vector)。 CNN 网络结构 卷积神经网络存在远超 MLP 的层级,带来的变换远非只停留于对层级功能的细化上。在网络结构层面,也逐渐由处理针对任务性质的差异,产生了在 CNN 整体结构内,更为明确的区分。我们一般将位于神经网络内,专项执行单一主要任务的内部子模块,称为 子网结构(Sub-Network Structure),或简称为 子网(Subnet)。 同样于层级分类情况,对于主要目的不同的 CNN ,其子网结构也不完全相同。 但一般而言,大体可以分为 3 个子网,分别是: 特征提取(FE [Feature Extraction])子网,用于提炼原始信息至高级特征; 特征选择(FS [Feature Selection])子网,用于将高级特征抽象至最终输出特征向量; 结果输出(RO [Result Output])子网,用于输出最终的处理结果; 从分类可见,特征选择子网和特征提取子网,在卷积神经网络中的作用,基本等同于传统机器学习过程中,特征的选择和提取在传统逻辑回归和聚类分析中,所起到的作用一致。但其作用范围是整张网络内,所有的过程中特征。这一点还是有较大维度上的差异的。 在具体实践中,是什么样的情况呢? 我们以 AlexNet 部署物体识别的 CNN 分类模型为例,有: 图 4.7.1-6 以 AlexNet 部署的 ImageNet 物体识别模型 在例子中,3 个子网结构各包含了多个 AlexNet 的不同层级: 特征提取子网(FE),包含 输入层、平化层,以及从输入层至平化层间的多个池化层、卷积层,共同组成; 特征选择子网(FS),在本例中根据功能也被称为 分类子网(Classification Subnet),包含接收平化层输出的相邻隐藏层至输出层前一级隐藏层。这些隐藏层都是全链接层,以此完成特征向量提炼。 结果输出子网(RO),则是在接收 FS 输出后,最终生成特征向量的传统 MLP 组成。例子中采用了 SoftMax 连接函数,完成了对样本的 概率分布(Probabilistic Distribution) 归一化向量输出。 需要注意的是,例子中由于是训练好的模型,并没有画出当模型还在训练时,损失函数生效的阶段。不过,在我们经过前几节的讲解后,还是可以判断得到,其生效位置正是在 RO 之后。 训练阶段的 CNN ,正是接收了结果输出子网的特征向量,以此作为迭代的损失函数输入。 那么 CNN 有哪些适合的优势场景呢? CNN 的常见场景 考虑到 CNN 的特点,其实只要是超过一维的样本数据,都能够以 CNN 来进行相关作业。这也决定了 CNN 具有极强的普适性。 目前上,工业界对 CNN 的运用已经涵盖了: 图像分类,如:手势识别、动作识别、人脸识别等; 图像分割,如:背景分离、智能抠图、轮廓融合等; 语义分割,如:物体分类、车辆检测等; 语音识别,如:文本转译、同声传录、情感分析等; 除此外,包括 2016 年名声大噪的 AlphaGo ,也是采用的 CNN 多模型混合架构。足以见得其巨大的发挥空间。虽然 2022 年因 OpenAI 的 ChatGPT 引起 LLM Transformer 浪潮,让 CNN 的热度有所减退,但并不能阻碍它成为目前最好用的模型结构选择之一。 相信未来,我们仍然能够一睹 CNN 回归 LLM 多模态语言大模型的风采。 至此,CNN 的初级概念和网络结构,基本介绍完毕。有了这些知识背景,在了解 CNN 的各种类型网络的设计时,亦能窥得大概。其余就需要仔细钻研论文,以了解全貌了。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_7_2.html":{"url":"Chapter_4/Language/cn/Docs_4_7_2.html","title":"4.7.2 循环神经网络(RNN [Recurrent Neural Network])","keywords":"","body":"4.7.2 循环神经网络(RNN [Recurrent Neural Network]) 循环神经网络(RNN [Recurrent Neural Network]),是指为了应对序列数据类型而专门设计的一种,具有一定程度 长期记忆(Long Term Memory) 和 短期记忆(Short Term Memory) 能力的神经网络模型类型。即然被称为“循环”神经网络,则循环在整个 RNN 流水线过程中,起到了至关重要的作用。 那么,具体时如何“循环”的呢?这一点需要从网络的权重更新细节说起。 RNN 自 MLP 的改进 在前文中,我们认识了朴素神经网络的代表多层感知器(MLP)。并通过分析 输入层、隐藏层、输出层,了解了具体的推理过程。一个典型的三层 MLP ,有如下展示(见 4.2): 图 4-43 经典层分类在简单神经网络中位置示意图(切片) 取图例所示,记时代 ttt 时,有输入 i⃗=[i1, i2, i3, i4]\\vec{i} = [i_1,\\ i_2,\\ i_3,\\ i_4]i⃗=[i1, i2, i3, i4] , 隐藏层存在变化 z⃗=W⋅i⃗+b⃗\\vec{z} = W \\cdot \\vec{i} + \\vec{b}z⃗=W⋅i⃗+b⃗ 使输出为 z⃗=[z1, z2, z3]\\vec{z} = [z_1,\\ z_2,\\ z_3]z⃗=[z1, z2, z3] ,而输出层在接收处理 z⃗\\vec{z}z⃗ 后得当次迭代结果为 o⃗=[o1, o2]\\vec{o} = [o_1,\\ o_2]o⃗=[o1, o2] 。则上图就有时代 ttt 的简化表示: 图 4-44 经典层分类在简单神经网络中位置简化图(切片) 在 MLP 中,每一个时代(Epoch)的权重更新过程,都不会受到前一个时代的影响,并且也未影响即将参与训练的后一个时代。这种现象,被称为 “无记忆”(No-Memory),即每一次权重更新只和当前时代的训练有关。 为了解决这个问题,1982 年,约翰·霍普菲尔德 (John Hopfield) 基于 威廉·伦茨(Wilhelm Lenz, 1888 - 1957) 和 恩斯特·伊辛(Ernst Ising, 1990 - 1998) 等人提出的 伦茨-伊辛模型(Lanz-Ising Model) 思想 ,改善了基础 MLP ,创建了首个被称为 “霍普菲尔德网络模型(HN [Hopfield Network])” 的类现代 RNN 神经网络,用于处理时序优化。该模型试图在神经元间引入趋势关联性,来实现一定程度的短期记忆。虽然是相当有开创性的工作,但由于需要保存较多变元受控影响参数过多,且由于核内只采用了 Tanh 简单引入非线性,却无其他处理,从而存在 梯度消失(Vanishing Gradient),导致不太具有工程优势。 受到启发,塞普·霍克雷特(Sepp Hochreiter) 和 尤尔根·施密特胡伯 (Jürgen Schmidhuber) 于 1991 年提出了 长短期记忆(LSTM [Long Short-Term Memory]) 模型 [20] ,通过简化迭代参数到隐藏层神经元结构内,引入了具有一定程度时间性的权重 W(t)W(t)W(t) 和偏移 b(t)b(t)b(t) ,代替了原有单步权重 WWW 和偏移 bbb 。相比 HN 的独立时序权重影响参数, LSTM 相当于用相对复杂结构,替换原 z⃗=W⋅i⃗+b⃗\\vec{z} = W \\cdot \\vec{i} + \\vec{b}z⃗=W⋅i⃗+b⃗ 在时序上的单步计算的方式,而非在式子本身上施加额外干预。 假设我们三个时代,分别是 t−1t - 1t−1 、 ttt 、 t+1t + 1t+1 ,那么改进后的 LSTM 有: 图 4-45 长短期记忆(LSTM)网络结构类比 MLP 的时序关联性示意图 如此,使得权重和偏移也参与到了迭代中,进而通过两者在时序上的传递,实现了过往训练结果的历史传递。并同时,减小了非样本量的人为干扰。 而 LSTM 也不出意料,成为了 RNN 模型的 经典代表。当下我们所谈论的 RNN 结构,基本都可归类为 LSTM 的变体。 RNN 元胞(Cell) 显然,原有通过分层(Layer)来对神经网络过程区分的方式,是不足以描述 RNN 在时间上的复杂过程的。而从时序角度来看,每个时代(Epoch)的一次迭代过程,都可以被抽象为重复且工程独立的计算模块。于是, RNN 中,我们将单个传统层级神经网络(NN)在时刻 ttt 的一次 完整计算,称为位于 ttt 的 RNN 元胞(Cell)。如图 4.7.2-3 中绿色框体内的部分。 同时,LSTM 也对单元内的传统网络部分进行了进一步精简,取消了层中神经元(Neuron)的固定状态,并打破了层的阈限,采用 计算节点(Node) 的泛化称谓,分化解构了层和层内的结构性与功能性。这种更接近现代电子电路设计的处理方式,也是为何 RNN 类型网络会更容易被 硬件化(Hardwareization) 的主要原因。 对于 RNN 元胞(Cell),有些文献中将其译为神经元,这从仿生学角度来说无可厚非。但以神经网络命名歧义上讲,会发现并不合适。因此,本书参照早期机器学习(ML)的 元胞自动机 和 受限玻尔兹曼机(RBM) 中,对元胞(Cell)的定义,来代称 RNN 原文献对 Cell 的表述。两者在概念和作用上都更为接近,是以为更贴切的意译。 另外,RNN 计算节点(Node),其实就是 MPL 意义上的节点(见 4.2),只是在功能上存在从 RNN 角度出发的独特分类。为做区别,我们称之为 RNN 节点(RNN-Node)。 RNN 节点(RNN-Node) 回到正题,怎么理解 RNN 节点(RNN-Node)呢? RNN 节点(RNN-Node) 即为 RNN 中,对算子(Operator)进行最小粒度能力整合的 基本组成 成分。相较于层本身的双重特性来说,单元更加强调自身的功能性,并将自身的结构性完全交由元胞结构设计来表示。而从单元功能来说,主要分为两种: 门(Gate)节点,用来控制元胞内外特征,对元胞影响的单核运算,根据功能有多种子类; 层(Layer)节点,遵循 MLP 标准层内神经元特性的类层功能组件; 因此,层(Layer)节点,其实就是朴素神经网络中的层(Layer)。而各类 门(Gate)节点 和 各个节点间的数据流组合方式,即元胞驱动公式(Cell Formula),才是 RNN 元胞结构上的独特之处。 图 4-46 长短期记忆(LSTM)的三连元胞结构图(即绿框内实际情况)[20] 上图中,紫色部分即为 门(Gate)节点,而黄色部分则为 层(Layer)节点。箭头代表数据流向。工程上,通常将门节点和门的前一级输入,共同作为门来表示。这在工程导向的数据驱动流程图上就有所体现。 想要明确三者间的关系,就需要结合实际模型来理解。这里我们仍基于 LSTM 来说明。 RNN 长短期记忆(LSTM)模型 在图 4.7.2-4 中,我们仅从宏观的角度,用类 MLP 的便于承接的方式说明 RNN 的时序性。而 LSTM 真正元胞内的流程,是如下所示(数据驱动流程图): 图 4-47 长短期记忆(LSTM)的元胞结构详情 [21] 其中, 以 ttt 代表迭代时代; 以 ccc 代表 元胞状态(Cell State) 向量,代表元胞长期记忆内的高维特征; 以 hhh 代表 隐藏状态(Hidden State) 向量,代表元胞短期记忆内的高维特征; 以 fff 代表 遗忘门(Forget Gate) 输出,遗忘门用于随机或策略(训练)的丢弃前一级输入; 以 iii 代表 输入门(Input Gate) 输出,输入门控制(训练)需被长期记忆的高维特征; 以 ooo 代表 输出门(Output Gate) 输出,输出门控制(训练)需被短期记忆的高维特征; 以 XXX 代表元胞的输入特征,即对 RNN 而言经样本预处理后的输入; 以 OOO 代表元胞的输出特征,可见 ttt 时有 Ot=htO_t = h_tOt=ht ,即 LSTM 输出为当前元胞隐藏状态向量 ; 由此,我们引申出了 LSTM 的 元胞驱动公式(Cell Formula) : Input:XtGate:{it=Sigmod(Xt⋅Bi + ht−1⋅Wi)ft=Sigmod(Xt⋅Bf + ht−1⋅Wf)ot=Sigmod(Xt⋅Bo + ht−1⋅Wo)Cell State:{ct=Sigmod(ft⊗ct−1 + it⊗c^t)c^t=Tanh(Xt⋅Bg + ht−1⋅Wg)Hidden State:ht=Tanh(ct)⊗otOutput:Ot=ht {\\displaystyle \\begin{aligned} Input: &\\quad X_t \\\\ Gate: &\\begin{cases} i_t &= Sigmod(X_t \\cdot B^i \\ +\\ h_{t-1} \\cdot W^i ) \\\\ f_t &= Sigmod(X_t \\cdot B^f \\ +\\ h_{t-1} \\cdot W^f ) \\\\ o_t &= Sigmod(X_t \\cdot B^o \\ +\\ h_{t-1} \\cdot W^o ) \\end{cases} \\\\ Cell\\ State: &\\begin{cases} c_t &= Sigmod(f_t \\otimes c_{t-1} \\ +\\ i_t \\otimes \\hat{c}_t ) \\\\ \\hat{c}_t &= Tanh(X_t \\cdot B^g \\ +\\ h_{t-1} \\cdot W^g ) \\end{cases} \\\\ Hidden\\ State: &\\quad h_t = Tanh(c_t) \\otimes o_t \\\\ Output: &\\quad O_t = h_t \\\\ \\end{aligned} } Input:Gate:Cell State:Hidden State:Output:Xt⎩⎪⎨⎪⎧itftot=Sigmod(Xt⋅Bi + ht−1⋅Wi)=Sigmod(Xt⋅Bf + ht−1⋅Wf)=Sigmod(Xt⋅Bo + ht−1⋅Wo){ctc^t=Sigmod(ft⊗ct−1 + it⊗c^t)=Tanh(Xt⋅Bg + ht−1⋅Wg)ht=Tanh(ct)⊗otOt=ht 式子中, 权重(Weight) 和 偏移(Bias) 分别为 WWW 和 BBB 采用 矩阵形式表示。 而 {Wi, Wf, Wo, Wg}\\{W^i,\\ W^f,\\ W^o,\\ W^g \\}{Wi, Wf, Wo, Wg} 和 {Bi, Bf, Bo, Bg}\\{B^i,\\ B^f,\\ B^o,\\ B^g \\}{Bi, Bf, Bo, Bg} ,则分别代表着 遗忘门(Forget Gate) 、 输入门(Input Gate) 、 输出门(Output Gate) 和 基本元(Standard Neuron) 的权重和偏移。它们都将参与 RNN 训练中的迭代,即我们要训练的对象。 我们将这种权重 WWW 和偏移 BBB 参与训练,并在时序上以相反于时间流向传递(指从后时间节点的观察角度)历史锚点的方式,称为 随时间反向传播(BPTT [Back Propagation Through Time])。而我们在前文中所介绍的 反向传播(BP [Back Propagation]),在这种意义下,则为 当期反向传播(BPIE [Back Propagation In Epoch])。BPTT 考虑了时间的影响,相当于引入时序概念的升级版 BP(即 BPIE)。也正是这样,BPTT 仍然具有 BP 的一切相关特性,同时却额外的具有了历史因素 ,谨慎二者差异。 这就是一个 LSTM 的 RNN 元胞的基本构成了。除了 LSTM 外,还有各种改进类型,如:引入了 “窥视孔连接(Peephole)”的 Peephole-LSTM,采用了门循环控制单元(GRU [Gated Recurrent Unit])的 GRU-LSTM 等。这些变体所改进的皆是 LSTM 的内部流结构,有了现在的基础,读者亦可独立了解了。此处给出对比图例,以简单供参考: 图 4-48 LSTM 与 GRU-LSTM 的元胞结构对比 那么 RNN 有哪些适合的优势场景呢? RNN 的常见场景 考虑到 RNN 的特点,RNN 类模型最为擅长的基本在于需要考虑时间关联性的场景。 目前上,工业界对 RNN 的运用已经涵盖了: 自然语言处理(NLP),如:文本分析(智能输入法)、机器翻译、语音助手等; 音视频生成,如:音乐生成、视频生成、合成编辑、自动裁剪等; 时序预测,如:股票分析、气象预测、医疗诊断等; 不过随着 Transformer 的兴起,RNN 在 NLP 领域的地位早已面临着极大挑战(自 2018 年 BERT 达成 SOTA 以来)。2023 年中的 Google Bard(BERT,GPT-3,Transformer) 大战 OpenAI ChatGPT-4(ChatGPT-4,Transformer) ,以 Bard 的糟糕(相对)表现失败告终。最终又进一步促成了 Google 加速推进了另一个用 Transformer 做为主体的 Gemini 大语言模型(LLM)发布,来扳回颜面。 而这精彩纷呈的大语言模型大战中,采用 RNN 作为骨架的 OpenAI Jukebox(12B 参数)和 EleutherAI GPT-NeoX(20B 参数),甚至没有激起水花。可见一斑。 如果 RNN 在短期内没有进一步突破,可见 Transformer 会逐步取而代之。但这,并不意味着 RNN 会退出历史舞台。技术永远都是博弈的过程,在人工智能的终极命题被解决前,无人能够断言。 需要注意的是,RNN 从始至终意图解决的都是“记忆”问题,而非 CNN 所解决的“提取”问题。两者 并不冲突,甚至还可以适度融合,即组合形成 CNN+RNN 融合模型(Hybrid Model)。由 CNN 的特征提取(FE)子网得倒高级特征,再经过 RNN 代替原 CNN 的特征选择(FS)子网和结果输出(RO)子网,实现对高级特征的时间敏感训练。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/Docs_4_7_3.html":{"url":"Chapter_4/Language/cn/Docs_4_7_3.html","title":"4.7.3 自注意力网络(Transformer)","keywords":"","body":"4.7.3 自注意力网络(Transformer) 自注意力网络(Transformer) 是一种基于自注意力机制(Self-Attention)的深度神经网络结构分类。最初的设计原型来自于 2015 年由 约书亚·本吉奥(Yoshua Bengio,1964~Present) 有关神经网络翻译模型优化而提出的设想 [22] 。 本吉奥 通过引入一种模拟生物在感知、学习、思考过程中自然而然产生的,主被动关注关键点的生理机制,来解决传统 机器学习(ML)编码器-解码器(Encoder-Decoder) 翻译模型(Translation Models)的 长句压缩固定长度(Fixed-Length) 特征,导致潜层语意缺失的问题。这种模拟方式,能够将长句(Long Sequence)中的信息,抽象为固定长度(Fixed-Length)向量的集合,而非 以单一向量的形式进行后续相关语意分解工作。 基于该研究,Google AI 实验室终于在 2017 年以 《Attention Is All You Need》[23] 一文,确定了基本的网络结构设计。 从此,开启了 Transformer 类型网络的快速发展之路。 Transformer 核心成分 & 核心机制 自注意力机制(Self-Attention) 是 Transformer 的 核心机制。而 Transformer 网络结构,则是与自注意力机制提取特征 配套,用于达成转译目的(不一定非要是语言)的整个编解码器 系统作业流水线。 图 4-49 Transformer 网络结构 [23] Transformer 在结构中,参考了 RNN 和早期 自编解码网络系统的特点,采用 时序上以串行元胞(Cell),而单次迭代中编解码(Encoder-Decoder)并行(Parallelization) 的方式,结合两者各自所长。 如此设计的原因,一方面来自于 Transformer 意图改善 RNN 对 序列到序列(Seq2Seq) 任务的处理能力。这便要求,其本身对输入样本的高级特征提取,能够拆解直接关联性,但却又可以保留更高维度潜藏的逻辑信息。另一方面,注意力机制只能提炼解构关联性特征,但我们需要的结果却是对原有输入的另一种未改变原意的表述,决定了必须有编解码类似的结构(至少目前为止)对高级特征进行非对称的压缩和还原。 自注意力机制,保证了前一点。整体结构,保证了后一点。而 注意力单元(Attention Unit),则是用于完成自注意力处理的组成框架的一部分(图中橙色)。 根据最精简实现, 从 注意力单元(Attention Unit) 上,Transformer 包括了两个分步主成分: 放缩点积注意力(SDPA),用于完成对于一次 Transformer 输入的单一注意力筛选; 多头注意力(MHA),通过对放缩点积的平行组合,完成对同一输入的多点注意力检测; 从 网络结构(Network Structure) 上,Transformer 包括了两个主体模块: 编码器(Encoder)模块,用于完成对样本输入的高级特征提取; 解码器(Decoder)模块,用于完成对样本输入的转译结果输出; 通俗来说,以人做类比,两者一个属于 Transformer 的“灵魂”,一个属于 Transformer 的“身躯”。从宏观角度,是互为一体的存在。这与前两节 CNN 、RNN 里,结构占相对主导地位(即框架为改进的关键)的情况有所不同。而是皆有突破。 正是这样,Transformer 才称为深度学习领域,在模型结构基本方法论上,近年来相对具有较大革新突破的工具。 刚刚我们提到的注意力单元,是对自注意力机制的实现。从上可知,Transformer 基本的一切都是围绕其运转的。那么,什么是自注意力机制(Self-Attention)呢? 自注意力机制(Self-Attention) 自注意力机制(Self-Attention) 是 Transformer 中,一种目的用于模拟生物注意力的数学量化范式。是 一类方法的方法论。 想要能够区分轻重,最容易想到的方式便是加权了。因此,在 Transformer 中,对注意力的数值定义便是: 对输入序列元素,根据输入查询元素键值对,以求动态计算加权平均值的权重。 所以,我们计算注意力,需要的输入数据序列就必须包含三个重要的信息维度,分别是: 查询信息,即查询(Query),代表当前输入具有可能被关注(查询)的意义表征; 关键信息,即键值(Keys),代表当前输入在查询(Query)下可能提供的信息特征; 标值信息,即取值(Values),代表与键值(Key)关联的量化查询积分; 最终,由三者共同借由算法组成 输出特征(Output Fratures),即 加权平均值的权重,作为 Transformer 的神经网络内高级特征。 图 4-50 Transformer 的输入转换过程 [23] 显然,虽然键值和取值可以按照 传统键值对(Key-Value Pair) 形式直接配对(当然也可以有复杂映射实现)。但从查询到键值还需要有一定转换方式,通过它来实现将查询可能的关注点,分散到对应的键值上,从而构成可以计算的结果。这个转换方式,就是 评分函数(Score Function)。 一般而言,评分函数可分为两类: 简单评分函数(Simple Scorer),此类以单步函数来完成粗糙的映射,如直接 Sigmod 复杂评分函数(Complex Scorer),此类用朴素神经网络或模型来完成映射,如 MLP 评分 而现有大部分 Transformer 中,采用的都是第二个类型的实现。包括基础 Transformer 的 SDPA 和 MHA 在内,皆属于此类。 当评分函数确定后,每个查询都能拆解到键值上,并获取对应积分了。此时,就需要由 输出函数(Output Function) 来将各个部分组合成最终的高级特征向量进行输出。 假设, 以 QQQ 代表查询, 以 SeqSeqSeq 表示输入序列, 以角标 [i][_i][i] 代表所处输入序列 SeqSeqSeq 的位置,有 iii 处键值 KKK 取积分 VVV , 以 OutOutOut 代表经过注意力筛选后,的高级特征向量, 记评分函数为 fscoref_{score}fscore ,输出函数为 foutf_{out}fout 则有: wi=fscore(Ki, Q)∑fscore(Ki, Q)Out=∑fout(wi⋅Vi) {\\displaystyle \\begin{aligned} w_i &= \\frac{f_{score}(K_i,\\ Q)}{\\sum{f_{score}(K_i,\\ Q)}} \\\\ Out &= \\sum{f_{out}(w_i \\cdot V_i)} \\\\ \\end{aligned} } wiOut=∑fscore(Ki, Q)fscore(Ki, Q)=∑fout(wi⋅Vi) 如此,我们便得到了 注意力量化公式。实际上,这种按序列计算的过程,常被直接 以矩阵运算代替。公式的意义更多在于原理描述。即,自注意力机制工程化的指导过程。依照上式,便可以通过构建评分函数和输出函数,来设计 Transformer 里的注意力单元了。 经典 Transformer 中,将这一步分为了两个组成。 放缩点积注意力(SDPA [Scaled Dot-Product Attention]) 放缩点积注意力(SDPA [Scaled Dot-Product Attention]) 被用于计算单点注意力,即只生成一个注意力高级特征输出。 图 4-51 Transformer 的 SDPA 单元 [23] 如图,红框部分便是 SDPA 的评分函数,而蓝框部分则为 SDPA 的输出函数。 我们记一个输入序列,其序列长度为 TTT 而查询维度为 ddd 。这里需要解释一下,什么是序列长度和查询维度。我们举个例子,如果有一条查询为 Q⃗=[[0.12, 3.57], [0.71, 1.14], [0.95, 0.63]]\\vec{Q} = [[0.12,\\ 3.57],\\ [0.71,\\ 1.14],\\ [0.95,\\ 0.63]]Q⃗=[[0.12, 3.57], [0.71, 1.14], [0.95, 0.63]] ,那么我们就称 单条查询的维度 为 d=2d=2d=2 ,而 总共有长度 为 T=3T=3T=3 条查询。即 查询维度就是一条查询所包含的参数数目,而 序列长度就是单次输入样本包含查询的数目。 当 ddd 确定,对于长度 TTT 的输入数据序列,就有 查询 Q∈RT×dQ \\in \\mathbb{R}^{T \\times d}Q∈RT×d 、 键值 K∈RT×dK \\in \\mathbb{R}^{T \\times d}K∈RT×d 、 取值 V∈RT×dV \\in \\mathbb{R}^{T \\times d}V∈RT×d ,即三者都是 T×dT \\times dT×d 大小的矩阵。则 SDPA 的 评分函数(Score Function) 有如下表示: fscore(K, Q)=softmax(Q⋅KTd) {\\displaystyle \\begin{aligned} f_{score}(K,\\ Q) = softmax \\left( \\frac{Q \\cdot K^T}{\\sqrt{d}} \\right) \\\\ \\end{aligned} } fscore(K, Q)=softmax(√dQ⋅KT) 而输出时采用的 输出函数(Output Function),就是一个取值与评分结果的矩阵点积(Dot-Product),这也是 SDPA 名称的原因。即: fout(fscore, V)=fscore⋅VOutput=softmax(Q⋅KTd)⋅V {\\displaystyle \\begin{aligned} f_{out} &(f_{score},\\ V) = f_{score} \\cdot V \\\\ Output &= softmax \\left( \\frac{Q \\cdot K^T}{\\sqrt{d}} \\right) \\cdot V \\\\ \\end{aligned} } foutOutput(fscore, V)=fscore⋅V=softmax(√dQ⋅KT)⋅V 过程中 1d\\tfrac{1}{\\sqrt{d}}√d1 即 缩放因子(Scale Factor)。而 Mask 操作是可选的,一般过程中作用于 fscoref_{score}fscore 的 SoftMax 操作之前,已经完成点积和缩放的 (Q⋅KTd)\\left( \\tfrac{Q \\cdot K^T}{\\sqrt{d}} \\right)(√dQ⋅KT) 这个 T×TT \\times TT×T 大小的矩阵。通过屏蔽部分数据或进行简单过滤,来进一步加工交给 Softmax 的输入。 实际操作时,可以在 编码器(Encoder) 阶段引入 Mask 层来做 部分参数优化,加速训练。而 解码器(Decoder) 需要用 Mask 来做 零值处理。即,将 (Q⋅KTd)\\left( \\tfrac{Q \\cdot K^T}{\\sqrt{d}} \\right)(√dQ⋅KT) 结果中的部分数据标记为 0 或极小值(如 1e-12 ,避免权重消失),组成不完整数据。 在经过一系列运算后,根据矩阵点乘的特性,最终输出为具有 Output∈RT×dOutput \\in \\mathbb{R}^{T \\times d}Output∈RT×d 的大小的 单次注意力张量(Tensor)。 不过,我们想要的是有多个关注点的高维特征,单个注意力无法满足要求。 这就需要 MHA 了。 多头注意力(MHA [Multi-Head Attention]) 多头注意力(MHA [Multi-Head Attention]) 是对多个单头注意力,即放缩点积注意力(SDPA),处理的加权复合。 千万需要小心的是,正是在 MHA 中,我们引入了真正用于 持久训练 的迭代用权重参数,构成参数矩阵参与模型训练。 图 4-52 Transformer 的 MHA 单元与 SDPA 单元的关系 如图,蓝色气泡内便是 SDPA 单元。在图例中,由 hhh 个 SDPA 单元,经过链接层(Concat 为简写),和线性归一化(目的是为了保证输入输出等大),构成了最终 MHA 的输出。 所以,从另一个角度来看,链接层函数就相当于 MHA 的评分函数,线性归一化则是输出函数。而 MHA 真正意义上的输入,即每个 SDPA 输入的集合。有: 图 4-53 Transformer 的 MHA 单元 [23] 上方即为 MHA 在 Transformer 中的基本算子表示。红框部分便是 MHA 的评分函数,而蓝框部分则为 MHA 的输出函数。可见,评分函数和输出函数的概念,也是相对于被选择作为参考的单元本身而言的。 我们仍然取一个输入序列(MHA 和 SDPA 都是对同一序列的操作,仅目标输出不同),其序列长度为 TTT 而查询维度为 ddd 。 记当前一个 MHA 总共有 hhh 个 SDPA 单元,每个单元按照顺序,由角标 [i][_i][i] 表示序号。则,对于顺序 iii 的 SDPA 单元输入,有查询 Qi∈RT×dQ_i \\in \\mathbb{R}^{T \\times d}Qi∈RT×d 、 键值 Ki∈RT×dK_i \\in \\mathbb{R}^{T \\times d}Ki∈RT×d 、 取值 Vi∈RT×dV_i \\in \\mathbb{R}^{T \\times d}Vi∈RT×d ,即三者都是 T×dT \\times dT×d 大小的矩阵。并有经过 SDPA 处理后的输出 Outputi∈RT×dOutput_i \\in \\mathbb{R}^{T \\times d}Outputi∈RT×d ,简记为 Oi∈RT×dO_i \\in \\mathbb{R}^{T \\times d}Oi∈RT×d 交付链接。 由于采用了多组 SDPA 组合,我们不再能以固定形式,确定每个 SDPA 输入的重要程度。因此,需要对每个构成 MHA 的 SDPA 算子的输入 [Qi, Ki, Vi][Q_i,\\ K_i,\\ V_i][Qi, Ki, Vi] 进行确权,来通过训练得到实际 MHA 的输入的初始关注点。 介于这一点,我们对每一组顺序 的 SDPA 单元输入进行加权。引入 输入权重(Input Wights),根据加权对象,分为 iii 组查询权重 WiQ∈Rd×TW^Q_i \\in \\mathbb{R}^{d \\times T}WiQ∈Rd×T 、 iii 组键值权重 WiK∈Rd×TW^K_i \\in \\mathbb{R}^{d \\times T}WiK∈Rd×T 、 iii 组取值权重 WiV∈Rd×TW^V_i \\in \\mathbb{R}^{d \\times T}WiV∈Rd×T 。 注意,加权需要用和加权对象维度转置(Transpose)的矩阵。 加权后,顺序 iii 的 SDPA 算子的输入就变为了 [Qi⋅WiQ, Ki⋅WiK, Vi⋅WiV][Q_i \\cdot W^Q_i,\\ K_i \\cdot W^K_i,\\ V_i \\cdot W^V_i][Qi⋅WiQ, Ki⋅WiK, Vi⋅WiV] 。同时,这也是为什么 MHA 中,Q、K、V 需要经过一次线性归一化。即目的是为了保证每一组的输入在样本值上的价值等同。 调整后,MHA 的 SDPA 计算公式 化为: Oi=softmax(QiWiQ⋅(KiWiK)Td)⋅ViWiV=SDPA(QiWiQ, KiWiK, ViWiV) {\\displaystyle \\begin{aligned} O_i &= softmax \\left( \\frac{Q_iW^Q_i \\cdot (K_iW^K_i)^T}{\\sqrt{d}} \\right) \\cdot V_iW^V_i \\\\ &= SDPA(Q_i W^Q_i,\\ K_i W^K_i,\\ V_i W^V_i) \\\\ \\end{aligned} } Oi=softmax(√dQiWiQ⋅(KiWiK)T)⋅ViWiV=SDPA(QiWiQ, KiWiK, ViWiV) 使得 MHA 的评分函数(Score Function)有如下表示: fscore(K, Q, V)=Concat(O1, O2, ⋯ , Oi) {\\displaystyle \\begin{aligned} f_{score}(K,\\ Q, \\ V) = Concat \\left( O_1,\\ O_2,\\ \\cdots \\ ,\\ O_i \\right) \\\\ \\end{aligned} } fscore(K, Q, V)=Concat(O1, O2, ⋯ , Oi) 其中,连接函数(Concat [Connection Function])是简单全链接。即,将每一个 SDPA 的输出 OiO_iOi 顺序拼接,构成 (FC=∑Oi)∈RT×dh(FC =\\sum O_i )\\in \\mathbb{R}^{T \\times dh}(FC=∑Oi)∈RT×dh 的输出。 而输出时采用的输出函数(Output Function),存在迭代的 目的权重(Target Wight) 矩阵 WO∈Rhd×TW^O \\in \\mathbb{R}^{hd \\times T}WO∈Rhd×T ,以权重代表注意力积分并参与训练(即动态的积分)。有: fout(fscore, WO)=linear(fscore⋅WO)Output=linear(Concat(O1, O2, ⋯ , Oi)⋅WO) {\\displaystyle \\begin{aligned} f_{out} &(f_{score},\\ W^O) = linear(f_{score} \\cdot W^O) \\\\ Output &= linear(Concat \\left( O_1,\\ O_2,\\ \\cdots \\ ,\\ O_i \\right) \\cdot W^O) \\\\ \\end{aligned} } foutOutput(fscore, WO)=linear(fscore⋅WO)=linear(Concat(O1, O2, ⋯ , Oi)⋅WO) 其中,线性归一化算子(Linear) 其实同 MHA 的 SDPA 输入线性归一化一样,目的在于归一化 MHA 的输出以取得我们想要的多关注点高维特征,并同时让输出保持与输入相同的维度大小。即,通过 linear(fscore⋅WO)linear(f_{score} \\cdot W^O)linear(fscore⋅WO) ,让原本 (fscore⋅WO)∈RT×dh(f_{score} \\cdot W^O) \\in \\mathbb{R}^{T \\times dh}(fscore⋅WO)∈RT×dh 大小的数据,通过以 T×dT \\times dT×d 大小分块,分为 hhh 块叠加求均值,来使最终输出的 Output∈RT×dOutput \\in \\mathbb{R}^{T \\times d}Output∈RT×d 大小。 所以,MHA 的完整处理公式为: fout(fscore, WO)=linear(fscore⋅WO)linear(fscore⋅WO)=∑h(fscore⋅WO)i∑(fscore⋅WO)iOutput=linear(Concat(O1, O2, ⋯ , Oi)⋅WO) {\\displaystyle \\begin{aligned} f_{out} &(f_{score},\\ W^O) = linear(f_{score} \\cdot W^O) \\\\ linear &(f_{score} \\cdot W^O) = \\sum^h \\frac{(f_{score} \\cdot W^O)_i}{\\sum (f_{score} \\cdot W^O)_i} \\\\ Output &= linear(Concat \\left( O_1,\\ O_2,\\ \\cdots \\ ,\\ O_i \\right) \\cdot W^O) \\\\ \\end{aligned} } foutlinearOutput(fscore, WO)=linear(fscore⋅WO)(fscore⋅WO)=∑h∑(fscore⋅WO)i(fscore⋅WO)i=linear(Concat(O1, O2, ⋯ , Oi)⋅WO) 至此,特征提取完毕。 由 MHA 的输出 Output∈RT×dOutput \\in \\mathbb{R}^{T \\times d}Output∈RT×d 和权重矩阵 [WO, ∑[WiQ, WiK, WiV]][W^O,\\ \\sum [W^Q_i,\\ W^K_i,\\ W^V_i] ][WO, ∑[WiQ, WiK, WiV]] ,参与到 Transformer 训练的内部过程。 Transformer 的辅助处理单元 在正式开始 Transformer 的网络结构讲解前。我们还需要了解一下,自注意力网络(Transformer)中的 其它辅助机制。 在经典结构中,Transformer 除了使用自注意力来完成特征提取外,还使用了由 ResNet 提出在当时已经相对成熟的 残差连接(Residual Connection) 技术,并使用简单 前馈控制(Feed Forward) 来修正 MHA 特征,提供非线性和引入深层次的 隐藏权重(Hidden Wight) 参与训练。 图 4-54 Transformer 辅助机制作用位置 图中红框的部分,即为这两个机制起作用的位置。一般,在 Transformer 中,将其分别称为 前馈控制单元(FFU [Feed Forward Unit]) 和 加和标准化单元(ANU [Add & Norm Unit])。 记两者的输入为 XXX ,输出为 X^\\hat{X}X^ 。 大部分情况下前馈控制单元的输入 XXX 都为 MHA 的输出,即 X=MHAOutput∈RT×dX = MHA_{Output} \\in \\mathbb{R}^{T \\times d}X=MHAOutput∈RT×d 但也有例外。加和标准化单元则需要两个输入。不过,在这两个单元的处理中,我们为了保证输入前后特征张量(Tensor)的一致性,要求不论 FFU 还是 ANU,都必须实现输入输出大小相等。 所以,在整个 Transformer 中,FFU 和 ANU 都有 X,X^∈RT×dX,\\hat{X} \\in \\mathbb{R}^{T \\times d}X,X^∈RT×d 。 而两者的 驱动公式(Core Formula),则为: FFU:{Input: XOutput: X^=ReLU(X⋅W1+B1)⋅W2+B2ANU:{Input: X1, X2Output: X^=Norm(X1+X2) {\\displaystyle \\begin{aligned} FFU: &\\begin{cases} Input &: \\ X \\\\ Output &: \\ \\hat{X} = ReLU(X \\cdot W_1 + B_1) \\cdot W_2 + B_2 \\end{cases} \\\\ ANU: &\\begin{cases} Input &: \\ X_1,\\ X_2 \\\\ Output &: \\ \\hat{X} = Norm(X_1 + X_2) \\end{cases} \\\\ \\end{aligned} } FFU:ANU:{InputOutput: X: X^=ReLU(X⋅W1+B1)⋅W2+B2{InputOutput: X1, X2: X^=Norm(X1+X2) 每一个 FFU 都能为我们引入一套权重 W=[W1T×d, W2T×d]W = [{W_1}^{T \\times d},\\ {W_2}^{T \\times d}]W=[W1T×d, W2T×d] 和偏移 B=[B1T×d, B2T×d]B= [{B_1}^{T \\times d},\\ {B_2}^{T \\times d}]B=[B1T×d, B2T×d] 参与训练。而 ANU 则负责通过 归一化(Normalization) 将样本数据映射到一定范围区间内,保证前级输出的统一尺度衡量,加速模型收敛。 所有原件准备就绪,Transformer 网络结构就非常容易理解了。 Transformer 网络结构 在本节开始时提到,自注意力网络(Transformer)从结构角度分为编码器(Encoder)和 解码器(Decoder)。两者在整体上分别对同一个序列(Sequence)进行不同处理。 图 4-55 Transformer 编解码器示意图 如图,蓝框内部分即编码器(Encoder)的构成,红框内部分则是解码器(Decoder)。 编码器(Encoder) 接收正常顺序的序列,如:“I am eating an apple” 经过 位子编码(Positional Encoding),再以特征工程提炼出的 [Q, K, V][Q,\\ K,\\ V][Q, K, V] 。 之后,交由 MHA 提取高级特征,并将提取的高级特征经过一次 ANU 归一化。最终,归一化的高级特征通过 FFU 加隐藏的核心权重和偏移,再次经由一次 ANU 归一化,完成当前时代的编码部分处理。记编码器的输出为 OencO_{enc}Oenc ,显然 OencO_{enc}Oenc 有 T×dT \\times dT×d 大小。 解码器(Decoder) 接收被标记过的序列,如:“I am eating an apple” 经过标记(Shifted Right)变为 “\\ I am eating an apple” ,再由特征工程提炼出的 [Q, K, V][Q,\\ K,\\ V][Q, K, V] 输入。 标记(Shifted Right) 的作用是为了区分每一个序列的起点,例子里我们采用的是 “\\” ,当然也可以用其他标志。 之后,交由 加遮罩(Mask)的 MHA 提取高级特征,并 ANU 归一化。这里的 遮罩,就是前文中提到的 SDPA 的可选 Mask 操作,即解码器对 (Q⋅KTd)\\left( \\tfrac{Q \\cdot K^T}{\\sqrt{d}} \\right)(√dQ⋅KT) 的零值处理。简单的 Mask 有: Mask=[0 ,1 ,1 ,⋯, 10 ,0 ,1 ,⋯, 10 ,0 ,0 ,⋯, 1⋮,⋮ ,⋮ ,⋯, ⋮0 ,0 ,0 ,⋯, 0]T×d {\\displaystyle \\begin{aligned} &Mask = \\begin{bmatrix} & 0 \\ , & 1 \\ , & 1 \\ , \\cdots,\\ & 1 \\\\ & 0 \\ , & 0 \\ , & 1 \\ , \\cdots,\\ & 1 \\\\ & 0 \\ , & 0 \\ , & 0 \\ , \\cdots,\\ & 1 \\\\ & \\vdots, & \\vdots \\ , & \\vdots \\ , \\cdots,\\ & \\vdots \\\\ & 0 \\ , & 0 \\ , & 0 \\ , \\cdots,\\ & 0 \\\\ \\end{bmatrix}_{T \\times d} \\\\ \\end{aligned} } Mask=⎣⎢⎢⎢⎢⎢⎢⎡0 ,0 ,0 ,⋮,0 ,1 ,0 ,0 ,⋮ ,0 ,1 ,⋯, 1 ,⋯, 0 ,⋯, ⋮ ,⋯, 0 ,⋯, 111⋮0⎦⎥⎥⎥⎥⎥⎥⎤T×d 即 mask(Q⋅KTd)mask \\left( \\tfrac{Q \\cdot K^T}{\\sqrt{d}} \\right)mask(√dQ⋅KT) 只保留右上角数据,完成解码器对输入的第一次注意力操作。 接下来,解码器会接受编码器的同序列输出 OencO_{enc}Oenc ,作为一组键值 [K=Oenc, V=Oenc][K = O_{enc},\\ V = O_{enc}][K=Oenc, V=Oenc] 组合,并用前一级 MHA 的 ANU 归一化结果作为查询 QQQ ,合并为一组 [Q, K=Oenc, V=Oenc][Q,\\ K = O_{enc},\\ V = O_{enc}][Q, K=Oenc, V=Oenc] 作为第二个 MHA 的输入。 第二个 MHA 进行常规的 无 Mask 注意力过程。将第二个 MHA 的输出交由 FFU 加隐藏的核心权重和偏移。在 ANU 归一化后,作为解码器的最终输出。 记解码器的输出为 OdecO_{dec}Odec ,同样有 T×dT \\times dT×d 大小。 或许有心的读者已经注意到,在图例中,编解码器的核心流水线旁边都有一个数量标记 NNN 。这意味着每个编解码都是由 NNN 个这样的流水线构成的。目的是为了将 长序列(Long Sequence),拆分为顺序的单个 单词(Word),即 短序列(Short Sequence),顺序的输入处理。我们将编解码各自的一条完整流水线,称为 编码层(Encoding Layer) 和 解码层(Decoding Layer)。 那么,以解码器输入 “\\ I am eating an apple” 为例。经过分割后,就变成了: 0 - \"\" 1 - \"I\" 2 - \"am\" 3 - \"eating\" 4 - \"an\" 5 - \"apple\" 总共 6 个短句。分别交由 6 个解码层处理。最终的输出也按照解码层的顺序,进行顺序拼接。相当于每一个解码层的 T=1T=1T=1 。而拼接后的结果仍然是 T×dT \\times dT×d 。 这样既保证了模型本身的一定范围实时感知,也解放了模型本身的训练处理机能。在 2017 经典 Transformer 中,建议取 N=6N=6N=6 ,平衡效率。 Transformer 的输出 & 训练迭代 其实,经过之上的一系列工作,最终编码器的输出 OdecO_{dec}Odec ,还需要经过一次 线性归一化(Linear Normalization),再通过 SoftMax 输出概率预测结果 PPP 。预测 PPP 的大小为 T×1T \\times 1T×1 是一组概率数组。 这个输出,才是最终参与模型迭代,用于损失函数的结果。 那么,Transformer 采用的损失函数是什么呢? 即然最终操作的对象是概率值,那么不难想到本质仍然属于分类(Classification)。 因此,Transformer 通常采用 交叉熵损失(Cross Entropy Loss)。即我们在损失函数一节中,提到过的: Loss=1N∑i=1N[∑j=1k−yj⋅log(predictionj)]i {\\displaystyle \\begin{aligned} Loss = \\frac{1}{N} \\sum_{i=1}^N [\\sum_{j=1}^k -y_j \\cdot log(prediction_j)]_i \\\\ \\end{aligned} } Loss=N1i=1∑N[j=1∑k−yj⋅log(predictionj)]i 同理,也可以考虑改用其他的分类项损失。 随后的过程就是深度学习网络(DNN)的通用过程了,用优化算法加速权重迭代。并持续训练,直到模型达成收敛指标。 而部署后,预测结果 PPP 所关联的词汇,就是最终输出。 Transformer 的常见场景 自注意力网络(Transformer)在诞生之后,大部分都被运用在 NLP 由其是 LLM 领域。 目前上,工业界对 Transformer 的运用已经涵盖了: 自然语言处理(NLP),如:文本分析(智能输入法)、机器翻译、语音助手等; 音视频生成,如:音乐生成、视频生成、合成编辑、自动裁剪等; 而配合其他网络结构,如 CNN 的原样本特征提取能力,Transformer 在图形处理上也被大量运用,涵盖了: 图像分类,如:手势识别、动作识别、人脸识别等; 图像分割,如:背景分离、智能抠图、轮廓融合等; 语义分割,如:物体分类、车辆检测等; 语音识别,如:文本转译、同声传录、情感分析等; 时序预测,如:股票分析、气象预测、医疗诊断等; 可以说,Transformer 几乎体现在各种方面。 至此,随着经典模型结构 自注意力网络(Transformer)介绍完毕,基本理论知识也完成了初步的梳理。 从下一章开始,我们将正式步入音视频处理的实践工程领域。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_4/Language/cn/References_4.html":{"url":"Chapter_4/Language/cn/References_4.html","title":"【参考文献】","keywords":"","body":"四、【参考文献】 [1] Hinton, G. E.; Salakhutdinov, R. R. (2006). \"Reducing the Dimensionality of Data with Neural Networks.Science\". 313 (5786): 504–507. [2] Larochelle, H.; Bengio, Y. (2008). \"Classification using discriminative restricted Boltzmann machines\". Proceedings of the 25th international conference on Machine learning - ICML '08. p. 536. [3] Coates, A.; Lee, H.; Ng, A. Y. (2011). \"An analysis of single-layer networks in unsupervised feature learning\". International Conference on Artificial Intelligence and Statistics (AISTATS). [4] Yuxi Li. (2018). \"DEEP REINFORCEMENT LEARNING”. arXiv. [5] Krizhevsky, Alex , I. Sutskever , and G. Hinton. (2012). \"ImageNet Classification with Deep Convolutional Neural Networks.\" NIPS Curran Associates Inc. [6] Y. LeCun, “LeNet-5, convolutional neural networks”. History summary page. [7] Eugenio Culurciello. (2016). \"Navigating the unsupervised learning landscape\". [8] Klambauer G, Unterthiner T, Mayr A, et al. Self-normalizing neural networks[J]. Advances in neural information processing systems, 2017, 30. [9] Misra D. Mish: A self regularized non-monotonic activation function[J]. arXiv preprint arXiv:1908.08681, 2019. [10] Ramachandran P, Zoph B, Le Q V. Searching for activation functions[J]. arXiv preprint arXiv:1710.05941, 2017. [11] Howard A, Sandler M, Chu G, et al. Searching for mobilenetv3[C]//Proceedings of the IEEE/CVF international conference on computer vision. 2019: 1314-1324. [12] Srivastava N, Hinton G, Krizhevsky A, et al. Dropout: a simple way to prevent neural networks from overfitting[J]. The journal of machine learning research, 2014, 15(1): 1929-1958. [13] Goodfellow I, Warde-Farley D, Mirza M, et al. Maxout networks[C]//International conference on machine learning. PMLR, 2013: 1319-1327. [14] Hadsell R, Chopra S, LeCun Y. Dimensionality reduction by learning an invariant mapping[C]//2006 IEEE computer society conference on computer vision and pattern recognition (CVPR'06). IEEE, 2006, 2: 1735-1742. [15] Schroff F, Kalenichenko D, Philbin J. Facenet: A unified embedding for face recognition and clustering[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2015: 815-823. [16] Sohn K. Improved deep metric learning with multi-class n-pair loss objective[J]. Advances in neural information processing systems, 2016, 29. [17] Hinton G, Srivastava N, Swersky K. Neural networks for machine learning lecture 6a overview of mini-batch gradient descent[J]. Cited on, 2012, 14(8): 2. [18] Teo Y S, Shin S, Jeong H, et al. Benchmarking quantum tomography completeness and fidelity with machine learning[J]. New Journal of Physics, 2021, 23(10): 103021. [19] Gao M, Wang Q, Lin Z, et al. Tuning Pre-trained Model via Moment Probing[C]//Proceedings of the IEEE/CVF International Conference on Computer Vision. 2023: 11803-11813. [20] Hochreiter, Sepp & Schmidhuber, Jürgen. (1997). Long Short-term Memory. Neural computation. 9. 1735-80. 10.1162/neco.1997.9.8.1735. [21] Lee, Daeil & Koo, Seoryong & Jang, Inseok & Kim, Jonghyun. (2022). Comparison of Deep Reinforcement Learning and PID Controllers for Automatic Cold Shutdown Operation. Energies. 15. 2834. 10.3390/en15082834. [22] Bahdanau D, Cho K, Bengio Y. Neural machine translation by jointly learning to align and translate[J]. arXiv preprint arXiv:1409.0473, 2014. [23] Vaswani A, Shazeer N, Parmar N, et al. Attention is all you need[J]. Advances in neural information processing systems, 2017, 30. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:10 "},"Chapter_5/Language/cn/Apex_5_Introduce.html":{"url":"Chapter_5/Language/cn/Apex_5_Introduce.html","title":"五、音视频帧分析与数据处理","keywords":"","body":"五、音视频帧分析与实践 引言 历经四个章节,我们详细探讨了音频与色彩的相关知识,以及常用算法和机器学习在音视频中的工程方向和理论原型。通过整理并学习这些内容,我们已经对音视频处理的基本概念和技术工具有了初步的了解。而音视频处理的核心任务之一,便是对音视频帧的分析与处理。 音视频帧工程(Audio & Visual/Video Frame Engineering)是音视频工程中的关键环节。音频帧和视频帧分别代表了音频信号和视频信号在时间轴上的离散片段。对这些帧的分析与处理,不仅是实现音视频同步、特效添加、压缩编码等高级功能的基础,也是提升音视频质量和用户体验的关键。 本章节将主要整理说明音视频帧的基本概念、分析方法和简单处理技术。通过对音视频帧的深入理解和操作,我们可以更好地掌握音视频处理的核心技术,为后续的复杂应用与试验打下坚实的基础。 通过本章节的学习,读者将能够掌握音视频帧的基本分析方法和简单处理技术,为进一步深入研究和开发音视频应用提供必要的知识储备。真正进入音视频工程领域的大门。 关键字:音频帧、视频帧、帧分析、简单帧处理、工程实践 目录 5.1 音视频帧与环境准备 5.1.1 常用数学库(Numpy、Pandas、Mateplotlib) 5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio) 5.1.3 视频分析库(PyOpenCV、Color-Science) 5.1.4 其他分析软件 【参考文献】 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-14 11:29:06 "},"Chapter_5/Language/cn/Docs_5_1.html":{"url":"Chapter_5/Language/cn/Docs_5_1.html","title":"5.1 音视频帧与环境准备","keywords":"","body":"5.1 音视频帧 与 环境准备 什么是音视频帧呢?首先需要了解,什么是帧。 帧(Frame) 是 对某一时刻(视频) 或 某小段时间(音频)内 的数据代称。而 音视频帧,则是对音频帧 和 视频帧 的统称。通常而言,我们将 一段音频中一个有效数据块(如:WAV 的音频数据子块、FLAC 的音频数据块、MP3 的帧数据)称为 音频帧(Audio Frame),而将 一张图片所代表的某时刻静态图像信息(如:时刻 t 完整图像解压缩后的 YUV 数据)称为 视频帧(Visual/Video Frame)。 当然,音视频的离散采样,决定了其本身皆为 离散数据的时序排列数据集。但从相对角度来看,如果称视频帧为离散的,那么音频帧在这样的尺度下,就是连续的。因此,对音频的分析更多是从 音频整体角度,或 范围内的局部情况 分析。很少单一的局限于某个时间点。而对视频的分析则分为,是对 包含视频整体时空情况 的 动态分析,还是只对 某一固定时刻 的 静态分析。对比之下稍显隔离。不过从某种意义上讲,这也是因为视频数据本身所包含的维度更高,而更容易的被拆解以获取更多信息所致。 所以,在不考虑网络的情况下,我们通常将 视频分析(Video Analysis) 的两种类型,独立称为 视频流分析(Video Stream Analysis) 和 视频帧分析(Video Frame Analysis)。而 音频分析(Audio Analysis) 则不再细分。即 音频流分析(Audio Stream Analysis)同 音频分析(Audio Analysis)技术性一致。 需要注意的是,当引入网络条件时,音视频流分析在网络流传输的语义前提下另有所指。同时,在 流协议背景时,也同样是指协议层面的特征,切勿将三者混淆。 本章节我们讨论的音视频分析,特指对音视频的直观特征分析,即对其基础信息的分析。以此为目标,进行一些简单工程。 常用库准备 在开始搭建分析环境之前,还需要对常用的工具库进行简单的介绍。由于分析所采用的工程手段,多为以 Python 为脚本语言编写的简单处理流,因此,我们需要使用到的基本库,皆为 Python 工具库。 于是为方便后续索引、使用、总结,从库功能性上做简单归类,可以分为:常用数学库、视频分析库 和 音频分析库。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:11 "},"Chapter_5/Language/cn/Docs_5_1_1.html":{"url":"Chapter_5/Language/cn/Docs_5_1_1.html","title":"5.1.1 常用数学库(Numpy、Pandas、Mateplotlib)","keywords":"","body":"5.1.1 常用数学库(NumPy、Pandas、Mateplotlib) 工程里对 数据分析和科学计算 的过程中,常用数学库是不可或缺的工具。这些库不仅提供了高效的数据处理能力,还为我们提供了 丰富的数学函数 和 可视化工具。其中,最为重要的库有三个,即 NumPy、Pandas、Mateplotlib,分别对应 [ 基础计算、数理统计、图表绘制 ] 的需求。 NumPy(Numerical Python) NumPy(Numerical Python) 是 用于科学计算的基础库,提供了针对 N 维数组/张量 及其 衍生类型 生命周期方法的结构化封装,和用于 协助处理这些数组/张量的丰富函数库 [1] 。这使得我们可以通过其进行快速的矩阵运算和其他数学操作,而不必再单独定义实现某些通用的算法。例如前文提到的傅立叶变换,其变体,或其逆变换(FFT、DFT、IDFT etc.)。除此之外,NumPy 还包含了线性代数、统计函数、随机数生成等功能模块,是数据分析和机器学习的基础工具之一。 主要功能: 提供基础数据结构(ndarray)和数据类型(dtype),作为 N 维数组/张量 数据载体 完善的基础数学函数库,包括 基础统计、线性代数、傅立叶变换、随机数生成 广泛的扩展数学函数库,包括 金融函数、伽马函数等 于特殊函数库中(numpy.special) 相对完善的内存管理和索引体系,并支持内存映射能力,即可处理超出内存大小数据集 提供完整的数据交互体系,在数据结构化、字符串操作、I/O 操作上与其他库有 较高兼容 基础库(np.)的常用函数(简,仅列出名称): 算术运算: add, subtract, multiply, divide, power, mod, remainder 比较运算: greater, greater_equal, less, less_equal, equal, not_equal 逻辑运算: logical_and, logical_or, logical_not, logical_xor 基本统计: mean, median, std, var, min, max, sum, cumsum, prod, cumprod 排序搜索: sort, argsort, argmax, argmin, searchsorted 三角函数: sin, cos, tan, arcsin, arccos, arctan, arctan2 双曲函数: sinh, cosh, tanh, arcsinh, arccosh, arctanh 指数对数: exp, expm1, log, log10, log2, log1p 矩阵运算: dot, vdot, inner, outer, matmul 直方图: histogram, histogram2d, histogramdd 多项式(需依托 np.poly1d 多项式类): poly, polyval, polyfit, roots, polyder, polyint 线性代数扩展(np.linalg.)的常用函数(简,仅列出名称): 矩阵分解: cholesky, qr, svd 求逆和解线性方程组: inv, pinv, solve 特征值和特征向量: eig, eigh, eigvals, eigvalsh 矩阵范数(L1/L2/inf): norm 矩阵行列式和秩: det, matrix_rank 傅立叶变换扩展(np.fft.)的常用函数(简,仅列出名称): 一维傅里叶变换: fft, ifft 二维傅里叶变换: fft2, ifft2 多维傅里叶变换: fftn, ifftn 一维快速傅立叶法: rfft, irfft 一维亥姆霍兹变换: hfft, ihfft 随机数生成扩展(np.random.)的常用函数(简,仅列出名称): 简单随机: rand, randn, randint, choice 概率分布: normal, uniform, binomial, poisson, exponential, beta, gamma, chisquare 乱序函数: shuffle, permutation 随机种子: seed 其他如 特殊函数扩展(np.special.) 等,在具体使用时,可自行前往 官网档案馆 查阅。 Pandas(Python Data Analysis Library) Pandas(Python Data Analysis Library) 是 用于数据操作和分析的强大工具库,提供了针对 数理统计服务 的 高效格式类型和相关统计分析工具,在处理 结构化数据 方面具有巨大优势 [2] 。尤其是对于 表格类数据 的处理。我们可以通过其 DataFrame 和 Series 这两个核心类型,轻松的获取 经数组化后能提供给 NumPy 处理的数据集。进而允许我们更方便地进行数据的清洗、修改和分析操作。此外,对于科学统计类的时间序列数据,Pandas 亦能完美解析到需要使用的格式。是辅助我们进行统计工作和数据预处理的利器。 主要功能: 高效的 数据结构(即,DataFrame 、Series 和 两者关联方法) 丰富的 时序结构(即,DatetimeIndex, Timedelta, Period 时刻/时间/时差) 丰富的 数据清洗、数据转换、数据标准化 能力 支持 多种格式 I/O 操作,如 CSV、Excel、SQL、JSON 等 通用格式类型 提供诸如时间序列数据的索引、切片、重采样、滚动窗口等,时间序列数据处理能力 提供对 缺失值、异常值、重复数据 等问题数据的,检测、填充、转换、过滤能力 基础库(pd.)的常用函数(简,仅列出名称): 数据结构: , , 时序结构: , , 数据创建: read_csv, read_excel, read_sql, read_json, read_html, read_clipboard, read_parquet, read_feather, read_orc, read_sas, read_spss, read_stata, read_hdf, read_pickle 数据导出: to_csv, to_excel, to_sql, to_json, to_html, to_clipboard, to_parquet, to_feather, to_orc, to_sas, to_spss, to_stata, to_hdf, to_pickle 数据变换: assign, drop, rename, pivot, pivot_table, melt, stack, unstack, get_dummies 数据聚合: groupby, agg, aggregate, transform, apply, rolling, expanding, resample 数据清洗: isnull, notnull, dropna, fillna, replace, interpolate, duplicated, drop_duplicates 数据合并: merge, concat, join, append 选择过滤: loc, iloc, at, iat, ix 基本统计: mean, median, std, var, min, max, sum, cumsum, prod, cumprod, describe 数据结构扩展(pd.Series, pd.DataFrame)的辅助方法(简,仅列出名称): 方法: append, drop, drop_duplicates, dropna, fillna, replace, interpolate, isnull, notnull, unique, value_counts, apply, map, astype, copy, shift, diff, pct_change, rank, sort_values, sort_index 方法: append, drop, drop_duplicates, dropna, fillna, replace, interpolate, isnull, notnull, pivot, pivot_table, melt, stack, unstack, get_dummies, merge, concat, join, groupby, agg, aggregate, transform, apply, rolling, expanding, resample, sort_values, sort_index, rank, describe, corr, cov, hist, boxplot, plot 时间序列扩展(pd.DatetimeIndex, pd.Timedelta, pd.Period)的辅助方法(简): 方法: to_pydatetime, to_period, to_series, to_frame, normalize, strftime, snap, shift, tz_convert, tz_localize, floor, ceil, round 方法: total_seconds, to_pytimedelta, components, is_leap_year 方法: asfreq, start_time, end_time, to_timestamp, strftime 这些方法和结构类型,涵盖了数据创建、选择、过滤、变换、聚合、清洗、合并、时间序列处理以及数据输入输出等多个方面,进而使得 Pandas 成为了数据科学和数据分析领域的基础工具,亦被广泛应用于数据清洗、数据变换、数据分析、数据可视化等任务。 不过,在 可视化方面,我们一般不会使用 Pandas 自身的绘制模块所提供的绘图功能,而是采用更为专业的 Matplotlib 库协助获取结果。实际上 Pandas 自身的绘制模块(pd.plotting.)在过程方面,也是采用的 Matplotlib 做为绘制执行器。调用绘图模块,仅仅是调用了封装好的绘制流而已,而这并不是 Pandas 所擅长的部分。 其他如 日期类型扩展(pd.DateOffset) 等,在具体使用时,可自行前往 官网档案馆 查阅。 Matplotlib Matplotlib(Mathematics Python Plotting Library)是基于 Python 语言开发,专用于数据图形化的高级图表绘制库。在数据科学、工程、金融、统计等领域有着广泛的应用 [3] 。通过库所包含的各种核心及辅助模块,我们能够轻松的 将经由 NumPy 和 Pandas 处理后的数据,以静态、动态 或 交互式图的方式展示出来。它提供了 丰富的绘图功能,可以被用于生成各种类型的图表,如折线图、柱状图、散点图、直方图等。而灵活的 API 设计,则允许我们在自定义图表的各个方面,进行相对自由的定制。因此,其成为了工程中 首选的数据可视化工具,帮助我们更为 直观地展示数据分析 的结果。 主要功能: 支持包括 折线图、柱状图、热力图、3D 复合等,丰富的绘图类型 高可定制化 的展示细节,包括 图例、命名、注释、线条、样式等几乎所有图表元素 高可交互性 的图表操作,且与 大部分不同平台的 GUI 库(如 Qt、wxWidgets)兼容 多种输出格式支持,如 PNG、PDF、SVG 等 与主流科学计算库(如 NumPy、Pandas、SciPy 等)的 无缝集成 基础库(matplotlib.pyplot. as plt.)的常用函数(简,仅列出名称): 图形容器: , , 样式类型: 略(如 等,有关样式有较多扩展库,详见官方文档) 创建图形和子图: figure, subplot, subplots, add_subplot, subplots_adjust 图形导入: imread, imshow 绘图函数: plot, scatter, bar, barh, hist, pie, boxplot, errorbar, fill, fill_between, stackplot, stem, step 图形属性: title, xlabel, ylabel, xlim, ylim, xticks, yticks, grid, legend, text, annotate 图形样式: style.use, set_cmap, get_cmap, colormaps 线条样式: set_linestyle, set_linewidth, set_color, set_marker, set_markersize 文本样式: set_fontsize, set_fontweight, set_fontstyle, set_fontname 布局样式: tight_layout, subplots_adjust, get_current_fig_manager 交互工具: ginput, waitforbuttonpress, connect, disconnect 事件处理: mpl_connect, mpl_disconnect 图形保存: savefig 颜色映射(matplotlib.cm. as cm.)的常用函数(简,仅列出名称): 映射对象(颜色映射结构): 映射注册与获取: get_cmap, register_cmap 常用映射: viridis, plasma, inferno, magma 图形容器(plt.Figure, plt.Axes)的常用函数(简,仅列出名称): 方法: add_subplot, add_axes, subplots, subplots_adjust, savefig, clf, gca, tight_layout, subplots_adjust, get_current_fig_manager 方法: plot, scatter, bar, barh, hist, pie, boxplot, errorbar, fill, fill_between, stackplot, stem, step, set_title, set_xlabel, set_ylabel, set_xlim, set_ylim, set_xticks, set_yticks, grid, legend, text, annotate, cla, twinx, twiny, set_aspect, set_facecolor 3D 绘图(mpl_toolkits.mplot3d.)的常用函数(简,仅列出名称): 3D 图形容器: 3D 图形属性: set_xlabel, set_ylabel, set_zlabel, set_xlim, set_ylim, set_zlim, view_init 常用通用方法: text, annotate, grid, legend, set_aspect, set_facecolor 其他如 描绘效果扩展(matplotlib.patheffects) 等,在具体使用时,可自行前往 官网档案馆 查阅。 三个关键基础库介绍完毕,那么现在,让我们用它们做些简单的数据练习。 简单练习:用 常用数学库 完成 加州房地产信息统计 为了更贴近数据处理中所面临的真实情况,我们这里使用 Google 开源的 加利福尼亚州模拟房地产统计信息,作为数据源。 练习事例按照标准工程工作流进行。 第一步,确立已知信息: 数据来源:房地产统计 CSV 格式(.csv)表 [本地文件] 处理环境:依赖 ,Python 脚本执行 工程目标: 1) 根据数据获取 归一化后的房价,并以经纬度为横纵坐标,颜色表示处理结果 2) 根据数据获取 人均占有房间数,并以经纬度为横纵坐标,颜色表示处理结果 第二步,准备执行环境: 检测是否已经安装了 Python 和 pip(对应 Python 版本 2.x) 或 pip3(对应 Python 版本 3.x) 包管理器: python --version pip --version 若 Python 和 pip 不存在,则需要去 Python 官网(https://www.python.org/downloads/) 下载对应当前主机平台的安装文件。而 pip 的安装(如果未随安装包安装的话),需要先准备安装脚本。 # Windows curl -o %TEMP%\\get-pip.py https://bootstrap.pypa.io/get-pip.py # MacOS & Linux curl -o /tmp/get-pip.py https://bootstrap.pypa.io/get-pip.py 之后,执行如下命令安装: # Windows python -m ensurepip --upgrade python %TEMP%\\get-pip.py # MacOS & Linux python -m ensurepip --upgrade python /tmp/get-pip.py 但这样的分平台执行方式,不够简单。所以,我们考虑将 整个 pip 安装过程封装成一个面向全平台的 Python 脚本,如果需要安装时,直接运行该脚本即可。而脚本需要做的事,是检测 pip 未安装的情况下,执行对应当前 Python 版本的 pip 安装过程。有: import os import subprocess import sys import tempfile import urllib.request def is_pip_installed(): try: subprocess.run([sys.executable, \"-m\", \"pip\", \"--version\"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return True except subprocess.CalledProcessError: return False def download_get_pip(temp_dir): url = \"https://bootstrap.pypa.io/get-pip.py\" file_path = os.path.join(temp_dir, \"get-pip.py\") print(f\"Downloading {url} to {file_path}...\") urllib.request.urlretrieve(url, file_path) return file_path def run_get_pip(file_path): print(f\"Running {file_path}...\") subprocess.run([sys.executable, file_path], check=True) def main(): if is_pip_installed(): print(\"pip is already installed.\") else: # Create a temporary directory with tempfile.TemporaryDirectory() as temp_dir: # Download get-pip.py file_path = download_get_pip(temp_dir) # Run get-pip.py run_get_pip(file_path) if __name__ == \"__main__\": main() 将上方的脚本保存为 install_pip.py 文件。我们只需要 将该脚本拷贝到相应平台,并执行脚本 即可: python install_pip.py 同理,对于案例中需要使用到的 NumPy、Pandas、Matplotlib 三库。我们也采用自动化脚本进行检测和安装。创建脚本 install_math_libs.py 如下: import subprocess import sys def is_package_installed(package_name): try: subprocess.run([sys.executable, \"-m\", \"pip\", \"show\", package_name], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return True except subprocess.CalledProcessError: return False def install_package(package_name): print(f\"Installing {package_name}...\") subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", package_name], check=True) subprocess.run([sys.executable, \"-m\", \"pip\", \"show\", package_name], check=True) def main(): packages = [\"numpy\", \"pandas\", \"matplotlib\"] for package in packages: if is_package_installed(package): print(f\"{package} is already installed.\") else: install_package(package) print(f\"{package} has been installed.\") if __name__ == \"__main__\": main() 随后,使用 Python 执行脚本: python install_math_libs.py 如果包已安装,则会输出 \"[基础数学库] is already installed.\"。如果包未安装,则会安装该包并输出 \"[基础数学库] has been installed.\",并显示包的详细信息。 到此,完成基础库的环境准备工作。 第三步,数据预处理: 现在,我们正式进入事例的工作流。 随后的步骤,我们建立 practice_1_mathetics_libs_using.py 脚本后,在其中处理。 首先,在新建脚本的头部添加: import math import numpy as np import pandas as pd import matplotlib.pyplot as plt import matplotlib.cm as cm import matplotlib.gridspec as gridspec from mpl_toolkits.mplot3d import Axes3D 导入工程使用的核心库。 根据 ,我们需要的目标可视化数据,来自于对 CSV 表中数据做简单处理所得。因此,首先应将表中有效数据提取出来,有: california_housing_dataframe = pd.read_csv( \"https://download.mlcc.google.cn/mledu-datasets/california_housing_train.csv\", sep=\",\") california_housing_dataframe = california_housing_dataframe.reindex( np.random.permutation(california_housing_dataframe.index) ) 其中,california_housing_dataframe.reindex 的目的是打乱 样本数据 的行顺序。用以确保数据在后续处理和分析过程中是 随机的,有助于避免因数据顺序带来的偏差。 我们的两个目标关键数据,分别为 “归一化后的房价” 和 “人均占有房间数”,而这两个量并不在原表中。需根据 california_housing_dataframe 已有数据,通过 计算获取 这两个值。而为了区别用处起见(例如,后续我们需要用 “人均占有房间数” 作为回归特征,来建立其与 “归一化后的房价” 的线性回归模型),我们定义两个方法,分别用于 生成补充 “人均占有房间数” 的新特征表,和 只有遴选特征计算得到 “归一化后的房价” 的靶向特征: def preprocess_features(data): \"\"\" Preprocess the input features from the data. Args: data (pd.DataFrame): The input data containing various features. Returns: pd.DataFrame: A DataFrame containing the selected and processed features. \"\"\" selected_features = data[ [\"latitude\", \"longitude\", \"housing_median_age\", \"total_rooms\", \"total_bedrooms\", \"population\", \"households\", \"median_income\"] ] processed_features = selected_features.copy() processed_features[\"rooms_per_person\"] = ( data[\"total_rooms\"] / data[\"population\"] ) return processed_features def preprocess_targets(data, need_normalize): \"\"\" Preprocess the target values from the data. Args: data (pd.DataFrame): The input data containing the target values. need_normalize: Whether to normalize the output median_house_value Returns: pd.DataFrame: A DataFrame containing the processed target values. \"\"\" output_targets = pd.DataFrame() output_targets['median_house_value_is_high'] = ( (data['median_house_value'] > 265000).astype(float) ) output_targets[\"median_house_value\"] = ( data[\"median_house_value\"] / 1000.0 ) if need_normalize: output_targets[\"median_house_value\"] /= output_targets[\"median_house_value\"].max() return output_targets 通过 preprocess_features 方法,建立包含 rooms_per_person 信息的新 pd.DataFrame 用于 和 补充替换 原 california_housing_dataframe 数据的作用,而作为基础信息使用。通过 preprocess_targets 方法,建立只有 median_house_value 信息的新 pd.DataFrame 用于处理 。 调用两个方法,并取 CSV 表的头部 17000 个数据作为有效数据,有: total_examples = preprocess_features(california_housing_dataframe.head(17000)) total_targets = preprocess_targets(california_housing_dataframe.head(17000), True) print(\"total::\\n\") print(total_examples.describe()) print(total_targets.describe()) 其中,total_examples 即新特征表,total_targets 即靶向特征。获得预处理完毕的数据,可以开始进行绘制了。 第四步,结果可视化: 当下我们已经取得了需要的数据内容,只用通过 Matplotlib 将数据展示即可。由于 中存在 两种图样类型。为了方便起见,我们依然采用封装的形式,将对应类型图表的绘制流程函数化使用。有: def ploting_2d_histogram(examples, targets): \"\"\" Plot a 2D histogram of the examples and targets. Args: examples (pd.DataFrame): The input features to plot. targets (pd.DataFrame): The target values to plot. Returns: None \"\"\" # Create a new figure with a specified size plt.figure(figsize=(13.00, 9.00)) # Add a 2D subplot to the figure plt.subplot(1, 1, 1) # Set the title and labels for the 2D plot plt.title(\"California Housing Validation Data\") plt.xlabel(\"Longitude\") plt.ylabel(\"Latitude\") plt.autoscale(False) plt.ylim([32, 43]) plt.xlim([-126, -112]) # Create a 2D scatter plot plt.scatter( examples[\"longitude\"], examples[\"latitude\"], cmap=\"coolwarm\", c=targets ) # Display the plot plt.show() def ploting_3d_histogram(examples, targets, z_label): \"\"\" Plot a 3D histogram of the examples and targets. Args: examples (pd.DataFrame): The input features to plot. targets (pd.DataFrame): The target values to plot. z_label (string): The Z-Label descriptions Returns: None \"\"\" # Create a new figure with a specified size fig = plt.figure(figsize=(13.00, 9.00)) # Add a 3D subplot to the figure ax = fig.add_subplot(111, projection='3d') # Set the title and labels for the 3D plot ax.set_title(\"California Housing 3D Data\") ax.set_xlabel(\"Longitude\") ax.set_ylabel(\"Latitude\") ax.set_zlabel(z_label) # Create a 3D scatter plot scatter = ax.scatter( examples[\"longitude\"], examples[\"latitude\"], targets, c=targets, cmap=\"coolwarm\" ) # Add a color bar which maps values to colors cbar = fig.colorbar(scatter, ax=ax, shrink=0.5, aspect=5) cbar.set_label('Color State') # : Set initial view angle ax.view_init(elev=30, azim=30) # Display the plot plt.show() 而在完成函数化后,绘制的过程就很简单了,直接调用方法即可: ploting_2d_histogram(total_examples, total_targets[\"median_house_value\"]) ploting_3d_histogram(total_examples, total_targets[\"median_house_value\"], \"Median House Value (in $1000's)\") ploting_3d_histogram(total_examples, total_examples[\"rooms_per_person\"], \"Rooms/Person\") 最终,通过 Python 执行 practice_1_mathetics_libs_using.py 脚本,就能得到想要的结果了。执行成功会获得 3 张图表: 图 5-1 模拟加利福利亚房价中位值 2D 热力图 图 5-2 模拟加利福利亚区域房价中位值 3D 热力图 图 5-3 模拟加利福利亚人均占有房间数 3D 热力图 至此,对基础库的练习完毕。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:11 "},"Chapter_5/Language/cn/Docs_5_1_2.html":{"url":"Chapter_5/Language/cn/Docs_5_1_2.html","title":"5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio)","keywords":"","body":"5.1.2 音频分析库(SoundFile、PyAudio、Librosa、Aubio) 在完成对基础库的熟悉后,我们接下来需要做的就是对工程中,音视频分析的相关核心功能库的学习。以音频分析库为切入点。 如果期望对 一段音频(或音频流)进行解读,根据我们已有的认知,将当前的音频数据从封装的音频格式,还原为采样模拟信号对应的 PCM 数字信号载体 只是第一步。该操作是后续所有工作的起点。 而音频格式在前文已有介绍,分为 三大类别,即 无压缩编码格式、无损压缩编码格式、有损压缩编码格式。虽然能通过一些针对 某单个类型 或 类型族 的 音频编解码库 来做解码工作,但我们在分析过程中,更希望能够通过 简单而统一 的方式,排除掉格式本身细部的工程干扰。使我们能够更关注于对 音频所含有信息本身 的分析。 既然如此,为何不直接使用大名鼎鼎的 FFMpeg 来完成从 编解码到分析,甚至是 重排、编辑 等操作呢? 其中的关键就在于,FFMpeg 虽然功能强大,但在以 实时处理、数据集成、特征提取 等为主要应用场景的音频分析情况下,FFMpeg 并不具备足够的优势。更不用提 Python 的使用环境 和 对断点调试临时插值,与 基础库的高度兼容 方面的要求了(尤其对 模型训练时,提取的数据能够 直接被训练过程使用 的这一点)。 所以,音频分析场景,除非只需要当前音视频数据的 元信息(Metadata),即 头部信息(Header),一般会采用以下这些库来进行。至于 FFMpeg ,在实际使用中会把其核心能力局限于 编解码 和 转码 的范围里,虽然 其核心库 和 辅助插件 是包含了包括滤镜在内的多种功能的,但通常我们只会以 最简形式接入。这一部分,伴随着网络推拉流协议和更贴近于规格的编解码协议库(如 x264 等),将在本系列书籍的进阶篇中细讲。此处暂不做更进一步的讨论。 现在,让焦点回到音频分析库上。常用的音频分析库主要有四个,为 SoundFile、PyAudio、Librosa、Aubio,分别对应 [ 音频文件读写、音频流数据的输入输出、工程乐理分析、实时音频处理 ] 的需求。 SoundFile(Python Sound File) SoundFile(PySoundFile [Python Sound File]) 是一个 用于读写音频文件的 Python 库,主要被用于解码(或者编码)常用的 音频格式文件 [4] 。例如前文介绍过的 WAV、AIFF、FLAC 等大多数常见音频格式,SoundFile 都已完整支持。并且,通过 SoundFile 取出的音频数据,可以和其他音频分析库(如 Librosa、Aubio 等)和科学计算库(如 NumPy、SciPy 等)配合使用。 实际上,SoundFile 核心能力来自于 C开源库 Libsndfile,正是 Libsndfile 为它 提供了多种音频文件格式的支撑。而 PySoundFile 则可以看做是 Libsndfile 这个 C语言库的 Python 套接访问入口。因此,如果我们在常规工程中存在对音频文件的读写需求,不妨考虑采用 Libsndfile 来处理,它的官网位于 http://www.mega-nerd.com/libsndfile/ ,含有该库的相关技术参数。 主要功能: 支持 WAV、AIFF、FLAC、OGG 等多种常见 音频文件格式,适用于 广泛的 音频读写需求 支持长音频处理,提供快速读写大文件的功能,并可用于临时性的(分块)流式处理 提供 高可定制化的 API,允许用户自定义音频处理流程和数据操作,适合快速分析 允许以不同的数据格式(如浮点型、整型)读取和写入音频数据,及 基本元数据访问 与主流科学计算库(如 NumPy、Pandas、SciPy 等)的 无缝集成 单一的文件操作专精库,不存在多个子模块,仅有有限但明确的 API 入口 基础库(sf.)的常用函数(简,仅列出名称): 数据结构: 关联文件: open 音频读写: read, write 基本信息: info 核心类(sf.SoundFile 即 )的常用函数(简,仅列出名称): 基础参数: samplerate, channels, format, subtype, endian, frames 帧位索引: seek, tell 数据访问: read, write, read_frames, write_frames 分块读写: buffer_read, buffer_write 由上可知,SoundFile 本身的调用极其简便,但已满足完整的音频文件读写需求。开源项目位于 Github:bastibe/python-soundfile。使用细节,可自行前往 官方档案馆查阅。 PyAudio(Python Audio) PyAudio(Python Audio) 是音频分析中 常用的音频输入输出操作库,即 音频 I/O 库 [5] 。换句话说,它提供了一组工具和函数,使得开发者可以在项目的 Python 程序中,利用 PyAudio 已有的函数接口,快速进行音频的流式(这里指本地流)录制和输出。同 SoundFile 一样,PyAudio 依赖于底层 C语言库 PortAudio 的帮助,而其内核 PortAudio 库实则为一个 专精于多种操作系统上运行(即跨 Windows、MacOS、Linux 平台)的底层音频输入输出(I/O)库。 所以,与 SoundFile 注重于对音频文件(即本地音频流结果)的操作不同,PyAudio 或者说 PortAudio 的操作重点,在于 处理对 “实时” 音频流的捕获和析出。实时音频流,是能够被连续处理传输的音频数据,例如采样自麦克风输入模数转换后的持续不断的数字信号,或者取自播放音频的连续到来分块数据,即 过程中音频数据。 由此,音频分析中常用 PyAudio 来完成对被分析音频的 “启停转播”(Play/Stop/Seek/Pause),所谓 音频本地流控(LASC [Local Audio Stream Control])。 主要功能: 专业音频本地流控 Python 库,支持实时音频流的捕获和播放,适合 实时音频处理任务 稳定的 跨平台兼容性,完整覆盖主流操作系统,包括 Windows、macOS 和 Linux 灵活的 音频流配置,提供多种配置选项,如采样率、通道数、样本格式、缓冲区大小等 提供 接入式回调,支持使用回调函数处理音频数据,适合低延迟的实时音频分析 与主流科学计算库 和 其他音频库(如 SoundFile)的 无缝集成 单一的音频本地流读写专精库,不存在多个子模块,仅有有限但明确的 API 入口 基础库(pyaudio.)由于特殊的套接设计,仅用于创建 即 PortAudio 实例: 数据结构: 、 创建实例: PyAudio 核心类(pyaudio.PyAudio 即 设备实例)的常用函数(简,仅列出名称): 销毁实例: terminate 联音频流: open (返回 实例,通过 stream_callback 参数配置回调) 设备查询: get_device_count, get_device_info_by_index, get_host_api_count, get_default_input_device_info, get_default_output_device_info, get_host_api_info_by_index, get_device_info_by_host_api_device_index 参数查验: get_sample_size, is_format_supported 核心类的(pyaudio.Stream 即 音频流实例)的常用函数(简,仅列出名称): 音频流启停: start_stream, stop_stream 音频流关闭: close(注意, 的 open 状态来自于设备实例,亦是其初始状态) 流状态检测: is_active, is_stopped 流数据读写: read, write 余下使用细节,可自行前往 项目官网 ,或 官方档案馆查阅。 上述关键函数已包含 PyAudio 的 几乎全部调用,但并没有列出 PyAudio 回调格式。这是因为,这一部分正是 PyAudio 分析适用性的关键。在具体使用中,PyAudio 回调 的设定方式,和回调各参数意义与取值,是我们留意的重点。 参考 PyAudio 0.2.14 当前最新版,回调的设置方式和格式都是固定的,有: def callback(in_data, frame_count, time_info, in_status): # 在此处处理音频数据(例如,进行实时分析或处理) return (out_data, out_status) p = pyaudio.PyAudio() stream = p.open( format=p.get_format_from_width(2), channels=1 if sys.platform == 'darwin' else 2, rate=44100, input=True, output=True, stream_callback=callback ) 其中,callback(in_data, frame_count, time_info, status) 即 回调传入,包含四个关键参: in_data 为 音频数据的输入流,通常配合 np.frombuffer(in_data, dtype=np.int16) 读取数据 frame_count 为 输入流当前数据对应音频帧数,即当前 in_data 数据覆盖的 帧数 time_info 是一个包含了 三个设备相关时间戳 的 数据字典,有参数(注意表述): input_buffer_adc_time 表示 输入音频数据被 ADC 处理时的时间戳(如果适用) output_buffer_dac_time 表示 输出音频数据被 DAC 处理时的时间戳(如果适用) current_time 表示 当前时间,即 当前调用触发时的系统时间戳 in_status 是 记录当前输入回调时,流状态的枚举类标识。可取三个状态常量,分别是: pyaudio.paContinue 表示 流继续,即恢复播放和正常播放时的状态,也是默认状态 pyaudio.paComplete 表示 流完成,即代指当前输入流数据为最末尾的一组 pyaudio.paAbort 表示 流中止,即立刻停止时触发,一般为紧急关流或异常情况 在 callback 处理完毕后,回调要求以 return (out_data, out_status) 的 格式返回。同样: out_data 为 音频数据的输出流,根据协定好的音频 PCM 位数对应的格式输出,一般同输入 out_status 是 记录当前输出的状态,同 in_status 的可取值一致,一般同 in_status 不变 配置好 callback 后,我们该如何使用呢?只需要于 实例调用 open 开启流 实体时,以 stream_callback=callback 将 函数句柄以参数传入 即可生效。而这里的 callback 也可 根据具体情况修改命名,比如 audio_analyze_callback 。 随之就可以在回调中,完成分析作业了。 Librosa Librosa 是一个功能强大且易于使用的 音频/乐理(工程)科学分析原生 Python 库,成体系的提供了用于 音频特征提取、节拍节奏分析、音高(工程)估计、音频效果器(滤波、特效接口) 等处理的算法实现。其设计理念来自于 SciPy 2015 年的第十四届 Python 科学大会中,有关音频处理、音频潜藏信息提取与分析快捷化的讨论 [6] 。因此,在设计之初就完全采用了,与其他科学计算库(如 NumPy、SciPy)和可视化库(主要指 Matplotlib)的 无缝集成。而极强的分析能力和可操作性(工程层面),使 Librosa 成为了我们做 音频分析与操作时的重要工具。 必须熟练掌握。 主要功能: 临时处理友好,提供简便的方法,在必要时做临时读取和写入音频文件,支持多种格式 快速时频转换,提供短时傅里叶变换(STFT)、常规Q变换(CQT)等,方便时频域分析 音频特征提取,支持对梅尔频率倒谱系数(MFCC)、色度特征、频谱对比度等特征提取 节拍节奏分析,具有节拍跟踪、起音检测等,音乐(工程)分析能力 分割与重采样,提供音频分割与重采样工具,便于快速分析对比 调音与音频特效,具有音高估计和调音功能,并支持音频时间伸缩和音高变换等音频效果 当然还有最重要的【无缝集成】特性 基础库(librosa.)的常用函数(简,仅列出名称): 音频加载: load, stream 音频生成: clicks, tone, chirp 简化分析: to_mono, resample, get_duration, get_samplerate 时频分析: stft, istft, reassigned_spectrogram, cqt, icqt, hybrid_cqt, pseudo_cqt, vqt, iirt, fmt, magphase 时域校准: autocorrelate, lpc, zero_crossings, mu_compress, mu_expand 谐波分析: interp_harmonics, salience, f0_harmonics, phase_vocoder 相位校准: griffinlim, griffinlim_cqt 响度单位换算: amplitude_to_db, db_to_amplitude, power_to_db, db_to_power, perceptual_weighting, frequency_weighting, multi_frequency_weighting, A_weighting, B_weighting, C_weighting, D_weighting, pcen 时轴单位换算: frames_to_samples, frames_to_time, samples_to_frames, samples_to_time, time_to_frames, time_to_samples, blocks_to_frames, blocks_to_samples, blocks_to_time 频率单位换算: hz_to_note, hz_to_midi, hz_to_svara_h, hz_to_svara_c, hz_to_fjs, midi_to_hz, midi_to_note, midi_to_svara_h, midi_to_svara_c, note_to_midi, note_to_svara_h, note_to_svara_c, hz_to_mel, hz_to_octs, mel_to_hz, octs_to_hz, A4_to_tuning, tuning_to_A4 基底频率生成: fft_frequencies, cqt_frequencies, mel_frequencies, tempo_frequencies, fourier_tempo_frequencies 乐理乐谱工具: key_to_notes, key_to_degrees, mela_to_svara, mela_to_degrees, thaat_to_degrees, list_mela, list_thaat, fifths_to_note, interval_to_fjs, interval_frequencies, pythagorean_intervals, plimit_intervals 乐理音高音调: pyin, yin, estimate_tuning, pitch_tuning, piptrack 适配杂项: samples_like, times_like, get_fftlib, set_fftlib 图表显示扩展(librosa.display.)的常用函数(简,仅列出名称,依赖于 Matplotlib): 数据可视化: specshow, waveshow 坐标轴设置: TimeFormatter, NoteFormatter, SvaraFormatter, FJSFormatter, LogHzFormatter, ChromaFormatter, ChromaSvaraFormatter, ChromaFJSFormatter, TonnetzFormatter 适配杂项: cmap, AdaptiveWaveplot 音频特征提取(librosa.feature.)的常用函数(简,仅列出名称): 工程频谱特征: chroma_stft, chroma_cqt, chroma_cens, chroma_vqt, melspectrogram, mfcc, rms, spectral_centroid, spectral_bandwidth, spectral_contrast, spectral_flatness, spectral_rolloff, poly_features, tonnetz, zero_crossing_rate 乐理节奏特征: tempo, tempogram, fourier_tempogram, tempogram_ratio 特征计算: delta, stack_memory 反向逆推: inverse.mel_to_stft, inverse.mel_to_audio, inverse.mfcc_to_mel, inverse.mfcc_to_audio 起音检测扩展(librosa.onset.)的常用函数(简,仅列出名称): 峰值检测: onset_detect 小值回溯: onset_backtrack 强度统计: onset_strength, onset_strength_multi 节拍节奏扩展(librosa.beat.)的常用函数(简,仅列出名称): 节拍追踪: beat_track 主位脉冲: plp 语谱分解扩展(librosa.decompose.)的常用函数(简,仅列出名称): 特征矩阵分解: decompose 源分离滤波: hpss, nn_filter 音频效果器扩展(librosa.effects.)的常用函数(简,仅列出名称): 谐波乐源分离: hpss, harmonic, percussive 时间伸缩: time_stretch 时序混音: remix 音高移动: pitch_shift 信号操控: trim, split, preemphasis, deemphasis 时域分割扩展(librosa.segment.)的常用函数(简,仅列出名称): 自相似性: cross_similarity, path_enhance 重复矩阵: recurrence_matrix, lag_to_recurrence 延迟矩阵: timelag_filter, recurrence_to_lag 时域聚类: agglomerative, subsegment 顺序模型扩展(librosa.sequence.)的常用函数(简,仅列出名称): 顺序对齐: dtw, rqa 维特比(Viterbi)解码: viterbi, viterbi_discriminative, viterbi_binary 状态转移矩阵: transition_uniform, transition_loop, transition_cycle, transition_local 跨库通用扩展(librosa.util.)的常用函数(简,仅列出名称): 数组转换: frame, pad_center, expand_to, fix_length, fix_frames, index_to_slice, softmask, stack, sync, axis_sort, normalize, shear, sparsify_rows, buf_to_float, tiny 条件匹配: match_intervals, match_events 统计运算: localmax, localmin, peak_pick, nils, cyclic_gradient, dtype_c2r, dtype_r2c, count_unique, is_unique, abs2, phasor 输入评估: valid_audio, valid_int, valid_intervals, is_positive_int 本库样例: example, example_info, list_examples, find_files, cite 具体使用细节,可自行前往项目 官方档案馆查阅 。 Librosa 在音频方面,涵盖了大多数基本的科学分析手段,足够一般工程使用。 但在 数据科学方面 和 集成性 的高度倾注,也让 Librosa 的 实时性相对有所降低(本质为复杂度和精度上升,所伴随算力消耗的升高)。可若此时我们对误差有相对较高的容忍度,且更希望音频处理足够实时和高效时,就得采用 Aubio 库来达成这一点了。Aubio 和 Librosa 的特性相反,是满足这种情况有效补充手段。 Aubio Aubio 是主要用于 音乐信息检索(MIR [Music Information Retrieval]) 的 跨平台轻量级分析库。设计之初就是期望实时进行 MIR 使 Aubio 采用了 C语言 作为库的核心语言。不过,因其已在自身的开源项目中,实现了 Python 的套接调用入口 [7] ,我们仍然可以在 Python 中使用。 功能性方面,Aubio 和 Librosa 在音频浅层信息处理上,如果排除效率因素,则几乎不相上下。但 Aubio 的处理效率,不论从整体架构还是本位支撑上,都着实比 Librosa 更加高效。 因此,在音频分析领域,对于类似 ‘音高检测’ 等以实时性作为主要求的分析点,我们常采用 Aubio 而不是 Librosa 处理。而对于 梅尔频率倒谱系数(MFCC)之类的科学分析,则多数用 Librosa 解决,虽然 Aubio 也有此功能。除此外,科学分析不以 Aubio 合并解决的另一原因,还在于 Aubio 对主流科学计算库的兼容程度,要略逊 Librosa 一筹,并向当局限。即有利有弊。 此外,相比 Librosa,Aubio 仅能提供相对基础的分析。 主要功能: 实时处理能力,面向低延迟的音频处理能力,专为快速高效设计 专精通用检测,提供 节拍检测、起音检测、音符分割等通用基础音频分析 简易实时效果,提供快速重采样、过滤、归一化能力,只能实现部分简易效果 跨平台支持,可以在主流操作系统(Windows、macOS、Linux)上运行 有限集成性,提供 Python 入口,虽不完美兼容计算库,但仍可有效利用实时特性 受局限的调用方式,但官方提供了很多样例,学习门槛较低 基础库(aubio.)对常用过程的类封装(简,仅列出名称): 数据读写: 、 乐理分析: 、 、 、 、 频谱分析: 、 、 、 、 、 简易滤波: 一些常用过程封装的常用操作简示(非所有,仅列出名称): 音高 相关:[entity]([source]), [entity].set_unit, [entity].set_tolerance 节奏 相关:[entity]([source]), [entity].get_bpm 起音 检测:[entity]([source]), [entity].set_threshold 音频写入 类: [entity].close 音频读取 类: [entity].seek, [entity].close 官方样例,可从 项目官网 获取,而各个封装结构内的 额外参数配置/获取方式,可查阅 官方档案馆查阅 。 由于是 C语言库,其 Python 套接后的使用形式,也 相对更接近 C 的使用习惯。所以,Aubio 的的过程类,在创建实体时就需要传入配置参数,如下例: # 创建音频源读取实例 source = aubio.source('example.wav', 44100, 512) # 创建音频写入实例 sink = aubio.sink('output.wav', 44100, 1) # 创建音高检测实例 pitch_o = aubio.pitch(\"yin\", 1024, 512, 44100) pitch_o.set_unit(\"Hz\") pitch_o.set_silence(-40) # 创建节拍检测实例 tempo_o = aubio.tempo(\"default\", 1024, 512, 44100) # 创建起音检测实例 onset_o = aubio.onset(\"default\", 1024, 512, 44100) # 创建音调检测实例 notes_o = aubio.notes(\"default\", 1024, 512, 44100) # 创建离散余弦变换实例 dct_o = aubio.cqt(16) # 创建快速傅里叶变换实例 fft_o = aubio.fft(1024) # 创建梅尔频率倒谱系数实例 mfcc_o = aubio.mfcc(40, 1024, 44100) # 创建滤波器组实例 filterbank_o = aubio.filterbank(40, 1024) # 创建频谱描述符实例 specdesc_o = aubio.specdesc(aubio.specdesc_type.centroid, 1024) # 创建相位声码器实例 pvoc_o = aubio.pvoc(1024, 512) 上述过程中,我们进行了一些配置,基本涵盖了 Aubio 在 Python 上的 大部分经常被使用到的实用功能 。以上例中的配置,对创建的实体意义进行说明,有: 音频读取():读取 example.wav,采样率 44100 Hz,每次读取 512 帧 音频写入():写入 output.wav,采样率 44100 Hz,单声道 音高检测():yin 算法,窗口 1024/跳频 512/采样率 44100 Hz,静音阈 -40 dB 节拍检测():使用默认算法,窗口 1024,跳频 512,采样率 44100 Hz 起音检测():使用默认算法,窗口 1024,跳频 512,采样率 44100 Hz 音调检测():使用默认音集,窗口 1024,跳频 512,采样率 44100 Hz 离散余弦变换():离散余弦变换,以 16 个由短至长余弦周期构成解集(见前文) 快速傅里叶变换():快速傅里叶变换,窗口 1024 梅尔频率倒谱系数():提取 MFCC,梅尔带 40,窗口 1024,采样率 44100 Hz 滤波器组():分解为 40 个频率带,窗口 1024 频谱描述符():提取频谱描述符,计算频谱流,窗口 1024 相位声码器():配置相位声码器,窗口 1024/每次取 512 个样本 (即跳频 512) 而其使用时的方式,由于是以 __call__ 的 Python 调用实现的,有: # 读取音频数据并处理 while True: samples, read = source() # 音高检测 pitch = pitch_o(samples)[0] print(f\"Detected pitch: {pitch} Hz\") # 节拍检测 is_beat = tempo_o(samples) if is_beat: print(f\"Beat detected at {source.positions}\") # 起音检测 is_onset = onset_o(samples) if is_onset: print(f\"Onset detected at {source.positions}\") # 音调检测 notes = notes_o(samples) print(f\"Detected notes: {notes}\") # 离散余弦变换 dct_data = dct_o(samples) print(f\"DCT Data: {dct_data}\") # 快速傅里叶变换 fft_data = fft_o(samples) print(f\"FFT Data: {fft_data}\") # 提取梅尔频率倒谱系数 mfcc_data = mfcc_o(samples) print(f\"MFCC Data: {mfcc_data}\") # 滤波处理 filtered_data = filterbank_o(samples) print(f\"Filtered Data: {filtered_data}\") # 提取频谱描述符 specdesc_data = specdesc_o(samples) print(f\"Spectral Descriptor: {specdesc_data}\") # 使用 pvoc 对象处理样本 spec = pvoc_o(samples) spectrogram.append(spec) # 写入音频数据 sink(samples, read) if read 即,直接用创建并配置好的对应功能实体,循环取 获取的 采样片段 samples 传入,就可以得到检测处理结果了。可见,Aubio 的使用非常的 “面向过程”,创建出的实体,与其说是 “对象”,不如说是对 “过程的封装”。 从 Aubio 的设计体现出了,其作为库的有限调用方式,并没有为使用者提供基于调用侧的功能扩展入口。 所以,除实时处理外,Aubio 的能力有限。只适合作为 补充手段 应用于分析中。 四个关键音频库介绍完毕,那么现在,让我们用它们做些简单的实践。 简单练习:用 常用音频库 完成 带有实时频响图的音频播放器 为了相对可能的便利,我们需要让这个练习用播放器有一个 UI 界面,且能根据需要的自主选择音频文件。而 波形图(Waveform) 就是整个音频所有频段在 波形切面(TLS) 叠加后的投影。 对于界面,我们需要引入 Tkinter 库来协助进行绘制。Tkinter 是 Python 标准模块其中之一,专用于创建图形用户界面(GUI)的工具,提供了一系列简易的按钮、图表、交互组件和标准布局。这里只需了解即可。 练习事例按照标准工程工作流进行。 第一步,确立已知信息: 数据来源:用户自选的 \".wav .flac *.mp3\" 音频格式文件(如需可自行在源码中拓展) 处理环境:依赖 、,Python 脚本执行 工程目标: 1) 提供一个具有 GUI 的简易音频格式文件播放器,自选择播放音频文件,可控播放/暂停 2) 图形界面显示选定音频文件的波形图,并提供 Seekbar 可进行 Seek 操作 第二步,准备执行环境: 检测是否已经安装了 Python 和 pip(对应 Python 版本 2.x) 或 pip3(对应 Python 版本 3.x) 包管理器。此步骤同我们在 的练习 中的操作一致,执行脚本即可: python install_pip.py python install_math_libs.py 完成对 Python 环境 的准备和 的安装。具体脚本实现,可回顾上一节。 同理,对于 的准备工作,我们也按照脚本方式进行流程化的封装。创建自动化脚本 install_acoustic_libs.py 如下: import subprocess import sys import platform def is_package_installed(package_name): try: subprocess.run([sys.executable, \"-m\", \"pip\", \"show\", package_name], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return True except subprocess.CalledProcessError: return False def install_package(package_name): print(f\"Installing {package_name}...\") subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", package_name], check=True) subprocess.run([sys.executable, \"-m\", \"pip\", \"show\", package_name], check=True) def is_portaudio_installed(): try: if platform.system() == \"Darwin\": # macOS result = subprocess.run([\"brew\", \"list\", \"portaudio\"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) elif platform.system() == \"Linux\": result = subprocess.run([\"dpkg\", \"-s\", \"portaudio19-dev\"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: return True # Assume portaudio is handled manually on other platforms return result.returncode == 0 except subprocess.CalledProcessError: return False def install_portaudio(): if platform.system() == \"Darwin\": # macOS print(\"Installing portaudio using Homebrew...\") subprocess.run([\"brew\", \"install\", \"portaudio\"], check=True) elif platform.system() == \"Linux\": print(\"Installing portaudio using APT...\") subprocess.run([\"sudo\", \"apt-get\", \"install\", \"-y\", \"portaudio19-dev\"], check=True) else: print(\"Please install portaudio manually for your platform.\") sys.exit(1) def main(): packages = [\"soundfile\", \"pyaudio\", \"librosa\"] for package in packages: if package == \"pyaudio\": if not is_portaudio_installed(): install_portaudio() if is_package_installed(package): print(f\"{package} is already installed.\") else: install_package(package) print(f\"{package} has been installed.\") else: if is_package_installed(package): print(f\"{package} is already installed.\") else: install_package(package) print(f\"{package} has been installed.\") if __name__ == \"__main__\": main() 此处有个流程上的关键,即 PyAudio 依赖于 PortAudio 库提供的 音频输入输出设备拨接。我们需要在安装 PyAudio 前,先行安装 PortAudio 以保证 PyAudio 的正常执行,否则会报如下的 IO访问错误: OSError: [Errno -9986] Internal PortAudio error PyAudio 的安装过程由于 未配置对 PortAudio 的强依赖标注,且 PortAudio 并未提供 pip 的可用包。因此,不会在 pip 包管理安装过程中,自行获取前置库。需要我们 手动在脚本中完成 检测 与 安装。 随后,使用 Python 执行脚本: python install_acoustic_libs.py 如果包已安装,则会输出 \"[基础音频库] is already installed.\"。如果包未安装,则会安装该包并输出 \"[基础音频库] has been installed.\",并显示包的详细信息。 到此,完成音频库的环境准备工作。 为什么建议 采用执行脚本的形式,对需要的库进行准备流水封装呢?因为这是一个非常好的习惯。而随着工作的积累,相关的 工具库快速部署脚本会逐步的累积,形成足够支撑大部分情况的 一键部署工具集。在这过程中,工程师 可以养成对环境准备以流水线方式处理的逻辑链,使之后再遇到新的情况时,也能快速的理清思维,便于减轻维护工作压力。 第三步,搭建音频播放器: 由于只是个简易播放器,我们选择在单一文件中实现所有基本功能。 首先,需要思考一下,必要包含于 GUI 的交互组件都有哪些。有: 停止(Stop):用于在音频开始播放后,停止播放并重置音频到起始位置; 播放/暂停(Play/Pause):用于控制音频的播放,与过程中暂停; 打开(Open):用于满足选择要播放的音频格式文件; 进度条(Seekbar):用于提供 Seek 功能,并实时显示播放进度 而纯粹的用于显示展示于 GUI 的组件,只有: 波形图(Waveform):在 “打开” 选择音频文件后,显示该音频波形图; 至此,我们获得了此播放器的基本交互逻辑。 图 5-4 简易音频播放器的交互逻辑关系示意图 根据上图交互关系,将每一个节点作为函数封装,就能轻松完成相关实现了。编写代码: import tkinter as tk from tkinter import filedialog import numpy as np import soundfile as sf import pyaudio import threading import queue import matplotlib.pyplot as plt from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg class AudioPlayer: def __init__(self, root): self.root = root self.root.title(\"Simple Audio Player\") # Initialize pyaudio self.pyaudio_instance = pyaudio.PyAudio() # Create control buttons frame self.control_frame = tk.Frame(self.root) self.control_frame.pack(side=tk.TOP, fill=tk.X) self.stop_button = tk.Button(self.control_frame, text=\"Stop\", command=self.stop_audio) self.stop_button.pack(side=tk.LEFT) self.play_pause_button = tk.Button(self.control_frame, text=\"Play\", command=self.toggle_play_pause) self.play_pause_button.pack(side=tk.LEFT) self.open_button = tk.Button(self.control_frame, text=\"Open\", command=self.open_file) self.open_button.pack(side=tk.LEFT) self.playing = False self.audio_data = None self.fs = None self.current_frame = 0 self.stream = None # Create matplotlib figure and axes for waveform display self.fig, self.ax_waveform = plt.subplots(figsize=(6, 3.6)) self.canvas = FigureCanvasTkAgg(self.fig, master=self.root) self.canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1) # Create progress bar self.progress_frame = tk.Frame(self.root) self.progress_frame.pack(side=tk.TOP, fill=tk.X) self.progress_bar = tk.Scale(self.progress_frame, from_=0, to=1000, orient=tk.HORIZONTAL, showvalue=0) self.progress_bar.pack(fill=tk.X, expand=True) # Timer to update waveform line self.update_interval = 1 # milliseconds # Create thread event to stop update thread self.update_thread_event = threading.Event() # Queue for inter-thread communication self.queue = queue.Queue() # Flag variable to detect if the progress bar is being dragged self.is_seeking = False self.was_playing = False # Mark the playback state when seeking # Bind events self.progress_bar.bind(\"\", self.on_seek_start) self.progress_bar.bind(\"\", self.on_seek_end) self.progress_bar.bind(\"\", self.on_seek) # Start thread to update progress bar self.root.after(self.update_interval, self.update_progress_bar) def open_file(self): file_path = filedialog.askopenfilename(filetypes=[(\"Audio Files\", \"*.wav *.flac *.mp3\")]) if file_path: self.audio_data, self.fs = sf.read(file_path, dtype='float32') self.current_frame = 0 duration = len(self.audio_data) / self.fs self.progress_bar.config(to=duration * 1000) # Set the maximum value of the progress bar to the audio duration in milliseconds self.play_pause_button.config(text=\"Play\") self.playing = False self.plot_waveform() def toggle_play_pause(self): if self.playing: self.play_pause_button.config(text=\"Play\") self.playing = False self.pause_audio() self.update_thread_event.set() # Stop update thread else: self.play_pause_button.config(text=\"Pause\") self.playing = True self.update_thread_event.clear() # Clear update thread event threading.Thread(target=self.play_audio).start() def audio_callback(self, in_data, frame_count, time_info, status): end_frame = self.current_frame + frame_count data = self.audio_data[self.current_frame:end_frame].tobytes() self.current_frame = end_frame self.queue.put(end_frame / self.fs * 1000) # Current time (milliseconds) if self.current_frame >= len(self.audio_data): return (data, pyaudio.paComplete) return (data, pyaudio.paContinue) def pause_audio(self): if self.stream is not None: self.stream.stop_stream() self.stream.close() self.stream = None def play_audio(self): self.stream = self.pyaudio_instance.open( format=pyaudio.paFloat32, channels=self.audio_data.shape[1], rate=self.fs, output=True, stream_callback=self.audio_callback ) self.stream.start_stream() def stop_audio(self): self.playing = False self.current_frame = 0 if self.stream is not None: self.stream.stop_stream() self.stream.close() self.stream = None self.play_pause_button.config(text=\"Play\") # Reset the red line to the beginning self.update_thread_event.set() # Stop update thread self.plot_waveform() # Reset waveform plot self.progress_bar.set(0) def plot_waveform(self): self.ax_waveform.clear() time_axis = np.linspace(0, len(self.audio_data) / self.fs, num=len(self.audio_data)) self.ax_waveform.plot(time_axis, self.audio_data) self.ax_waveform.set_title(\"Waveform\") self.ax_waveform.set_xlabel(\"Time (s)\") # Set x-axis label to seconds self.ax_waveform.set_ylabel(\"Amplitude\") self.canvas.draw() def update_progress_bar(self): try: while not self.queue.empty(): current_time = self.queue.get_nowait() if not self.is_seeking: # Only update when not dragging the progress bar self.progress_bar.set(current_time) except queue.Empty: pass self.root.after(self.update_interval, self.update_progress_bar) def on_seek_start(self, event): self.was_playing = self.playing # Record the playback state when seeking if self.playing: self.toggle_play_pause() # Pause playback self.is_seeking = True # Mark that the progress bar is being dragged def on_seek(self, event): # Update current_frame in real-time value = self.progress_bar.get() self.current_frame = int(float(value) / 1000 * self.fs) def on_seek_end(self, event): self.is_seeking = False # Mark that dragging has ended self.plot_waveform() # Update waveform plot if self.was_playing: # If it was playing before, resume playback self.toggle_play_pause() def seek(self, value): if self.audio_data is not None: self.current_frame = int(float(value) / 1000 * self.fs) if __name__ == \"__main__\": root = tk.Tk() app = AudioPlayer(root) root.mainloop() 有运行效果如下: 图 5-5 简易音频播放器的运行效果图 至此,对音频库的练习完毕。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:11 "},"Chapter_5/Language/cn/Docs_5_1_3.html":{"url":"Chapter_5/Language/cn/Docs_5_1_3.html","title":"5.1.3 视频分析库(PyOpenCV、Color-Science)","keywords":"","body":"5.1.3 视频分析库(PyOpenCV、Color-Science) 和音频一样,外部工程里,对视频的分析处理焦点多在于 帧分析,并非于流分析。或者说,有关音视频编解码与网络流的评估,是属于完整编解码工程内部范畴。其更多的是与网络子系统进行结合,并依托于诸如 ITU-T(或其他音视频组织,较少)提出的相关协议(如 H.264、H.265 等)约束之标准规格背景,来作为整体工程中的 子评估系统。所以,音视频流分析(Audio/Video Stream Analysis)和编解码协议是强耦合的,一般会将之归属于 编解码器内部监测 部分,平行于项目的正常作业流水线,来监控各个环节。 而 视频帧分析(Video Frame Analysis) 或 帧处理(Frame Processing) 的有效介入点,是在 编码前(Before encoding) 和 解码后(After decoding)。此时,我们用来处理的数据,已经是纯粹的 色彩格式(Color Format) 数据了。 以解码为例,在解码后的必要环节是什么样的呢? 图 5-6 简易音频播放器的运行效果图 首先,是 颜色空间转换,亦是大量使用第二章知识的地方。一般解码后的图像因为考虑到存储空间成本,会采用 传输格式(Transport Format),即 YUV 体系色彩格式。 不过,只凭借 YUV 是无法做为 唯一且足够泛化的 随后步骤起点的。这并不是指 YUV体系的色彩格式 无法直接交由如 OpenGL、DirectX、Vulkan 等驱动处理,相反这些驱动内部往往已经通过 模式编程方法,完成了一些 固定格式自硬件抽象层(HAL)的映射式转换工作(原理同第二章中,已讲解并推导过的色彩空间转换,部分算子的硬件化实现在驱动层面的组合)。同理于 RGB,在硬件支持的情况下,直接以 YUV 上屏在流程上会更简短。可当我们的目的是需要对每一帧的图片,做 基于传统图形学算法上的调整,或 为模型进行特征分析/提取的预处理 时,未经存储空间压缩并贴近人自然感受的 原色格式(Primaries Format),即 RGB 体系色彩格式,还是会更便于操作。 另外,并不一定是由 YUV 转 RGB,在某些场景,我们也会要求将 RGB 转 YUV,或完成两个体系内的其他细分类型互转。所以,具体如何转换是 由后续步骤所需的输入而定,相当灵活。 在色彩格式转换后,则是 帧分析与预处理步骤。这一步完成 对前者输出帧数据的特征提取与解析。将会使用到相关的分析方法,例如 二维傅立叶 或其他 基础图像算法、滤波核 或 模型接入。此处也是我们本节进行操作的重点。 最后一步是 GPU 上屏缓冲和通信,则需要由 选定的图形驱动(Vulkan 等)来建立相应的信道,提供指令通信和显存更新功能。本节中,这些相关的环境和上屏更新,是由 Python 的 Tinker 界面库走系统 UI 环境 或 常用视频分析库(如 OpenCV)在 库内自行维护。暂不需要我们介入。 而当需要项目自行处理驱动和 GPU 通信环境上下文维护时,整个渲染引擎的部分,都应当在 同一个主体环境下(也可以用代表其通信句柄名的,实时上下文/通信上下文,来代指),辅助其他(如果需要)用于 时间片复用 或 GPU 信令预封装 的 辅助环境(如 延迟上下文 或 类似的自定义指令组装结构)使用。从而方便各个 前后关联密切环节的处理结果,在 GPU 资源池中实现互通。 这一涉及驱动资源协同和池化设计的部分,就属于 图形引擎(Graphics Engine) 的关键处理技术之一了。让我们在未来的进阶一册中再单独讲解。 常用的视频分析库主要有两个,为 Colour-Science、PyOpenCV,分别对应 [ 颜色科学综合分析、图像处理与科学计算 ] 的需求。常被用于 工程原型验证(即设计思路的验证) 和 外部(指工程外)帧分析。 尤其是 PyOpenCV,该库是重中之重。不仅是视频分析的核心库,在业务中也会经常直接使用到它的 C++ 内核。 Colour-Science(Color-Science) Colour-Science(Color-Science) 是一个专注于 色彩科学计算、光谱分析、色彩转换 和 色彩管理 的 Python 计算库。其由 Colour Developers 开发和维护,旨在为色彩科学领域的研究和应用提供一个 全面而强大的工具集 [8] 。注意区别库名为 Colour-Science 。 主要功能: 色彩空间转换,支持 CIE 标准下的 RGB、XYZ、LAB、LUV 等各种 色彩空间 转换与互转 支持色彩科学如 黑体辐射、辐射亮度、色温 等的 物理量评估 提供感官量与科学量间的换算,支持 配色函数 和 CIE 统一化色彩差异对比计算 支持由设备制造商提供的 LUT、CSV、XRite 等 不同种色彩配置文件 校准、评估、转换 能够提供完备的色彩学分析图表可视化能力 Colour-Science 是一个 相当齐全的色彩科学库,其方法基本涵盖了现行大部分通用(或较广范围使用)的色彩规格,并实现了相互间的联结。通过它,我们能够轻易的将不同色彩系统内的自定义变量等内部概念,转换到统一 CIE 规格下衡量。当然,也可以反向提供相应的配置内容。 由于库的体量过于巨大,此处仅列出部分相对高频次使用的函数,仅供参考。 核心模块(colour.)的常用函数(简,仅列出名称): 色彩空间: RGB_COLOURSPACES, RGB_to_XYZ, XYZ_to_RGB, XYZ_to_Lab, Lab_to_XYZ, xyY_to_XYZ, XYZ_to_xyY, LMS_to_XYZ, XYZ_to_LMS, UCS_to_XYZ, XYZ_to_UCS 色彩比对: XYZ_to_xy, xy_to_XYZ, XYZ_to_uv, uv_to_XYZ 色温转换: xy_to_CCT, CCT_to_xy 色彩感知: chromatic_adaptation, contrast_sensitivity_function, corresponding_chromaticities_prediction 色差计算: delta_E (CIE 1976, CIE 1994, CIE 2000, CMC etc.), index_stress (Kruskal’s Standardized Residual Sum of Squares) 光度计算: lightness, whiteness, yellowness, luminance, luminous_flux, luminous_efficacy, luminous_efficiency, 光谱处理: 光谱分析的主体类, sd_to_XYZ, sd_blackbody, sd_ones, sd_zeros, sd_gaussian, sd_CIE_standard_illuminant_A sd_CIE_illuminant_D_series 颜色代数: table_interpolation, kernel_nearest_neighbour, kernel_linear, kernel_sinc, kernel_lanczos, kernel_cardinal_spline, 数据读写: read_image, write_image, read_LUT, write_LUT, read_sds_from_csv_file, write_sds_to_csv_file, read_spectral_data_from_csv_file, read_sds_from_xrite_file, 辅助模块(colour..)的常用函数(简,仅列出名称): 绘图可视化(plotting.): plot_single_colour_swatch, plot_multi_colour_swatches, plot_single_sd, plot_multi_sds, plot_single_illuminant_sd, plot_multi_illuminant_sds, plot_single_lightness_function, plot_multi_lightness_functions, plot_single_luminance_function, plot_multi_luminance_functions 读写扩展(io.): image_specification_OpenImageI LUT_to_LUT, 色彩模型(models.): RGB_COLOURSPACE_CIE_RGB, RGB_COLOURSPACE_BT709, RGB_COLOURSPACE_BT2020, RGB_COLOURSPACE_DCI_P3, RGB_COLOURSPACE_sRGB 色温扩展(temperature.): mired_to_CCT, CCT_to_mired, xy_to_CCT_CIE_D, CCT_to_xy_CIE_D 光谱恢复(recovery.): sd_Jakob2019, LUT3D_Jakob2019, XYZ_to_sd_Jakob2019, find_coefficients_Jakob2019 代数扩展(algebra.): euclidean_distance, manhattan_distance, eigen_decomposition, vecmul Colour 开源项目 位于:Github:colour-science/colour 。使用细节,可自行前往官方档案馆查阅:官方档案馆查阅 。 PyOpenCV(Python Entry of Open Source Computer Vision Library) PyOpenCV(Python OpenCV) 是 计算机视觉和图像机器学习 OpenCV 库 的 官方 Python 套接接口,项目自 Intel 奠基,现由 OpenCV 开源开发社区进行维护 [9] 。其核心 OpenCV 覆盖了数百个计算机视觉算法,并 官方预训练好了 大量用于 传统 CV 的 ML 功能线下模型(详见 Github:OpenCV-contrib/Modules ),囊括从 简单图像处理 到 复杂应用的视觉任务,如边缘检测、图像滤波、基础变换(旋转、缩放、错切、仿射变换)、对象检测等,都可通过调用其方法功能实现。并且,考虑到机器学习拓展性,本身提供了 对模型训练和推理的相关扩展接口,方便处理中使用。 此外,OpenCV 有着对图片、视频文件、视频流(本地流、网络流)等数据源的完整支持,使得基本大部分涉及视频的分析工作,都能够用该库一库解决。非常强大。但其是一个以计算机视觉和 2D 图像处理为核心的库,具有 有限 的 3D 功能,并不专注于全面的 3D 图形学处理。 另外需要注意的是 OpenCV 并不是 专门用于进行深度学习的框架,虽然能够进行推理,可 并不能 达到最好的资源利用效率和训练与推理性能。这点在应用或非分析工程中,当存在大量模型处理需求或模型流水线时,应该考虑。 主要功能: 图像处理,支持图像读取、写入、滤波、变换、边缘检测等基本操作 视频处理,支持视频文件的读取、写入、帧捕获和视频流处理 特征检测,提供关键点检测和特征匹配,如 SIFT、SURF、ORB 等 对象检测,支持 Haar 级联分类器、深度学习模型(如 YOLO、SSD)等 机器学习,支持多种机器学习算法,如 SVM、KNN、决策树等 三维重建,提供立体匹配、相机标定、三维重建功能(有限) 图像分割,支持阈值分割、轮廓检测、分水岭算法等 相机补益,支持镜头畸变校正和图像增强 运动分析,提供光流计算和运动跟踪功能 图像拼接,支持全景图像拼接和图像对齐 GPU 加速,部分算法支持 GPU 加速,提升计算性能 高级图像处理,支持图像金字塔、模板匹配、霍夫变换(Hough)等高级操作 丰富的库和模块,集成了大量的图像处理和分析工具 良好的库兼容性,可以与 NumPy、SciPy 等科学计算库结合使用 多模型格式支持,支持 Caffe、TensorFlow、ONNX(关键) 等多种框架的模型格式 跨平台支持,可以在主流操作系统(Windows、macOS、Linux)上运行 由于 OpenCV 对 API 入口进行了统一,以下模块调用前缀皆为 “cv2.”,比如 “cv2.add”,后续如无特殊说明,则按此依据。 因为 OpenCV 的复杂度,我们参考官方的 核心库(对应 opencv-python) 和 扩展库(opencv-contrib-python) 两大分类,将主要的常用函数和封装,也拆分为 两部分描述。 首先,是核心库(opencv-python)所包含的内部模块。 核心模块(cv2.core)的常用函数(简,仅列出名称): 基本数据结构: 、 、 、 、 基本算法和操作: add, subtract, multiply, divide, absdiff 线性代数: solve, invert, determinant, eigen 随机数生成: , randu, randn 类型转换: convertScaleAbs, normalize 数据操作: minMaxLoc, meanStdDev, reduce 输入输出: imread, imwrite, imdecode, imencode 时间操作: getTickCount, getTickFrequency, getCPUTickCount 图像克隆和复制: copyMakeBorder 数学函数: exp, log, sqrt, pow 图像处理模块(cv2.imgproc)的基础函数(简,仅列出名称): 基本图像变换: resize, warpAffine, warpPerspective 颜色空间转换: cvtColor, inRange 图像滤波: GaussianBlur, medianBlur, bilateralFilter, blur 阈值处理: threshold, adaptiveThreshold 直方图处理: calcHist, equalizeHist 几何变换: getRotationMatrix2D, getAffineTransform, getPerspectiveTransform 图像金字塔: pyrUp, pyrDown 图像插值: linearPolar, remap 直线与形状绘制: line, rectangle, circle, ellipse, putText 图像处理模块(cv2.imgproc)的结构分析与形态学(Morphology)函数(简,仅列出名称): 边缘检测: Canny, Sobel, Laplacian, Scharr 霍夫变换: HoughLines, HoughLinesP, HoughCircles 轮廓检测: findContours, drawContours 形态学操作: morphologyEx, erode, dilate 矩形拟合: boundingRect, minAreaRect 圆形拟合: minEnclosingCircle 椭圆拟合: fitEllipse 多边形拟合: approxPolyDP 凸闭包计算: convexHull, convexityDefects 形状匹配: matchShapes 视频读写模块(cv2.videoio)的常用函数(简,仅列出名称): 视频捕获: , isOpened, read, release 视频写入: , write, release 视频属性: get, set (归属 创建的流句柄所有) 视频编码: 图形用户界面模块(cv2.highgui)的常用函数(简,仅列出名称): 创建窗口: namedWindow 显示图像: imshow 等待键盘事件: waitKey 销毁窗口: destroyWindow, destroyAllWindows 鼠标事件: setMouseCallback 滑动条(Trackbar): createTrackbar, getTrackbarPos, setTrackbarPos 传统机器学习对象检测模块(cv2.objdetect)的常用函数(简,仅列出名称): 分类器实例: 使用分类器检测对象: detectMultiScale 保存和加载 XML 分类器文件: save, load (为 加载分类器) 官方提供的 XML 分类器文件,位于 OpenCV 的安装目录,主要有两类,加载方式一致: data/haarcascades 为 Haar 分类器(矩形像素差)的指定目标训练所得分类特征 data/lbpcascades 为 LBP 分类器(纹理描述符)的指定目标训练所得分类特征 特征检测与匹配模块(cv2.features2d)的常用函数(简,仅列出名称): 特征检测对象: 、 、 、 、 特征匹配对象: 、 特征检测创建: SIFT_create, SURF_create, ORB_create, FastFeatureDetector_create, BRISK::create 特征描述获取: compute, detect(由 [xx]_create 创造的对应特征检测方法的对象调用) 特征匹配: match, knnMatch(由 等特征匹配对象调用) 关键点绘制: drawKeypoints, drawMatches 相机校正与三维影射模块(cv2.calib3d)的常用函数(简,仅列出名称): 相机校正: findChessboardCorners, cornerSubPix, calibrateCamera, initUndistortRectifyMap, undistort, undistortPoints, getOptimalNewCameraMatrix 立体校正: stereoCalibrate, stereoRectify, stereoBM_create, stereoSGBM_create 匹配校正: correctMatches 3D 重建: reprojectImageTo3D 基本矩阵与本质矩阵(重要): findFundamentalMat, findEssentialMat, recoverPose 三角化: triangulatePoints 图像分割模块(cv2.segmentation)的常用函数(简,仅列出名称): 阈值分割: threshold, adaptiveThreshold(同 [结构分析与形态学函数] 已并入基础库) 路径分割: findContours, drawContours(同 [结构分析与形态学函数] 已并入基础库) 形态学分割: morphologyEx(套接,基于图像形状 膨胀、腐蚀、开/闭运算,增减益) 分水岭算法: watershed 图割(Graph Cut)算法: grabCut 超像素分割(需引入 opencv-contrib-python 扩展的 cv2.ximgproc 模块): ximgproc.createSuperpixelLSC 为创建 线性光谱聚类(LSC) 超像素分割器 ximgproc.createSuperpixelSLIC 为创建 简单线性迭代聚类(SLIC) 超像素分割器 ximgproc.createSuperpixelSEEDS 为创建 能量驱动采样(SEEDS) 超像素分割器 图像拼接模块(cv2.stitching)的常用函数(简,仅列出名称): 图像拼接对象: 图像拼接创建: create, createStitcher 设置参数: setPanoConfidenceThresh, setWaveCorrection(由 对象调用) 图像拼接: stitch(由 对象调用) 特征检索: featuresFinder(由 对象调用) 图像修复与 HDR 模块(cv2.photo)的常用函数(简,仅列出名称): 图像修复: inpaint 去噪: fastNlMeansDenoising, fastNlMeansDenoisingColored HDR 合成: createMergeDebevec, createMergeMertens, createMergeRobertson 色调映射: createTonemap, createTonemapDrago, createTonemapMantiuk, createTonemapReinhard 辐射校正: createCalibrateDebevec, createCalibrateRobertson 图像质量评估模块(cv2.quality)的常用函数(简,仅列出名称): 图像质量评估对象(重要): 无参考图像空间质量评估(BRISQUE) 评估实例 梯度幅度相似性偏差(GMSD) 评估实例 通用像素点间均方误差(MSE) 评估实例 像素峰值信噪比(PSNR) 评估实例 结构相似性指数(SSIM) 评估实例 图像质量评估创建: create 图像质量评估计算: compute 预训练模型加载: load(继承自 的关键方法) 文本处理模块(cv2.text)的常用函数(简,仅列出名称): 文本检测对象: 文本识别对象: , 文本检测创建: createERFilterNM1, createERFilterNM2 文本识别创建: createOCRHMMDecoder, createOCRHMMTransitionsTable 文本检测: detectRegions 文本识别: run(由所创建 调用) 字符识别: loadOCRHMMClassifierNM, loadOCRHMMClassifierCNN 视频分析模块(cv2.video)的常用函数(简,仅列出名称): 背景建模: , 光流计算: calcOpticalFlowFarneback(HS 法), calcOpticalFlowPyrLK(LK 法) 运动检测: CamShift, meanShift 视频稳定化: estimateRigidTransform, findTransformECC 轨迹跟踪模块(cv2.tracking)的常用函数,用于物体跟踪(重要,节省算力),仅列出名称: 跟踪器对象: 、 单目标跟踪( 跟踪器): Tracker_create, TrackerKCF_create, TrackerMIL_create, TrackerBoosting_create, TrackerMedianFlow_create, TrackerTLD_create, TrackerGOTURN_create, TrackerMOSSE_create, TrackerCSRT_create 多目标跟踪( 跟踪器集): MultiTracker_create, add 跟踪初始化: init 跟踪当前帧: update 其次,是扩展库(opencv-contrib-python)所包含的额外模块。 扩展库涵盖了较多 传统计算机视觉(CV)高级算法,部分使用配参会较核心库更为复杂。同时,其中涉及 3D 匹配 的功能,大部分会用到 空间位姿计算(Spatial Posture Calculation) 来表示物体 在场景中的定位情况。而对于此类涉及具有实际意义 3D 场景或物体的算法,想要展示其处理结果,一般都需要用构建空间化的渲染管线完成,而无法再直接使用 Matplotlib 做快速绘制(除非引入外部位姿库,或自实现)。介于此,有关 3D 绘制的部分,我们于未来再行讨论。 现在,让我们来看都有哪些 功能扩展。 生物识别扩展模块(cv2.bioinspired)的常用函数(简),用于感知模拟(重要): 视网膜模型(需 opencv-contrib-python 扩展的 cv2.bioinspired_Retina 模块),通过(cv2.)bioinspired_Retina.create 创建实例: 视网膜模拟类型实例 .clearBuffers 初始化清空模型历史缓冲 .run 运行模型分析传入数据 .getParvo 获取视网膜小细胞(Parvo Cells)的感知模拟 .getMagno 获取视网膜大细胞(Magno Cells)的感知模拟 .write 配置视网膜模型参数,需要 .xml 格式的模型参数配置文件 .setupIPLMagnoChannel 设置视网膜大细胞通道数 .setupOPLandIPLParvoChannel 设置视网膜小细胞通道数 脉冲神经网络对象(需 opencv-contrib-python 扩展的 cv2.bioinspired 模块),通过(cv2.)bioinspired.TransientAreasSegmentationModule.create 创建实例: 脉冲神经网络进行瞬态区域检测实例 .run 运行模型分析传入数据 .getSegmentationPicture 获取检测结果 结构光扩展模块(cv2.structured_light)的常用函数(简,仅列出名称): 扫描蒙皮光栅生成器(需 opencv-contrib-python 扩展的 cv2.structured_light 模块),通过(cv2.)structured_light..create 创建实例: 、 .setWhiteThreshold 设置白色阈值 .setBlackThreshold 设置黑色阈值 .getImagesForShadowMasks 获取阴影校验图像(用于结构光解码) .generate 生成用于投影到被扫描物体上的光栅化蒙皮(锚点定位,必须) 扫描结果范式解码(需 opencv-contrib-python 扩展的 cv2.structured_light 模块),方法提供自 继承的 父类: 实物结构光光栅化投影解码器 .decode 解码捕获的光栅投影 三维重建,需要用到核心库三维影射模块(cv2.calib3d)能力: triangulatePoints, reprojectImageTo3D, convertPointsFromHomogeneous 表面检测点对特征匹配(PPF)扩展模块(cv2.ppf_match_3d)的常用函数,简: 点云模型(需 opencv-contrib-python 扩展的 cv2.ppf_match_3d 模块),通过(cv2.) ppf_match_3d.loadPLYSimple 加载 多边形点云格式(PLY [Polygon File Format])文件(.ply),来创建点云模型实例: 模型被加载 PLY 文件的光栅化与法线等信息,以 OpenCV 的 Mat 格式储存 模型检测器(基于局部几何特征匹配),即粗配准(Coarse Global Registeration)。需要在使用(cv2.)ppf_match_3d. 创建时指定 关联采样步长(relativeSamplingStep)决定使用时的模型检测精度,值越小则越严格(精确匹配): 采用点对特征匹配(Point Pair Features)算法的场景模型检测 .trainModel 将点云模型传入检测器训练,制作指定模型的场景内检测器 .match 使用训练好的模型检测器实例,检测 3D 场景内模型/位姿匹配 位姿匹配器(基于初始位姿特征匹配),即精配准(Fine Local Registeration)。需要在使用(cv2.)surface_matching. 创建时,对使用的 临近点迭代(ICP [Iterative Closest Point]) 算法进行初始设定 [10] 。位姿匹配器是对 粗配准 结果的进一步优化,用于细化点位,需要注意, 有这些参数: iterations 为 ICP 算法的最大迭代次数 tolerence 为 ICP 算法的收敛容差,变换矩阵更新差值小于该值时,停止迭代 rejectionScale 为 ICP 剔除放缩因子,剔除点对距离大于该因子乘平均距离时的点对 numLevels 为 ICP 点云对齐时的分辨率像素金字塔层数,层数越多越耗时,越精确 sampleType 为 ICP 点云对齐 采样类型,一般为 0 默认值 numMaxCorr 为 ICP 算法的最大对应点对(Point Pairs)数,可调节模型结果精度 位姿匹配器执行后,可以取得 源模型(Model)在场景(Scene)中的具体点位的场景内位置情况。常被用于 SLAM、场景重建、3D 环境分析。以: .registerModelToScene 注册物体点云到场景,来获关键点场景内的位姿矩阵 得到经过 ICP 校准后的 PPF 结果(需要在调用 .registerModelToScene 方法时,传入 PPF 返回的各点位姿矩阵数组)。 二维条码定位校准 ArUco 标记模块(cv2.aruco)的常用函数(简,仅列出名称): 创建标记字典: aruco.Dictionary_create, aruco.getPredefinedDictionary 标记检测: aruco.detectMarkers 标记绘制: aruco.drawDetectedMarkers, aruco.drawDetectedCornersCharuco 标记校准: aruco.calibrateCameraAruco 姿态估计: aruco.estimatePoseSingleMarkers, aruco.estimatePoseBoard, aruco.estimatePoseCharucoBoard 标记板创建: aruco.GridBoard_create, aruco.CharucoBoard_create 坐标面绘制: aruco.drawPlanarBoard Charuco 标记: aruco.drawCharucoDiamond, aruco.detectCharucoDiamond, aruco.interpolateCornersCharuco 机器学习模块(cv2.ml)常用方法封装(简,仅列出名称),提供传统机器学习分类算法: 数据准备: ml.TrainData_create 支持向量机: ml.SVM_create, .trainAuto, .predict K 近邻: ml.KNearest_create, .train, .findNearest 决策树: ml.DTrees_create, .train, .predict 随机森林: ml.RTrees_create, .train, .predict 加速树分类: ml.Boost_create, .train, .predict 正态贝叶斯分类器: ml.NormalBayesClassifier_create, .train, .predict 神经网络: ml.ANN_MLP_create, .train, .predict EM 聚类: ml.EM_create, .trainEM, .trainM, .predict 深度学习模块(cv2.dnn)常用方法封装(简,仅列出名称),提供深度学习单一模型前向推理: 模型加载: , dnn.readNet, dnn.readNetFromCaffe, dnn.readNetFromTensorflow, dnn.readNetFromTorch, dnn.readNetFromONNX, dnn.readNetFromDarknet 输入处理: dnn.blobFromImage, dnn.blobFromImages 输入设置: .setInput 推理后端: .setPreferableBackend, .setPreferableTarget 模型推理: .forward GPU 加速扩展模块(cv2.cuda)的常用函数,是同名基础模块算法 CUDA 加速版,简: GPU 信息: cuda.getCudaEnabledDeviceCount, cuda.printCudaDeviceInfo 内存管理: , cuda.registerPageLocked, cuda.unregisterPageLocked 图像处理: cuda.cvtColor, cuda.resize, cuda.threshold, cuda.warpAffine, cuda.warpPerspective 图像滤波: cuda.createBoxFilter, cuda.createGaussianFilter, cuda.createSobelFilter, cuda.createLaplacianFilter, cuda.createCannyEdgeDetector 特征检测: cuda.ORB_create, cuda.SURF_CUDA_create 立体匹配: cuda.createStereoBM, cuda.createStereoBeliefPropagation, cuda.createStereoConstantSpaceBP 视频处理: cuda.createBackgroundSubtractorMOG, cuda.createBackgroundSubtractorMOG2 光流计算: cuda.calcOpticalFlowFarneback, cuda.calcOpticalFlowPyrLK 空频变换: cuda.dft(1D/2D 离散傅立叶), cuda.mulSpectrums(频域乘) 图像金字塔: cuda.pyrUp, cuda.pyrDown 以上只列出了少部分常用的函数,仅覆盖了 OpenCV 的部分常用基础能力。 更多的使用细节,可自行前往项目 官方档案馆查阅 。 注意,上文中,并行计算扩展模块(cv2.parallel)并未例入其中。因为其主要为库内部加速,且对外的自定义函数自由度太高,使用时应对可能存在数据访问冲突进行自管理。考虑到必要程度不高(存在替代方案且库本身的 CUDA 加速就能满足性能要求),不太建议使用。 仍然如前,让我们用它们做些简单的实践。 简单练习:用 常用视频库 完成 带有均色分析的简易单人脸跟踪识别 这次,我们尝试完成,用 OpenCV 的 传统机器学习对象检测 和 视频分析对象跟踪算法 来实现对 单一人脸的识别与跟踪。且对人脸区域的 RGB、XYZ、LAB 三类色彩空间通道均值进行实时监测,绘制历史图表并显示在 UI 界面。 由于 OpenCV 提供了部分图形功能,能够做基础绘图(点、线、几何面等)。我们直接选用 OpenCV 来创建练习的图形用户界面(GUI)。而色彩分析则用在此领域更专业的 Colour-Science 完成。 练习事例按照标准工程工作流进行。 第一步,确立已知信息: 数据来源:使用电脑自带(或默认外接)摄像头的采样作为输入 处理环境:依赖 、,Python 脚本执行 工程目标: 1) 提供一个具有 GUI 的简易单人脸(Single Face)区域监测,并在监测到人脸后跟踪 2) 对人脸区域内的像素值进行关于 RGB、XYZ、LAB 色彩空间的区域内均值分析 第二步,准备执行环境: 检测是否已经安装了 Python 和 pip(对应 Python 版本 2.x) 或 pip3(对应 Python 版本 3.x) 包管理器。此步骤同我们在 的练习 中的操作一致,执行脚本即可: python install_pip.py python install_math_libs.py 完成对 Python 环境 的准备和 的安装。具体脚本实现,可回顾上一节。 同理,对于 的准备工作,我们也按照脚本方式进行流程化的封装。创建自动化脚本 install_grapic_libs.py 如下: import subprocess import sys def is_package_installed(package_name): try: subprocess.run([sys.executable, \"-m\", \"pip\", \"show\", package_name], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return True except subprocess.CalledProcessError: return False def install_package(package_name): print(f\"Installing {package_name}...\") try: subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", package_name], check=True) print(f\"{package_name} has been installed.\") except subprocess.CalledProcessError: print(f\"Failed to install {package_name}. Please try installing it manually.\") def main(): packages = [\"colour-science\", \"opencv-python\", \"opencv-contrib-python\"] for package in packages: if is_package_installed(package): print(f\"{package} is already installed.\") else: install_package(package) if __name__ == \"__main__\": main() 这套脚本流程应该相当熟悉了。随后,使用 Python 执行脚本: python install_grapic_libs.py 如果包已安装,则会输出 \"[基础视频库] is already installed.\"。如果包未安装,则会安装该包并输出 \"[基础视频库] has been installed.\",并显示包的详细信息。 到此,完成音频库的环境准备工作。 第三步,搭建人脸检测分析 Demo: 际上,这一次的 Demo 较上节的 来说,在交互逻辑上会少很多内容(基本没有操作上的交互)。但其功能逻辑链路,会比 要深一些。所以,我们可以把 功能上的诉求按照同一条执行流水线,进行概念原型设计。 而细化的两个 就是执行流水线的 “必要目标节点”,有关键步骤图: 图 5-7 人脸检测分析 Demo 处理过程节点示意图 至此,我们获得了此播放器的基本运行逻辑。根据上图节点作函数封装,构建实时处理流水线。编写代码: import cv2 import numpy as np import colour from collections import deque # 加载 Haar 级联分类器用于人脸检测 face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') # 打开摄像头 cap = cv2.VideoCapture(0) # 初始化跟踪器标志 init_tracker = False tracker = None # 定义一个队列来保存历史颜色数据 history_length = 100 # 只保留最近 100 帧的数据 history_rgb = [deque(maxlen=history_length) for _ in range(3)] history_xyz = [deque(maxlen=history_length) for _ in range(3)] history_lab = [deque(maxlen=history_length) for _ in range(3)] def calculate_colour_metrics(frame, bounding_box): x, y, w, h = bounding_box face_roi = frame[int(y):int(y + h), int(x):int(x + w)] # 计算 RGB 平均值 mean_rgb = np.mean(face_roi, axis=(0, 1)) / 255.0 # 归一化到 [0, 1] 范围 # 获取 D65 光源的色度坐标 illuminant = colour.CCS_ILLUMINANTS['CIE 1931 2 Degree Standard Observer']['D65'] # 转换到 XYZ 颜色空间 mean_xyz = colour.RGB_to_XYZ(mean_rgb, colour.RGB_COLOURSPACES['sRGB'], illuminant=illuminant) # 转换到 Lab 颜色空间 mean_lab = colour.XYZ_to_Lab(mean_xyz, illuminant) return mean_rgb, mean_xyz, mean_lab def draw_graph(frame, data, position, colors, title): \"\"\" 在 frame 上绘制图表 :param frame: 要绘制图表的帧 :param data: 要绘制的数据(deque) :param position: 图表的位置 :param colors: 图表的颜色列表 :param title: 图表的名称 \"\"\" graph_height = 100 graph_width = 200 x, y = position # 创建半透明背景 overlay = frame.copy() cv2.rectangle(overlay, (x, y - graph_height), (x + graph_width, y), (0, 0, 0), -1) alpha = 0.5 # 透明度 cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame) # 绘制坐标轴 cv2.line(frame, (x, y), (x + graph_width, y), (0, 0, 0), 1) cv2.line(frame, (x, y), (x, y - graph_height), (0, 0, 0), 1) # 绘制图表名称 cv2.putText( frame, title, (x, y - graph_height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1 ) # 绘制数据曲线 for channel, color in enumerate(colors): if len(data[channel]) > 1: for i in range(1, len(data[channel])): cv2.line( frame, (x + int((i - 1) * graph_width / (history_length - 1)), y - int(data[channel][i - 1] * graph_height)), (x + int(i * graph_width / (history_length - 1)), y - int(data[channel][i] * graph_height)), color, 1 ) while True: # 读取摄像头帧 ret, frame = cap.read() if not ret: break # 转换为灰度图像 gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if not init_tracker: # 检测人脸 faces = face_cascade.detectMultiScale( gray, scaleFactor=1.1s, minNeighbors=5, minSize=(120, 120), # 增大最小尺寸以减少局部特征检测 flags=cv2.CASCADE_SCALE_IMAGE ) # 如果检测到人脸,选择最大的矩形框初始化跟踪器 if len(faces) > 0: # 选择最大的矩形框 largest_face = max(faces, key=lambda rect: rect[2] * rect[3]) x, y, w, h = largest_face bounding_box = (x, y, w, h) # 确保检测到的是整张人脸而不是局部特征(例如通过宽高比) aspect_ratio = w / h if 0.75 有运行效果如下: 图 5-8 带有均色分析的简易单人脸跟踪识别 Demo 效果图 完成针对视频分析库的练习。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-14 10:15:42 "},"Chapter_5/Language/cn/Docs_5_1_4.html":{"url":"Chapter_5/Language/cn/Docs_5_1_4.html","title":"5.1.4 其他分析软件","keywords":"","body":"5.1.4 其他分析软件 除了使用前三节提及的开源库,以自编辑脚本的方式进行分析外。工作中也能采用其他收费或免费的 第三方分析软件,处理需要查验的数据。 那么,有哪些音视频常用的此类工具软件呢?这里我们做下简单罗列和介绍。 Audiacity Audacity 是一款全球广受欢迎且 免费的全平台音频编辑/录音软件。虽然一般常用它来做音乐编辑、声音设计、播客制作等实用需求,但其功能完善,也可以处理类似 音频修复、简单音频信号分解 等分析场景。 图 5-9 Audacity 界面展示 对于一些音频基础信息分析,比如:绘制频响切面(FLS)、测算 RMS 等。此外,Audacity 也支持插件能力扩展,我们可以去 Audacity 的 官网插件入口,查询我们需要的 额外扩展。 软件获取自 Audacity 的 官网地址,下载其最新版本。 Sonic Visualiser Sonic Visualiser(SV) 是一款专门用于音频科学分析的工具软件,由 英国伦敦大学(Queen Mary University of London) 的 音频和音乐技术研究小组 开发,并选择了 GNU 通用公共许可证(GPL)开放免费使用。相比 Audacity,SV 更为的强大而专业,能轻易做到 Audacity 不能做到的事情,比如:语谱图(Spectrogram) 分析、和声分析、音高检测 等高级功能。 图 5-10 Sonic Visualiser 界面展示 因此,对于需要 更细化音频分析 和 深度可视化能力 时,我们通常会 优先选择采用 SV 协助解决问题。类似 音频杂音分析、外部噪音分析 等,就可以用其初步处理。大部分时候,都可以通过 SV 的结果,判断出问题成因。 软件可自 Sonic Visualiser 的 官网地址 获取,同样也是一款 全平台软件。 Elecard StreamEye Studio Elecard StreamEye Studio(SES) 是一套包含总共 5 个独立应用程序和命令行工具(CLI)的 专业图像/视频分析工作中心。中心包括了 StreamEye、Stream Analyzer、Video Quality Estimator、Quality Gates 和 YUV Viewer,分别被用于 [深入分析编码视频序列(流-序列)、计算视频质量指标(流-指标)、编码流语法分析(流-规格)、编码流参数分析(流-参数)、传输格式分析(YUV)]。由于提供了 CLI,使得 SES 能够被用来进行稳定的自动化测试,并用于生成数据报告。 图 5-11 SES 的 StreamEye 界面展示 大多数情况下,SES 是被用来做 流分析(Stream Analyze) 的工作的,不过因为其本身的完备程度,我们也可以用它来进行一些 局部范围内的帧分析(Frame Analyze),比如 光流检测、运动矢量检测、超像素分割情况及局部像素分割可视化 的处理,或用来 查验 YUV 数据的准确性。 而要真正发挥 SES 的强大能力,则还需等到音视频编解码部分时,才能体现。 软件可自 Elecard StreamEye Studio 的官 获取。作为专业软件,其 基础功能是免费的,高级功能会包含更多的编解码规格,并提供更精细的分析能力。 除了我们介绍的这 3款 软件之外,还存在大量的第三方软件,比如 Adobe 系列 的 Adobe Audition 和 After Effects 等也可以用于分析(当然费用也需要预备一定的开支)。而通过我们之前介绍的开源库,和 诸如 FFMpeg 库等本身提供的命令行工具,同样也能做到。 这些工具总的来说,可以按照两类划分:综合分析 和 自动化运用。 三方软件擅长的主要是综合分析,能够较为轻松的从整体视野来处理问题; 开源库和 CLI 则更多在工程中被用于,验证原型 和 音视频工程的自动化治理(质量/性能报告)。 所以,在具体的工程中,还需 结合起来灵活使用。 Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-14 11:28:05 "},"Chapter_5/Language/cn/References_5.html":{"url":"Chapter_5/Language/cn/References_5.html","title":"【参考文献】","keywords":"","body":"五、【参考文献】 [1] Oliphant, Travis E. Guide to numpy. Vol. 1. USA: Trelgol Publishing, 2006. [2] Snider, L. A., and S. E. Swedo. \"PANDAS: current status and directions for research.\" Molecular psychiatry 9, no. 10 (2004): 900-907. [3] Tosi, Sandro. Matplotlib for Python developers. Packt Publishing Ltd, 2009. [4] SoundFile library (SoundFile: An audio library based on libsndfile, 2023. Available at: https://pysoundfile.readthedocs.io/) for audio file read/write operations, which is based on the libsndfile library (Erik de Castro Lopo. libsndfile: A C library for reading and writing sound files, 2023. Available at: http://www.mega-nerd.com/libsndfile/). [5] H. Pham, “Pyaudio website.” https://people.csail.mit.edu/hubert/pyaudio, 2006. [6] McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis, Matt McVicar, Eric Battenberg, and Oriol Nieto. “librosa: Audio and music signal analysis in python.” In Proceedings of the 14th python in science conference, pp. 18-25. 2015. [7] Brossier, Paul M. \"The aubio library at mirex 2006.\" Synthesis (2006). [8] Colour Developers. 2023. Colour-Science. Version 0.3.16. Accessed October 5, 2023. https://www.colour-science.org/. [9] Bradski, Gary, and Adrian Kaehler. \"OpenCV.\" Dr. Dobb’s journal of software tools 3.2 (2000). [10] P. J. Besl and N. D. McKay, \"A method for registration of 3-D shapes,\" in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 14, no. 2, pp. 239-256, Feb. 1992, doi: 10.1109/34.121791. Copyright © Since 2021 李述博 (Arikan.Li) , All Rights Reserved all right reserved,powered by GitbookLast Updated: 2024-09-12 12:11:11 "}}
\ No newline at end of file