From 1bce06b44c4b18562aecc3e93b55a5aa7252ed13 Mon Sep 17 00:00:00 2001 From: <> Date: Sat, 7 Dec 2024 11:15:08 +0000 Subject: [PATCH] Deployed 44d3474 with MkDocs version: 1.6.1 --- .nojekyll | 0 404.html | 519 ++ LLM_Serving_Survey/index.html | 996 +++ assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.83f73b43.min.js | 16 + assets/javascripts/bundle.83f73b43.min.js.map | 7 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++++++++ .../workers/search.6ce7567c.min.js | 42 + .../workers/search.6ce7567c.min.js.map | 7 + assets/stylesheets/main.6f8fc17f.min.css | 1 + assets/stylesheets/main.6f8fc17f.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + imgs/1.png | Bin 0 -> 74678 bytes imgs/LLM-Serving-Survey/image-1.png | Bin 0 -> 46926 bytes imgs/LLM-Serving-Survey/image-2.png | Bin 0 -> 22037 bytes imgs/LLM-Serving-Survey/image-3.png | Bin 0 -> 92191 bytes imgs/LLM-Serving-Survey/image.png | Bin 0 -> 44353 bytes imgs/mstct-1.png | Bin 0 -> 294877 bytes imgs/mstct-10.png | Bin 0 -> 32748 bytes imgs/mstct-11.png | Bin 0 -> 29590 bytes imgs/mstct-12.png | Bin 0 -> 60609 bytes imgs/mstct-13.png | Bin 0 -> 24930 bytes imgs/mstct-14.png | Bin 0 -> 17154 bytes imgs/mstct-15.png | Bin 0 -> 187760 bytes imgs/mstct-2.png | Bin 0 -> 427666 bytes imgs/mstct-3.png | Bin 0 -> 192682 bytes imgs/mstct-5.png | Bin 0 -> 55876 bytes imgs/mstct-6.png | Bin 0 -> 30437 bytes imgs/mstct-7.png | Bin 0 -> 257875 bytes imgs/mstct-8.png | Bin 0 -> 231841 bytes imgs/mstct-9.png | Bin 0 -> 22746 bytes imgs/yolov7-1.jpg | Bin 0 -> 609831 bytes imgs/yolov7-2.png | Bin 0 -> 40572 bytes index.html | 1116 +++ objectdet/MSTCT/index.html | 853 +++ objectdet/index.html | 775 ++ objectdet/yolov6/index.html | 905 +++ objectdet/yolov7/index.html | 635 ++ search/search_index.json | 1 + .../index.html" | 1522 ++++ security/index.html | 880 +++ .../index.html" | 1237 +++ sitemap.xml | 3 + sitemap.xml.gz | Bin 0 -> 127 bytes 78 files changed, 16719 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 LLM_Serving_Survey/index.html create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.83f73b43.min.js create mode 100644 assets/javascripts/bundle.83f73b43.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js.map create mode 100644 assets/stylesheets/main.6f8fc17f.min.css create mode 100644 assets/stylesheets/main.6f8fc17f.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 imgs/1.png create mode 100644 imgs/LLM-Serving-Survey/image-1.png create mode 100644 imgs/LLM-Serving-Survey/image-2.png create mode 100644 imgs/LLM-Serving-Survey/image-3.png create mode 100644 imgs/LLM-Serving-Survey/image.png create mode 100644 imgs/mstct-1.png create mode 100644 imgs/mstct-10.png create mode 100644 imgs/mstct-11.png create mode 100644 imgs/mstct-12.png create mode 100644 imgs/mstct-13.png create mode 100644 imgs/mstct-14.png create mode 100644 imgs/mstct-15.png create mode 100644 imgs/mstct-2.png create mode 100644 imgs/mstct-3.png create mode 100644 imgs/mstct-5.png create mode 100644 imgs/mstct-6.png create mode 100644 imgs/mstct-7.png create mode 100644 imgs/mstct-8.png create mode 100644 imgs/mstct-9.png create mode 100644 imgs/yolov7-1.jpg create mode 100644 imgs/yolov7-2.png create mode 100644 index.html create mode 100644 objectdet/MSTCT/index.html create mode 100644 objectdet/index.html create mode 100644 objectdet/yolov6/index.html create mode 100644 objectdet/yolov7/index.html create mode 100644 search/search_index.json create mode 100644 "security/DNN\346\250\241\345\236\213\345\222\214\345\212\240\351\200\237\345\231\250\347\232\204\347\241\254\344\273\266\345\256\211\345\205\250\347\273\274\350\277\260/index.html" create mode 100644 security/index.html create mode 100644 "security/\350\256\241\347\256\227\346\234\272\350\247\206\350\247\211\351\242\206\345\237\237\347\232\204\347\211\251\347\220\206\345\257\271\346\212\227\346\224\273\351\230\262\347\273\274\350\277\260/index.html" create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..59df0fa --- /dev/null +++ b/404.html @@ -0,0 +1,519 @@ + + + +
+ + + + + + + + + + + + + + +经典GPU架构中有很多SM,每个SM中包含了一些SRAM,用以在线程间进行数据交换和同步。
+encompass CPU:[19, 222] +移动边缘设备:[71] +ASICs: [202, 307] +加速器:TPU[136], FPGA[284]
+分为算法创新和系统优化
+ +使用低bit的数据来表示32位的数。分为QAT和PTQ。PTQ代表有W4A16在GPTQ[88],W8A8在SmoothQuant[268].量化也应用与上下文压缩CacheGen[169]和微调QloRA[70], PEQA[142].
+主要是由上下文KV Cache带来的,KV Cache会动态放缩。传统的方法比如FasterTransFormer是通过定长的内存预分配来管理的。vllm[150] 提出了paged Attention将KV Cache分割为多个不连续的内存block。SpecInfer[177]提出了Tree Attention和深度优先的树遍历来评估剩余KV Cahe的内存分配,主要应用在多个请求共享prompt前缀的情况。Lightllm[21]则提出了一个细粒度的基于token block的内存管理机制来进行内存管理。
+请求调度算法是为了最大化资源利用、保证响应时间等服务级别的目标。分为动态batch[33],preemption[114],priority[191],swapping[39],model selection[107],cost efficiency[295],负载均衡和资源分配[259]。LLM独有的特征:模型形状大,迭代的自编码机制、不确定的输出长度和上下文信息的状态管理。
+selective batching和iteration-level scheduling策略,主要有vllm,rayLLM[27], inflight batching TensorRT-LLM。SpecInfer采用投机采样来迭代地选择下一个请求。FastServe[261]使用较短完成时间来代替FCFS。SARATHI[31]解决模型初始化时的bubble问题。
+在初始阶段TensorRT-LLm基于上下文长度来选择cublas和Flash Attention。还有优化是将GEMM替换为GEMV。vllm提出是为了提高模型的吞吐,FlexFlow-serve利用SpecInfer是为了降低延迟。
+主要关注在GLM的serving问题,关注GPU上的推理高效问题[190, 297],新型编译器[78]。模型压缩和量化[126]。分布式训练[42, 175]
+