Built site for gh-pages

alexchen4ai · Feb 23, 2024 · 8015185 · 8015185
1 parent e8b0bcf
commit 8015185
Show file tree

Hide file tree

Showing 12 changed files with 37 additions and 61 deletions.
diff --git a/.nojekyll b/.nojekyll
@@ -1 +1 @@
-d52de7b8
+5bb44101
diff --git a/about.html b/about.html
@@ -95,11 +95,7 @@
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="./notes.html"> 
-<span class="menu-text">Notes</span></a>
-  </li>  
-  <li class="nav-item">
-    <a class="nav-link" href="./chat_model.html"> 
-<span class="menu-text">LLM code example</span></a>
+<span class="menu-text">Articles</span></a>
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="./hello.html"> 

diff --git a/hello.html b/hello.html
@@ -158,11 +158,7 @@
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="./notes.html"> 
-<span class="menu-text">Notes</span></a>
-  </li>  
-  <li class="nav-item">
-    <a class="nav-link" href="./chat_model.html"> 
-<span class="menu-text">LLM code example</span></a>
+<span class="menu-text">Articles</span></a>
   </li>  
   <li class="nav-item">
     <a class="nav-link active" href="./hello.html" aria-current="page"> 

diff --git a/index.html b/index.html
@@ -94,11 +94,7 @@
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="./notes.html"> 
-<span class="menu-text">Notes</span></a>
-  </li>  
-  <li class="nav-item">
-    <a class="nav-link" href="./chat_model.html"> 
-<span class="menu-text">LLM code example</span></a>
+<span class="menu-text">Articles</span></a>
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="./hello.html"> 

diff --git a/notes.html b/notes.html
@@ -156,11 +156,7 @@
   </li>  
   <li class="nav-item">
     <a class="nav-link active" href="./notes.html" aria-current="page"> 
-<span class="menu-text">Notes</span></a>
-  </li>  
-  <li class="nav-item">
-    <a class="nav-link" href="./chat_model.html"> 
-<span class="menu-text">LLM code example</span></a>
+<span class="menu-text">Articles</span></a>
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="./hello.html"> 
@@ -224,7 +220,7 @@ <h1 class="title">Research notes</h1>
 
 <div class="quarto-listing quarto-listing-container-default" id="listing-listing">
 <div class="list quarto-listing-default">
-<div class="quarto-post image-right" data-index="0" data-categories="Large Language Models" data-listing-date-sort="1708588800000" data-listing-file-modified-sort="1708672260761" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11" data-listing-word-count-sort="2110">
+<div class="quarto-post image-right" data-index="0" data-categories="Large Language Models" data-listing-date-sort="1708588800000" data-listing-file-modified-sort="1708672636360" data-listing-date-modified-sort="NaN" data-listing-reading-time-sort="11" data-listing-word-count-sort="2127">
 <div class="thumbnail">
 <p><a href="./notes/Large Language Model/llm_eval.html" class="no-external"></a></p><a href="./notes/Large Language Model/llm_eval.html" class="no-external">
 <p class="card-img-top"><img src="images/LLM_eval.png"  class="thumbnail-image card-img"/></p>

diff --git a/notes.xml b/notes.xml
@@ -47,6 +47,19 @@ Tip
 <p>Benchmarks for evaluating large language models come in various forms, each serving a unique purpose. They can be broadly categorized into general benchmarks, which assess overall performance, and specialized benchmarks, designed to evaluate the model’s proficiency in specific areas such as understanding the Chinese language or its ability to perform function calls.</p>
 </div>
 </div>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Consider to learn Stanford CS224U if you want to learn more fundamental knowledge about the LLM evaluation.</p>
+</div>
+</div>
 <section id="llm-leaderboard" class="level2">
 <h2 class="anchored" data-anchor-id="llm-leaderboard">LLM leaderboard</h2>
 <p>Numerous leaderboards exist for Large Language Models (LLMs), each compiled based on the benchmarks of these models. By examining these leaderboards, we can identify which benchmarks are particularly effective and informative for evaluating the capabilities of LLMs.</p>

diff --git a/notes/Diffusion Model/sd.html b/notes/Diffusion Model/sd.html
@@ -97,11 +97,7 @@
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="../../notes.html"> 
-<span class="menu-text">Notes</span></a>
-  </li>  
-  <li class="nav-item">
-    <a class="nav-link" href="../../chat_model.html"> 
-<span class="menu-text">LLM code example</span></a>
+<span class="menu-text">Articles</span></a>
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="../../hello.html"> 

diff --git a/notes/Large Language Model/llm_eval.html b/notes/Large Language Model/llm_eval.html
@@ -97,11 +97,7 @@
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="../../notes.html"> 
-<span class="menu-text">Notes</span></a>
-  </li>  
-  <li class="nav-item">
-    <a class="nav-link" href="../../chat_model.html"> 
-<span class="menu-text">LLM code example</span></a>
+<span class="menu-text">Articles</span></a>
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="../../hello.html"> 
@@ -259,6 +255,19 @@ <h1 class="title d-none d-lg-block">Large language model evaluation</h1>
 <p>Benchmarks for evaluating large language models come in various forms, each serving a unique purpose. They can be broadly categorized into general benchmarks, which assess overall performance, and specialized benchmarks, designed to evaluate the model’s proficiency in specific areas such as understanding the Chinese language or its ability to perform function calls.</p>
 </div>
 </div>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Consider to learn Stanford CS224U if you want to learn more fundamental knowledge about the LLM evaluation.</p>
+</div>
+</div>
 <section id="llm-leaderboard" class="level2">
 <h2 class="anchored" data-anchor-id="llm-leaderboard">LLM leaderboard</h2>
 <p>Numerous leaderboards exist for Large Language Models (LLMs), each compiled based on the benchmarks of these models. By examining these leaderboards, we can identify which benchmarks are particularly effective and informative for evaluating the capabilities of LLMs.</p>

diff --git a/notes/Large Language Model/moe.html b/notes/Large Language Model/moe.html
@@ -160,11 +160,7 @@
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="../../notes.html"> 
-<span class="menu-text">Notes</span></a>
-  </li>  
-  <li class="nav-item">
-    <a class="nav-link" href="../../chat_model.html"> 
-<span class="menu-text">LLM code example</span></a>
+<span class="menu-text">Articles</span></a>
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="../../hello.html"> 

diff --git a/notes/Large Language Model/rl_llm.html b/notes/Large Language Model/rl_llm.html
@@ -160,11 +160,7 @@
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="../../notes.html"> 
-<span class="menu-text">Notes</span></a>
-  </li>  
-  <li class="nav-item">
-    <a class="nav-link" href="../../chat_model.html"> 
-<span class="menu-text">LLM code example</span></a>
+<span class="menu-text">Articles</span></a>
   </li>  
   <li class="nav-item">
     <a class="nav-link" href="../../hello.html"> 

diff --git a/search.json b/search.json
@@ -128,20 +128,6 @@
     "section": "",
     "text": "Large language model evaluation\n\n\n\n\n\n\nLarge Language Models\n\n\n\n\n\n\n\n\n\n11 min\n\n\n\n\n\n\n\n\n\n\n\n\nMixture of expert\n\n\n\n\n\n\nLarge Language Models\n\n\n\n\n\n\n\n\n\n5 min\n\n\n\n\n\n\n\n\n\n\n\n\nScalable diffusion models with transformers\n\n\n\n\n\n\nDiffusion Model\n\n\n\n\n\n\n\n\n\n1 min\n\n\n\n\n\n\n\n\n\n\n\n\nReinforcement learning for large language model\n\n\n\n\n\n\nLarge Language Models\n\n\n\n\n\n\n\n\n\n19 min\n\n\n\n\n\n\nNo matching items"
   },
-  {
-    "objectID": "chat_model.html",
-    "href": "chat_model.html",
-    "title": "Chat model demo",
-    "section": "",
-    "text": "The chat models are very different from the other models. We should spend more time for the data, especially during training.\n\nfrom transformers import AutoTokenizer\n\ntokenizer = AutoTokenizer.from_pretrained(\"mistralai/Mistral-7B-Instruct-v0.1\")\n\nchat = [\n    {\"role\": \"user\", \"content\": \"Hello, how are you?\"},\n    {\"role\": \"assistant\", \"content\": \"I'm doing great. How can I help you today?\"},\n    {\"role\": \"user\", \"content\": \"I'd like to show off how chat templating works!\"},\n]\n\ntokenizer.apply_chat_template(chat, tokenize=False)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\"&lt;s&gt;[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?&lt;/s&gt; [INST] I'd like to show off how chat templating works! [/INST]\"\n\n\n\n# each model has different tokenizer! We can access the chat templates for each model\ntokenizer.chat_template\n\n\"{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}\"\n\n\n\n# During training, we should also use the chat templates to rearrange the dataset\nfrom transformers import AutoTokenizer\nfrom datasets import Dataset\n\ntokenizer = AutoTokenizer.from_pretrained(\"HuggingFaceH4/zephyr-7b-beta\")\n\nchat1 = [\n    {\"role\": \"user\", \"content\": \"Which is bigger, the moon or the sun?\"},\n    {\"role\": \"assistant\", \"content\": \"The sun.\"},\n]\nchat2 = [\n    {\"role\": \"user\", \"content\": \"Which is bigger, a virus or a bacterium?\"},\n    {\"role\": \"assistant\", \"content\": \"A bacterium.\"},\n]\n\ndataset = Dataset.from_dict({\"chat\": [chat1, chat2]})\ndataset = dataset.map(\n    lambda x: {\n        \"formatted_chat\": tokenizer.apply_chat_template(\n            x[\"chat\"], tokenize=False, add_generation_prompt=False\n        )\n    }\n)\nprint(dataset[\"formatted_chat\"][0])\n\n\n\nWe can also fill the missing tokens in the middle of the sentence. We just need to tell the language model the content before and after the missing part. And the model will handle the missing part automatically.\n\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\ntokenizer = AutoTokenizer.from_pretrained(\n    \"stabilityai/stable-code-3b\", trust_remote_code=True\n)\nmodel = AutoModelForCausalLM.from_pretrained(\n    \"stabilityai/stable-code-3b\",\n    trust_remote_code=True,\n    torch_dtype=\"auto\",\n    #   attn_implementation=\"flash_attention_2\",\n)\nmodel.cuda()\n\n# &lt;fim_prefix&gt; is the prefix code before the missiong part\n# &lt;fim_suffix&gt; is the suffix code after the missiong part\n# &lt;fim_middle&gt; is the missing part, and we add the token in the final part so that the model can predict it\ninputs = tokenizer(\n    \"&lt;fim_prefix&gt;def fib(n):&lt;fim_suffix&gt;    else:\\n        return fib(n - 2) + fib(n - 1)&lt;fim_middle&gt;\",\n    return_tensors=\"pt\",\n).to(model.device)\ntokens = model.generate(\n    **inputs,\n    max_new_tokens=48,\n    temperature=0.2,\n    do_sample=True,\n)\nprint(tokenizer.decode(tokens[0], skip_special_tokens=True))"
-  },
-  {
-    "objectID": "chat_model.html#fill-in-middle-model",
-    "href": "chat_model.html#fill-in-middle-model",
-    "title": "Chat model demo",
-    "section": "",
-    "text": "We can also fill the missing tokens in the middle of the sentence. We just need to tell the language model the content before and after the missing part. And the model will handle the missing part automatically.\n\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\ntokenizer = AutoTokenizer.from_pretrained(\n    \"stabilityai/stable-code-3b\", trust_remote_code=True\n)\nmodel = AutoModelForCausalLM.from_pretrained(\n    \"stabilityai/stable-code-3b\",\n    trust_remote_code=True,\n    torch_dtype=\"auto\",\n    #   attn_implementation=\"flash_attention_2\",\n)\nmodel.cuda()\n\n# &lt;fim_prefix&gt; is the prefix code before the missiong part\n# &lt;fim_suffix&gt; is the suffix code after the missiong part\n# &lt;fim_middle&gt; is the missing part, and we add the token in the final part so that the model can predict it\ninputs = tokenizer(\n    \"&lt;fim_prefix&gt;def fib(n):&lt;fim_suffix&gt;    else:\\n        return fib(n - 2) + fib(n - 1)&lt;fim_middle&gt;\",\n    return_tensors=\"pt\",\n).to(model.device)\ntokens = model.generate(\n    **inputs,\n    max_new_tokens=48,\n    temperature=0.2,\n    do_sample=True,\n)\nprint(tokenizer.decode(tokens[0], skip_special_tokens=True))"
-  },
   {
     "objectID": "about.html",
     "href": "about.html",

diff --git a/sitemap.xml b/sitemap.xml
@@ -10,7 +10,7 @@
   </url>
   <url>
     <loc>https://alexchen4ai.github.io/blog/notes/Large Language Model/llm_eval.html</loc>
-    <lastmod>2024-02-23T07:11:00.761Z</lastmod>
+    <lastmod>2024-02-23T07:17:16.360Z</lastmod>
   </url>
   <url>
     <loc>https://alexchen4ai.github.io/blog/index.html</loc>
@@ -20,10 +20,6 @@
     <loc>https://alexchen4ai.github.io/blog/notes.html</loc>
     <lastmod>2024-02-03T23:13:18.582Z</lastmod>
   </url>
-  <url>
-    <loc>https://alexchen4ai.github.io/blog/chat_model.html</loc>
-    <lastmod>2024-02-04T06:59:51.701Z</lastmod>
-  </url>
   <url>
     <loc>https://alexchen4ai.github.io/blog/about.html</loc>
     <lastmod>2024-02-09T08:17:04.643Z</lastmod>