Add warning log option

microsoft · Apr 17, 2024 · aca309d · aca309d
1 parent 435b897
commit aca309d
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 2 deletions.
diff --git a/src/logging.cpp b/src/logging.cpp
@@ -1,3 +1,6 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
 #include "generators.h"
 #include "json.h"
 #include <iostream>
@@ -14,6 +17,8 @@ void SetLogBool(std::string_view name, bool value) {
     g_log.enabled = value;
   else if (name == "ansi_tags")
     g_log.ansi_tags = value;
+  else if (name == "warning")
+    g_log.warning = value;
   else if (name == "generate_next_token")
     g_log.generate_next_token = value;
   else if (name == "append_next_tokens")

diff --git a/src/logging.h b/src/logging.h
@@ -1,3 +1,6 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
 #pragma once
 
 namespace Generators {
@@ -9,12 +12,13 @@ struct LogItems {
   // Special log related entries
   bool enabled{};        // Global on/off for all logging
   bool ansi_tags{true};  // Use ansi SGR color & style tags to make console output easier to read
+  bool warning{true};   // warning messages, like options that were set but don't apply
 
   // Loggable actions, will always have the name below with the log entry
   bool generate_next_token{};
   bool append_next_tokens{};
-  bool hit_eos{true};  // Only works for CPU non beam search
-  bool hit_max_length{true};
+  bool hit_eos{};  // Only works for CPU non beam search
+  bool hit_max_length{};
   bool model_input_values{};   // Dump the input tensor shapes & values before the model runs
   bool model_output_shapes{};  // Before the model runs there are only the output shapes, no values in them. Useful for pre Session::Run debugging
   bool model_output_values{};  // After the model runs the output tensor values can be displayed

diff --git a/src/models/kv_cache.cpp b/src/models/kv_cache.cpp
@@ -119,6 +119,9 @@ KV_Cache::KV_Cache(const Model& model, State& state)
       layer_count_{model_.config_->model.decoder.num_hidden_layers},
       past_present_share_buffer_{state_.params_->search.past_present_share_buffer && state_.params_->search.num_beams == 1 && model_.device_type_ == DeviceType::CUDA},
       shape_{state_.params_->BatchBeamSize(), model.config_->model.decoder.num_key_value_heads, 0, model.config_->model.decoder.head_size} {
+  if (g_log.enabled && g_log.warning && past_present_share_buffer_ != state_.params_->search.past_present_share_buffer)
+    Log("warning", "past_present_share_buffer search option set to true, but has been disabled due to the current configuration. See https://aka.ms/generate_config for details");
+
   pasts_.resize(layer_count_ * 2);
   presents_.reserve(layer_count_ * 2);
 
@@ -181,6 +184,7 @@ void KV_Cache::Add() {
 }
 
 void KV_Cache::Update(std::span<const int32_t> beam_indices, int current_length) {
+  // If we're sharing past & present buffers there is nothing to do here, so early exit
   if (past_present_share_buffer_)
     return;