Minor Jenkins fix (#212)

1. Ruff format as last commit was not formatted well. 2. Updated the Jenkins script --------- Signed-off-by: Amit Raj <[email protected]>
quic · Jan 10, 2025 · 40751a2 · 40751a2
1 parent de97e39
commit 40751a2
Show file tree

Hide file tree

Showing 10 changed files with 52 additions and 52 deletions.
diff --git a/QEfficient/cloud/finetune.py b/QEfficient/cloud/finetune.py
@@ -65,9 +65,9 @@ def main(**kwargs):
         # TODO: may have to init qccl backend, next try run with torchrun command
         torch_device = torch.device(device)
         assert torch_device.type != "cpu", "Host doesn't support single-node DDP"
-        assert (
-            torch_device.index is None
-        ), f"DDP requires specification of device type only, however provided device index as well: {torch_device}"
+        assert torch_device.index is None, (
+            f"DDP requires specification of device type only, however provided device index as well: {torch_device}"
+        )
         dist.init_process_group(backend=train_config.dist_backend)
         # from here onward "qaic/cuda" will automatically map to "qaic:i/cuda:i", where i = process rank
         getattr(torch, torch_device.type).set_device(dist.get_rank())

diff --git a/QEfficient/finetune/utils/train_utils.py b/QEfficient/finetune/utils/train_utils.py
@@ -96,7 +96,7 @@ def train(
 
     # Start the training loop
     for epoch in range(train_config.num_epochs):
-        print(f"Starting epoch {epoch+1}/{train_config.num_epochs}")
+        print(f"Starting epoch {epoch + 1}/{train_config.num_epochs}")
         print(f"train_config.max_train_step: {train_config.max_train_step}")
         # stop when the maximum number of training steps is reached
         if max_steps_reached:
@@ -108,7 +108,7 @@ def train(
         total_length = len(train_dataloader) // train_config.gradient_accumulation_steps
         pbar = tqdm(
             colour="blue",
-            desc=f"Training Epoch: {epoch+1}",
+            desc=f"Training Epoch: {epoch + 1}",
             total=total_length,
             dynamic_ncols=True,
         )
@@ -123,9 +123,9 @@ def train(
                 break
             batch = {k: v.to(device) for k, v in batch.items()}  # move the batch elements to qaic device
 
-            with torch.autocast(
-                device_type=device, dtype=torch.float16
-            ) if train_config.use_autocast else nullcontext():
+            with (
+                torch.autocast(device_type=device, dtype=torch.float16) if train_config.use_autocast else nullcontext()
+            ):
                 # an additional condition can be put here to avoid opByOpVerifier getting triggered for each step
                 if train_config.opByOpVerifier:
                     with qaic_debug.OpByOpVerifierMode(
@@ -183,7 +183,7 @@ def train(
                     model.save_pretrained(train_config.output_dir + f"/trained_weights/step_{step}")
 
             pbar.set_description(
-                f"Training Epoch: {epoch+1}/{train_config.num_epochs}, step {step+1}/{len(train_dataloader)} completed (loss: {loss.detach().float()})"
+                f"Training Epoch: {epoch + 1}/{train_config.num_epochs}, step {step + 1}/{len(train_dataloader)} completed (loss: {loss.detach().float()})"
             )
             if train_config.save_metrics:
                 save_to_json(
@@ -244,11 +244,11 @@ def train(
         if train_config.run_validation:
             if eval_epoch_loss < best_val_loss:
                 best_val_loss = eval_epoch_loss
-                print(f"best eval loss on epoch {epoch+1} is {best_val_loss}")
+                print(f"best eval loss on epoch {epoch + 1} is {best_val_loss}")
             val_loss.append(float(eval_epoch_loss))
             val_prep.append(float(eval_ppl))
         print(
-            f"Epoch {epoch+1}: train_perplexity={train_perplexity:.4f}, train_epoch_loss={train_epoch_loss:.4f}, epoch time {epoch_end_time}s"
+            f"Epoch {epoch + 1}: train_perplexity={train_perplexity:.4f}, train_epoch_loss={train_epoch_loss:.4f}, epoch time {epoch_end_time}s"
         )
 
         # Saving the results every epoch to plot later
@@ -322,9 +322,9 @@ def evaluation(model, train_config, eval_dataloader, local_rank, tokenizer, devi
         # Ensure no gradients are computed for this scope to save memory
         with torch.no_grad():
             # Forward pass and compute loss
-            with torch.autocast(
-                device_type=device, dtype=torch.float16
-            ) if train_config.use_autocast else nullcontext():
+            with (
+                torch.autocast(device_type=device, dtype=torch.float16) if train_config.use_autocast else nullcontext()
+            ):
                 outputs = model(**batch)
             loss = outputs.loss
 

diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
@@ -48,15 +48,15 @@ pipeline {
                }
                stage('Run Non-CLI QAIC Tests') {
                    steps {
-                       timeout(time: 60, unit: 'MINUTES') {
+                       timeout(time: 70, unit: 'MINUTES') {
                            sh '''
                            sudo docker exec ${BUILD_TAG} bash -c "
                            cd /efficient-transformers &&
                            . preflight_qeff/bin/activate &&
                            mkdir -p $PWD/Non_qaic &&
                            export TOKENIZERS_PARALLELISM=false &&
                            export QEFF_HOME=$PWD/Non_qaic &&
-                           pytest tests -m '(not cli) and (on_qaic) and (not qnn)' -n 4 --junitxml=tests/tests_log2.xml &&
+                           pytest tests -m '(not cli) and (on_qaic) and (not qnn)' -n 3 --junitxml=tests/tests_log2.xml &&
                            deactivate"
                            '''
                        }

diff --git a/scripts/perplexity_computation/calculate_perplexity.py b/scripts/perplexity_computation/calculate_perplexity.py
@@ -200,7 +200,7 @@ def torch_perplexity(
 
         loop_time = time.time() - loop_s
         logger.info(
-            f"E2E Sample Time: {(loop_time)/batch_size:.4f}s\t E2E TOKENS/S : {((ctx_len-prompt_len)*batch_size)/loop_time:.2f}"
+            f"E2E Sample Time: {(loop_time) / batch_size:.4f}s\t E2E TOKENS/S : {((ctx_len - prompt_len) * batch_size) / loop_time:.2f}"
         )
 
         del outputs
@@ -332,7 +332,7 @@ def calculate_perplexity(
 
             loop_time = time.time() - loop_s
             logger.info(
-                f"e2e sample time: {(loop_time)/batch_size:.4f}s\t e2e tokens/s : {((ctx_len-prompt_len)*batch_size)/loop_time:.2f}"
+                f"e2e sample time: {(loop_time) / batch_size:.4f}s\t e2e tokens/s : {((ctx_len - prompt_len) * batch_size) / loop_time:.2f}"
             )
 
     avg_loss = total_loss / total_tokens
@@ -415,7 +415,7 @@ def main():
         print(f"Dataset Stride: {args.stride}", file=fp)
         print(f"Overall Loss: {loss}", file=fp)
         print(f"Perplexity: {perplexity}", file=fp)
-        print(f"Total time for evaluation: {(time.time()-start_time)/3600.0} hrs", file=fp)
+        print(f"Total time for evaluation: {(time.time() - start_time) / 3600.0} hrs", file=fp)
         if isinstance(args.model_type, str) and args.model_type == "torch":
             print("\n*******************************************************", file=fp)
             print(f"Torch Original Perplexity: {perplexity}", file=fp)

diff --git a/tests/base/test_onnx_transforms.py b/tests/base/test_onnx_transforms.py
@@ -77,9 +77,9 @@ def test_split_tensors_transform(tmp_path):
     >
     test_split () => ()
     <
-        float[1, 32] tensor0 = [ "location": "{external_tensors_file}", "offset": "0", "length": "{32*4}" ],
-        float[1, 32] tensor1 = [ "location": "{external_tensors_file}", "offset": "{32*4}", "length": "{32*4}" ],
-        float[1, 16] tensor2 = [ "location": "{external_tensors_file}", "offset": "{64*4}", "length": "{16*4}" ]
+        float[1, 32] tensor0 = [ "location": "{external_tensors_file}", "offset": "0", "length": "{32 * 4}" ],
+        float[1, 32] tensor1 = [ "location": "{external_tensors_file}", "offset": "{32 * 4}", "length": "{32 * 4}" ],
+        float[1, 16] tensor2 = [ "location": "{external_tensors_file}", "offset": "{64 * 4}", "length": "{16 * 4}" ]
     >
     {{
     }}

diff --git a/tests/qnn_tests/test_causal_lm_models_qnn.py b/tests/qnn_tests/test_causal_lm_models_qnn.py
@@ -86,9 +86,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
 
     pytorch_kv_tokens = api_runner.run_kv_model_on_pytorch(qeff_model.model)
 
-    assert (
-        pytorch_hf_tokens == pytorch_kv_tokens
-    ).all(), "Tokens don't match for HF PyTorch model output and KV PyTorch model output"
+    assert (pytorch_hf_tokens == pytorch_kv_tokens).all(), (
+        "Tokens don't match for HF PyTorch model output and KV PyTorch model output"
+    )
 
     onnx_model_path = qeff_model.export()
     ort_tokens = api_runner.run_kv_model_on_ort(onnx_model_path)
@@ -109,9 +109,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
     exec_info = qeff_model.generate(tokenizer, prompts=Constants.INPUT_STR)
     cloud_ai_100_tokens = exec_info.generated_ids[0]  # Because we always run for single input and single batch size
     gen_len = ort_tokens.shape[-1]
-    assert (
-        ort_tokens == cloud_ai_100_tokens[:, :gen_len]
-    ).all(), "Tokens don't match for ONNXRT output and Cloud AI 100 output."
+    assert (ort_tokens == cloud_ai_100_tokens[:, :gen_len]).all(), (
+        "Tokens don't match for ONNXRT output and Cloud AI 100 output."
+    )
 
     # testing for CB models
     model_hf, _ = load_causal_lm_model(model_config)

diff --git a/tests/text_generation/test_text_generation.py b/tests/text_generation/test_text_generation.py
@@ -98,6 +98,6 @@ def test_generate_text_stream(
     for decoded_tokens in text_generator.generate_stream_tokens(Constants.INPUT_STR, generation_len=max_gen_len):
         stream_tokens.extend(decoded_tokens)
 
-    assert (
-        cloud_ai_100_output == stream_tokens
-    ), f"Deviation in output observed while comparing regular execution and streamed output: {cloud_ai_100_output} != {stream_tokens}"
+    assert cloud_ai_100_output == stream_tokens, (
+        f"Deviation in output observed while comparing regular execution and streamed output: {cloud_ai_100_output} != {stream_tokens}"
+    )
diff --git a/tests/transformers/models/test_causal_lm_models.py b/tests/transformers/models/test_causal_lm_models.py
@@ -110,9 +110,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
 
     pytorch_kv_tokens = api_runner.run_kv_model_on_pytorch(qeff_model.model)
 
-    assert (
-        pytorch_hf_tokens == pytorch_kv_tokens
-    ).all(), "Tokens don't match for HF PyTorch model output and KV PyTorch model output"
+    assert (pytorch_hf_tokens == pytorch_kv_tokens).all(), (
+        "Tokens don't match for HF PyTorch model output and KV PyTorch model output"
+    )
 
     onnx_model_path = qeff_model.export()
     ort_tokens = api_runner.run_kv_model_on_ort(onnx_model_path, is_tlm=is_tlm)
@@ -133,9 +133,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
     exec_info = qeff_model.generate(tokenizer, prompts=Constants.INPUT_STR)
     cloud_ai_100_tokens = exec_info.generated_ids[0]  # Because we always run for single input and single batch size
     gen_len = ort_tokens.shape[-1]
-    assert (
-        ort_tokens == cloud_ai_100_tokens[:, :gen_len]
-    ).all(), "Tokens don't match for ONNXRT output and Cloud AI 100 output."
+    assert (ort_tokens == cloud_ai_100_tokens[:, :gen_len]).all(), (
+        "Tokens don't match for ONNXRT output and Cloud AI 100 output."
+    )
 
     # testing for CB models
     model_hf, _ = load_causal_lm_model(model_config)
@@ -204,9 +204,9 @@ def test_causal_lm_export_with_deprecated_api(model_name):
     new_api_ort_tokens = api_runner.run_kv_model_on_ort(new_api_onnx_model_path)
     old_api_ort_tokens = api_runner.run_kv_model_on_ort(old_api_onnx_model_path)
 
-    assert (
-        new_api_ort_tokens == old_api_ort_tokens
-    ).all(), "New API output does not match old API output for ONNX export function"
+    assert (new_api_ort_tokens == old_api_ort_tokens).all(), (
+        "New API output does not match old API output for ONNX export function"
+    )
 
 
 @pytest.mark.on_qaic

diff --git a/tests/transformers/spd/test_spd_inference.py b/tests/transformers/spd/test_spd_inference.py
@@ -74,9 +74,9 @@ def get_padded_input_len(input_len: int, prefill_seq_len: int, ctx_len: int):
     """
     num_chunks = -(input_len // -prefill_seq_len)  # ceil divide without float
     input_len_padded = num_chunks * prefill_seq_len  # Convert input_len to a multiple of prefill_seq_len
-    assert (
-        input_len_padded <= ctx_len
-    ), "input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
+    assert input_len_padded <= ctx_len, (
+        "input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
+    )
     return input_len_padded
 
 
@@ -325,9 +325,9 @@ def test_spec_decode_inference(
     for prompt, generation in zip(prompts, batch_decode):
         print(f"{prompt=} {generation=}")
     # validation check
-    assert mean_num_accepted_tokens == float(
-        num_speculative_tokens + 1
-    ), f"mean number of accepted tokens is {mean_num_accepted_tokens} but should be {num_speculative_tokens+1}"
+    assert mean_num_accepted_tokens == float(num_speculative_tokens + 1), (
+        f"mean number of accepted tokens is {mean_num_accepted_tokens} but should be {num_speculative_tokens + 1}"
+    )
     del target_model_session
     del draft_model_session
     generated_ids = np.asarray(generated_ids).flatten()

diff --git a/tests/transformers/test_transformer_pytorch_transforms.py b/tests/transformers/test_transformer_pytorch_transforms.py
@@ -320,9 +320,9 @@ def test_awq_to_matmulnbits_transform(in_features, out_features):
     assert transformed
     new_out = new_module(rand_data)
     assert isinstance(new_module, QuantLinearORT)
-    assert compare_original_vs_kv_model_pt_outputs(
-        old_out, new_out, tolerance=1e-8
-    ), "Test failed because MAE is greater than tolerance"
+    assert compare_original_vs_kv_model_pt_outputs(old_out, new_out, tolerance=1e-8), (
+        "Test failed because MAE is greater than tolerance"
+    )
 
 
 @pytest.mark.parametrize("in_features", [4096, 4096])
@@ -349,6 +349,6 @@ def test_gptq_to_matmulnbits_transform(in_features, out_features):
     assert transformed
     new_out = new_module(rand_data)
     assert isinstance(new_module, QuantLinearORT)
-    assert compare_original_vs_kv_model_pt_outputs(
-        old_out, new_out, tolerance=1e-4
-    ), "Test failed because MAE is greater than tolerance"
+    assert compare_original_vs_kv_model_pt_outputs(old_out, new_out, tolerance=1e-4), (
+        "Test failed because MAE is greater than tolerance"
+    )