profile.txt

    1  source /workspace/setup.sh 
    2  vim ~/.ssh/authorized_keys 
    3  exit
    4  vim ~/.ssh/authorized_keys 
    5  exit
    6  source ../sglang_env/bin/activate
    7  history 
    8  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-prompt 1 --ctx-len 8192 --tp-size 4
    9  source /workspace/setup.sh 
   10  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-prompt 1 --ctx-len 8192 --tp-size 4
   11  git status
   12  git checkout .
   13  git log
   14  git status
   15  git log
   16  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-prompt 1 --ctx-len 8192 --tp-size 4
   17  nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -e CUDA_VISIBLE_DEVICES=0,1,2,3 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
   18  nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -e "CUDA_VISIBLE_DEVICES=0,1,2,3" -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
   19  nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
   20  nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop "CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4"
   21  CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
   22  export CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
   23  nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
   24* python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt
   25  nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt
   26  nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt 1
   27  CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt 1
   28  CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.89 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt 1
   29  CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt 1
   30  CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_16384 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0  --mul-qs 8192 --tp-size 4 --num-prompt 1 --ctx-len 16384
   31  CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_16384 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0  --mul-qs 8192 --tp-size 4 --num-prompt 1 --ctx-len 16384
   32  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0  --mul-qs 8192 --tp-size 4 --num-prompt 1 --ctx-len 16384
   33  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.8 --load-format dummy --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0  --mul-qs 8192 --tp-size 4 --num-prompt 1 --ctx-len 16384
   34  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.8 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0  --mul-qs 8192 --tp-size 4 --num-prompt 1 --ctx-len 16384
   35  git add -A
   36  rm -rf /workspace/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3-70B
   37  git add -A
   38  git commit -m "disk not enought"
   39  git push origin feat/partial_eviction
   40  df -h .
   41  lsblk 
   42  df -h
   43  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.8 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1  --mul-qs 8192 --tp-size 4 --num-prompt 0 --ctx-len 512
   44  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.8 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1   --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 512
   45  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.8 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1   --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 1024
   46  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.8 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1   --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 2048
   47  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1   --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 2048
   48  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1   --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 4096
   49  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1   --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 256
   50  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1   --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 128
   51  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1   --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 64
   52  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0   --tp-size 4 --num-prompt 1 --ctx-len 8192 --mul-qs 14166
   53  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0   --tp-size 4 --num-prompt 1 --ctx-len 14166 --mul-qs 14166
   54  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0   --tp-size 4 --num-prompt 1 --ctx-len 14166 --mul-qs 14166 
   55  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0   --tp-size 4 --num-prompt 1 --ctx-len 14166 --mul-qs 14166
   56  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0   --tp-size 4 --num-prompt 1 --ctx-len 14166 --mul-qs 28232
   57  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0   --tp-size 4 --num-prompt 1 --ctx-len 28232
   58  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0   --tp-size 4 --num-prompt 1 --ctx-len 4096
   59  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0   --tp-size 4 --num-prompt 1 --ctx-len 1024
   60  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 4096 --mul-qs 256
   61  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 256
   62  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 512
   63  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 4096 --mul-qs 512
   64  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 4096 --mul-qs 1024
   65  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 16384 --mul-qs 512
   66  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 32768 --mul-qs 512
   67  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 2 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 256
   68  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 16 
   69  CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 1
   70  python model_equation_aio_regression.py 
   71  git add -A
   72  history > profile.txt 


 CUDA_VISIBLE_DEVICES=0,1,2,3 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-70B --host 0.0.0.0 --mem-fraction-static 0.75 --context-length 4096 --enable-flashinfer --schedule-heuristic lpm --tp-size 4 --load-format dummy