-
Notifications
You must be signed in to change notification settings - Fork 6
/
profile.txt
75 lines (73 loc) · 16.3 KB
/
profile.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
1 source /workspace/setup.sh
2 vim ~/.ssh/authorized_keys
3 exit
4 vim ~/.ssh/authorized_keys
5 exit
6 source ../sglang_env/bin/activate
7 history
8 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-prompt 1 --ctx-len 8192 --tp-size 4
9 source /workspace/setup.sh
10 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-prompt 1 --ctx-len 8192 --tp-size 4
11 git status
12 git checkout .
13 git log
14 git status
15 git log
16 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-prompt 1 --ctx-len 8192 --tp-size 4
17 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -e CUDA_VISIBLE_DEVICES=0,1,2,3 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
18 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -e "CUDA_VISIBLE_DEVICES=0,1,2,3" -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
19 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
20 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop "CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4"
21 CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
22 export CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
23 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 1 --ctx-len 8192 --mul-qs 8192 --tp-size 4
24* python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt
25 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt
26 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt 1
27 CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.85 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt 1
28 CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.89 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt 1
29 CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_8192 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --ctx-len 8192 --mul-qs 8192 --tp-size 4 --num-prompt 1
30 CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_16384 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 8192 --enable-flashinfer --num-mul 0 --mul-qs 8192 --tp-size 4 --num-prompt 1 --ctx-len 16384
31 CUDA_VISIBLE_DEVICES=0,1,2,3 nsys profile -w true -t cuda,nvtx -o 70b_prompt_16384 -f true --capture-range=cudaProfilerApi --capture-range-end stop python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --mul-qs 8192 --tp-size 4 --num-prompt 1 --ctx-len 16384
32 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.9 --load-format dummy --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --mul-qs 8192 --tp-size 4 --num-prompt 1 --ctx-len 16384
33 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.8 --load-format dummy --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --mul-qs 8192 --tp-size 4 --num-prompt 1 --ctx-len 16384
34 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.8 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --mul-qs 8192 --tp-size 4 --num-prompt 1 --ctx-len 16384
35 git add -A
36 rm -rf /workspace/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3-70B
37 git add -A
38 git commit -m "disk not enought"
39 git push origin feat/partial_eviction
40 df -h .
41 lsblk
42 df -h
43 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.8 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --mul-qs 8192 --tp-size 4 --num-prompt 0 --ctx-len 512
44 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.8 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 512
45 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.8 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 1024
46 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.8 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 2048
47 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 2048
48 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 4096
49 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 256
50 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 128
51 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 64
52 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --tp-size 4 --num-prompt 1 --ctx-len 8192 --mul-qs 14166
53 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --tp-size 4 --num-prompt 1 --ctx-len 14166 --mul-qs 14166
54 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --tp-size 4 --num-prompt 1 --ctx-len 14166 --mul-qs 14166
55 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --tp-size 4 --num-prompt 1 --ctx-len 14166 --mul-qs 14166
56 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --tp-size 4 --num-prompt 1 --ctx-len 14166 --mul-qs 28232
57 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --tp-size 4 --num-prompt 1 --ctx-len 28232
58 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --tp-size 4 --num-prompt 1 --ctx-len 4096
59 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 0 --tp-size 4 --num-prompt 1 --ctx-len 1024
60 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 4096 --mul-qs 256
61 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 256
62 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 512
63 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 4096 --mul-qs 512
64 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 4096 --mul-qs 1024
65 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 16384 --mul-qs 512
66 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 32768 --mul-qs 512
67 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 2 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 256
68 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 16
69 CUDA_VISIBLE_DEVICES=0,1,2,3 python multi_node/profile_model_forwarding.py --model-path meta-llama/Meta-Llama-3-70B --load-format dummy --mem-fraction-static 0.75 --stream-interval 1 --context-length 33000 --enable-flashinfer --num-mul 1 --tp-size 4 --num-prompt 0 --ctx-len 8192 --mul-qs 1
70 python model_equation_aio_regression.py
71 git add -A
72 history > profile.txt
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-70B --host 0.0.0.0 --mem-fraction-static 0.75 --context-length 4096 --enable-flashinfer --schedule-heuristic lpm --tp-size 4 --load-format dummy