dev-hitesh-gupta
commited on
Commit
•
4eae4b0
1
Parent(s):
83739db
Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- log_1725411796.txt +0 -0
- log_1725411883.txt +0 -0
- log_1725411949.txt +0 -0
- log_1725412477.txt +0 -0
- log_1725412528.txt +0 -0
- log_1725412706.txt +0 -0
- log_1725412729.txt +0 -0
- log_1725412766.txt +0 -0
- log_1725413132.txt +0 -0
- log_1725413248.txt +0 -0
- log_1725413416.txt +0 -0
- log_1725413811.txt +0 -0
- log_1725413837.txt +0 -0
- log_1725414865.txt +0 -0
- log_1725415318.txt +0 -0
- log_1725415584.txt +0 -0
- log_1725415618.txt +0 -0
- log_1725415890.txt +0 -0
- log_1725416792.txt +0 -0
- log_1725416985.txt +0 -0
- log_1725417415.txt +0 -0
- log_1725417643.txt +0 -0
- log_1725417865.txt +0 -0
- log_1725418121.txt +0 -0
- log_1725418385.txt +0 -0
- log_1725418403.txt +0 -0
- log_1725418471.txt +0 -0
- log_1725418487.txt +0 -0
- log_1725418585.txt +0 -0
- log_1725418906.txt +0 -0
- log_1725418928.txt +0 -0
- log_1725418952.txt +0 -0
- log_1725419330.txt +10 -0
- profiling_outputs/iteration_12/rank0.1725411827523394495.pt.trace.json.gz +3 -0
- profiling_outputs/iteration_12/rank0.1725419372986525138.pt.trace.json.gz +3 -0
- profiling_outputs/iteration_12/rank0_key_averages.txt +157 -0
- profiling_outputs/iteration_12/rank0_stacks.txt +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
profiling_outputs/iteration_12/rank0_stacks.txt filter=lfs diff=lfs merge=lfs -text
|
log_1725411796.txt
ADDED
File without changes
|
log_1725411883.txt
ADDED
File without changes
|
log_1725411949.txt
ADDED
File without changes
|
log_1725412477.txt
ADDED
File without changes
|
log_1725412528.txt
ADDED
File without changes
|
log_1725412706.txt
ADDED
File without changes
|
log_1725412729.txt
ADDED
File without changes
|
log_1725412766.txt
ADDED
File without changes
|
log_1725413132.txt
ADDED
File without changes
|
log_1725413248.txt
ADDED
File without changes
|
log_1725413416.txt
ADDED
File without changes
|
log_1725413811.txt
ADDED
File without changes
|
log_1725413837.txt
ADDED
File without changes
|
log_1725414865.txt
ADDED
File without changes
|
log_1725415318.txt
ADDED
File without changes
|
log_1725415584.txt
ADDED
File without changes
|
log_1725415618.txt
ADDED
File without changes
|
log_1725415890.txt
ADDED
File without changes
|
log_1725416792.txt
ADDED
File without changes
|
log_1725416985.txt
ADDED
File without changes
|
log_1725417415.txt
ADDED
File without changes
|
log_1725417643.txt
ADDED
File without changes
|
log_1725417865.txt
ADDED
File without changes
|
log_1725418121.txt
ADDED
File without changes
|
log_1725418385.txt
ADDED
File without changes
|
log_1725418403.txt
ADDED
File without changes
|
log_1725418471.txt
ADDED
File without changes
|
log_1725418487.txt
ADDED
File without changes
|
log_1725418585.txt
ADDED
File without changes
|
log_1725418906.txt
ADDED
File without changes
|
log_1725418928.txt
ADDED
File without changes
|
log_1725418952.txt
ADDED
File without changes
|
log_1725419330.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Step 1 | loss:0.8410570025444031 lr:2.9999999999999997e-06 tokens_per_second_per_gpu:704.4238888314976
|
2 |
+
Step 2 | loss:0.8031501173973083 lr:5.999999999999999e-06 tokens_per_second_per_gpu:1024.7700646671149
|
3 |
+
Step 3 | loss:0.8186150789260864 lr:8.999999999999999e-06 tokens_per_second_per_gpu:1043.6909191161417
|
4 |
+
Step 4 | loss:0.8145633339881897 lr:1.1999999999999999e-05 tokens_per_second_per_gpu:1019.2992522263147
|
5 |
+
Step 5 | loss:0.7742070555686951 lr:1.4999999999999999e-05 tokens_per_second_per_gpu:1036.7547177946178
|
6 |
+
Step 6 | loss:0.8019543886184692 lr:1.7999999999999997e-05 tokens_per_second_per_gpu:1038.9270556360439
|
7 |
+
Step 7 | loss:0.8277010321617126 lr:2.1e-05 tokens_per_second_per_gpu:1036.3345335641322
|
8 |
+
Step 8 | loss:0.7890640497207642 lr:2.3999999999999997e-05 tokens_per_second_per_gpu:1035.6141064563976
|
9 |
+
Step 9 | loss:0.803084671497345 lr:2.6999999999999996e-05 tokens_per_second_per_gpu:1029.8257838448487
|
10 |
+
Step 10 | loss:0.8352741003036499 lr:2.9999999999999997e-05 tokens_per_second_per_gpu:1037.948880337632
|
profiling_outputs/iteration_12/rank0.1725411827523394495.pt.trace.json.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b39ce2e6a8c41b095ccf0ba616e02546cf107212af1f2db0317745f542b4af47
|
3 |
+
size 10297860
|
profiling_outputs/iteration_12/rank0.1725419372986525138.pt.trace.json.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dba010bb1a8b9c4dd17fdf7acd23de237845bf0098d51e3a53636571688deff8
|
3 |
+
size 10243085
|
profiling_outputs/iteration_12/rank0_key_averages.txt
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -------------------------------------------------------------------------------- --------------------------------------------------------------------------- ------------
|
2 |
+
Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg CPU Mem Self CPU Mem CUDA Mem Self CUDA Mem # of Calls Input Shapes Source Location Total FLOPs
|
3 |
+
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -------------------------------------------------------------------------------- --------------------------------------------------------------------------- ------------
|
4 |
+
ProfilerStep* 0.00% 0.000us 0.00% 0.000us 0.000us 806.035ms 22.19% 806.035ms 403.018ms 0 b 0 b 0 b 0 b 2 [] -- <built-in method to of Tensor object at 0x7f79fa3fbf10>
|
5 |
+
recipes/lora_finetune_single_device.py(622): train
|
6 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
7 |
+
torchtune/config/_parse.py(50): wrapper
|
8 |
+
recipes/lora_finetune_single_device.py(648): <module>
|
9 |
+
|
10 |
+
ampere_bf16_s1688gemm_bf16_128x128_ldg8_f2f_stages_3... 0.00% 0.000us 0.00% 0.000us 0.000us 212.867ms 5.86% 212.867ms 485.997us 0 b 0 b 0 b 0 b 438 [] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
11 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
12 |
+
torch/_tensor.py(465): backward
|
13 |
+
recipes/lora_finetune_single_device.py(622): train
|
14 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
15 |
+
|
16 |
+
void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816... 0.00% 0.000us 0.00% 0.000us 0.000us 190.122ms 5.23% 190.122ms 1.980ms 0 b 0 b 0 b 0 b 96 [] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
17 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
18 |
+
torch/_tensor.py(465): backward
|
19 |
+
recipes/lora_finetune_single_device.py(622): train
|
20 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
21 |
+
|
22 |
+
ampere_bf16_s16816gemm_bf16_256x128_ldg8_f2f_stages_... 0.00% 0.000us 0.00% 0.000us 0.000us 100.057ms 2.75% 100.057ms 2.943ms 0 b 0 b 0 b 0 b 34 [] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
23 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
24 |
+
torch/_tensor.py(465): backward
|
25 |
+
recipes/lora_finetune_single_device.py(622): train
|
26 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
27 |
+
|
28 |
+
void at::native::(anonymous namespace)::cunn_Spatial... 0.00% 0.000us 0.00% 0.000us 0.000us 70.645ms 1.94% 70.645ms 35.322ms 0 b 0 b 0 b 0 b 2 [] -- <built-in function cross_entropy_loss>
|
29 |
+
torch/nn/functional.py(3014): cross_entropy
|
30 |
+
torch/nn/modules/loss.py(1187): forward
|
31 |
+
torch/nn/modules/module.py(1555): _call_impl
|
32 |
+
nn.Module: CrossEntropyLoss_0
|
33 |
+
|
34 |
+
ampere_bf16_s1688gemm_bf16_128x128_ldg8_f2f_stages_3... 0.00% 0.000us 0.00% 0.000us 0.000us 66.469ms 1.83% 66.469ms 2.770ms 0 b 0 b 0 b 0 b 24 [] -- <built-in method to of Tensor object at 0x7f79fa3fbf10>
|
35 |
+
recipes/lora_finetune_single_device.py(622): train
|
36 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
37 |
+
torchtune/config/_parse.py(50): wrapper
|
38 |
+
recipes/lora_finetune_single_device.py(648): <module>
|
39 |
+
|
40 |
+
void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816... 0.00% 0.000us 0.00% 0.000us 0.000us 64.458ms 1.77% 64.458ms 5.372ms 0 b 0 b 0 b 0 b 12 [] -- <built-in method to of Tensor object at 0x7f79fa3fbf10>
|
41 |
+
recipes/lora_finetune_single_device.py(622): train
|
42 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
43 |
+
torchtune/config/_parse.py(50): wrapper
|
44 |
+
recipes/lora_finetune_single_device.py(648): <module>
|
45 |
+
|
46 |
+
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 63.230ms 1.74% 63.230ms 124.960us 0 b 0 b 0 b 0 b 506 [] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
47 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
48 |
+
torch/_tensor.py(465): backward
|
49 |
+
recipes/lora_finetune_single_device.py(622): train
|
50 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
51 |
+
|
52 |
+
void at::native::elementwise_kernel<128, 2, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 59.520ms 1.64% 59.520ms 117.166us 0 b 0 b 0 b 0 b 508 [] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
53 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
54 |
+
torch/_tensor.py(465): backward
|
55 |
+
recipes/lora_finetune_single_device.py(622): train
|
56 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
57 |
+
|
58 |
+
ampere_bf16_s16816gemm_bf16_256x128_ldg8_f2f_stages_... 0.00% 0.000us 0.00% 0.000us 0.000us 52.740ms 1.45% 52.740ms 2.198ms 0 b 0 b 0 b 0 b 24 [] -- <built-in method to of Tensor object at 0x7f79fa3fbf10>
|
59 |
+
recipes/lora_finetune_single_device.py(622): train
|
60 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
61 |
+
torchtune/config/_parse.py(50): wrapper
|
62 |
+
recipes/lora_finetune_single_device.py(648): <module>
|
63 |
+
|
64 |
+
ampere_bf16_s1688gemm_bf16_128x128_ldg8_f2f_stages_3... 0.00% 0.000us 0.00% 0.000us 0.000us 47.180ms 1.30% 47.180ms 1.966ms 0 b 0 b 0 b 0 b 24 [] -- <built-in method to of Tensor object at 0x7f79fa3fbf10>
|
65 |
+
recipes/lora_finetune_single_device.py(622): train
|
66 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
67 |
+
torchtune/config/_parse.py(50): wrapper
|
68 |
+
recipes/lora_finetune_single_device.py(648): <module>
|
69 |
+
|
70 |
+
void at::native::elementwise_kernel<128, 2, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 41.515ms 1.14% 41.515ms 65.688us 0 b 0 b 0 b 0 b 632 [] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
71 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
72 |
+
torch/_tensor.py(465): backward
|
73 |
+
recipes/lora_finetune_single_device.py(622): train
|
74 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
75 |
+
|
76 |
+
aten::_log_softmax 0.00% 23.940us 0.00% 144.092us 144.092us 35.624ms 0.98% 37.220ms 37.220ms 0 b 0 b 222.77 Mb 0 b 1 [[2, 32016, 912], [], []] -- <built-in function cross_entropy_loss>
|
77 |
+
torch/nn/functional.py(3014): cross_entropy
|
78 |
+
torch/nn/modules/loss.py(1187): forward
|
79 |
+
torch/nn/modules/module.py(1555): _call_impl
|
80 |
+
nn.Module: CrossEntropyLoss_0
|
81 |
+
|
82 |
+
aten::add_ 0.03% 1.329ms 2.03% 90.196ms 477.229us 35.471ms 0.98% 35.471ms 187.675us 0 b 0 b 0 b 0 b 189 [[2, 913, 32, 64, 2], [2, 913, 32, 64, 2], []] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
83 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
84 |
+
torch/_tensor.py(465): backward
|
85 |
+
recipes/lora_finetune_single_device.py(622): train
|
86 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
87 |
+
|
88 |
+
aten::_log_softmax 0.00% 30.790us 0.00% 70.611us 70.611us 35.020ms 0.96% 35.411ms 35.411ms 0 b 0 b 71.32 Mb 0 b 1 [[2, 32016, 292], [], []] -- <built-in function cross_entropy_loss>
|
89 |
+
torch/nn/functional.py(3014): cross_entropy
|
90 |
+
torch/nn/modules/loss.py(1187): forward
|
91 |
+
torch/nn/modules/module.py(1555): _call_impl
|
92 |
+
nn.Module: CrossEntropyLoss_0
|
93 |
+
|
94 |
+
void at::native::(anonymous namespace)::cunn_Spatial... 0.00% 0.000us 0.00% 0.000us 0.000us 32.035ms 0.88% 32.035ms 16.018ms 0 b 0 b 0 b 0 b 2 [] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
95 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
96 |
+
torch/_tensor.py(465): backward
|
97 |
+
recipes/lora_finetune_single_device.py(622): train
|
98 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
99 |
+
|
100 |
+
void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816... 0.00% 0.000us 0.00% 0.000us 0.000us 31.991ms 0.88% 31.991ms 5.332ms 0 b 0 b 0 b 0 b 6 [] -- <built-in method to of Tensor object at 0x7f79fa3fbf10>
|
101 |
+
recipes/lora_finetune_single_device.py(622): train
|
102 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
103 |
+
torchtune/config/_parse.py(50): wrapper
|
104 |
+
recipes/lora_finetune_single_device.py(648): <module>
|
105 |
+
|
106 |
+
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 31.808ms 0.88% 31.808ms 82.833us 0 b 0 b 0 b 0 b 384 [] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
107 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
108 |
+
torch/_tensor.py(465): backward
|
109 |
+
recipes/lora_finetune_single_device.py(622): train
|
110 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
111 |
+
|
112 |
+
aten::add_ 0.04% 1.868ms 2.02% 89.872ms 358.056us 29.127ms 0.80% 29.127ms 116.044us 0 b 0 b 0 b 0 b 251 [[2, 913, 4096], [2, 913, 4096], []] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
113 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
114 |
+
torch/_tensor.py(465): backward
|
115 |
+
recipes/lora_finetune_single_device.py(622): train
|
116 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
117 |
+
|
118 |
+
void pytorch_flash::flash_bwd_dq_dk_dv_loop_seqk_par... 0.00% 0.000us 0.00% 0.000us 0.000us 27.914ms 0.77% 27.914ms 436.163us 0 b 0 b 0 b 0 b 64 [] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
119 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
120 |
+
torch/_tensor.py(465): backward
|
121 |
+
recipes/lora_finetune_single_device.py(622): train
|
122 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
123 |
+
|
124 |
+
void at::native::elementwise_kernel<128, 2, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 27.004ms 0.74% 27.004ms 355.322us 0 b 0 b 0 b 0 b 76 [] -- <built-in method to of Tensor object at 0x7f79fa3fbf10>
|
125 |
+
recipes/lora_finetune_single_device.py(622): train
|
126 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
127 |
+
torchtune/config/_parse.py(50): wrapper
|
128 |
+
recipes/lora_finetune_single_device.py(648): <module>
|
129 |
+
|
130 |
+
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 26.862ms 0.74% 26.862ms 38.706us 0 b 0 b 0 b 0 b 694 [] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
131 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
132 |
+
torch/_tensor.py(465): backward
|
133 |
+
recipes/lora_finetune_single_device.py(622): train
|
134 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
135 |
+
|
136 |
+
void at::native::elementwise_kernel<128, 2, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 25.891ms 0.71% 25.891ms 208.798us 0 b 0 b 0 b 0 b 124 [] -- <built-in method to of Tensor object at 0x7f79fa3fbf10>
|
137 |
+
recipes/lora_finetune_single_device.py(622): train
|
138 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
139 |
+
torchtune/config/_parse.py(50): wrapper
|
140 |
+
recipes/lora_finetune_single_device.py(648): <module>
|
141 |
+
|
142 |
+
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 25.065ms 0.69% 25.065ms 334.207us 0 b 0 b 0 b 0 b 75 [] -- <built-in method to of Tensor object at 0x7f79fa3fbf10>
|
143 |
+
recipes/lora_finetune_single_device.py(622): train
|
144 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
145 |
+
torchtune/config/_parse.py(50): wrapper
|
146 |
+
recipes/lora_finetune_single_device.py(648): <module>
|
147 |
+
|
148 |
+
void at::native::vectorized_elementwise_kernel<4, at... 0.00% 0.000us 0.00% 0.000us 0.000us 22.140ms 0.61% 22.140ms 43.412us 0 b 0 b 0 b 0 b 510 [] -- ...in method run_backward of torch._C._EngineBase object at 0x7f7ab8266140>
|
149 |
+
torch/autograd/graph.py(763): _engine_run_backward
|
150 |
+
torch/_tensor.py(465): backward
|
151 |
+
recipes/lora_finetune_single_device.py(622): train
|
152 |
+
recipes/lora_finetune_single_device.py(643): recipe_main
|
153 |
+
|
154 |
+
------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -------------------------------------------------------------------------------- --------------------------------------------------------------------------- ------------
|
155 |
+
Self CPU time total: 4.453s
|
156 |
+
Self CUDA time total: 3.632s
|
157 |
+
|
profiling_outputs/iteration_12/rank0_stacks.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4489b7f30fc000a3a5f9a1dcfe8879c00b547f631645074dbaf194fb5f18913
|
3 |
+
size 28216209
|