-
Notifications
You must be signed in to change notification settings - Fork 83
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add weight streaming at runtime #3156
Comments
Looking to instead allocate a certain amount based on liveness and then overlap running kernels and loading during runtime to shorten the amount of time spent waiting. |
Added a stream for copies in this weight_streaming branch (not sure why I can't link it directly to this issue). Currently, the |
It appears that the |
Adding the |
Removed use of |
After some testing it appears that weight streaming does work, although with a few caveats:
Ultimately, this makes weight streaming in its current state only really viable in very niche situations; it would likely require quite a bit more work to make it useful in everyday use. Below is a log of a successful run with weights being streamed (notice that when allocating params the free memory is lower than the total size of the literals):root@e5f23edfcfdf:/code/AMDMIGraphX# HIP_HIDDEN_FREE_MEM=45600 ./build/bin/driver run models/resnet50.onnx --enable-weight-streaming --streaming-budget 0 Running [ MIGraphX Version: 2.11.0.e6451cb8f ]: ./build/bin/driver run models/resnet50.onnx --enable-weight-streaming --streaming-budget 0 Compiling ... Reading: models/resnet50.onnx Free memory: 421527552 status: 0 Using weight streaming... Streaming budget: 0 Scratch size: 6422528 Total size of literals: 105378732 Free memory: 266338304 Status: 0 Free memory: 266338304 status: 0 module: "main" @0 = check_context::migraphx::gpu::context -> float_type, {}, {} @1 = hip::hip_allocate_memory[shape=int8_type, {31232000}, {1},id=main:scratch] -> int8_type, {31232000}, {1} @2 = @literal{ ... } -> float_type, {1, 64, 112, 112}, {802816, 12544, 112, 1} @3 = load[offset=7062272,end=10273536](@1) -> float_type, {1, 64, 112, 112}, {802816, 12544, 112, 1} @4 = hip::copy_to_gpu(@2,@3) -> float_type, {1, 64, 112, 112}, {802816, 12544, 112, 1} @5 = @literal{0.802063, 0.803199, 0.796995} -> float_type, {3, 1, 1}, {1, 1, 1} @6 = load[offset=602112,end=602124](@1) -> float_type, {3, 1, 1}, {1, 1, 1} @7 = hip::copy_to_gpu(@5,@6) -> float_type, {3, 1, 1}, {1, 1, 1} @8 = load[offset=0,end=602112](@1) -> float_type, {1, 3, 224, 224}, {150528, 50176, 224, 1} @9 = multibroadcast[out_lens={1, 3, 224, 224},out_dyn_dims={}](@7) -> float_type, {1, 3, 224, 224}, {0, 1, 0, 0} data = @param:data -> float_type, {1, 3, 224, 224}, {150528, 50176, 224, 1} @11 = gpu::code_object[code_object=4416,symbol_name=mul_kernel,global=75264,local=1024,](@9,data,@8) -> float_type, {1, 3, 224, 224}, {150528, 50176, 224, 1} @12 = @literal{ ... } -> float_type, {64, 3, 7, 7}, {147, 49, 7, 1} @13 = load[offset=602112,end=639744](@1) -> float_type, {64, 3, 7, 7}, {147, 49, 7, 1} @14 = hip::copy_to_gpu(@12,@13) -> float_type, {64, 3, 7, 7}, {147, 49, 7, 1} @15 = load[offset=3851008,end=7062272](@1) -> float_type, {1, 64, 112, 112}, {802816, 12544, 112, 1} @16 = gpu::code_object[code_object=8992,symbol_name=mlir_convolution_add_relu,global=25088,local=64,](@4,@11,@14,@15) -> float_type, {1, 64, 112, 112}, {802816, 12544, 112, 1} @17 = load[offset=0,end=802816](@1) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @18 = gpu::pooling[mode=max,padding={1, 1, 1, 1},padding_mode=0,stride={2, 2},lengths={3, 3},dilations={1, 1},ceil_mode=0,count_include_pad=0,lp_order=2,dyn_global=0](@16,@17) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @19 = @literal{ ... } -> float_type, {64, 1, 1}, {1, 1, 1} @20 = load[offset=802816,end=803072](@1) -> float_type, {64, 1, 1}, {1, 1, 1} @21 = hip::copy_to_gpu(@19,@20) -> float_type, {64, 1, 1}, {1, 1, 1} @22 = multibroadcast[out_lens={1, 64, 56, 56},out_dyn_dims={}](@21) -> float_type, {1, 64, 56, 56}, {0, 1, 0, 0} @23 = load[offset=1769728,end=2572544](@1) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @24 = gpu::code_object[code_object=4416,symbol_name=mul_kernel,global=100352,local=1024,](@22,@18,@23) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @25 = @literal{ ... } -> float_type, {128, 1, 1}, {1, 1, 1} @26 = @literal{ ... } -> float_type, {64, 1, 1}, {1, 1, 1} @27 = load[offset=16640,end=16896](@1) -> float_type, {64, 1, 1}, {1, 1, 1} @28 = hip::copy_to_gpu(@26,@27) -> float_type, {64, 1, 1}, {1, 1, 1} @29 = @literal{ ... } -> float_type, {64, 64, 1, 1}, {64, 1, 1, 1} @30 = load[offset=256,end=16640](@1) -> float_type, {64, 64, 1, 1}, {64, 1, 1, 1} @31 = hip::copy_to_gpu(@29,@30) -> float_type, {64, 64, 1, 1}, {64, 1, 1, 1} @32 = @literal{ ... } -> float_type, {64, 64, 3, 3}, {576, 9, 3, 1} @33 = load[offset=803072,end=950528](@1) -> float_type, {64, 64, 3, 3}, {576, 9, 3, 1} @34 = hip::copy_to_gpu(@32,@33) -> float_type, {64, 64, 3, 3}, {576, 9, 3, 1} @35 = @literal{ ... } -> float_type, {256, 128, 1, 1}, {128, 1, 1, 1} @36 = @literal{ ... } -> float_type, {64, 1, 1}, {1, 1, 1} @37 = load[offset=950528,end=1753344](@1) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @38 = multibroadcast[out_lens={1, 64, 56, 56},out_dyn_dims={}](@28) -> float_type, {1, 64, 56, 56}, {0, 1, 0, 0} @39 = gpu::code_object[code_object=4680,symbol_name=add_relu_kernel,global=100352,local=1024,](@24,@38,@37) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @40 = load[offset=16640,end=16896](@1) -> float_type, {64, 1, 1}, {1, 1, 1} @41 = hip::copy_to_gpu(@36,@40) -> float_type, {64, 1, 1}, {1, 1, 1} @42 = load[offset=3522816,end=4325632](@1) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @43 = multibroadcast[out_lens={1, 64, 56, 56},out_dyn_dims={}](@41) -> float_type, {1, 64, 56, 56}, {0, 1, 0, 0} @44 = gpu::code_object[code_object=5920,symbol_name=mlir_convolution_add_relu,global=12544,local=64,](@43,@39,@31,@42) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @45 = load[offset=2572544,end=3375360](@1) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @46 = gpu::code_object[code_object=7560,symbol_name=mlir_convolution,global=6272,local=64,](@44,@34,@45) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @47 = load[offset=0,end=512](@1) -> float_type, {128, 1, 1}, {1, 1, 1} @48 = hip::copy_to_gpu(@25,@47) -> float_type, {128, 1, 1}, {1, 1, 1} @49 = load[offset=1024,end=1606656](@1) -> float_type, {1, 128, 56, 56}, {401408, 3136, 56, 1} @50 = multibroadcast[out_lens={1, 128, 56, 56},out_dyn_dims={}](@48) -> float_type, {1, 128, 56, 56}, {0, 1, 0, 0} @51 = gpu::code_object[code_object=4824,symbol_name=concat_add_relu_kernel,global=100352,local=1024,](@46,@24,@50,@49) -> float_type, {1, 128, 56, 56}, {401408, 3136, 56, 1} @52 = load[offset=1606656,end=1737728](@1) -> float_type, {256, 128, 1, 1}, {128, 1, 1, 1} @53 = hip::copy_to_gpu(@35,@52) -> float_type, {256, 128, 1, 1}, {128, 1, 1, 1} @54 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @55 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @56 = load[offset=1738752,end=1739776](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @57 = hip::copy_to_gpu(@55,@56) -> float_type, {256, 1, 1}, {1, 1, 1} @58 = load[offset=1737728,end=1738752](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @59 = hip::copy_to_gpu(@54,@58) -> float_type, {256, 1, 1}, {1, 1, 1} @60 = multibroadcast[out_lens={1, 256, 56, 56},out_dyn_dims={}](@57) -> float_type, {1, 256, 56, 56}, {0, 1, 0, 0} @61 = load[offset=10507520,end=13718784](@1) -> float_type, {1, 256, 56, 56}, {802816, 3136, 56, 1} @62 = multibroadcast[out_lens={1, 256, 56, 56},out_dyn_dims={}](@59) -> float_type, {1, 256, 56, 56}, {0, 1, 0, 0} @63 = gpu::code_object[code_object=11048,symbol_name=mlir_convolution_mul_add_relu,global=12544,local=64,](@60,@62,@51,@53,@61) -> float_type, {1, 256, 56, 56}, {802816, 3136, 56, 1} @64 = load[offset=7296256,end=10507520](@1) -> float_type, {1, 256, 56, 56}, {802816, 3136, 56, 1} @65 = gpu::code_object[code_object=6792,symbol_name=mlir_convolution,global=12544,local=64,](@51,@53,@64) -> float_type, {1, 256, 56, 56}, {802816, 3136, 56, 1} @66 = @literal{ ... } -> float_type, {64, 256, 1, 1}, {256, 1, 1, 1} @67 = load[offset=65792,end=131328](@1) -> float_type, {64, 256, 1, 1}, {256, 1, 1, 1} @68 = hip::copy_to_gpu(@66,@67) -> float_type, {64, 256, 1, 1}, {256, 1, 1, 1} @69 = @literal{ ... } -> float_type, {64, 1, 1}, {1, 1, 1} @70 = @literal{ ... } -> float_type, {64, 64, 3, 3}, {576, 9, 3, 1} @71 = load[offset=1671424,end=1818880](@1) -> float_type, {64, 64, 3, 3}, {576, 9, 3, 1} @72 = hip::copy_to_gpu(@70,@71) -> float_type, {64, 64, 3, 3}, {576, 9, 3, 1} @73 = load[offset=0,end=256](@1) -> float_type, {64, 1, 1}, {1, 1, 1} @74 = hip::copy_to_gpu(@69,@73) -> float_type, {64, 1, 1}, {1, 1, 1} @75 = load[offset=868608,end=1671424](@1) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @76 = multibroadcast[out_lens={1, 64, 56, 56},out_dyn_dims={}](@74) -> float_type, {1, 64, 56, 56}, {0, 1, 0, 0} @77 = gpu::code_object[code_object=5920,symbol_name=mlir_convolution_add_relu,global=12544,local=64,](@76,@63,@68,@75) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @78 = @literal{ ... } -> float_type, {256, 64, 1, 1}, {64, 1, 1, 1} @79 = load[offset=0,end=65536](@1) -> float_type, {256, 64, 1, 1}, {64, 1, 1, 1} @80 = hip::copy_to_gpu(@78,@79) -> float_type, {256, 64, 1, 1}, {64, 1, 1, 1} @81 = @literal{ ... } -> float_type, {64, 1, 1}, {1, 1, 1} @82 = load[offset=65536,end=65792](@1) -> float_type, {64, 1, 1}, {1, 1, 1} @83 = hip::copy_to_gpu(@81,@82) -> float_type, {64, 1, 1}, {1, 1, 1} @84 = load[offset=1818880,end=2621696](@1) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @85 = multibroadcast[out_lens={1, 64, 56, 56},out_dyn_dims={}](@83) -> float_type, {1, 64, 56, 56}, {0, 1, 0, 0} @86 = gpu::code_object[code_object=9632,symbol_name=mlir_convolution_add_relu,global=6272,local=64,](@85,@77,@72,@84) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @87 = load[offset=4084992,end=7296256](@1) -> float_type, {1, 256, 56, 56}, {802816, 3136, 56, 1} @88 = gpu::code_object[code_object=10128,symbol_name=mlir_convolution_add,global=12544,local=128,](@65,@86,@80,@87) -> float_type, {1, 256, 56, 56}, {802816, 3136, 56, 1} @89 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @90 = load[offset=3281152,end=3282176](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @91 = hip::copy_to_gpu(@89,@90) -> float_type, {256, 1, 1}, {1, 1, 1} @92 = @literal{ ... } -> float_type, {64, 1, 1}, {1, 1, 1} @93 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @94 = load[offset=3280128,end=3281152](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @95 = hip::copy_to_gpu(@93,@94) -> float_type, {256, 1, 1}, {1, 1, 1} @96 = @literal{ ... } -> float_type, {64, 256, 1, 1}, {256, 1, 1, 1} @97 = load[offset=3211520,end=3277056](@1) -> float_type, {64, 256, 1, 1}, {256, 1, 1, 1} @98 = hip::copy_to_gpu(@96,@97) -> float_type, {64, 256, 1, 1}, {256, 1, 1, 1} @99 = load[offset=0,end=256](@1) -> float_type, {64, 1, 1}, {1, 1, 1} @100 = hip::copy_to_gpu(@92,@99) -> float_type, {64, 1, 1}, {1, 1, 1} @101 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @102 = load[offset=3278080,end=3279104](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @103 = hip::copy_to_gpu(@101,@102) -> float_type, {256, 1, 1}, {1, 1, 1} @104 = @literal{ ... } -> float_type, {64, 64, 3, 3}, {576, 9, 3, 1} @105 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @106 = load[offset=3277056,end=3278080](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @107 = hip::copy_to_gpu(@105,@106) -> float_type, {256, 1, 1}, {1, 1, 1} @108 = multibroadcast[out_lens={1, 256, 56, 56},out_dyn_dims={}](@103) -> float_type, {1, 256, 56, 56}, {0, 1, 0, 0} @109 = load[offset=256,end=3211520](@1) -> float_type, {1, 256, 56, 56}, {802816, 3136, 56, 1} @110 = multibroadcast[out_lens={1, 256, 56, 56},out_dyn_dims={}](@107) -> float_type, {1, 256, 56, 56}, {0, 1, 0, 0} @111 = gpu::code_object[code_object=4816,symbol_name=mul_add_relu_kernel,global=200704,local=1024,](@108,@88,@110,@109) -> float_type, {1, 256, 56, 56}, {802816, 3136, 56, 1} @112 = @literal{ ... } -> float_type, {256, 64, 1, 1}, {64, 1, 1, 1} @113 = @literal{ ... } -> float_type, {64, 1, 1}, {1, 1, 1} @114 = load[offset=3277056,end=3277312](@1) -> float_type, {64, 1, 1}, {1, 1, 1} @115 = hip::copy_to_gpu(@113,@114) -> float_type, {64, 1, 1}, {1, 1, 1} @116 = load[offset=3282176,end=4084992](@1) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @117 = multibroadcast[out_lens={1, 64, 56, 56},out_dyn_dims={}](@115) -> float_type, {1, 64, 56, 56}, {0, 1, 0, 0} @118 = gpu::code_object[code_object=5920,symbol_name=mlir_convolution_add_relu,global=12544,local=64,](@117,@111,@98,@116) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @119 = load[offset=803072,end=950528](@1) -> float_type, {64, 64, 3, 3}, {576, 9, 3, 1} @120 = hip::copy_to_gpu(@104,@119) -> float_type, {64, 64, 3, 3}, {576, 9, 3, 1} @121 = load[offset=256,end=803072](@1) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @122 = multibroadcast[out_lens={1, 64, 56, 56},out_dyn_dims={}](@100) -> float_type, {1, 64, 56, 56}, {0, 1, 0, 0} @123 = gpu::code_object[code_object=9632,symbol_name=mlir_convolution_add_relu,global=6272,local=64,](@122,@118,@120,@121) -> float_type, {1, 64, 56, 56}, {200704, 3136, 56, 1} @124 = load[offset=803072,end=868608](@1) -> float_type, {256, 64, 1, 1}, {64, 1, 1, 1} @125 = hip::copy_to_gpu(@112,@124) -> float_type, {256, 64, 1, 1}, {64, 1, 1, 1} @126 = multibroadcast[out_lens={1, 256, 56, 56},out_dyn_dims={}](@91) -> float_type, {1, 256, 56, 56}, {0, 1, 0, 0} @127 = load[offset=7296256,end=10507520](@1) -> float_type, {1, 256, 56, 56}, {802816, 3136, 56, 1} @128 = multibroadcast[out_lens={1, 256, 56, 56},out_dyn_dims={}](@95) -> float_type, {1, 256, 56, 56}, {0, 1, 0, 0} @129 = gpu::code_object[code_object=14512,symbol_name=mlir_convolution_mul_add_add_relu,global=12544,local=128,](@126,@88,@128,@123,@125,@127) -> float_type, {1, 256, 56, 56}, {802816, 3136, 56, 1} @130 = @literal{ ... } -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @131 = load[offset=12113152,end=12702976](@1) -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @132 = hip::copy_to_gpu(@130,@131) -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @133 = @literal{ ... } -> float_type, {128, 256, 1, 1}, {256, 1, 1, 1} @134 = load[offset=131584,end=262656](@1) -> float_type, {128, 256, 1, 1}, {256, 1, 1, 1} @135 = hip::copy_to_gpu(@133,@134) -> float_type, {128, 256, 1, 1}, {256, 1, 1, 1} @136 = @literal{ ... } -> float_type, {128, 1, 1}, {1, 1, 1} @137 = load[offset=0,end=512](@1) -> float_type, {128, 1, 1}, {1, 1, 1} @138 = hip::copy_to_gpu(@136,@137) -> float_type, {128, 1, 1}, {1, 1, 1} @139 = load[offset=10507520,end=12113152](@1) -> float_type, {1, 128, 56, 56}, {401408, 3136, 56, 1} @140 = multibroadcast[out_lens={1, 128, 56, 56},out_dyn_dims={}](@138) -> float_type, {1, 128, 56, 56}, {0, 1, 0, 0} @141 = gpu::code_object[code_object=13344,symbol_name=mlir_convolution_add_relu,global=6272,local=64,](@140,@129,@135,@139) -> float_type, {1, 128, 56, 56}, {401408, 3136, 56, 1} @142 = load[offset=0,end=401408](@1) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @143 = gpu::code_object[code_object=6280,symbol_name=mlir_convolution,global=6400,local=64,](@141,@132,@142) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @144 = @literal{ ... } -> float_type, {128, 1, 1}, {1, 1, 1} @145 = load[offset=401408,end=401920](@1) -> float_type, {128, 1, 1}, {1, 1, 1} @146 = hip::copy_to_gpu(@144,@145) -> float_type, {128, 1, 1}, {1, 1, 1} @147 = load[offset=1610240,end=2814464](@1) -> float_type, {1, 384, 28, 28}, {301056, 784, 28, 1} @148 = step[axes={2, 3},steps={2, 2}](@129) -> float_type, {1, 256, 28, 28}, {802816, 3136, 112, 2} @149 = multibroadcast[out_lens={1, 128, 28, 28},out_dyn_dims={}](@146) -> float_type, {1, 128, 28, 28}, {0, 1, 0, 0} @150 = gpu::code_object[code_object=5104,symbol_name=add_relu_noop_concat_noop_kernel,global=150528,local=1024,](@143,@149,@148,@147) -> float_type, {1, 384, 28, 28}, {301056, 784, 28, 1} @151 = @literal{ ... } -> float_type, {128, 1, 1}, {1, 1, 1} @152 = load[offset=0,end=512](@1) -> float_type, {128, 1, 1}, {1, 1, 1} @153 = hip::copy_to_gpu(@151,@152) -> float_type, {128, 1, 1}, {1, 1, 1} @154 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @155 = @literal{ ... } -> float_type, {512, 384, 1, 1}, {384, 1, 1, 1} @156 = load[offset=512,end=786944](@1) -> float_type, {512, 384, 1, 1}, {384, 1, 1, 1} @157 = hip::copy_to_gpu(@155,@156) -> float_type, {512, 384, 1, 1}, {384, 1, 1, 1} @158 = load[offset=788992,end=791040](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @159 = hip::copy_to_gpu(@154,@158) -> float_type, {512, 1, 1}, {1, 1, 1} @160 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @161 = load[offset=786944,end=788992](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @162 = hip::copy_to_gpu(@160,@161) -> float_type, {512, 1, 1}, {1, 1, 1} @163 = @literal{ ... } -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @164 = load[offset=4190720,end=4780544](@1) -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @165 = hip::copy_to_gpu(@163,@164) -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @166 = load[offset=7325184,end=8930816](@1) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @167 = gpu::code_object[code_object=9608,symbol_name=mlir_convolution,global=6656,local=128,](@150,@157,@166) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @168 = load[offset=4780544,end=6386176](@1) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @169 = multibroadcast[out_lens={1, 512, 28, 28},out_dyn_dims={}](@162) -> float_type, {1, 512, 28, 28}, {0, 1, 0, 0} @170 = multibroadcast[out_lens={1, 512, 28, 28},out_dyn_dims={}](@159) -> float_type, {1, 512, 28, 28}, {0, 1, 0, 0} @171 = gpu::code_object[code_object=16680,symbol_name=mlir_convolution_mul_add_relu,global=6656,local=128,](@169,@170,@150,@157,@168) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @172 = @literal{ ... } -> float_type, {128, 1, 1}, {1, 1, 1} @173 = load[offset=512,end=1024](@1) -> float_type, {128, 1, 1}, {1, 1, 1} @174 = hip::copy_to_gpu(@172,@173) -> float_type, {128, 1, 1}, {1, 1, 1} @175 = @literal{ ... } -> float_type, {128, 512, 1, 1}, {512, 1, 1, 1} @176 = load[offset=1024,end=263168](@1) -> float_type, {128, 512, 1, 1}, {512, 1, 1, 1} @177 = hip::copy_to_gpu(@175,@176) -> float_type, {128, 512, 1, 1}, {512, 1, 1, 1} @178 = load[offset=663552,end=1064960](@1) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @179 = multibroadcast[out_lens={1, 128, 28, 28},out_dyn_dims={}](@153) -> float_type, {1, 128, 28, 28}, {0, 1, 0, 0} @180 = gpu::code_object[code_object=9120,symbol_name=mlir_convolution_add_relu,global=6400,local=64,](@179,@171,@177,@178) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @181 = load[offset=1024,end=402432](@1) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @182 = multibroadcast[out_lens={1, 128, 28, 28},out_dyn_dims={}](@174) -> float_type, {1, 128, 28, 28}, {0, 1, 0, 0} @183 = gpu::code_object[code_object=10272,symbol_name=mlir_convolution_add_relu,global=6400,local=64,](@182,@180,@165,@181) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @184 = @literal{ ... } -> float_type, {512, 128, 1, 1}, {128, 1, 1, 1} @185 = load[offset=402432,end=664576](@1) -> float_type, {512, 128, 1, 1}, {128, 1, 1, 1} @186 = hip::copy_to_gpu(@184,@185) -> float_type, {512, 128, 1, 1}, {128, 1, 1, 1} @187 = load[offset=3122176,end=4727808](@1) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @188 = gpu::code_object[code_object=6288,symbol_name=mlir_convolution_add,global=13312,local=64,](@167,@183,@186,@187) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @189 = @literal{ ... } -> float_type, {128, 1, 1}, {1, 1, 1} @190 = load[offset=3121664,end=3122176](@1) -> float_type, {128, 1, 1}, {1, 1, 1} @191 = hip::copy_to_gpu(@189,@190) -> float_type, {128, 1, 1}, {1, 1, 1} @192 = @literal{ ... } -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @193 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @194 = load[offset=0,end=2048](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @195 = hip::copy_to_gpu(@193,@194) -> float_type, {512, 1, 1}, {1, 1, 1} @196 = @literal{ ... } -> float_type, {128, 512, 1, 1}, {512, 1, 1, 1} @197 = @literal{ ... } -> float_type, {512, 128, 1, 1}, {128, 1, 1, 1} @198 = @literal{ ... } -> float_type, {128, 1, 1}, {1, 1, 1} @199 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @200 = load[offset=4608,end=6656](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @201 = hip::copy_to_gpu(@199,@200) -> float_type, {512, 1, 1}, {1, 1, 1} @202 = load[offset=4096,end=4608](@1) -> float_type, {128, 1, 1}, {1, 1, 1} @203 = hip::copy_to_gpu(@198,@202) -> float_type, {128, 1, 1}, {1, 1, 1} @204 = @literal{ ... } -> float_type, {128, 1, 1}, {1, 1, 1} @205 = @literal{ ... } -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @206 = load[offset=4727808,end=5317632](@1) -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @207 = hip::copy_to_gpu(@205,@206) -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @208 = multibroadcast[out_lens={1, 512, 28, 28},out_dyn_dims={}](@195) -> float_type, {1, 512, 28, 28}, {0, 1, 0, 0} @209 = load[offset=926720,end=2532352](@1) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @210 = multibroadcast[out_lens={1, 512, 28, 28},out_dyn_dims={}](@201) -> float_type, {1, 512, 28, 28}, {0, 1, 0, 0} @211 = gpu::code_object[code_object=4688,symbol_name=mul_add_relu_kernel,global=200704,local=1024,](@208,@188,@210,@209) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @212 = load[offset=0,end=512](@1) -> float_type, {128, 1, 1}, {1, 1, 1} @213 = hip::copy_to_gpu(@204,@212) -> float_type, {128, 1, 1}, {1, 1, 1} @214 = load[offset=406016,end=668160](@1) -> float_type, {128, 512, 1, 1}, {512, 1, 1, 1} @215 = hip::copy_to_gpu(@196,@214) -> float_type, {128, 512, 1, 1}, {512, 1, 1, 1} @216 = load[offset=2532352,end=2794496](@1) -> float_type, {512, 128, 1, 1}, {128, 1, 1, 1} @217 = hip::copy_to_gpu(@197,@216) -> float_type, {512, 128, 1, 1}, {128, 1, 1, 1} @218 = multibroadcast[out_lens={1, 128, 28, 28},out_dyn_dims={}](@203) -> float_type, {1, 128, 28, 28}, {0, 1, 0, 0} @219 = load[offset=4608,end=406016](@1) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @220 = gpu::code_object[code_object=9120,symbol_name=mlir_convolution_add_relu,global=6400,local=64,](@218,@211,@215,@219) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @221 = load[offset=406016,end=807424](@1) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @222 = multibroadcast[out_lens={1, 128, 28, 28},out_dyn_dims={}](@213) -> float_type, {1, 128, 28, 28}, {0, 1, 0, 0} @223 = gpu::code_object[code_object=10272,symbol_name=mlir_convolution_add_relu,global=6400,local=64,](@222,@220,@207,@221) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @224 = load[offset=9975808,end=11581440](@1) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @225 = gpu::code_object[code_object=6288,symbol_name=mlir_convolution_add,global=13312,local=64,](@188,@223,@217,@224) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @226 = @literal{ ... } -> float_type, {512, 128, 1, 1}, {128, 1, 1, 1} @227 = load[offset=4689920,end=4952064](@1) -> float_type, {512, 128, 1, 1}, {128, 1, 1, 1} @228 = hip::copy_to_gpu(@226,@227) -> float_type, {512, 128, 1, 1}, {128, 1, 1, 1} @229 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @230 = load[offset=1607680,end=1609728](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @231 = hip::copy_to_gpu(@229,@230) -> float_type, {512, 1, 1}, {1, 1, 1} @232 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @233 = load[offset=1605632,end=1607680](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @234 = hip::copy_to_gpu(@232,@233) -> float_type, {512, 1, 1}, {1, 1, 1} @235 = load[offset=0,end=1605632](@1) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @236 = multibroadcast[out_lens={1, 512, 28, 28},out_dyn_dims={}](@231) -> float_type, {1, 512, 28, 28}, {0, 1, 0, 0} @237 = multibroadcast[out_lens={1, 512, 28, 28},out_dyn_dims={}](@234) -> float_type, {1, 512, 28, 28}, {0, 1, 0, 0} @238 = gpu::code_object[code_object=4688,symbol_name=mul_add_relu_kernel,global=200704,local=1024,](@236,@225,@237,@235) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @239 = load[offset=1605632,end=2195456](@1) -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @240 = hip::copy_to_gpu(@192,@239) -> float_type, {128, 128, 3, 3}, {1152, 9, 3, 1} @241 = @literal{ ... } -> float_type, {128, 512, 1, 1}, {512, 1, 1, 1} @242 = @literal{ ... } -> float_type, {128, 1, 1}, {1, 1, 1} @243 = load[offset=2859008,end=2859520](@1) -> float_type, {128, 1, 1}, {1, 1, 1} @244 = hip::copy_to_gpu(@242,@243) -> float_type, {128, 1, 1}, {1, 1, 1} @245 = load[offset=2596864,end=2859008](@1) -> float_type, {128, 512, 1, 1}, {512, 1, 1, 1} @246 = hip::copy_to_gpu(@241,@245) -> float_type, {128, 512, 1, 1}, {512, 1, 1, 1} @247 = load[offset=2195456,end=2596864](@1) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @248 = multibroadcast[out_lens={1, 128, 28, 28},out_dyn_dims={}](@244) -> float_type, {1, 128, 28, 28}, {0, 1, 0, 0} @249 = gpu::code_object[code_object=9120,symbol_name=mlir_convolution_add_relu,global=6400,local=64,](@248,@238,@246,@247) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @250 = load[offset=0,end=401408](@1) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @251 = multibroadcast[out_lens={1, 128, 28, 28},out_dyn_dims={}](@191) -> float_type, {1, 128, 28, 28}, {0, 1, 0, 0} @252 = gpu::code_object[code_object=10272,symbol_name=mlir_convolution_add_relu,global=6400,local=64,](@251,@249,@240,@250) -> float_type, {1, 128, 28, 28}, {100352, 784, 28, 1} @253 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @254 = load[offset=401408,end=402432](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @255 = hip::copy_to_gpu(@253,@254) -> float_type, {256, 1, 1}, {1, 1, 1} @256 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @257 = load[offset=405504,end=407552](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @258 = hip::copy_to_gpu(@256,@257) -> float_type, {512, 1, 1}, {1, 1, 1} @259 = @literal{ ... } -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @260 = @literal{ ... } -> float_type, {256, 512, 1, 1}, {512, 1, 1, 1} @261 = load[offset=8916992,end=9441280](@1) -> float_type, {256, 512, 1, 1}, {512, 1, 1, 1} @262 = hip::copy_to_gpu(@260,@261) -> float_type, {256, 512, 1, 1}, {512, 1, 1, 1} @263 = load[offset=6557696,end=8916992](@1) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @264 = hip::copy_to_gpu(@259,@263) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @265 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @266 = load[offset=402432,end=403456](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @267 = hip::copy_to_gpu(@265,@266) -> float_type, {256, 1, 1}, {1, 1, 1} @268 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @269 = load[offset=403456,end=405504](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @270 = hip::copy_to_gpu(@268,@269) -> float_type, {512, 1, 1}, {1, 1, 1} @271 = multibroadcast[out_lens={1, 512, 28, 28},out_dyn_dims={}](@270) -> float_type, {1, 512, 28, 28}, {0, 1, 0, 0} @272 = load[offset=4952064,end=6557696](@1) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @273 = multibroadcast[out_lens={1, 512, 28, 28},out_dyn_dims={}](@258) -> float_type, {1, 512, 28, 28}, {0, 1, 0, 0} @274 = gpu::code_object[code_object=8752,symbol_name=mlir_convolution_mul_add_add_relu,global=13312,local=64,](@271,@225,@273,@252,@228,@272) -> float_type, {1, 512, 28, 28}, {401408, 784, 28, 1} @275 = load[offset=403456,end=1206272](@1) -> float_type, {1, 256, 28, 28}, {200704, 784, 28, 1} @276 = multibroadcast[out_lens={1, 256, 28, 28},out_dyn_dims={}](@267) -> float_type, {1, 256, 28, 28}, {0, 1, 0, 0} @277 = gpu::code_object[code_object=6432,symbol_name=mlir_convolution_add_relu,global=12800,local=64,](@276,@274,@262,@275) -> float_type, {1, 256, 28, 28}, {200704, 784, 28, 1} @278 = load[offset=0,end=200704](@1) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @279 = gpu::code_object[code_object=8968,symbol_name=mlir_convolution,global=3584,local=64,](@277,@264,@278) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @280 = load[offset=3113984,end=3716096](@1) -> float_type, {1, 768, 14, 14}, {150528, 196, 14, 1} @281 = step[axes={2, 3},steps={2, 2}](@274) -> float_type, {1, 512, 14, 14}, {401408, 784, 56, 2} @282 = multibroadcast[out_lens={1, 256, 14, 14},out_dyn_dims={}](@255) -> float_type, {1, 256, 14, 14}, {0, 1, 0, 0} @283 = gpu::code_object[code_object=5104,symbol_name=add_relu_noop_concat_noop_kernel,global=75264,local=1024,](@279,@282,@281,@280) -> float_type, {1, 768, 14, 14}, {150528, 196, 14, 1} @284 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @285 = @literal{ ... } -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @286 = @literal{ ... } -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @287 = load[offset=9221120,end=10269696](@1) -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @288 = hip::copy_to_gpu(@286,@287) -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @289 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @290 = load[offset=1053696,end=1057792](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @291 = hip::copy_to_gpu(@289,@290) -> float_type, {1024, 1, 1}, {1, 1, 1} @292 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @293 = load[offset=2107392,end=2108416](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @294 = hip::copy_to_gpu(@292,@293) -> float_type, {256, 1, 1}, {1, 1, 1} @295 = load[offset=2106368,end=2107392](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @296 = hip::copy_to_gpu(@284,@295) -> float_type, {256, 1, 1}, {1, 1, 1} @297 = load[offset=6861824,end=9221120](@1) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @298 = hip::copy_to_gpu(@285,@297) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @299 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @300 = load[offset=1048576,end=1049600](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @301 = hip::copy_to_gpu(@299,@300) -> float_type, {256, 1, 1}, {1, 1, 1} @302 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @303 = load[offset=2112512,end=2116608](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @304 = hip::copy_to_gpu(@302,@303) -> float_type, {1024, 1, 1}, {1, 1, 1} @305 = @literal{ ... } -> float_type, {1024, 768, 1, 1}, {768, 1, 1, 1} @306 = load[offset=3716096,end=6861824](@1) -> float_type, {1024, 768, 1, 1}, {768, 1, 1, 1} @307 = hip::copy_to_gpu(@305,@306) -> float_type, {1024, 768, 1, 1}, {768, 1, 1, 1} @308 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @309 = load[offset=2108416,end=2112512](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @310 = hip::copy_to_gpu(@308,@309) -> float_type, {1024, 1, 1}, {1, 1, 1} @311 = @literal{ ... } -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @312 = load[offset=0,end=1048576](@1) -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @313 = hip::copy_to_gpu(@311,@312) -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @314 = @literal{ ... } -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @315 = load[offset=1057792,end=2106368](@1) -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @316 = hip::copy_to_gpu(@314,@315) -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @317 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @318 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @319 = @literal{ ... } -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @320 = load[offset=11177984,end=12226560](@1) -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @321 = hip::copy_to_gpu(@319,@320) -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @322 = load[offset=3112960,end=3113984](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @323 = hip::copy_to_gpu(@318,@322) -> float_type, {256, 1, 1}, {1, 1, 1} @324 = @literal{ ... } -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @325 = load[offset=14734848,end=17094144](@1) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @326 = hip::copy_to_gpu(@324,@325) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @327 = load[offset=1049600,end=1053696](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @328 = hip::copy_to_gpu(@317,@327) -> float_type, {1024, 1, 1}, {1, 1, 1} @329 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@310) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @330 = load[offset=10269696,end=11072512](@1) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @331 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@304) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @332 = gpu::code_object[code_object=11432,symbol_name=mlir_convolution_mul_add_relu,global=7168,local=64,](@329,@331,@283,@307,@330) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @333 = load[offset=2108416,end=2309120](@1) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @334 = multibroadcast[out_lens={1, 256, 14, 14},out_dyn_dims={}](@294) -> float_type, {1, 256, 14, 14}, {0, 1, 0, 0} @335 = gpu::code_object[code_object=9504,symbol_name=mlir_convolution_add_relu,global=3584,local=64,](@334,@332,@288,@333) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @336 = load[offset=2309120,end=2509824](@1) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @337 = multibroadcast[out_lens={1, 256, 14, 14},out_dyn_dims={}](@296) -> float_type, {1, 256, 14, 14}, {0, 1, 0, 0} @338 = gpu::code_object[code_object=7840,symbol_name=mlir_convolution_add_relu,global=7168,local=128,](@337,@335,@298,@336) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @339 = load[offset=6861824,end=7664640](@1) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @340 = gpu::code_object[code_object=7560,symbol_name=mlir_convolution,global=7168,local=64,](@283,@307,@339) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @341 = load[offset=3113984,end=3916800](@1) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @342 = gpu::code_object[code_object=7696,symbol_name=mlir_convolution_add,global=8192,local=64,](@340,@338,@316,@341) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @343 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@328) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @344 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@291) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @345 = load[offset=3916800,end=4719616](@1) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @346 = gpu::code_object[code_object=4688,symbol_name=mul_add_relu_kernel,global=100352,local=1024,](@343,@342,@344,@345) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @347 = multibroadcast[out_lens={1, 256, 14, 14},out_dyn_dims={}](@301) -> float_type, {1, 256, 14, 14}, {0, 1, 0, 0} @348 = load[offset=1049600,end=1250304](@1) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @349 = gpu::code_object[code_object=9504,symbol_name=mlir_convolution_add_relu,global=3584,local=64,](@347,@346,@313,@348) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @350 = load[offset=0,end=200704](@1) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @351 = multibroadcast[out_lens={1, 256, 14, 14},out_dyn_dims={}](@323) -> float_type, {1, 256, 14, 14}, {0, 1, 0, 0} @352 = gpu::code_object[code_object=7840,symbol_name=mlir_convolution_add_relu,global=7168,local=128,](@351,@349,@326,@350) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @353 = load[offset=9782272,end=10585088](@1) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @354 = gpu::code_object[code_object=7696,symbol_name=mlir_convolution_add,global=8192,local=64,](@342,@352,@321,@353) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @355 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @356 = load[offset=5120,end=9216](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @357 = hip::copy_to_gpu(@355,@356) -> float_type, {1024, 1, 1}, {1, 1, 1} @358 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @359 = load[offset=0,end=1024](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @360 = hip::copy_to_gpu(@358,@359) -> float_type, {256, 1, 1}, {1, 1, 1} @361 = @literal{ ... } -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @362 = load[offset=10585088,end=11633664](@1) -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @363 = hip::copy_to_gpu(@361,@362) -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @364 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @365 = @literal{ ... } -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @366 = load[offset=5462016,end=6510592](@1) -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @367 = hip::copy_to_gpu(@365,@366) -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @368 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @369 = load[offset=1024,end=5120](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @370 = hip::copy_to_gpu(@368,@369) -> float_type, {1024, 1, 1}, {1, 1, 1} @371 = @literal{ ... } -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @372 = load[offset=6510592,end=8869888](@1) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @373 = hip::copy_to_gpu(@371,@372) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @374 = load[offset=8869888,end=8870912](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @375 = hip::copy_to_gpu(@364,@374) -> float_type, {256, 1, 1}, {1, 1, 1} @376 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@370) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @377 = load[offset=8870912,end=9673728](@1) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @378 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@357) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @379 = gpu::code_object[code_object=4688,symbol_name=mul_add_relu_kernel,global=100352,local=1024,](@376,@354,@378,@377) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @380 = multibroadcast[out_lens={1, 256, 14, 14},out_dyn_dims={}](@360) -> float_type, {1, 256, 14, 14}, {0, 1, 0, 0} @381 = load[offset=200704,end=401408](@1) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @382 = gpu::code_object[code_object=9504,symbol_name=mlir_convolution_add_relu,global=3584,local=64,](@380,@379,@363,@381) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @383 = multibroadcast[out_lens={1, 256, 14, 14},out_dyn_dims={}](@375) -> float_type, {1, 256, 14, 14}, {0, 1, 0, 0} @384 = load[offset=1204224,end=1404928](@1) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @385 = gpu::code_object[code_object=7840,symbol_name=mlir_convolution_add_relu,global=7168,local=128,](@383,@382,@373,@384) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @386 = load[offset=401408,end=1204224](@1) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @387 = gpu::code_object[code_object=7696,symbol_name=mlir_convolution_add,global=8192,local=64,](@354,@385,@367,@386) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @388 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @389 = load[offset=0,end=4096](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @390 = hip::copy_to_gpu(@388,@389) -> float_type, {1024, 1, 1}, {1, 1, 1} @391 = @literal{ ... } -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @392 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @393 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @394 = load[offset=6464512,end=6465536](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @395 = hip::copy_to_gpu(@393,@394) -> float_type, {256, 1, 1}, {1, 1, 1} @396 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @397 = @literal{ ... } -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @398 = load[offset=1204224,end=2252800](@1) -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @399 = hip::copy_to_gpu(@391,@398) -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @400 = load[offset=4612096,end=4613120](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @401 = hip::copy_to_gpu(@396,@400) -> float_type, {256, 1, 1}, {1, 1, 1} @402 = load[offset=2252800,end=4612096](@1) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @403 = hip::copy_to_gpu(@397,@402) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @404 = @literal{ ... } -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @405 = load[offset=4613120,end=5661696](@1) -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @406 = hip::copy_to_gpu(@404,@405) -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @407 = load[offset=8192,end=12288](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @408 = hip::copy_to_gpu(@392,@407) -> float_type, {1024, 1, 1}, {1, 1, 1} @409 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@390) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @410 = load[offset=5661696,end=6464512](@1) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @411 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@408) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @412 = gpu::code_object[code_object=4688,symbol_name=mul_add_relu_kernel,global=100352,local=1024,](@409,@387,@411,@410) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @413 = multibroadcast[out_lens={1, 256, 14, 14},out_dyn_dims={}](@395) -> float_type, {1, 256, 14, 14}, {0, 1, 0, 0} @414 = load[offset=6465536,end=6666240](@1) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @415 = gpu::code_object[code_object=9504,symbol_name=mlir_convolution_add_relu,global=3584,local=64,](@413,@412,@406,@414) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @416 = load[offset=0,end=200704](@1) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @417 = multibroadcast[out_lens={1, 256, 14, 14},out_dyn_dims={}](@401) -> float_type, {1, 256, 14, 14}, {0, 1, 0, 0} @418 = gpu::code_object[code_object=7840,symbol_name=mlir_convolution_add_relu,global=7168,local=128,](@417,@415,@403,@416) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @419 = load[offset=17205248,end=18008064](@1) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @420 = gpu::code_object[code_object=7696,symbol_name=mlir_convolution_add,global=8192,local=64,](@387,@418,@399,@419) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @421 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @422 = @literal{ ... } -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @423 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @424 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @425 = load[offset=11937792,end=11939840](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @426 = hip::copy_to_gpu(@424,@425) -> float_type, {512, 1, 1}, {1, 1, 1} @427 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @428 = load[offset=0,end=1024](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @429 = hip::copy_to_gpu(@427,@428) -> float_type, {256, 1, 1}, {1, 1, 1} @430 = @literal{ ... } -> float_type, {256, 1, 1}, {1, 1, 1} @431 = load[offset=1024,end=2048](@1) -> float_type, {256, 1, 1}, {1, 1, 1} @432 = hip::copy_to_gpu(@430,@431) -> float_type, {256, 1, 1}, {1, 1, 1} @433 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @434 = @literal{ ... } -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @435 = load[offset=12992512,end=15351808](@1) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @436 = hip::copy_to_gpu(@434,@435) -> float_type, {256, 256, 3, 3}, {2304, 9, 3, 1} @437 = load[offset=18008064,end=18012160](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @438 = hip::copy_to_gpu(@433,@437) -> float_type, {1024, 1, 1}, {1, 1, 1} @439 = @literal{ ... } -> float_type, {512, 512, 3, 3}, {4608, 9, 3, 1} @440 = load[offset=12988416,end=12992512](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @441 = hip::copy_to_gpu(@421,@440) -> float_type, {1024, 1, 1}, {1, 1, 1} @442 = load[offset=11939840,end=12988416](@1) -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @443 = hip::copy_to_gpu(@422,@442) -> float_type, {1024, 256, 1, 1}, {256, 1, 1, 1} @444 = load[offset=6144,end=10240](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @445 = hip::copy_to_gpu(@423,@444) -> float_type, {1024, 1, 1}, {1, 1, 1} @446 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @447 = @literal{ ... } -> float_type, {1024, 1, 1}, {1, 1, 1} @448 = @literal{ ... } -> float_type, {512, 1024, 1, 1}, {1024, 1, 1, 1} @449 = @literal{ ... } -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @450 = load[offset=15351808,end=16400384](@1) -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @451 = hip::copy_to_gpu(@449,@450) -> float_type, {256, 1024, 1, 1}, {1024, 1, 1, 1} @452 = load[offset=2048,end=6144](@1) -> float_type, {1024, 1, 1}, {1, 1, 1} @453 = hip::copy_to_gpu(@447,@452) -> float_type, {1024, 1, 1}, {1, 1, 1} @454 = load[offset=9840640,end=11937792](@1) -> float_type, {512, 1024, 1, 1}, {1024, 1, 1, 1} @455 = hip::copy_to_gpu(@448,@454) -> float_type, {512, 1024, 1, 1}, {1024, 1, 1, 1} @456 = load[offset=403456,end=9840640](@1) -> float_type, {512, 512, 3, 3}, {4608, 9, 3, 1} @457 = hip::copy_to_gpu(@439,@456) -> float_type, {512, 512, 3, 3}, {4608, 9, 3, 1} @458 = load[offset=401408,end=403456](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @459 = hip::copy_to_gpu(@446,@458) -> float_type, {512, 1, 1}, {1, 1, 1} @460 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@453) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @461 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@445) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @462 = load[offset=16400384,end=17203200](@1) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @463 = gpu::code_object[code_object=4688,symbol_name=mul_add_relu_kernel,global=100352,local=1024,](@460,@420,@461,@462) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @464 = load[offset=2048,end=202752](@1) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @465 = multibroadcast[out_lens={1, 256, 14, 14},out_dyn_dims={}](@432) -> float_type, {1, 256, 14, 14}, {0, 1, 0, 0} @466 = gpu::code_object[code_object=9504,symbol_name=mlir_convolution_add_relu,global=3584,local=64,](@465,@463,@451,@464) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @467 = multibroadcast[out_lens={1, 256, 14, 14},out_dyn_dims={}](@429) -> float_type, {1, 256, 14, 14}, {0, 1, 0, 0} @468 = load[offset=15351808,end=15552512](@1) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @469 = gpu::code_object[code_object=7840,symbol_name=mlir_convolution_add_relu,global=7168,local=128,](@467,@466,@436,@468) -> float_type, {1, 256, 14, 14}, {50176, 196, 14, 1} @470 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@438) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @471 = multibroadcast[out_lens={1, 1024, 14, 14},out_dyn_dims={}](@441) -> float_type, {1, 1024, 14, 14}, {0, 1, 0, 0} @472 = load[offset=12992512,end=13795328](@1) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @473 = gpu::code_object[code_object=10416,symbol_name=mlir_convolution_mul_add_add_relu,global=8192,local=64,](@470,@420,@471,@469,@443,@472) -> float_type, {1, 1024, 14, 14}, {200704, 196, 14, 1} @474 = multibroadcast[out_lens={1, 512, 14, 14},out_dyn_dims={}](@426) -> float_type, {1, 512, 14, 14}, {0, 1, 0, 0} @475 = load[offset=11939840,end=12341248](@1) -> float_type, {1, 512, 14, 14}, {100352, 196, 14, 1} @476 = gpu::code_object[code_object=9632,symbol_name=mlir_convolution_add_relu,global=7168,local=64,](@474,@473,@455,@475) -> float_type, {1, 512, 14, 14}, {100352, 196, 14, 1} @477 = load[offset=301056,end=401408](@1) -> float_type, {1, 512, 7, 7}, {25088, 49, 7, 1} @478 = gpu::code_object[code_object=7048,symbol_name=mlir_convolution,global=4096,local=128,](@476,@457,@477) -> float_type, {1, 512, 7, 7}, {25088, 49, 7, 1} @479 = load[offset=0,end=301056](@1) -> float_type, {1, 1536, 7, 7}, {75264, 49, 7, 1} @480 = step[axes={2, 3},steps={2, 2}](@473) -> float_type, {1, 1024, 7, 7}, {200704, 196, 28, 2} @481 = multibroadcast[out_lens={1, 512, 7, 7},out_dyn_dims={}](@459) -> float_type, {1, 512, 7, 7}, {0, 1, 0, 0} @482 = gpu::code_object[code_object=5232,symbol_name=add_relu_noop_concat_noop_kernel,global=37632,local=1024,](@478,@481,@480,@479) -> float_type, {1, 1536, 7, 7}, {75264, 49, 7, 1} @483 = @literal{ ... } -> float_type, {2048, 1536, 1, 1}, {1536, 1, 1, 1} @484 = @literal{ ... } -> float_type, {2048, 1, 1}, {1, 1, 1} @485 = @literal{ ... } -> float_type, {2048, 1, 1}, {1, 1, 1} @486 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @487 = load[offset=4515840,end=17098752](@1) -> float_type, {2048, 1536, 1, 1}, {1536, 1, 1, 1} @488 = hip::copy_to_gpu(@483,@487) -> float_type, {2048, 1536, 1, 1}, {1536, 1, 1, 1} @489 = @literal{ ... } -> float_type, {512, 512, 3, 3}, {4608, 9, 3, 1} @490 = load[offset=313344,end=321536](@1) -> float_type, {2048, 1, 1}, {1, 1, 1} @491 = hip::copy_to_gpu(@484,@490) -> float_type, {2048, 1, 1}, {1, 1, 1} @492 = @literal{ ... } -> float_type, {512, 2048, 1, 1}, {2048, 1, 1, 1} @493 = load[offset=26535936,end=30730240](@1) -> float_type, {512, 2048, 1, 1}, {2048, 1, 1, 1} @494 = hip::copy_to_gpu(@492,@493) -> float_type, {512, 2048, 1, 1}, {2048, 1, 1, 1} @495 = load[offset=301056,end=303104](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @496 = hip::copy_to_gpu(@486,@495) -> float_type, {512, 1, 1}, {1, 1, 1} @497 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @498 = load[offset=303104,end=305152](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @499 = hip::copy_to_gpu(@497,@498) -> float_type, {512, 1, 1}, {1, 1, 1} @500 = load[offset=17098752,end=26535936](@1) -> float_type, {512, 512, 3, 3}, {4608, 9, 3, 1} @501 = hip::copy_to_gpu(@489,@500) -> float_type, {512, 512, 3, 3}, {4608, 9, 3, 1} @502 = @literal{ ... } -> float_type, {2048, 512, 1, 1}, {512, 1, 1, 1} @503 = load[offset=321536,end=4515840](@1) -> float_type, {2048, 512, 1, 1}, {512, 1, 1, 1} @504 = hip::copy_to_gpu(@502,@503) -> float_type, {2048, 512, 1, 1}, {512, 1, 1, 1} @505 = load[offset=305152,end=313344](@1) -> float_type, {2048, 1, 1}, {1, 1, 1} @506 = hip::copy_to_gpu(@485,@505) -> float_type, {2048, 1, 1}, {1, 1, 1} @507 = multibroadcast[out_lens={1, 2048, 7, 7},out_dyn_dims={}](@491) -> float_type, {1, 2048, 7, 7}, {0, 1, 0, 0} @508 = load[offset=30730240,end=31131648](@1) -> float_type, {1, 2048, 7, 7}, {100352, 49, 7, 1} @509 = multibroadcast[out_lens={1, 2048, 7, 7},out_dyn_dims={}](@506) -> float_type, {1, 2048, 7, 7}, {0, 1, 0, 0} @510 = gpu::code_object[code_object=10536,symbol_name=mlir_convolution_mul_add_relu,global=8192,local=64,](@507,@509,@482,@488,@508) -> float_type, {1, 2048, 7, 7}, {100352, 49, 7, 1} @511 = load[offset=31131648,end=31232000](@1) -> float_type, {1, 512, 7, 7}, {25088, 49, 7, 1} @512 = multibroadcast[out_lens={1, 512, 7, 7},out_dyn_dims={}](@499) -> float_type, {1, 512, 7, 7}, {0, 1, 0, 0} @513 = gpu::code_object[code_object=7328,symbol_name=mlir_convolution_add_relu,global=4096,local=128,](@512,@510,@494,@511) -> float_type, {1, 512, 7, 7}, {25088, 49, 7, 1} @514 = load[offset=26535936,end=26636288](@1) -> float_type, {1, 512, 7, 7}, {25088, 49, 7, 1} @515 = multibroadcast[out_lens={1, 512, 7, 7},out_dyn_dims={}](@496) -> float_type, {1, 512, 7, 7}, {0, 1, 0, 0} @516 = gpu::code_object[code_object=7968,symbol_name=mlir_convolution_add_relu,global=4096,local=128,](@515,@513,@501,@514) -> float_type, {1, 512, 7, 7}, {25088, 49, 7, 1} @517 = load[offset=17098752,end=17500160](@1) -> float_type, {1, 2048, 7, 7}, {100352, 49, 7, 1} @518 = gpu::code_object[code_object=8072,symbol_name=mlir_convolution,global=8192,local=64,](@482,@488,@517) -> float_type, {1, 2048, 7, 7}, {100352, 49, 7, 1} @519 = load[offset=26636288,end=27037696](@1) -> float_type, {1, 2048, 7, 7}, {100352, 49, 7, 1} @520 = gpu::code_object[code_object=8592,symbol_name=mlir_convolution_add,global=8192,local=64,](@518,@516,@504,@519) -> float_type, {1, 2048, 7, 7}, {100352, 49, 7, 1} @521 = @literal{ ... } -> float_type, {2048, 1000}, {1000, 1} @522 = @literal{ ... } -> float_type, {2048, 1, 1}, {1, 1, 1} @523 = load[offset=8200192,end=8208384](@1) -> float_type, {2048, 1, 1}, {1, 1, 1} @524 = hip::copy_to_gpu(@522,@523) -> float_type, {2048, 1, 1}, {1, 1, 1} @525 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @526 = @literal{ ... } -> float_type, {2048, 1, 1}, {1, 1, 1} @527 = @literal{ ... } -> float_type, {512, 2048, 1, 1}, {2048, 1, 1, 1} @528 = load[offset=21845920,end=26040224](@1) -> float_type, {512, 2048, 1, 1}, {2048, 1, 1, 1} @529 = hip::copy_to_gpu(@527,@528) -> float_type, {512, 2048, 1, 1}, {2048, 1, 1, 1} @530 = load[offset=26451872,end=26460064](@1) -> float_type, {2048, 1, 1}, {1, 1, 1} @531 = hip::copy_to_gpu(@526,@530) -> float_type, {2048, 1, 1}, {1, 1, 1} @532 = @literal{ ... } -> float_type, {2048, 512, 1, 1}, {512, 1, 1, 1} @533 = @literal{ ... } -> float_type, {512, 512, 3, 3}, {4608, 9, 3, 1} @534 = load[offset=12406688,end=21843872](@1) -> float_type, {512, 512, 3, 3}, {4608, 9, 3, 1} @535 = hip::copy_to_gpu(@533,@534) -> float_type, {512, 512, 3, 3}, {4608, 9, 3, 1} @536 = load[offset=8212384,end=12406688](@1) -> float_type, {2048, 512, 1, 1}, {512, 1, 1, 1} @537 = hip::copy_to_gpu(@532,@536) -> float_type, {2048, 512, 1, 1}, {512, 1, 1, 1} @538 = @literal{ ... } -> float_type, {512, 1, 1}, {1, 1, 1} @539 = @literal{ ... } -> float_type, {2048, 1, 1}, {1, 1, 1} @540 = load[offset=26443680,end=26451872](@1) -> float_type, {2048, 1, 1}, {1, 1, 1} @541 = hip::copy_to_gpu(@539,@540) -> float_type, {2048, 1, 1}, {1, 1, 1} @542 = @literal{ ... } -> float_type, {2048, 1, 1}, {1, 1, 1} @543 = load[offset=8192000,end=8200192](@1) -> float_type, {2048, 1, 1}, {1, 1, 1} @544 = hip::copy_to_gpu(@542,@543) -> float_type, {2048, 1, 1}, {1, 1, 1} @545 = load[offset=26441632,end=26443680](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @546 = hip::copy_to_gpu(@538,@545) -> float_type, {512, 1, 1}, {1, 1, 1} @547 = @literal{ ... } -> float_type, {1000}, {1} @548 = load[offset=8208384,end=8212384](@1) -> float_type, {1000}, {1} @549 = hip::copy_to_gpu(@547,@548) -> float_type, {1000}, {1} @550 = load[offset=0,end=8192000](@1) -> float_type, {2048, 1000}, {1000, 1} @551 = hip::copy_to_gpu(@521,@550) -> float_type, {2048, 1000}, {1000, 1} @552 = load[offset=21843872,end=21845920](@1) -> float_type, {512, 1, 1}, {1, 1, 1} @553 = hip::copy_to_gpu(@525,@552) -> float_type, {512, 1, 1}, {1, 1, 1} @554 = multibroadcast[out_lens={1, 2048, 7, 7},out_dyn_dims={}](@541) -> float_type, {1, 2048, 7, 7}, {0, 1, 0, 0} @555 = multibroadcast[out_lens={1, 2048, 7, 7},out_dyn_dims={}](@531) -> float_type, {1, 2048, 7, 7}, {0, 1, 0, 0} @556 = load[offset=26040224,end=26441632](@1) -> float_type, {1, 2048, 7, 7}, {100352, 49, 7, 1} @557 = gpu::code_object[code_object=4688,symbol_name=mul_add_relu_kernel,global=100352,local=1024,](@554,@520,@555,@556) -> float_type, {1, 2048, 7, 7}, {100352, 49, 7, 1} @558 = load[offset=26443680,end=26544032](@1) -> float_type, {1, 512, 7, 7}, {25088, 49, 7, 1} @559 = multibroadcast[out_lens={1, 512, 7, 7},out_dyn_dims={}](@546) -> float_type, {1, 512, 7, 7}, {0, 1, 0, 0} @560 = gpu::code_object[code_object=7328,symbol_name=mlir_convolution_add_relu,global=4096,local=128,](@559,@557,@529,@558) -> float_type, {1, 512, 7, 7}, {25088, 49, 7, 1} @561 = load[offset=21845920,end=21946272](@1) -> float_type, {1, 512, 7, 7}, {25088, 49, 7, 1} @562 = multibroadcast[out_lens={1, 512, 7, 7},out_dyn_dims={}](@553) -> float_type, {1, 512, 7, 7}, {0, 1, 0, 0} @563 = gpu::code_object[code_object=7968,symbol_name=mlir_convolution_add_relu,global=4096,local=128,](@562,@560,@535,@561) -> float_type, {1, 512, 7, 7}, {25088, 49, 7, 1} @564 = load[offset=12406688,end=12808096](@1) -> float_type, {1, 2048, 7, 7}, {100352, 49, 7, 1} @565 = gpu::code_object[code_object=8072,symbol_name=mlir_convolution,global=8192,local=64,](@563,@537,@564) -> float_type, {1, 2048, 7, 7}, {100352, 49, 7, 1} @566 = multibroadcast[out_lens={1, 2048, 7, 7},out_dyn_dims={}](@544) -> float_type, {1, 2048, 7, 7}, {0, 1, 0, 0} @567 = load[offset=8212384,end=8220576](@1) -> float_type, {1, 2048, 1, 1}, {2048, 1, 1, 1} @568 = multibroadcast[out_lens={1, 2048, 7, 7},out_dyn_dims={}](@524) -> float_type, {1, 2048, 7, 7}, {0, 1, 0, 0} @569 = gpu::code_object[code_object=4984,symbol_name=mul_add_add_relu_mul_reduce_sum_kernel,global=131072,local=64,](@566,@520,@565,@568,@567) -> float_type, {1, 2048, 1, 1}, {2048, 1, 1, 1} @570 = multibroadcast[out_lens={1, 1000},out_dyn_dims={}](@549) -> float_type, {1, 1000}, {0, 1} main:#output_0 = @param:main:#output_0 -> float_type, {1, 1000}, {1000, 1} @572 = gpu::code_object[code_object=5648,symbol_name=mlir_reshape_dot_add,global=2048,local=64,](@570,@569,@551,main:#output_0) -> float_type, {1, 1000}, {1000, 1} @573 = @return(@572) |
Part of this is also due to this issue #3310. |
Figure out a way to have weight streaming at runtime i.e. be able to fit large models on gpu without needing to know literal size ahead of time
@literal
instructions take up so much time@literal
instructionThe text was updated successfully, but these errors were encountered: