//
// Generated by LLVM NVPTX Back-End
//

.version 8.4
.target sm_90a
.address_size 64

	// .globl	triton_per_fused_add_div_mul_pow_sub_sum_0 // -- Begin function triton_per_fused_add_div_mul_pow_sub_sum_0
.extern .shared .align 16 .b8 global_smem[];
                                        // @triton_per_fused_add_div_mul_pow_sub_sum_0
.visible .entry triton_per_fused_add_div_mul_pow_sub_sum_0(
	.param .u64 .ptr .global .align 1 triton_per_fused_add_div_mul_pow_sub_sum_0_param_0,
	.param .u64 .ptr .global .align 1 triton_per_fused_add_div_mul_pow_sub_sum_0_param_1,
	.param .u32 triton_per_fused_add_div_mul_pow_sub_sum_0_param_2
)
.reqntid 64, 1, 1
{
	.reg .pred 	%p<36>;
	.reg .b16 	%rs<36>;
	.reg .b32 	%r<91>;
	.reg .f32 	%f<62>;
	.reg .b64 	%rd<48>;
	.loc	1 19 0                          // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:19:0
$L__func_begin0:
	.loc	1 19 0                          // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:19:0

// %bb.0:
	ld.param.u64 	%rd11, [triton_per_fused_add_div_mul_pow_sub_sum_0_param_0];
	ld.param.u64 	%rd12, [triton_per_fused_add_div_mul_pow_sub_sum_0_param_1];
$L__tmp0:
	.loc	1 26 34                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:26:34
	mov.u32 	%r46, %tid.x;
	and.b32  	%r47, %r46, 31;
	and.b32  	%r48, %r46, 63;
	.loc	1 28 21                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:28:21
	and.b32  	%r49, %r46, 48;
	setp.ne.s32 	%p1, %r49, 48;
	.loc	1 29 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:29:19
	cvt.u16.u32 	%rs1, %r46;
	shl.b16 	%rs2, %rs1, 2;
	.loc	1 30 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:30:19
	and.b16  	%rs3, %rs2, 252;
	mul.lo.s16 	%rs4, %rs3, 171;
	shr.u16 	%rs5, %rs4, 11;
	.loc	1 29 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:29:19
	mul.lo.s16 	%rs6, %rs5, 12;
	sub.s16 	%rs7, %rs2, %rs6;
	cvt.u32.u16 	%r50, %rs7;
	and.b32  	%r51, %r50, 252;
	.loc	1 31 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:31:19
	cvt.u16.u32 	%rs8, %r48;
	.loc	1 32 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:32:19
	mul.lo.s16 	%rs9, %rs8, 86;
	shr.u16 	%rs10, %rs9, 8;
	.loc	1 31 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:31:19
	mul.lo.s16 	%rs11, %rs10, 3;
	sub.s16 	%rs12, %rs8, %rs11;
	or.b16  	%rs13, %rs8, 64;
	.loc	1 32 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:32:19
	mul.lo.s16 	%rs14, %rs13, 86;
	shr.u16 	%rs15, %rs14, 8;
	.loc	1 31 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:31:19
	mul.lo.s16 	%rs16, %rs15, 3;
	sub.s16 	%rs17, %rs13, %rs16;
	or.b16  	%rs18, %rs8, -128;
	.loc	1 32 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:32:19
	and.b16  	%rs19, %rs18, 191;
	mul.lo.s16 	%rs20, %rs19, 171;
	shr.u16 	%rs21, %rs20, 9;
	.loc	1 31 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:31:19
	mul.lo.s16 	%rs22, %rs21, 3;
	sub.s16 	%rs23, %rs18, %rs22;
	or.b16  	%rs24, %rs1, -64;
	.loc	1 32 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:32:19
	and.b16  	%rs25, %rs24, 255;
	mul.lo.s16 	%rs26, %rs25, 171;
	shr.u16 	%rs27, %rs26, 9;
	.loc	1 31 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:31:19
	mul.lo.s16 	%rs28, %rs27, 3;
	sub.s16 	%rs29, %rs24, %rs28;
	cvt.u32.u16 	%r52, %rs29;
	and.b32  	%r53, %r52, 255;
	.loc	1 33 42                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:33:42
	mul.wide.u16 	%r54, %rs5, 16;
	.loc	1 33 30                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:33:30
	cvt.u64.u32 	%rd13, %r54;
	cvt.u64.u16 	%rd14, %rs7;
	and.b64  	%rd15, %rd14, 252;
	add.s64 	%rd16, %rd15, %rd13;
	shl.b64 	%rd17, %rd16, 2;
	add.s64 	%rd18, %rd12, %rd17;
	add.s64 	%rd1, %rd18, 16;
	mov.b32 	%r5, 0;
	.loc	1 33 47                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:33:47
	// begin inline asm
	mov.u32 %r1, 0x0;
	mov.u32 %r2, 0x0;
	mov.u32 %r3, 0x0;
	mov.u32 %r4, 0x0;
	@%p1 ld.global.v4.b32 { %r1, %r2, %r3, %r4 }, [ %rd1 + 0 ];
	@!%p1 mov.u32 %r1, %r5;
	@!%p1 mov.u32 %r2, %r5;
	@!%p1 mov.u32 %r3, %r5;
	@!%p1 mov.u32 %r4, %r5;
	// end inline asm
	.loc	1 34 35                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:34:35
	or.b32  	%r55, %r54, %r51;
	.loc	1 34 30                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:34:30
	mul.wide.u32 	%rd19, %r55, 4;
	add.s64 	%rd2, %rd12, %rd19;
	.loc	1 34 43                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:34:43
	// begin inline asm
	mov.u32 %r9, 0x0;
	mov.u32 %r10, 0x0;
	mov.u32 %r11, 0x0;
	mov.u32 %r12, 0x0;
	@%p1 ld.global.v4.b32 { %r9, %r10, %r11, %r12 }, [ %rd2 + 0 ];
	@!%p1 mov.u32 %r9, %r5;
	@!%p1 mov.u32 %r10, %r5;
	@!%p1 mov.u32 %r11, %r5;
	@!%p1 mov.u32 %r12, %r5;
	// end inline asm
	.loc	1 35 41                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:35:41
	shl.b16 	%rs30, %rs10, 2;
	shl.b16 	%rs31, %rs15, 2;
	shl.b16 	%rs32, %rs21, 2;
	mul.wide.u16 	%r56, %rs27, 4;
	.loc	1 35 30                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:35:30
	cvt.u64.u16 	%rd20, %rs30;
	cvt.u64.u16 	%rd21, %rs12;
	and.b64  	%rd22, %rd21, 255;
	add.s64 	%rd23, %rd22, %rd20;
	shl.b64 	%rd24, %rd23, 2;
	add.s64 	%rd25, %rd12, %rd24;
	add.s64 	%rd3, %rd25, 4;
	cvt.u64.u16 	%rd26, %rs31;
	cvt.u64.u16 	%rd27, %rs17;
	and.b64  	%rd28, %rd27, 255;
	add.s64 	%rd29, %rd28, %rd26;
	shl.b64 	%rd30, %rd29, 2;
	add.s64 	%rd31, %rd12, %rd30;
	add.s64 	%rd4, %rd31, 4;
	cvt.u64.u16 	%rd32, %rs32;
	cvt.u64.u16 	%rd33, %rs23;
	and.b64  	%rd34, %rd33, 255;
	add.s64 	%rd35, %rd34, %rd32;
	shl.b64 	%rd36, %rd35, 2;
	add.s64 	%rd37, %rd12, %rd36;
	add.s64 	%rd5, %rd37, 4;
	cvt.u64.u32 	%rd38, %r56;
	cvt.u64.u16 	%rd39, %rs29;
	and.b64  	%rd40, %rd39, 255;
	add.s64 	%rd41, %rd40, %rd38;
	shl.b64 	%rd42, %rd41, 2;
	add.s64 	%rd43, %rd12, %rd42;
	add.s64 	%rd6, %rd43, 4;
	mov.pred 	%p11, -1;
	.loc	1 35 46                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:35:46
	// begin inline asm
	mov.u32 %r17, 0x0;
	@%p11 ld.global.b32 { %r17 }, [ %rd3 + 0 ];
	@!%p11 mov.u32 %r17, %r5;
	// end inline asm
	// begin inline asm
	mov.u32 %r19, 0x0;
	@%p11 ld.global.b32 { %r19 }, [ %rd4 + 0 ];
	@!%p11 mov.u32 %r19, %r5;
	// end inline asm
	mov.b32 	%f1, %r19;
	// begin inline asm
	mov.u32 %r21, 0x0;
	@%p11 ld.global.b32 { %r21 }, [ %rd5 + 0 ];
	@!%p11 mov.u32 %r21, %r5;
	// end inline asm
	mov.pred 	%p17, 0;
	// begin inline asm
	mov.u32 %r23, 0x0;
	@%p17 ld.global.b32 { %r23 }, [ %rd6 + 0 ];
	@!%p17 mov.u32 %r23, %r5;
	// end inline asm
	.loc	1 36 35                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:36:35
	or.b16  	%rs33, %rs30, %rs12;
	cvt.u32.u16 	%r57, %rs33;
	and.b32  	%r58, %r57, 255;
	or.b16  	%rs34, %rs31, %rs17;
	cvt.u32.u16 	%r59, %rs34;
	and.b32  	%r60, %r59, 255;
	or.b16  	%rs35, %rs32, %rs23;
	cvt.u32.u16 	%r61, %rs35;
	and.b32  	%r62, %r61, 255;
	or.b32  	%r63, %r56, %r53;
	.loc	1 36 30                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:36:30
	mul.wide.u32 	%rd44, %r58, 4;
	add.s64 	%rd7, %rd12, %rd44;
	mul.wide.u32 	%rd45, %r60, 4;
	add.s64 	%rd8, %rd12, %rd45;
	mul.wide.u32 	%rd46, %r62, 4;
	add.s64 	%rd9, %rd12, %rd46;
	mul.wide.u32 	%rd47, %r63, 4;
	add.s64 	%rd10, %rd12, %rd47;
	.loc	1 36 42                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:36:42
	// begin inline asm
	mov.u32 %r25, 0x0;
	@%p11 ld.global.b32 { %r25 }, [ %rd7 + 0 ];
	@!%p11 mov.u32 %r25, %r5;
	// end inline asm
	// begin inline asm
	mov.u32 %r27, 0x0;
	@%p11 ld.global.b32 { %r27 }, [ %rd8 + 0 ];
	@!%p11 mov.u32 %r27, %r5;
	// end inline asm
	mov.b32 	%f2, %r27;
	// begin inline asm
	mov.u32 %r29, 0x0;
	@%p11 ld.global.b32 { %r29 }, [ %rd9 + 0 ];
	@!%p11 mov.u32 %r29, %r5;
	// end inline asm
	// begin inline asm
	mov.u32 %r31, 0x0;
	@%p17 ld.global.b32 { %r31 }, [ %rd10 + 0 ];
	@!%p17 mov.u32 %r31, %r5;
	// end inline asm
	.loc	1 33 47                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:33:47
	mov.b32 	%f3, %r2;
	mov.b32 	%f4, %r1;
	.loc	1 34 43                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:34:43
	mov.b32 	%f5, %r10;
	mov.b32 	%f6, %r9;
	.loc	1 37 18                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:37:18
	sub.f32 	%f7, %f4, %f6;
	sub.f32 	%f8, %f3, %f5;
	.loc	1 38 18                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:38:18
	mul.f32 	%f9, %f8, %f8;
	.loc	1 33 47                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:33:47
	mov.b32 	%f10, %r3;
	mov.b32 	%f11, %r4;
	.loc	1 34 43                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:34:43
	mov.b32 	%f12, %r11;
	mov.b32 	%f13, %r12;
	.loc	1 37 18                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:37:18
	sub.f32 	%f14, %f11, %f13;
	sub.f32 	%f15, %f10, %f12;
$L__tmp1:
	.loc	2 256 15                        // standard.py:256:15
	fma.rn.f32 	%f16, %f7, %f7, %f9;
	fma.rn.f32 	%f17, %f15, %f15, %f16;
	fma.rn.f32 	%f18, %f14, %f14, %f17;
	selp.f32 	%f19, %f18, 0f00000000, %p1;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r64, %f19;
	shfl.sync.bfly.b32	%r65, %r64, 16, 31, -1;
	mov.b32 	%f20, %r65;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f21, %f19, %f20;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r66, %f21;
	shfl.sync.bfly.b32	%r67, %r66, 8, 31, -1;
	mov.b32 	%f22, %r67;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f23, %f21, %f22;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r68, %f23;
	shfl.sync.bfly.b32	%r69, %r68, 4, 31, -1;
	mov.b32 	%f24, %r69;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f25, %f23, %f24;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r70, %f25;
	shfl.sync.bfly.b32	%r71, %r70, 2, 31, -1;
	mov.b32 	%f26, %r71;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f27, %f25, %f26;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r72, %f27;
	shfl.sync.bfly.b32	%r73, %r72, 1, 31, -1;
	mov.b32 	%f28, %r73;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f29, %f27, %f28;
	.loc	2 267 36                        // standard.py:267:36
	setp.eq.s32 	%p27, %r47, 0;
	shr.u32 	%r74, %r46, 3;
	and.b32  	%r75, %r74, 4;
	mov.u32 	%r76, global_smem;
	add.s32 	%r33, %r76, %r75;
	mov.b32 	%r34, %f29;
	// begin inline asm
	@%p27 st.shared.b32 [ %r33 + 0 ], %r34;
	// end inline asm
	bar.sync 	0;
	setp.lt.s32 	%p28, %r46, 2;
	shl.b32 	%r77, %r46, 2;
	add.s32 	%r36, %r76, %r77;
	// begin inline asm
	@%p28 ld.shared.b32 %r35, [ %r36 + 0 ];
	// end inline asm
	mov.b32 	%f30, %r35;
	shfl.sync.bfly.b32	%r78, %r35, 1, 31, -1;
	mov.b32 	%f31, %r78;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f32, %f30, %f31;
	.loc	2 267 36                        // standard.py:267:36
	and.b32  	%r79, %r46, 1;
	setp.eq.b32 	%p34, %r79, 1;
	not.pred 	%p35, %p34;
	and.pred  	%p29, %p28, %p35;
	mov.b32 	%r38, %f32;
	// begin inline asm
	@%p29 st.shared.b32 [ %r36 + 0 ], %r38;
	// end inline asm
	bar.sync 	0;
	ld.shared.f32 	%f33, [global_smem];
$L__tmp2:
	.loc	1 42 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:42:19
	sub.f32 	%f34, %f1, %f2;
	.loc	1 35 46                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:35:46
	mov.b32 	%f35, %r17;
	mov.b32 	%f36, %r21;
	.loc	1 36 42                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:36:42
	mov.b32 	%f37, %r25;
	mov.b32 	%f38, %r29;
	.loc	1 42 19                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:42:19
	sub.f32 	%f39, %f36, %f38;
	sub.f32 	%f40, %f35, %f37;
	.loc	1 43 20                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:43:20
	mul.f32 	%f41, %f40, %f40;
$L__tmp3:
	.loc	2 267 36                        // standard.py:267:36
	bar.sync 	0;
	.loc	2 256 15                        // standard.py:256:15
	fma.rn.f32 	%f42, %f34, %f34, %f41;
	fma.rn.f32 	%f43, %f39, %f39, %f42;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r80, %f43;
	shfl.sync.bfly.b32	%r81, %r80, 16, 31, -1;
	mov.b32 	%f44, %r81;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f45, %f43, %f44;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r82, %f45;
	shfl.sync.bfly.b32	%r83, %r82, 8, 31, -1;
	mov.b32 	%f46, %r83;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f47, %f45, %f46;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r84, %f47;
	shfl.sync.bfly.b32	%r85, %r84, 4, 31, -1;
	mov.b32 	%f48, %r85;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f49, %f47, %f48;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r86, %f49;
	shfl.sync.bfly.b32	%r87, %r86, 2, 31, -1;
	mov.b32 	%f50, %r87;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f51, %f49, %f50;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r88, %f51;
	shfl.sync.bfly.b32	%r89, %r88, 1, 31, -1;
	mov.b32 	%f52, %r89;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f53, %f51, %f52;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r40, %f53;
	// begin inline asm
	@%p27 st.shared.b32 [ %r33 + 0 ], %r40;
	// end inline asm
	bar.sync 	0;
	// begin inline asm
	@%p28 ld.shared.b32 %r41, [ %r36 + 0 ];
	// end inline asm
	mov.b32 	%f54, %r41;
	shfl.sync.bfly.b32	%r90, %r41, 1, 31, -1;
	mov.b32 	%f55, %r90;
	.loc	2 256 15                        // standard.py:256:15
	add.f32 	%f56, %f54, %f55;
	.loc	2 267 36                        // standard.py:267:36
	mov.b32 	%r44, %f56;
	// begin inline asm
	@%p29 st.shared.b32 [ %r36 + 0 ], %r44;
	// end inline asm
	bar.sync 	0;
	ld.shared.f32 	%f57, [global_smem];
$L__tmp4:
	.loc	1 49 20                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:49:20
	mul.f32 	%f58, %f57, 0f3DAAAAAB;
	.loc	1 50 20                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:50:20
	fma.rn.f32 	%f59, %f33, 0f3DAAAAAB, %f58;
	.loc	1 52 20                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:52:20
	add.f32 	%f60, %f59, %f59;
	.loc	1 54 20                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:54:20
	mul.f32 	%f61, %f60, 0f3E800000;
	.loc	1 55 4                          // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:55:4
	bar.sync 	0;
	.loc	1 56 71                         // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:56:71
	setp.eq.s32 	%p33, %r48, 0;
	mov.b32 	%r45, %f61;
	// begin inline asm
	@%p33 st.global.b32 [ %rd11 + 0 ], { %r45 };
	// end inline asm
	.loc	1 56 4                          // cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py:56:4
	ret;
$L__tmp5:
$L__func_end0:
                                        // -- End function
}
	.file	1 "inductor_cache/r4/cr4vciir7373x5oqgqvwt7jscu4ioqd4w3uryi6rm2v3jstfpvgl.py"
	.file	2 "/home/sahanp/.conda/envs/parity-bench/lib/python3.12/site-packages/triton/language/standard.py"
	.section	.debug_abbrev
	{
.b8 1                                   // Abbreviation Code
.b8 17                                  // DW_TAG_compile_unit
.b8 1                                   // DW_CHILDREN_yes
.b8 37                                  // DW_AT_producer
.b8 8                                   // DW_FORM_string
.b8 19                                  // DW_AT_language
.b8 5                                   // DW_FORM_data2
.b8 3                                   // DW_AT_name
.b8 8                                   // DW_FORM_string
.b8 16                                  // DW_AT_stmt_list
.b8 6                                   // DW_FORM_data4
.b8 27                                  // DW_AT_comp_dir
.b8 8                                   // DW_FORM_string
.b8 0                                   // EOM(1)
.b8 0                                   // EOM(2)
.b8 2                                   // Abbreviation Code
.b8 46                                  // DW_TAG_subprogram
.b8 0                                   // DW_CHILDREN_no
.b8 3                                   // DW_AT_name
.b8 8                                   // DW_FORM_string
.b8 32                                  // DW_AT_inline
.b8 11                                  // DW_FORM_data1
.b8 0                                   // EOM(1)
.b8 0                                   // EOM(2)
.b8 3                                   // Abbreviation Code
.b8 46                                  // DW_TAG_subprogram
.b8 1                                   // DW_CHILDREN_yes
.b8 17                                  // DW_AT_low_pc
.b8 1                                   // DW_FORM_addr
.b8 18                                  // DW_AT_high_pc
.b8 1                                   // DW_FORM_addr
.b8 49                                  // DW_AT_abstract_origin
.b8 19                                  // DW_FORM_ref4
.b8 0                                   // EOM(1)
.b8 0                                   // EOM(2)
.b8 4                                   // Abbreviation Code
.b8 29                                  // DW_TAG_inlined_subroutine
.b8 0                                   // DW_CHILDREN_no
.b8 49                                  // DW_AT_abstract_origin
.b8 19                                  // DW_FORM_ref4
.b8 17                                  // DW_AT_low_pc
.b8 1                                   // DW_FORM_addr
.b8 18                                  // DW_AT_high_pc
.b8 1                                   // DW_FORM_addr
.b8 88                                  // DW_AT_call_file
.b8 11                                  // DW_FORM_data1
.b8 89                                  // DW_AT_call_line
.b8 11                                  // DW_FORM_data1
.b8 87                                  // DW_AT_call_column
.b8 11                                  // DW_FORM_data1
.b8 0                                   // EOM(1)
.b8 0                                   // EOM(2)
.b8 0                                   // EOM(3)
	}
	.section	.debug_info
	{
.b32 211                                // Length of Unit
.b8 2                                   // DWARF version number
.b8 0
.b32 .debug_abbrev                      // Offset Into Abbrev. Section
.b8 8                                   // Address Size (in bytes)
.b8 1                                   // Abbrev [1] 0xb:0xcc DW_TAG_compile_unit
.b8 116                                 // DW_AT_producer
.b8 114
.b8 105
.b8 116
.b8 111
.b8 110
.b8 0
.b8 2                                   // DW_AT_language
.b8 0
.b8 99                                  // DW_AT_name
.b8 114
.b8 52
.b8 118
.b8 99
.b8 105
.b8 105
.b8 114
.b8 55
.b8 51
.b8 55
.b8 51
.b8 120
.b8 53
.b8 111
.b8 113
.b8 103
.b8 113
.b8 118
.b8 119
.b8 116
.b8 55
.b8 106
.b8 115
.b8 99
.b8 117
.b8 52
.b8 105
.b8 111
.b8 113
.b8 100
.b8 52
.b8 119
.b8 51
.b8 117
.b8 114
.b8 121
.b8 105
.b8 54
.b8 114
.b8 109
.b8 50
.b8 118
.b8 51
.b8 106
.b8 115
.b8 116
.b8 102
.b8 112
.b8 118
.b8 103
.b8 108
.b8 46
.b8 112
.b8 121
.b8 0
.b32 .debug_line                        // DW_AT_stmt_list
.b8 105                                 // DW_AT_comp_dir
.b8 110
.b8 100
.b8 117
.b8 99
.b8 116
.b8 111
.b8 114
.b8 95
.b8 99
.b8 97
.b8 99
.b8 104
.b8 101
.b8 47
.b8 114
.b8 52
.b8 0
.b8 2                                   // Abbrev [2] 0x63:0x2d DW_TAG_subprogram
.b8 116                                 // DW_AT_name
.b8 114
.b8 105
.b8 116
.b8 111
.b8 110
.b8 95
.b8 112
.b8 101
.b8 114
.b8 95
.b8 102
.b8 117
.b8 115
.b8 101
.b8 100
.b8 95
.b8 97
.b8 100
.b8 100
.b8 95
.b8 100
.b8 105
.b8 118
.b8 95
.b8 109
.b8 117
.b8 108
.b8 95
.b8 112
.b8 111
.b8 119
.b8 95
.b8 115
.b8 117
.b8 98
.b8 95
.b8 115
.b8 117
.b8 109
.b8 95
.b8 48
.b8 0
.b8 1                                   // DW_AT_inline
.b8 3                                   // Abbrev [3] 0x90:0x46 DW_TAG_subprogram
.b64 $L__func_begin0                    // DW_AT_low_pc
.b64 $L__func_end0                      // DW_AT_high_pc
.b32 99                                 // DW_AT_abstract_origin
.b8 4                                   // Abbrev [4] 0xa5:0x18 DW_TAG_inlined_subroutine
.b32 99                                 // DW_AT_abstract_origin
.b64 $L__tmp1                           // DW_AT_low_pc
.b64 $L__tmp2                           // DW_AT_high_pc
.b8 1                                   // DW_AT_call_file
.b8 41                                  // DW_AT_call_line
.b8 24                                  // DW_AT_call_column
.b8 4                                   // Abbrev [4] 0xbd:0x18 DW_TAG_inlined_subroutine
.b32 99                                 // DW_AT_abstract_origin
.b64 $L__tmp3                           // DW_AT_low_pc
.b64 $L__tmp4                           // DW_AT_high_pc
.b8 1                                   // DW_AT_call_file
.b8 46                                  // DW_AT_call_line
.b8 26                                  // DW_AT_call_column
.b8 0                                   // End Of Children Mark
.b8 0                                   // End Of Children Mark
	}
	.section	.debug_macinfo	{	}
