//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-24817639
// Cuda compilation tools, release 10.0, V10.0.130
// Based on LLVM 3.4svn
//

.version 6.3
.target sm_70
.address_size 64

	// .globl	_Z22do_computation_207_gpuP2L0iid

.visible .entry _Z22do_computation_207_gpuP2L0iid(
	.param .u32 _Z22do_computation_207_gpuP2L0iid_param_0,
	.param .u32 _Z22do_computation_207_gpuP2L0iid_param_1,
	.param .u32 _Z22do_computation_207_gpuP2L0iid_param_2,
	.param .u32 _Z22do_computation_207_gpuP2L0iid_param_3,
	.param .u64 _Z22do_computation_207_gpuP2L0iid_param_4,
	.param .f64 _Z22do_computation_207_gpuP2L0iid_param_5
)
.maxntid 128, 1, 1
{
	.reg .pred 	%p<21>;
	.reg .b32 	%r<126>;
	.reg .f64 	%fd<44>;
	.reg .b64 	%rd<186>;


	ld.param.u32 	%r50, [_Z22do_computation_207_gpuP2L0iid_param_0];
	ld.param.u32 	%r51, [_Z22do_computation_207_gpuP2L0iid_param_1];
	ld.param.u32 	%r52, [_Z22do_computation_207_gpuP2L0iid_param_2];
	ld.param.u32 	%r53, [_Z22do_computation_207_gpuP2L0iid_param_3];
	ld.param.u64 	%rd6, [_Z22do_computation_207_gpuP2L0iid_param_4];
	ld.param.f64 	%fd1, [_Z22do_computation_207_gpuP2L0iid_param_5];
	mov.u32 	%r103, 0;

BB0_1:
	mov.u32 	%r55, %ctaid.x;
	shl.b32 	%r56, %r55, 7;
	add.s32 	%r57, %r103, %r56;
	mov.u32 	%r58, %tid.x;
	add.s32 	%r2, %r57, %r58;
	mov.u32 	%r59, 1;
	sub.s32 	%r60, %r59, %r53;
	add.s32 	%r61, %r2, %r60;
	setp.gt.s32	%p1, %r61, 0;
	@%p1 bra 	BB0_34;

	add.s32 	%r62, %r52, -1;
	setp.lt.s32	%p2, %r62, 0;
	cvta.to.global.u64 	%rd7, %rd6;
	ld.global.nc.u64 	%rd8, [%rd7];
	cvta.to.global.u64 	%rd9, %rd8;
	mul.wide.s32 	%rd1, %r2, 8;
	add.s64 	%rd10, %rd9, %rd1;
	ld.global.f64 	%fd2, [%rd10];
	mul.f64 	%fd3, %fd2, %fd1;
	st.global.f64 	[%rd10], %fd3;
	@%p2 bra 	BB0_34;

	ld.global.nc.u64 	%rd12, [%rd7+16];
	cvta.to.global.u64 	%rd2, %rd12;
	mov.u32 	%r104, 0;
	mov.u32 	%r105, %r104;

BB0_4:
	add.s32 	%r65, %r51, -1;
	setp.lt.s32	%p3, %r65, 0;
	cvt.s64.s32	%rd3, %r105;
	mul.wide.s32 	%rd13, %r105, 32;
	add.s64 	%rd4, %rd2, %rd13;
	ld.global.u64 	%rd14, [%rd4];
	cvta.to.global.u64 	%rd15, %rd14;
	add.s64 	%rd16, %rd15, %rd1;
	ld.global.f64 	%fd4, [%rd16];
	mul.f64 	%fd5, %fd4, %fd1;
	st.global.f64 	[%rd16], %fd5;
	@%p3 bra 	BB0_33;

	add.s32 	%r67, %r50, -1;
	setp.lt.s32	%p4, %r67, 0;
	mov.u32 	%r66, 0;
	mov.u32 	%r106, %r66;
	@%p4 bra 	BB0_22;
	bra.uni 	BB0_6;

BB0_22:
	and.b32  	%r92, %r51, 3;
	mov.u32 	%r119, 0;
	setp.eq.s32	%p14, %r92, 0;
	@%p14 bra 	BB0_23;

	setp.eq.s32	%p15, %r92, 1;
	@%p15 bra 	BB0_25;
	bra.uni 	BB0_26;

BB0_25:
	mov.u32 	%r120, %r119;
	bra.uni 	BB0_29;

BB0_6:
	ld.global.u64 	%rd17, [%rd4+16];
	cvta.to.global.u64 	%rd18, %rd17;
	cvt.s64.s32	%rd5, %r106;
	mul.wide.s32 	%rd19, %r106, 32;
	add.s64 	%rd20, %rd18, %rd19;
	ld.global.u64 	%rd21, [%rd20];
	cvta.to.global.u64 	%rd22, %rd21;
	add.s64 	%rd23, %rd22, %rd1;
	ld.global.f64 	%fd6, [%rd23];
	mul.f64 	%fd7, %fd6, %fd1;
	st.global.f64 	[%rd23], %fd7;
	neg.s32 	%r107, %r50;
	setp.lt.s32	%p5, %r50, 4;
	mov.u32 	%r12, %r66;
	@%p5 bra 	BB0_9;

BB0_7:
	ld.global.u64 	%rd24, [%rd4+16];
	cvta.to.global.u64 	%rd25, %rd24;
	shl.b64 	%rd26, %rd5, 5;
	add.s64 	%rd27, %rd25, %rd26;
	ld.global.u64 	%rd28, [%rd27+16];
	cvta.to.global.u64 	%rd29, %rd28;
	mul.wide.s32 	%rd30, %r12, 16;
	add.s64 	%rd31, %rd29, %rd30;
	ld.global.u64 	%rd32, [%rd31];
	cvta.to.global.u64 	%rd33, %rd32;
	add.s64 	%rd34, %rd33, %rd1;
	ld.global.f64 	%fd8, [%rd34];
	mul.f64 	%fd9, %fd8, %fd1;
	add.s32 	%r71, %r12, 1;
	add.s32 	%r72, %r12, 2;
	add.s32 	%r73, %r12, 3;
	st.global.f64 	[%rd34], %fd9;
	ld.global.u64 	%rd35, [%rd4+16];
	cvta.to.global.u64 	%rd36, %rd35;
	add.s64 	%rd37, %rd36, %rd26;
	ld.global.u64 	%rd38, [%rd37+16];
	cvta.to.global.u64 	%rd39, %rd38;
	mul.wide.s32 	%rd40, %r71, 16;
	add.s64 	%rd41, %rd39, %rd40;
	ld.global.u64 	%rd42, [%rd41];
	cvta.to.global.u64 	%rd43, %rd42;
	add.s64 	%rd44, %rd43, %rd1;
	ld.global.f64 	%fd10, [%rd44];
	mul.f64 	%fd11, %fd10, %fd1;
	st.global.f64 	[%rd44], %fd11;
	ld.global.u64 	%rd45, [%rd4+16];
	cvta.to.global.u64 	%rd46, %rd45;
	add.s64 	%rd47, %rd46, %rd26;
	ld.global.u64 	%rd48, [%rd47+16];
	cvta.to.global.u64 	%rd49, %rd48;
	mul.wide.s32 	%rd50, %r72, 16;
	add.s64 	%rd51, %rd49, %rd50;
	ld.global.u64 	%rd52, [%rd51];
	cvta.to.global.u64 	%rd53, %rd52;
	add.s64 	%rd54, %rd53, %rd1;
	ld.global.f64 	%fd12, [%rd54];
	mul.f64 	%fd13, %fd12, %fd1;
	st.global.f64 	[%rd54], %fd13;
	ld.global.u64 	%rd55, [%rd4+16];
	cvta.to.global.u64 	%rd56, %rd55;
	add.s64 	%rd57, %rd56, %rd26;
	ld.global.u64 	%rd58, [%rd57+16];
	cvta.to.global.u64 	%rd59, %rd58;
	mul.wide.s32 	%rd60, %r73, 16;
	add.s64 	%rd61, %rd59, %rd60;
	ld.global.u64 	%rd62, [%rd61];
	cvta.to.global.u64 	%rd63, %rd62;
	add.s64 	%rd64, %rd63, %rd1;
	ld.global.f64 	%fd14, [%rd64];
	mul.f64 	%fd15, %fd14, %fd1;
	st.global.f64 	[%rd64], %fd15;
	add.s32 	%r12, %r12, 4;
	add.s32 	%r10, %r107, 4;
	add.s32 	%r74, %r107, 7;
	setp.lt.s32	%p6, %r74, 0;
	mov.u32 	%r107, %r10;
	@%p6 bra 	BB0_7;

	setp.gt.s32	%p7, %r10, -1;
	@%p7 bra 	BB0_21;

BB0_9:
	sub.s32 	%r76, %r59, %r50;
	add.s32 	%r77, %r76, %r12;
	max.s32 	%r79, %r77, %r66;
	add.s32 	%r80, %r79, %r50;
	sub.s32 	%r13, %r80, %r12;
	and.b32  	%r14, %r13, 3;
	setp.eq.s32	%p8, %r14, 0;
	@%p8 bra 	BB0_10;
	bra.uni 	BB0_11;

BB0_10:
	mov.u32 	%r117, %r12;
	bra.uni 	BB0_18;

BB0_11:
	setp.eq.s32	%p9, %r14, 1;
	@%p9 bra 	BB0_12;
	bra.uni 	BB0_13;

BB0_12:
	mov.u32 	%r112, %r12;
	bra.uni 	BB0_17;

BB0_13:
	setp.eq.s32	%p10, %r14, 2;
	@%p10 bra 	BB0_14;
	bra.uni 	BB0_15;

BB0_14:
	mov.u32 	%r110, %r12;
	bra.uni 	BB0_16;

BB0_15:
	ld.global.u64 	%rd65, [%rd4+16];
	cvta.to.global.u64 	%rd66, %rd65;
	shl.b64 	%rd67, %rd5, 5;
	add.s64 	%rd68, %rd66, %rd67;
	ld.global.u64 	%rd69, [%rd68+16];
	cvta.to.global.u64 	%rd70, %rd69;
	mul.wide.s32 	%rd71, %r12, 16;
	add.s64 	%rd72, %rd70, %rd71;
	ld.global.u64 	%rd73, [%rd72];
	cvta.to.global.u64 	%rd74, %rd73;
	add.s64 	%rd75, %rd74, %rd1;
	ld.global.f64 	%fd16, [%rd75];
	mul.f64 	%fd17, %fd16, %fd1;
	st.global.f64 	[%rd75], %fd17;
	add.s32 	%r110, %r12, 1;
	add.s32 	%r12, %r12, 1;

BB0_16:
	ld.global.u64 	%rd76, [%rd4+16];
	cvta.to.global.u64 	%rd77, %rd76;
	shl.b64 	%rd78, %rd5, 5;
	add.s64 	%rd79, %rd77, %rd78;
	ld.global.u64 	%rd80, [%rd79+16];
	cvta.to.global.u64 	%rd81, %rd80;
	mul.wide.s32 	%rd82, %r110, 16;
	add.s64 	%rd83, %rd81, %rd82;
	ld.global.u64 	%rd84, [%rd83];
	cvta.to.global.u64 	%rd85, %rd84;
	add.s64 	%rd86, %rd85, %rd1;
	ld.global.f64 	%fd18, [%rd86];
	mul.f64 	%fd19, %fd18, %fd1;
	st.global.f64 	[%rd86], %fd19;
	add.s32 	%r112, %r110, 1;
	add.s32 	%r12, %r12, 1;

BB0_17:
	ld.global.u64 	%rd87, [%rd4+16];
	cvta.to.global.u64 	%rd88, %rd87;
	shl.b64 	%rd89, %rd5, 5;
	add.s64 	%rd90, %rd88, %rd89;
	ld.global.u64 	%rd91, [%rd90+16];
	cvta.to.global.u64 	%rd92, %rd91;
	mul.wide.s32 	%rd93, %r112, 16;
	add.s64 	%rd94, %rd92, %rd93;
	ld.global.u64 	%rd95, [%rd94];
	cvta.to.global.u64 	%rd96, %rd95;
	add.s64 	%rd97, %rd96, %rd1;
	ld.global.f64 	%fd20, [%rd97];
	mul.f64 	%fd21, %fd20, %fd1;
	st.global.f64 	[%rd97], %fd21;
	add.s32 	%r117, %r112, 1;
	add.s32 	%r12, %r12, 1;

BB0_18:
	setp.lt.u32	%p11, %r13, 4;
	@%p11 bra 	BB0_21;

	sub.s32 	%r116, %r12, %r50;

BB0_20:
	ld.global.u64 	%rd98, [%rd4+16];
	cvta.to.global.u64 	%rd99, %rd98;
	shl.b64 	%rd100, %rd5, 5;
	add.s64 	%rd101, %rd99, %rd100;
	ld.global.u64 	%rd102, [%rd101+16];
	cvta.to.global.u64 	%rd103, %rd102;
	mul.wide.s32 	%rd104, %r117, 16;
	add.s64 	%rd105, %rd103, %rd104;
	ld.global.u64 	%rd106, [%rd105];
	cvta.to.global.u64 	%rd107, %rd106;
	add.s64 	%rd108, %rd107, %rd1;
	ld.global.f64 	%fd22, [%rd108];
	mul.f64 	%fd23, %fd22, %fd1;
	st.global.f64 	[%rd108], %fd23;
	ld.global.u64 	%rd109, [%rd4+16];
	cvta.to.global.u64 	%rd110, %rd109;
	add.s64 	%rd111, %rd110, %rd100;
	ld.global.u64 	%rd112, [%rd111+16];
	cvta.to.global.u64 	%rd113, %rd112;
	add.s32 	%r81, %r117, 1;
	mul.wide.s32 	%rd114, %r81, 16;
	add.s64 	%rd115, %rd113, %rd114;
	ld.global.u64 	%rd116, [%rd115];
	cvta.to.global.u64 	%rd117, %rd116;
	add.s64 	%rd118, %rd117, %rd1;
	ld.global.f64 	%fd24, [%rd118];
	mul.f64 	%fd25, %fd24, %fd1;
	st.global.f64 	[%rd118], %fd25;
	ld.global.u64 	%rd119, [%rd4+16];
	cvta.to.global.u64 	%rd120, %rd119;
	add.s64 	%rd121, %rd120, %rd100;
	ld.global.u64 	%rd122, [%rd121+16];
	cvta.to.global.u64 	%rd123, %rd122;
	add.s32 	%r82, %r117, 2;
	mul.wide.s32 	%rd124, %r82, 16;
	add.s64 	%rd125, %rd123, %rd124;
	ld.global.u64 	%rd126, [%rd125];
	cvta.to.global.u64 	%rd127, %rd126;
	add.s64 	%rd128, %rd127, %rd1;
	ld.global.f64 	%fd26, [%rd128];
	mul.f64 	%fd27, %fd26, %fd1;
	st.global.f64 	[%rd128], %fd27;
	ld.global.u64 	%rd129, [%rd4+16];
	cvta.to.global.u64 	%rd130, %rd129;
	add.s64 	%rd131, %rd130, %rd100;
	ld.global.u64 	%rd132, [%rd131+16];
	cvta.to.global.u64 	%rd133, %rd132;
	add.s32 	%r83, %r117, 3;
	mul.wide.s32 	%rd134, %r83, 16;
	add.s64 	%rd135, %rd133, %rd134;
	ld.global.u64 	%rd136, [%rd135];
	cvta.to.global.u64 	%rd137, %rd136;
	add.s64 	%rd138, %rd137, %rd1;
	ld.global.f64 	%fd28, [%rd138];
	mul.f64 	%fd29, %fd28, %fd1;
	st.global.f64 	[%rd138], %fd29;
	add.s32 	%r117, %r117, 4;
	add.s32 	%r116, %r116, 4;
	setp.lt.s32	%p12, %r116, 0;
	@%p12 bra 	BB0_20;

BB0_21:
	cvt.u32.u64	%r84, %rd5;
	add.s32 	%r106, %r84, 1;
	sub.s32 	%r85, %r106, %r51;
	setp.lt.s32	%p13, %r85, 0;
	@%p13 bra 	BB0_6;
	bra.uni 	BB0_33;

BB0_23:
	mov.u32 	%r125, %r119;
	bra.uni 	BB0_30;

BB0_26:
	setp.eq.s32	%p16, %r92, 2;
	mov.u32 	%r120, %r59;
	@%p16 bra 	BB0_28;

	ld.global.u64 	%rd139, [%rd4+16];
	cvta.to.global.u64 	%rd140, %rd139;
	ld.global.u64 	%rd141, [%rd140];
	cvta.to.global.u64 	%rd142, %rd141;
	add.s64 	%rd143, %rd142, %rd1;
	ld.global.f64 	%fd30, [%rd143];
	mul.f64 	%fd31, %fd30, %fd1;
	st.global.f64 	[%rd143], %fd31;
	mov.u32 	%r119, 1;
	mov.u32 	%r120, 2;

BB0_28:
	ld.global.u64 	%rd144, [%rd4+16];
	cvta.to.global.u64 	%rd145, %rd144;
	mul.wide.u32 	%rd146, %r119, 32;
	add.s64 	%rd147, %rd145, %rd146;
	ld.global.u64 	%rd148, [%rd147];
	cvta.to.global.u64 	%rd149, %rd148;
	add.s64 	%rd150, %rd149, %rd1;
	ld.global.f64 	%fd32, [%rd150];
	mul.f64 	%fd33, %fd32, %fd1;
	st.global.f64 	[%rd150], %fd33;
	add.s32 	%r119, %r119, 1;

BB0_29:
	ld.global.u64 	%rd151, [%rd4+16];
	cvta.to.global.u64 	%rd152, %rd151;
	mul.wide.s32 	%rd153, %r119, 32;
	add.s64 	%rd154, %rd152, %rd153;
	ld.global.u64 	%rd155, [%rd154];
	cvta.to.global.u64 	%rd156, %rd155;
	add.s64 	%rd157, %rd156, %rd1;
	ld.global.f64 	%fd34, [%rd157];
	mul.f64 	%fd35, %fd34, %fd1;
	st.global.f64 	[%rd157], %fd35;
	add.s32 	%r125, %r119, 1;
	add.s32 	%r119, %r120, 1;

BB0_30:
	setp.lt.u32	%p17, %r51, 4;
	@%p17 bra 	BB0_33;

	sub.s32 	%r124, %r119, %r51;

BB0_32:
	ld.global.u64 	%rd158, [%rd4+16];
	cvta.to.global.u64 	%rd159, %rd158;
	mul.wide.s32 	%rd160, %r125, 32;
	add.s64 	%rd161, %rd159, %rd160;
	ld.global.u64 	%rd162, [%rd161];
	cvta.to.global.u64 	%rd163, %rd162;
	add.s64 	%rd164, %rd163, %rd1;
	ld.global.f64 	%fd36, [%rd164];
	mul.f64 	%fd37, %fd36, %fd1;
	st.global.f64 	[%rd164], %fd37;
	ld.global.u64 	%rd165, [%rd4+16];
	cvta.to.global.u64 	%rd166, %rd165;
	add.s32 	%r95, %r125, 1;
	mul.wide.s32 	%rd167, %r95, 32;
	add.s64 	%rd168, %rd166, %rd167;
	ld.global.u64 	%rd169, [%rd168];
	cvta.to.global.u64 	%rd170, %rd169;
	add.s64 	%rd171, %rd170, %rd1;
	ld.global.f64 	%fd38, [%rd171];
	mul.f64 	%fd39, %fd38, %fd1;
	st.global.f64 	[%rd171], %fd39;
	ld.global.u64 	%rd172, [%rd4+16];
	cvta.to.global.u64 	%rd173, %rd172;
	add.s32 	%r96, %r125, 2;
	mul.wide.s32 	%rd174, %r96, 32;
	add.s64 	%rd175, %rd173, %rd174;
	ld.global.u64 	%rd176, [%rd175];
	cvta.to.global.u64 	%rd177, %rd176;
	add.s64 	%rd178, %rd177, %rd1;
	ld.global.f64 	%fd40, [%rd178];
	mul.f64 	%fd41, %fd40, %fd1;
	st.global.f64 	[%rd178], %fd41;
	ld.global.u64 	%rd179, [%rd4+16];
	cvta.to.global.u64 	%rd180, %rd179;
	add.s32 	%r97, %r125, 3;
	mul.wide.s32 	%rd181, %r97, 32;
	add.s64 	%rd182, %rd180, %rd181;
	ld.global.u64 	%rd183, [%rd182];
	cvta.to.global.u64 	%rd184, %rd183;
	add.s64 	%rd185, %rd184, %rd1;
	ld.global.f64 	%fd42, [%rd185];
	mul.f64 	%fd43, %fd42, %fd1;
	st.global.f64 	[%rd185], %fd43;
	add.s32 	%r125, %r125, 4;
	add.s32 	%r124, %r124, 4;
	setp.lt.s32	%p18, %r124, 0;
	@%p18 bra 	BB0_32;

BB0_33:
	cvt.u32.u64	%r98, %rd3;
	add.s32 	%r105, %r98, 1;
	add.s32 	%r104, %r104, 1;
	sub.s32 	%r99, %r104, %r52;
	setp.lt.s32	%p19, %r99, 0;
	@%p19 bra 	BB0_4;

BB0_34:
	mov.u32 	%r100, %nctaid.x;
	shl.b32 	%r101, %r100, 7;
	add.s32 	%r103, %r103, %r101;
	sub.s32 	%r102, %r103, %r53;
	setp.lt.s32	%p20, %r102, 0;
	@%p20 bra 	BB0_1;

	ret;
}

	// .globl	_Z24do_computation_2_228_gpuP2L0iid
.visible .entry _Z24do_computation_2_228_gpuP2L0iid(
	.param .u32 _Z24do_computation_2_228_gpuP2L0iid_param_0,
	.param .u64 _Z24do_computation_2_228_gpuP2L0iid_param_1,
	.param .f64 _Z24do_computation_2_228_gpuP2L0iid_param_2
)
.maxntid 128, 1, 1
{
	.reg .pred 	%p<3>;
	.reg .b32 	%r<14>;
	.reg .f64 	%fd<4>;
	.reg .b64 	%rd<7>;


	ld.param.u32 	%r4, [_Z24do_computation_2_228_gpuP2L0iid_param_0];
	ld.param.u64 	%rd1, [_Z24do_computation_2_228_gpuP2L0iid_param_1];
	ld.param.f64 	%fd1, [_Z24do_computation_2_228_gpuP2L0iid_param_2];
	mov.u32 	%r13, 0;
	cvta.to.global.u64 	%rd2, %rd1;

BB1_1:
	mov.u32 	%r6, %tid.x;
	add.s32 	%r7, %r6, %r13;
	mov.u32 	%r8, %ctaid.x;
	shl.b32 	%r9, %r8, 7;
	add.s32 	%r2, %r7, %r9;
	add.s32 	%r10, %r4, -1;
	setp.gt.s32	%p1, %r2, %r10;
	@%p1 bra 	BB1_3;

	ld.global.nc.u64 	%rd3, [%rd2];
	cvta.to.global.u64 	%rd4, %rd3;
	mul.wide.s32 	%rd5, %r2, 8;
	add.s64 	%rd6, %rd4, %rd5;
	ld.global.f64 	%fd2, [%rd6];
	mul.f64 	%fd3, %fd2, %fd1;
	st.global.f64 	[%rd6], %fd3;

BB1_3:
	mov.u32 	%r11, %nctaid.x;
	shl.b32 	%r12, %r11, 7;
	add.s32 	%r13, %r12, %r13;
	setp.lt.s32	%p2, %r13, %r4;
	@%p2 bra 	BB1_1;

	ret;
}

	// .globl	_Z24do_computation_2_231_gpuP2L0iid
.visible .entry _Z24do_computation_2_231_gpuP2L0iid(
	.param .u32 _Z24do_computation_2_231_gpuP2L0iid_param_0,
	.param .u32 _Z24do_computation_2_231_gpuP2L0iid_param_1,
	.param .u64 _Z24do_computation_2_231_gpuP2L0iid_param_2,
	.param .f64 _Z24do_computation_2_231_gpuP2L0iid_param_3
)
.maxntid 128, 1, 1
{
	.reg .pred 	%p<5>;
	.reg .b32 	%r<25>;
	.reg .f64 	%fd<4>;
	.reg .b64 	%rd<11>;


	ld.param.u32 	%r8, [_Z24do_computation_2_231_gpuP2L0iid_param_0];
	ld.param.u32 	%r9, [_Z24do_computation_2_231_gpuP2L0iid_param_1];
	ld.param.u64 	%rd1, [_Z24do_computation_2_231_gpuP2L0iid_param_2];
	ld.param.f64 	%fd1, [_Z24do_computation_2_231_gpuP2L0iid_param_3];
	mov.u32 	%r22, 0;
	cvta.to.global.u64 	%rd2, %rd1;

BB2_1:
	mov.u32 	%r11, %ctaid.x;
	add.s32 	%r12, %r11, %r22;
	mov.u32 	%r13, 1;
	sub.s32 	%r14, %r13, %r9;
	add.s32 	%r15, %r12, %r14;
	setp.gt.s32	%p1, %r15, 0;
	@%p1 bra 	BB2_6;

	mov.u32 	%r24, 0;
	mov.u32 	%r23, %tid.x;

BB2_3:
	add.s32 	%r17, %r8, -1;
	setp.gt.s32	%p2, %r23, %r17;
	@%p2 bra 	BB2_5;

	ld.global.nc.u64 	%rd3, [%rd2+16];
	cvta.to.global.u64 	%rd4, %rd3;
	mul.wide.s32 	%rd5, %r12, 32;
	add.s64 	%rd6, %rd4, %rd5;
	ld.global.u64 	%rd7, [%rd6];
	cvta.to.global.u64 	%rd8, %rd7;
	mul.wide.s32 	%rd9, %r23, 8;
	add.s64 	%rd10, %rd8, %rd9;
	ld.global.f64 	%fd2, [%rd10];
	mul.f64 	%fd3, %fd2, %fd1;
	st.global.f64 	[%rd10], %fd3;

BB2_5:
	add.s32 	%r24, %r24, 128;
	add.s32 	%r23, %r23, 128;
	setp.lt.s32	%p3, %r24, %r8;
	@%p3 bra 	BB2_3;

BB2_6:
	mov.u32 	%r20, %nctaid.x;
	add.s32 	%r22, %r20, %r22;
	sub.s32 	%r21, %r22, %r9;
	setp.lt.s32	%p4, %r21, 0;
	@%p4 bra 	BB2_1;

	ret;
}

	// .globl	_Z24do_computation_2_236_gpuP2L0iid
.visible .entry _Z24do_computation_2_236_gpuP2L0iid(
	.param .u32 _Z24do_computation_2_236_gpuP2L0iid_param_0,
	.param .u32 _Z24do_computation_2_236_gpuP2L0iid_param_1,
	.param .u32 _Z24do_computation_2_236_gpuP2L0iid_param_2,
	.param .u64 _Z24do_computation_2_236_gpuP2L0iid_param_3,
	.param .f64 _Z24do_computation_2_236_gpuP2L0iid_param_4
)
.maxntid 128, 1, 1
{
	.reg .pred 	%p<14>;
	.reg .b32 	%r<93>;
	.reg .f64 	%fd<24>;
	.reg .b64 	%rd<96>;


	ld.param.u32 	%r31, [_Z24do_computation_2_236_gpuP2L0iid_param_0];
	ld.param.u32 	%r32, [_Z24do_computation_2_236_gpuP2L0iid_param_1];
	ld.param.u32 	%r33, [_Z24do_computation_2_236_gpuP2L0iid_param_2];
	ld.param.u64 	%rd5, [_Z24do_computation_2_236_gpuP2L0iid_param_3];
	ld.param.f64 	%fd1, [_Z24do_computation_2_236_gpuP2L0iid_param_4];
	mov.u32 	%r80, 0;

BB3_1:
	mov.u32 	%r35, %ctaid.x;
	add.s32 	%r36, %r35, %r80;
	mov.u32 	%r37, 1;
	sub.s32 	%r38, %r37, %r33;
	add.s32 	%r39, %r36, %r38;
	setp.gt.s32	%p1, %r39, 0;
	@%p1 bra 	BB3_21;

	add.s32 	%r41, %r32, -1;
	setp.lt.s32	%p2, %r41, 0;
	mov.u32 	%r81, 0;
	@%p2 bra 	BB3_21;

BB3_3:
	mov.u32 	%r42, %tid.x;
	add.s32 	%r43, %r42, %r81;
	sub.s32 	%r45, %r37, %r31;
	add.s32 	%r46, %r43, %r45;
	setp.gt.s32	%p3, %r46, 0;
	@%p3 bra 	BB3_20;

	setp.lt.s32	%p4, %r32, 4;
	mov.u32 	%r48, 0;
	mov.u32 	%r9, %r48;
	@%p4 bra 	BB3_8;

	mul.wide.s32 	%rd1, %r43, 8;
	cvta.to.global.u64 	%rd6, %rd5;
	ld.global.nc.u64 	%rd7, [%rd6+16];
	cvta.to.global.u64 	%rd8, %rd7;
	mul.wide.s32 	%rd9, %r36, 32;
	add.s64 	%rd10, %rd8, %rd9;
	add.s64 	%rd2, %rd10, 16;
	neg.s32 	%r82, %r32;
	mov.u32 	%r9, 0;

BB3_6:
	ld.global.u64 	%rd11, [%rd2];
	cvta.to.global.u64 	%rd12, %rd11;
	mul.wide.s32 	%rd13, %r9, 32;
	add.s64 	%rd14, %rd12, %rd13;
	ld.global.u64 	%rd15, [%rd14];
	cvta.to.global.u64 	%rd16, %rd15;
	add.s64 	%rd17, %rd16, %rd1;
	ld.global.f64 	%fd2, [%rd17];
	mul.f64 	%fd3, %fd2, %fd1;
	add.s32 	%r54, %r9, 1;
	add.s32 	%r55, %r9, 2;
	add.s32 	%r56, %r9, 3;
	st.global.f64 	[%rd17], %fd3;
	ld.global.u64 	%rd18, [%rd2];
	cvta.to.global.u64 	%rd19, %rd18;
	mul.wide.s32 	%rd20, %r54, 32;
	add.s64 	%rd21, %rd19, %rd20;
	ld.global.u64 	%rd22, [%rd21];
	cvta.to.global.u64 	%rd23, %rd22;
	add.s64 	%rd24, %rd23, %rd1;
	ld.global.f64 	%fd4, [%rd24];
	mul.f64 	%fd5, %fd4, %fd1;
	st.global.f64 	[%rd24], %fd5;
	ld.global.u64 	%rd25, [%rd2];
	cvta.to.global.u64 	%rd26, %rd25;
	mul.wide.s32 	%rd27, %r55, 32;
	add.s64 	%rd28, %rd26, %rd27;
	ld.global.u64 	%rd29, [%rd28];
	cvta.to.global.u64 	%rd30, %rd29;
	add.s64 	%rd31, %rd30, %rd1;
	ld.global.f64 	%fd6, [%rd31];
	mul.f64 	%fd7, %fd6, %fd1;
	st.global.f64 	[%rd31], %fd7;
	ld.global.u64 	%rd32, [%rd2];
	cvta.to.global.u64 	%rd33, %rd32;
	mul.wide.s32 	%rd34, %r56, 32;
	add.s64 	%rd35, %rd33, %rd34;
	ld.global.u64 	%rd36, [%rd35];
	cvta.to.global.u64 	%rd37, %rd36;
	add.s64 	%rd38, %rd37, %rd1;
	ld.global.f64 	%fd8, [%rd38];
	mul.f64 	%fd9, %fd8, %fd1;
	st.global.f64 	[%rd38], %fd9;
	add.s32 	%r9, %r9, 4;
	add.s32 	%r7, %r82, 4;
	add.s32 	%r57, %r82, 7;
	setp.lt.s32	%p5, %r57, 0;
	mov.u32 	%r82, %r7;
	@%p5 bra 	BB3_6;

	setp.gt.s32	%p6, %r7, -1;
	@%p6 bra 	BB3_20;

BB3_8:
	cvta.to.global.u64 	%rd39, %rd5;
	ld.global.nc.u64 	%rd40, [%rd39+16];
	cvta.to.global.u64 	%rd41, %rd40;
	mul.wide.s32 	%rd42, %r36, 32;
	add.s64 	%rd43, %rd41, %rd42;
	add.s64 	%rd3, %rd43, 16;
	sub.s32 	%r61, %r37, %r32;
	add.s32 	%r62, %r61, %r9;
	max.s32 	%r64, %r62, %r48;
	add.s32 	%r65, %r64, %r32;
	sub.s32 	%r10, %r65, %r9;
	and.b32  	%r11, %r10, 3;
	setp.eq.s32	%p7, %r11, 0;
	@%p7 bra 	BB3_9;
	bra.uni 	BB3_10;

BB3_9:
	mov.u32 	%r92, %r9;
	bra.uni 	BB3_17;

BB3_10:
	setp.eq.s32	%p8, %r11, 1;
	@%p8 bra 	BB3_11;
	bra.uni 	BB3_12;

BB3_11:
	mov.u32 	%r87, %r9;
	bra.uni 	BB3_16;

BB3_12:
	setp.eq.s32	%p9, %r11, 2;
	@%p9 bra 	BB3_13;
	bra.uni 	BB3_14;

BB3_13:
	mov.u32 	%r85, %r9;
	bra.uni 	BB3_15;

BB3_14:
	ld.global.u64 	%rd44, [%rd3];
	cvta.to.global.u64 	%rd45, %rd44;
	mul.wide.s32 	%rd46, %r9, 32;
	add.s64 	%rd47, %rd45, %rd46;
	ld.global.u64 	%rd48, [%rd47];
	cvta.to.global.u64 	%rd49, %rd48;
	mul.wide.s32 	%rd50, %r43, 8;
	add.s64 	%rd51, %rd49, %rd50;
	ld.global.f64 	%fd10, [%rd51];
	mul.f64 	%fd11, %fd10, %fd1;
	st.global.f64 	[%rd51], %fd11;
	add.s32 	%r85, %r9, 1;
	add.s32 	%r9, %r9, 1;

BB3_15:
	ld.global.u64 	%rd52, [%rd3];
	cvta.to.global.u64 	%rd53, %rd52;
	mul.wide.s32 	%rd54, %r85, 32;
	add.s64 	%rd55, %rd53, %rd54;
	ld.global.u64 	%rd56, [%rd55];
	cvta.to.global.u64 	%rd57, %rd56;
	mul.wide.s32 	%rd58, %r43, 8;
	add.s64 	%rd59, %rd57, %rd58;
	ld.global.f64 	%fd12, [%rd59];
	mul.f64 	%fd13, %fd12, %fd1;
	st.global.f64 	[%rd59], %fd13;
	add.s32 	%r87, %r85, 1;
	add.s32 	%r9, %r9, 1;

BB3_16:
	ld.global.u64 	%rd60, [%rd3];
	cvta.to.global.u64 	%rd61, %rd60;
	mul.wide.s32 	%rd62, %r87, 32;
	add.s64 	%rd63, %rd61, %rd62;
	ld.global.u64 	%rd64, [%rd63];
	cvta.to.global.u64 	%rd65, %rd64;
	mul.wide.s32 	%rd66, %r43, 8;
	add.s64 	%rd67, %rd65, %rd66;
	ld.global.f64 	%fd14, [%rd67];
	mul.f64 	%fd15, %fd14, %fd1;
	st.global.f64 	[%rd67], %fd15;
	add.s32 	%r92, %r87, 1;
	add.s32 	%r9, %r9, 1;

BB3_17:
	setp.lt.u32	%p10, %r10, 4;
	@%p10 bra 	BB3_20;

	sub.s32 	%r91, %r9, %r32;
	mul.wide.s32 	%rd4, %r43, 8;

BB3_19:
	ld.global.u64 	%rd68, [%rd3];
	cvta.to.global.u64 	%rd69, %rd68;
	mul.wide.s32 	%rd70, %r92, 32;
	add.s64 	%rd71, %rd69, %rd70;
	ld.global.u64 	%rd72, [%rd71];
	cvta.to.global.u64 	%rd73, %rd72;
	add.s64 	%rd74, %rd73, %rd4;
	ld.global.f64 	%fd16, [%rd74];
	mul.f64 	%fd17, %fd16, %fd1;
	st.global.f64 	[%rd74], %fd17;
	ld.global.u64 	%rd75, [%rd3];
	cvta.to.global.u64 	%rd76, %rd75;
	add.s32 	%r74, %r92, 1;
	mul.wide.s32 	%rd77, %r74, 32;
	add.s64 	%rd78, %rd76, %rd77;
	ld.global.u64 	%rd79, [%rd78];
	cvta.to.global.u64 	%rd80, %rd79;
	add.s64 	%rd81, %rd80, %rd4;
	ld.global.f64 	%fd18, [%rd81];
	mul.f64 	%fd19, %fd18, %fd1;
	st.global.f64 	[%rd81], %fd19;
	ld.global.u64 	%rd82, [%rd3];
	cvta.to.global.u64 	%rd83, %rd82;
	add.s32 	%r75, %r92, 2;
	mul.wide.s32 	%rd84, %r75, 32;
	add.s64 	%rd85, %rd83, %rd84;
	ld.global.u64 	%rd86, [%rd85];
	cvta.to.global.u64 	%rd87, %rd86;
	add.s64 	%rd88, %rd87, %rd4;
	ld.global.f64 	%fd20, [%rd88];
	mul.f64 	%fd21, %fd20, %fd1;
	st.global.f64 	[%rd88], %fd21;
	ld.global.u64 	%rd89, [%rd3];
	cvta.to.global.u64 	%rd90, %rd89;
	add.s32 	%r76, %r92, 3;
	mul.wide.s32 	%rd91, %r76, 32;
	add.s64 	%rd92, %rd90, %rd91;
	ld.global.u64 	%rd93, [%rd92];
	cvta.to.global.u64 	%rd94, %rd93;
	add.s64 	%rd95, %rd94, %rd4;
	ld.global.f64 	%fd22, [%rd95];
	mul.f64 	%fd23, %fd22, %fd1;
	st.global.f64 	[%rd95], %fd23;
	add.s32 	%r92, %r92, 4;
	add.s32 	%r91, %r91, 4;
	setp.lt.s32	%p11, %r91, 0;
	@%p11 bra 	BB3_19;

BB3_20:
	add.s32 	%r81, %r81, 128;
	sub.s32 	%r77, %r81, %r31;
	setp.lt.s32	%p12, %r77, 0;
	@%p12 bra 	BB3_3;

BB3_21:
	mov.u32 	%r78, %nctaid.x;
	add.s32 	%r80, %r78, %r80;
	sub.s32 	%r79, %r80, %r33;
	setp.lt.s32	%p13, %r79, 0;
	@%p13 bra 	BB3_1;

	ret;
}

	// .globl	_Z24do_computation_2_242_gpuP2L0iid
.visible .entry _Z24do_computation_2_242_gpuP2L0iid(
	.param .u32 _Z24do_computation_2_242_gpuP2L0iid_param_0,
	.param .u32 _Z24do_computation_2_242_gpuP2L0iid_param_1,
	.param .u32 _Z24do_computation_2_242_gpuP2L0iid_param_2,
	.param .u32 _Z24do_computation_2_242_gpuP2L0iid_param_3,
	.param .u64 _Z24do_computation_2_242_gpuP2L0iid_param_4,
	.param .f64 _Z24do_computation_2_242_gpuP2L0iid_param_5
)
.maxntid 128, 1, 1
{
	.reg .pred 	%p<17>;
	.reg .b32 	%r<104>;
	.reg .f64 	%fd<24>;
	.reg .b64 	%rd<136>;


	ld.param.u32 	%r35, [_Z24do_computation_2_242_gpuP2L0iid_param_0];
	ld.param.u32 	%r36, [_Z24do_computation_2_242_gpuP2L0iid_param_1];
	ld.param.u32 	%r37, [_Z24do_computation_2_242_gpuP2L0iid_param_2];
	ld.param.u32 	%r38, [_Z24do_computation_2_242_gpuP2L0iid_param_3];
	ld.param.u64 	%rd7, [_Z24do_computation_2_242_gpuP2L0iid_param_4];
	ld.param.f64 	%fd1, [_Z24do_computation_2_242_gpuP2L0iid_param_5];
	mov.u32 	%r89, 0;

BB4_1:
	mov.u32 	%r40, %ctaid.x;
	add.s32 	%r41, %r40, %r89;
	mov.u32 	%r42, 1;
	sub.s32 	%r43, %r42, %r38;
	add.s32 	%r44, %r41, %r43;
	setp.gt.s32	%p1, %r44, 0;
	@%p1 bra 	BB4_24;

	mov.u32 	%r90, 0;

BB4_3:
	add.s32 	%r46, %r37, -1;
	setp.lt.s32	%p2, %r46, 0;
	mov.u32 	%r47, %tid.x;
	add.s32 	%r48, %r47, %r90;
	sub.s32 	%r50, %r42, %r35;
	add.s32 	%r51, %r48, %r50;
	setp.gt.s32	%p3, %r51, 0;
	or.pred  	%p4, %p3, %p2;
	@%p4 bra 	BB4_23;

	mov.u32 	%r91, 0;
	mov.u32 	%r92, %r91;

BB4_5:
	add.s32 	%r54, %r36, -1;
	setp.lt.s32	%p5, %r54, 0;
	@%p5 bra 	BB4_22;

	setp.lt.s32	%p6, %r36, 4;
	mov.u32 	%r56, 0;
	mov.u32 	%r11, %r56;
	@%p6 bra 	BB4_10;

	cvt.s64.s32	%rd1, %r91;
	cvta.to.global.u64 	%rd8, %rd7;
	ld.global.nc.u64 	%rd9, [%rd8+16];
	cvta.to.global.u64 	%rd10, %rd9;
	mul.wide.s32 	%rd11, %r41, 32;
	add.s64 	%rd12, %rd10, %rd11;
	add.s64 	%rd2, %rd12, 16;
	neg.s32 	%r93, %r36;
	mov.u32 	%r11, 0;
	mul.wide.s32 	%rd3, %r48, 8;

BB4_8:
	ld.global.u64 	%rd13, [%rd2];
	cvta.to.global.u64 	%rd14, %rd13;
	shl.b64 	%rd15, %rd1, 5;
	add.s64 	%rd16, %rd14, %rd15;
	ld.global.u64 	%rd17, [%rd16+16];
	cvta.to.global.u64 	%rd18, %rd17;
	mul.wide.s32 	%rd19, %r11, 16;
	add.s64 	%rd20, %rd18, %rd19;
	ld.global.u64 	%rd21, [%rd20];
	cvta.to.global.u64 	%rd22, %rd21;
	add.s64 	%rd23, %rd22, %rd3;
	ld.global.f64 	%fd2, [%rd23];
	mul.f64 	%fd3, %fd2, %fd1;
	add.s32 	%r62, %r11, 1;
	add.s32 	%r63, %r11, 2;
	add.s32 	%r64, %r11, 3;
	st.global.f64 	[%rd23], %fd3;
	ld.global.u64 	%rd24, [%rd2];
	cvta.to.global.u64 	%rd25, %rd24;
	add.s64 	%rd26, %rd25, %rd15;
	ld.global.u64 	%rd27, [%rd26+16];
	cvta.to.global.u64 	%rd28, %rd27;
	mul.wide.s32 	%rd29, %r62, 16;
	add.s64 	%rd30, %rd28, %rd29;
	ld.global.u64 	%rd31, [%rd30];
	cvta.to.global.u64 	%rd32, %rd31;
	add.s64 	%rd33, %rd32, %rd3;
	ld.global.f64 	%fd4, [%rd33];
	mul.f64 	%fd5, %fd4, %fd1;
	st.global.f64 	[%rd33], %fd5;
	ld.global.u64 	%rd34, [%rd2];
	cvta.to.global.u64 	%rd35, %rd34;
	add.s64 	%rd36, %rd35, %rd15;
	ld.global.u64 	%rd37, [%rd36+16];
	cvta.to.global.u64 	%rd38, %rd37;
	mul.wide.s32 	%rd39, %r63, 16;
	add.s64 	%rd40, %rd38, %rd39;
	ld.global.u64 	%rd41, [%rd40];
	cvta.to.global.u64 	%rd42, %rd41;
	add.s64 	%rd43, %rd42, %rd3;
	ld.global.f64 	%fd6, [%rd43];
	mul.f64 	%fd7, %fd6, %fd1;
	st.global.f64 	[%rd43], %fd7;
	ld.global.u64 	%rd44, [%rd2];
	cvta.to.global.u64 	%rd45, %rd44;
	add.s64 	%rd46, %rd45, %rd15;
	ld.global.u64 	%rd47, [%rd46+16];
	cvta.to.global.u64 	%rd48, %rd47;
	mul.wide.s32 	%rd49, %r64, 16;
	add.s64 	%rd50, %rd48, %rd49;
	ld.global.u64 	%rd51, [%rd50];
	cvta.to.global.u64 	%rd52, %rd51;
	add.s64 	%rd53, %rd52, %rd3;
	ld.global.f64 	%fd8, [%rd53];
	mul.f64 	%fd9, %fd8, %fd1;
	st.global.f64 	[%rd53], %fd9;
	add.s32 	%r11, %r11, 4;
	add.s32 	%r9, %r93, 4;
	add.s32 	%r65, %r93, 7;
	setp.lt.s32	%p7, %r65, 0;
	mov.u32 	%r93, %r9;
	@%p7 bra 	BB4_8;

	setp.gt.s32	%p8, %r9, -1;
	@%p8 bra 	BB4_22;

BB4_10:
	cvta.to.global.u64 	%rd54, %rd7;
	ld.global.nc.u64 	%rd55, [%rd54+16];
	cvta.to.global.u64 	%rd56, %rd55;
	mul.wide.s32 	%rd57, %r41, 32;
	add.s64 	%rd58, %rd56, %rd57;
	add.s64 	%rd4, %rd58, 16;
	sub.s32 	%r69, %r42, %r36;
	add.s32 	%r70, %r69, %r11;
	max.s32 	%r72, %r70, %r56;
	add.s32 	%r73, %r72, %r36;
	sub.s32 	%r12, %r73, %r11;
	and.b32  	%r13, %r12, 3;
	setp.eq.s32	%p9, %r13, 0;
	@%p9 bra 	BB4_11;
	bra.uni 	BB4_12;

BB4_11:
	mov.u32 	%r103, %r11;
	bra.uni 	BB4_19;

BB4_12:
	setp.eq.s32	%p10, %r13, 1;
	@%p10 bra 	BB4_13;
	bra.uni 	BB4_14;

BB4_13:
	mov.u32 	%r98, %r11;
	bra.uni 	BB4_18;

BB4_14:
	setp.eq.s32	%p11, %r13, 2;
	@%p11 bra 	BB4_15;
	bra.uni 	BB4_16;

BB4_15:
	mov.u32 	%r96, %r11;
	bra.uni 	BB4_17;

BB4_16:
	ld.global.u64 	%rd59, [%rd4];
	cvta.to.global.u64 	%rd60, %rd59;
	mul.wide.s32 	%rd61, %r91, 32;
	add.s64 	%rd62, %rd60, %rd61;
	ld.global.u64 	%rd63, [%rd62+16];
	cvta.to.global.u64 	%rd64, %rd63;
	mul.wide.s32 	%rd65, %r11, 16;
	add.s64 	%rd66, %rd64, %rd65;
	ld.global.u64 	%rd67, [%rd66];
	cvta.to.global.u64 	%rd68, %rd67;
	mul.wide.s32 	%rd69, %r48, 8;
	add.s64 	%rd70, %rd68, %rd69;
	ld.global.f64 	%fd10, [%rd70];
	mul.f64 	%fd11, %fd10, %fd1;
	st.global.f64 	[%rd70], %fd11;
	add.s32 	%r96, %r11, 1;
	add.s32 	%r11, %r11, 1;

BB4_17:
	ld.global.u64 	%rd71, [%rd4];
	cvta.to.global.u64 	%rd72, %rd71;
	mul.wide.s32 	%rd73, %r91, 32;
	add.s64 	%rd74, %rd72, %rd73;
	ld.global.u64 	%rd75, [%rd74+16];
	cvta.to.global.u64 	%rd76, %rd75;
	mul.wide.s32 	%rd77, %r96, 16;
	add.s64 	%rd78, %rd76, %rd77;
	ld.global.u64 	%rd79, [%rd78];
	cvta.to.global.u64 	%rd80, %rd79;
	mul.wide.s32 	%rd81, %r48, 8;
	add.s64 	%rd82, %rd80, %rd81;
	ld.global.f64 	%fd12, [%rd82];
	mul.f64 	%fd13, %fd12, %fd1;
	st.global.f64 	[%rd82], %fd13;
	add.s32 	%r98, %r96, 1;
	add.s32 	%r11, %r11, 1;

BB4_18:
	ld.global.u64 	%rd83, [%rd4];
	cvta.to.global.u64 	%rd84, %rd83;
	mul.wide.s32 	%rd85, %r91, 32;
	add.s64 	%rd86, %rd84, %rd85;
	ld.global.u64 	%rd87, [%rd86+16];
	cvta.to.global.u64 	%rd88, %rd87;
	mul.wide.s32 	%rd89, %r98, 16;
	add.s64 	%rd90, %rd88, %rd89;
	ld.global.u64 	%rd91, [%rd90];
	cvta.to.global.u64 	%rd92, %rd91;
	mul.wide.s32 	%rd93, %r48, 8;
	add.s64 	%rd94, %rd92, %rd93;
	ld.global.f64 	%fd14, [%rd94];
	mul.f64 	%fd15, %fd14, %fd1;
	st.global.f64 	[%rd94], %fd15;
	add.s32 	%r103, %r98, 1;
	add.s32 	%r11, %r11, 1;

BB4_19:
	setp.lt.u32	%p12, %r12, 4;
	@%p12 bra 	BB4_22;

	sub.s32 	%r102, %r11, %r36;
	cvt.s64.s32	%rd5, %r91;
	mul.wide.s32 	%rd6, %r48, 8;

BB4_21:
	ld.global.u64 	%rd95, [%rd4];
	cvta.to.global.u64 	%rd96, %rd95;
	shl.b64 	%rd97, %rd5, 5;
	add.s64 	%rd98, %rd96, %rd97;
	ld.global.u64 	%rd99, [%rd98+16];
	cvta.to.global.u64 	%rd100, %rd99;
	mul.wide.s32 	%rd101, %r103, 16;
	add.s64 	%rd102, %rd100, %rd101;
	ld.global.u64 	%rd103, [%rd102];
	cvta.to.global.u64 	%rd104, %rd103;
	add.s64 	%rd105, %rd104, %rd6;
	ld.global.f64 	%fd16, [%rd105];
	mul.f64 	%fd17, %fd16, %fd1;
	st.global.f64 	[%rd105], %fd17;
	ld.global.u64 	%rd106, [%rd4];
	cvta.to.global.u64 	%rd107, %rd106;
	add.s64 	%rd108, %rd107, %rd97;
	ld.global.u64 	%rd109, [%rd108+16];
	cvta.to.global.u64 	%rd110, %rd109;
	add.s32 	%r82, %r103, 1;
	mul.wide.s32 	%rd111, %r82, 16;
	add.s64 	%rd112, %rd110, %rd111;
	ld.global.u64 	%rd113, [%rd112];
	cvta.to.global.u64 	%rd114, %rd113;
	add.s64 	%rd115, %rd114, %rd6;
	ld.global.f64 	%fd18, [%rd115];
	mul.f64 	%fd19, %fd18, %fd1;
	st.global.f64 	[%rd115], %fd19;
	ld.global.u64 	%rd116, [%rd4];
	cvta.to.global.u64 	%rd117, %rd116;
	add.s64 	%rd118, %rd117, %rd97;
	ld.global.u64 	%rd119, [%rd118+16];
	cvta.to.global.u64 	%rd120, %rd119;
	add.s32 	%r83, %r103, 2;
	mul.wide.s32 	%rd121, %r83, 16;
	add.s64 	%rd122, %rd120, %rd121;
	ld.global.u64 	%rd123, [%rd122];
	cvta.to.global.u64 	%rd124, %rd123;
	add.s64 	%rd125, %rd124, %rd6;
	ld.global.f64 	%fd20, [%rd125];
	mul.f64 	%fd21, %fd20, %fd1;
	st.global.f64 	[%rd125], %fd21;
	ld.global.u64 	%rd126, [%rd4];
	cvta.to.global.u64 	%rd127, %rd126;
	add.s64 	%rd128, %rd127, %rd97;
	ld.global.u64 	%rd129, [%rd128+16];
	cvta.to.global.u64 	%rd130, %rd129;
	add.s32 	%r84, %r103, 3;
	mul.wide.s32 	%rd131, %r84, 16;
	add.s64 	%rd132, %rd130, %rd131;
	ld.global.u64 	%rd133, [%rd132];
	cvta.to.global.u64 	%rd134, %rd133;
	add.s64 	%rd135, %rd134, %rd6;
	ld.global.f64 	%fd22, [%rd135];
	mul.f64 	%fd23, %fd22, %fd1;
	st.global.f64 	[%rd135], %fd23;
	add.s32 	%r103, %r103, 4;
	add.s32 	%r102, %r102, 4;
	setp.lt.s32	%p13, %r102, 0;
	@%p13 bra 	BB4_21;

BB4_22:
	add.s32 	%r91, %r91, 1;
	add.s32 	%r92, %r92, 1;
	sub.s32 	%r85, %r92, %r37;
	setp.lt.s32	%p14, %r85, 0;
	@%p14 bra 	BB4_5;

BB4_23:
	add.s32 	%r90, %r90, 128;
	sub.s32 	%r86, %r90, %r35;
	setp.lt.s32	%p15, %r86, 0;
	@%p15 bra 	BB4_3;

BB4_24:
	mov.u32 	%r87, %nctaid.x;
	add.s32 	%r89, %r87, %r89;
	sub.s32 	%r88, %r89, %r38;
	setp.lt.s32	%p16, %r88, 0;
	@%p16 bra 	BB4_1;

	ret;
}


