Commit 62200fd6 authored by Leonardo Solis's avatar Leonardo Solis
Browse files

set __constant to 12KB + aoc fp flags


Former-commit-id: d9099aa5
parent 69f8fedf
......@@ -517,7 +517,35 @@ else
SEP_FGRID_FLAG=
endif
FIPO_FLAG = $(FIPOCO_FLAG) $(FIPOIE_FLAG) $(FIPOIA_FLAG) $(FIPOLS1_FLAG) $(FIPOLS2_FLAG) $(FIPOLS3_FLAG) $(COPYPOPENE_FLAG) $(SEP_FGRID_FLAG)
FIPO_FLAG = $(FIPOCO_FLAG) $(FIPOIE_FLAG) $(FIPOIA_FLAG) $(FIPOLS1_FLAG) $(FIPOLS2_FLAG) $(FIPOLS3_FLAG) $(COPYPOPENE_FLAG) $(SEP_FGRID_FLAG)
# =============================
# AOC flags
# =============================
# customized __constant memory size
# default: 16 KB
# custom: 12KB = 12288 Bytes
CUSTOMSIZE_CONSTMEM = YES
# relax the order of fp operations: --fp-relaxed
# reduce fp rounding operations: --fpc
RELAXED_REDUCED_FLOATINGPOINT = YES
ifeq ($(CUSTOMSIZE_CONSTMEM), YES)
CUSTOMSIZE_CONSTMEM_FLAG=--const-cache-bytes 12288
else
CUSTOMSIZE_CONSTMEM_FLAG=
endif
ifeq ($(RELAXED_REDUCED_FLOATINGPOINT), YES)
RELAXED_REDUCED_FLOATINGPOINT_FLAG=--fp-relaxed --fpc
else
RELAXED_REDUCED_FLOATINGPOINT_FLAG=
endif
AOC_FLAG = $(CUSTOMSIZE_CONSTMEM_FLAG) $(RELAXED_REDUCED_FLOATINGPOINT_FLAG)
# =============================
......@@ -574,7 +602,7 @@ BOARD := Proc10A_X115
# $(KRNL_NAME).aoco
# $(KRNL_NAME).aocx
keremu: $(DEV_SRC)
aoc -march=emulator -v --board $(BOARD) $(DEV_SRC) -o $(TARGET_DIR)/$(KRNL_NAME).aocx $(REP) $(FIPO_FLAG) -g
aoc -march=emulator -v --board $(BOARD) $(DEV_SRC) -o $(TARGET_DIR)/$(KRNL_NAME).aocx $(REP) $(FIPO_FLAG) $(AOC_FLAG) -g
@echo " "
@echo "==============================================================="
@echo "INFO: emulation files are under: bin/$(KRNL_NAME)(.aoco)(.aocx)"
......@@ -602,7 +630,7 @@ emu: $(TARGET_DIR)/$(TARGET) keremu
# $(KRNL_NAME).aoco
# No actual hardware is built (NO .aocx)
kerrpt: $(DEV_SRC)
aoc --board $(BOARD) $(DEV_SRC) -o $(TARGET_DIR)/$(KRNL_NAME).aoco $(REP) $(FIPO_FLAG) -c --report -g
aoc --board $(BOARD) $(DEV_SRC) -o $(TARGET_DIR)/$(KRNL_NAME).aoco $(REP) $(FIPO_FLAG) $(AOC_FLAG) -c --report -g
@echo " "
@echo "=================================================================="
@echo "INFO: report files are under: bin/$(KRNL_NAME)/reports/report.html"
......@@ -620,7 +648,7 @@ kerrpt: $(DEV_SRC)
# $(KRNL_NAME).aoco
# $(KRNL_NAME).aocx
hw:
aoc --board $(BOARD) $(DEV_SRC) -o $(TARGET_DIR_HW)/$(KRNL_NAME).aocx $(FIPO_FLAG)
aoc --board $(BOARD) $(DEV_SRC) -o $(TARGET_DIR_HW)/$(KRNL_NAME).aocx $(FIPO_FLAG) $(AOC_FLAG)
@echo " "
@echo "================================================================"
@echo "INFO: FPGA bitstream is under: bin_hw/$(KRNL_NAME)(.aoco)(.aocx)"
......@@ -628,7 +656,7 @@ hw:
@echo " "
hw-he:
aoc --high-effort --board $(BOARD) $(DEV_SRC) -o $(TARGET_DIR_HW)/$(KRNL_NAME).aocx $(FIPO_FLAG)
aoc --high-effort --board $(BOARD) $(DEV_SRC) -o $(TARGET_DIR_HW)/$(KRNL_NAME).aocx $(FIPO_FLAG) $(AOC_FLAG)
@echo " "
@echo "================================================================"
@echo "INFO: FPGA bitstream is under: bin_hw/$(KRNL_NAME)(.aoco)(.aocx)"
......@@ -642,7 +670,7 @@ hw-he:
# $(KRNL_NAME).aoco
# $(KRNL_NAME).aocx
hw-prof:
aoc --profile --board $(BOARD) $(DEV_SRC) -o $(TARGET_DIR_HW_PROF)/$(KRNL_NAME).aocx $(FIPO_FLAG)
aoc --profile --board $(BOARD) $(DEV_SRC) -o $(TARGET_DIR_HW_PROF)/$(KRNL_NAME).aocx $(FIPO_FLAG) $(AOC_FLAG)
@echo " "
@echo "====================================================================================="
@echo "INFO: instrumented FPGA bitstream is under: bin_hw_profile/$(KRNL_NAME)(.aoco)(.aocx)"
......
......@@ -7,9 +7,9 @@ __kernel __attribute__ ((max_global_work_dim(0)))
void Krnl_Conform(
__global const int* restrict KerConstStatic_rotlist_const,
#if defined (FIXED_POINT_CONFORM)
__constant fixedpt3* restrict KerConstStatic_ref_coords_const, // must be formatted in host
__constant fixedpt3* restrict KerConstStatic_rotbonds_moving_vectors_const, // must be formatted in host
__constant fixedpt3* restrict KerConstStatic_rotbonds_unit_vectors_const, // must be formatted in host
__constant fixedpt3* restrict KerConstStatic_ref_coords_const,
__constant fixedpt3* restrict KerConstStatic_rotbonds_moving_vectors_const,
__constant fixedpt3* restrict KerConstStatic_rotbonds_unit_vectors_const,
#else
__constant float3* restrict KerConstStatic_ref_coords_const,
__constant float3* restrict KerConstStatic_rotbonds_moving_vectors_const,
......@@ -24,7 +24,7 @@ void Krnl_Conform(
#if defined (FIXED_POINT_CONFORM)
__constant fixedpt4* restrict KerConstStatic_ref_orientation_quats_const,
#else
__constant float4* restrict KerConstStatic_ref_orientation_quats_const,
__constant float4* restrict KerConstStatic_ref_orientation_quats_const,
#endif
unsigned short Host_RunId
)
......@@ -44,14 +44,6 @@ void Krnl_Conform(
__local float __attribute__((numbanks(8), bankwidth(16))) loc_coords[MAX_NUM_OF_ATOMS][4];
*/
/*
#if defined (FIXED_POINT_CONFORM)
__local fixedpt genotype [ACTUAL_GENOTYPE_LENGTH];
#else
__local float genotype [ACTUAL_GENOTYPE_LENGTH];
#endif
*/
/*
bool active = true;
*/
......@@ -62,47 +54,6 @@ void Krnl_Conform(
rotlist_localcache [c] = KerConstStatic_rotlist_const [c];
}
/*
#if defined (FIXED_POINT_CONFORM)
__local fixedpt3 ref_coords_localcache [MAX_NUM_OF_ATOMS];
#else
__local float3 ref_coords_localcache [MAX_NUM_OF_ATOMS];
#endif
for (uchar c = 0; c < DockConst_num_of_atoms; c++) {
ref_coords_localcache [c] = KerConstStatic_ref_coords_const [c];
}
*/
/*
#if defined (FIXED_POINT_CONFORM)
__local fixedpt3 rotbonds_moving_vectors_localcache [MAX_NUM_OF_ROTBONDS];
__local fixedpt3 rotbonds_unit_vectors_localcache [MAX_NUM_OF_ROTBONDS];
#else
__local float3 rotbonds_moving_vectors_localcache [MAX_NUM_OF_ROTBONDS];
__local float3 rotbonds_unit_vectors_localcache [MAX_NUM_OF_ROTBONDS];
#endif
for (uchar c = 0; c < Host_num_of_rotbonds; c++) {
rotbonds_moving_vectors_localcache [c] = KerConstStatic_rotbonds_moving_vectors_const[c];
rotbonds_unit_vectors_localcache [c] = KerConstStatic_rotbonds_unit_vectors_const [c];
}
*/
/*
#if defined (FIXED_POINT_CONFORM)
fixedpt4 ref_orientation_quats_const = KerConstStatic_ref_orientation_quats_const[Host_RunId];
fixedpt ref_orientation_quats_const_0 = ref_orientation_quats_const.x;
fixedpt ref_orientation_quats_const_1 = ref_orientation_quats_const.y;
fixedpt ref_orientation_quats_const_2 = ref_orientation_quats_const.z;
fixedpt ref_orientation_quats_const_3 = ref_orientation_quats_const.w;
#else
float4 ref_orientation_quats_const = KerConstStatic_ref_orientation_quats_const[Host_RunId];
float ref_orientation_quats_const_0 = ref_orientation_quats_const.x;
float ref_orientation_quats_const_1 = ref_orientation_quats_const.y;
float ref_orientation_quats_const_2 = ref_orientation_quats_const.z;
float ref_orientation_quats_const_3 = ref_orientation_quats_const.w;
#endif
*/
#pragma max_concurrency 32
while(active) {
char mode;
......@@ -114,7 +65,6 @@ while(active) {
fixedpt sin_theta, cos_theta;
fixedpt3 genrot_unitvec;
fixedpt3 genotype_xyz;
//fixedpt3 loc_coords[MAX_NUM_OF_ATOMS];
fixedpt3 __attribute__ ((
memory,
numbanks(1),
......@@ -130,7 +80,6 @@ while(active) {
float sin_theta, cos_theta;
float3 genrot_unitvec;
float3 genotype_xyz;
//float3 loc_coords[MAX_NUM_OF_ATOMS];
float3 __attribute__ ((
memory,
numbanks(1),
......@@ -219,9 +168,7 @@ while(active) {
if ((rotation_list_element & RLIST_FIRSTROT_MASK) != 0) //if first rotation of this atom
{
/*
atom_to_rotate = ref_coords_localcache [atom_id];
*/
/*atom_to_rotate = ref_coords_localcache [atom_id];*/
atom_to_rotate = KerConstStatic_ref_coords_const [atom_id];
}
else
......@@ -251,16 +198,13 @@ while(active) {
else //if rotating around rotatable bond
{
uint rotbond_id = (rotation_list_element & RLIST_RBONDID_MASK) >> RLIST_RBONDID_SHIFT;
/*
rotation_unitvec = rotbonds_unit_vectors_localcache [rotbond_id];
*/
/*rotation_unitvec = rotbonds_unit_vectors_localcache [rotbond_id];*/
rotation_unitvec = KerConstStatic_rotbonds_unit_vectors_const [rotbond_id];
rotation_angle = genotype [6+rotbond_id];
/*
rotation_movingvec = rotbonds_moving_vectors_localcache [rotbond_id];
*/
/*rotation_movingvec = rotbonds_moving_vectors_localcache [rotbond_id];*/
rotation_movingvec = KerConstStatic_rotbonds_moving_vectors_const [rotbond_id];
//in addition performing the first movement
......@@ -312,19 +256,18 @@ while(active) {
//two rotations should be performed
//(multiplying the quaternions)
{
#if defined (FIXED_POINT_CONFORM)
const fixedpt4 ref_orientation_quats_const = KerConstStatic_ref_orientation_quats_const[Host_RunId];
const fixedpt ref_orientation_quats_const_0 = ref_orientation_quats_const.x;
const fixedpt ref_orientation_quats_const_1 = ref_orientation_quats_const.y;
const fixedpt ref_orientation_quats_const_2 = ref_orientation_quats_const.z;
const fixedpt ref_orientation_quats_const_3 = ref_orientation_quats_const.w;
const fixedpt ref_orientation_quats_const_0 = ref_orientation_quats_const.x;
const fixedpt ref_orientation_quats_const_1 = ref_orientation_quats_const.y;
const fixedpt ref_orientation_quats_const_2 = ref_orientation_quats_const.z;
const fixedpt ref_orientation_quats_const_3 = ref_orientation_quats_const.w;
#else
const float4 ref_orientation_quats_const = KerConstStatic_ref_orientation_quats_const[Host_RunId];
const float ref_orientation_quats_const_0 = ref_orientation_quats_const.x;
const float ref_orientation_quats_const_1 = ref_orientation_quats_const.y;
const float ref_orientation_quats_const_2 = ref_orientation_quats_const.z;
const float ref_orientation_quats_const_3 = ref_orientation_quats_const.w;
const float ref_orientation_quats_const_0 = ref_orientation_quats_const.x;
const float ref_orientation_quats_const_1 = ref_orientation_quats_const.y;
const float ref_orientation_quats_const_2 = ref_orientation_quats_const.z;
const float ref_orientation_quats_const_3 = ref_orientation_quats_const.w;
#endif
//calculating quatrot_left*ref_orientation_quats_const,
......
......@@ -52,25 +52,6 @@ void Krnl_InterE(
*/
char active = 0x01;
/*
__local char atom_types_localcache [MAX_NUM_OF_ATOMS];
*/
/*
#if defined (FIXED_POINT_INTERE)
__local fixedpt64 atom_charges_localcache [MAX_NUM_OF_ATOMS];
#else
__local float atom_charges_localcache [MAX_NUM_OF_ATOMS];
#endif
*/
/*
for (uchar i=0; i<DockConst_num_of_atoms; i++) {
atom_types_localcache [i] = KerConstStatic_atom_types_const [i];
atom_charges_localcache [i] = KerConstStatic_atom_charges_const [i];
}
*/
__global const float* GlobFgrids2 = & GlobFgrids [Host_mul_tmp2];
__global const float* GlobFgrids3 = & GlobFgrids [Host_mul_tmp3];
......@@ -130,9 +111,6 @@ while(active) {
// for each atom
for (uchar atom1_id=0; atom1_id<DockConst_num_of_atoms; atom1_id++)
{
/*
char atom1_typeid = atom_types_localcache [atom1_id];
*/
char atom1_typeid = KerConstStatic_atom_types_const [atom1_id];
float3 loc_coords_atid1 = loc_coords[atom1_id];
......@@ -144,9 +122,6 @@ while(active) {
#if defined (FIXED_POINT_INTERE)
#else
/*
float q = atom_charges_localcache [atom1_id];
*/
float q = KerConstStatic_atom_charges_const [atom1_id];
#endif
......@@ -155,9 +130,6 @@ while(active) {
fixedpt64 fixpt_y = fixedpt64_fromfloat(loc_coords_atid1.y);
fixedpt64 fixpt_z = fixedpt64_fromfloat(loc_coords_atid1.z);
// fixedpt64 fixpt_q = fixedpt64_fromfloat(atom_charges_localcache [atom1_id]);
/*
fixedpt64 fixpt_q = atom_charges_localcache [atom1_id];
*/
fixedpt64 fixpt_q = KerConstStatic_atom_charges_const [atom1_id];
#endif
......
......@@ -39,50 +39,7 @@ void Krnl_IntraE(
*/
char active = 0x01;
/*
__local char atom_types_localcache [MAX_NUM_OF_ATOMS];
__local float atom_charges_localcache [MAX_NUM_OF_ATOMS];
__local float VWpars_AC_localcache [MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES];
__local float VWpars_BD_localcache [MAX_NUM_OF_ATYPES*MAX_NUM_OF_ATYPES];
__local float dspars_S_localcache [MAX_NUM_OF_ATYPES];
__local float dspars_V_localcache [MAX_NUM_OF_ATYPES];
*/
/*
for (uchar i=0; i<DockConst_num_of_atoms; i++) {
atom_types_localcache [i] = KerConstStatic_atom_types_const [i];
atom_charges_localcache [i] = KerConstStatic_atom_charges_const [i];
}
*/
/*
for (uchar i=0; i<Host_square_num_of_atypes; i++) {
if (i < DockConst_num_of_atypes) {
dspars_S_localcache [i] = KerConstStatic_dspars_S_const [i];
dspars_V_localcache [i] = KerConstStatic_dspars_V_const [i];
}
VWpars_AC_localcache [i] = KerConstStatic_VWpars_AC_const [i];
VWpars_BD_localcache [i] = KerConstStatic_VWpars_BD_const [i];
}
*/
/*
printf("%i\n", fixedpt_toint(681391));
printf("%i\n", fixedpt_toint(-772243));
printf("%li\n", fixedpt64_fromint(18));
printf("%li\n", fixedpt64_fromint(-18));
printf("%f\n", fixedpt64_tofloat(178145));
printf("%f\n", fixedpt_tofloat(178145));
*/
/*
// passed correctly
printf("kernel intraE %i \n", DockConst_num_of_intraE_contributors);
*/
__local char3 intraE_contributors_localcache [MAX_INTRAE_CONTRIBUTORS];
for (ushort i=0; i<MAX_INTRAE_CONTRIBUTORS; i++) {
intraE_contributors_localcache [i] = KerConstStatic_intraE_contributors_const [i];
}
......@@ -153,10 +110,6 @@ while(active) {
//#pragma unroll 10
for (ushort contributor_counter=0; contributor_counter<DockConst_num_of_intraE_contributors; contributor_counter++) {
/*
// passed correctly
printf("kernel intraE %i: %i \n", contributor_counter, DockConst_num_of_intraE_contributors);
*/
char3 ref_intraE_contributors_const;
ref_intraE_contributors_const = intraE_contributors_localcache[contributor_counter];
......@@ -186,15 +139,6 @@ while(active) {
printf("Distance: %f\n", distance_leo);
#endif
/*
float distance_pow_2 = distance_leo*distance_leo;
float inverse_distance_pow_2 = native_divide(1.0f, distance_pow_2);
float inverse_distance_pow_4 = inverse_distance_pow_2 * inverse_distance_pow_2;
float inverse_distance_pow_6 = inverse_distance_pow_4 * inverse_distance_pow_2;
float inverse_distance_pow_10 = inverse_distance_pow_6 * inverse_distance_pow_4;
float inverse_distance_pow_12 = inverse_distance_pow_6 * inverse_distance_pow_6;
*/
float partialE1;
float partialE2;
float partialE3;
......@@ -211,22 +155,12 @@ while(active) {
float inverse_distance_pow_10 = inverse_distance_pow_6 * inverse_distance_pow_4;
float inverse_distance_pow_12 = inverse_distance_pow_6 * inverse_distance_pow_6;
/*
char atom1_typeid = atom_types_localcache [atom1_id];
char atom2_typeid = atom_types_localcache [atom2_id];
*/
char atom1_typeid = KerConstStatic_atom_types_const [atom1_id];
char atom2_typeid = KerConstStatic_atom_types_const [atom2_id];
//calculating van der Waals / hydrogen bond term
/*
partialE1 = VWpars_AC_localcache [atom1_typeid*DockConst_num_of_atypes+atom2_typeid]*inverse_distance_pow_12;
*/
partialE1 = KerConstStatic_VWpars_AC_const [atom1_typeid*DockConst_num_of_atypes+atom2_typeid]*inverse_distance_pow_12;
/*
float tmp_pE2 = VWpars_BD_localcache [atom1_typeid*DockConst_num_of_atypes+atom2_typeid];
*/
float tmp_pE2 = KerConstStatic_VWpars_BD_const [atom1_typeid*DockConst_num_of_atypes+atom2_typeid];
if (ref_intraE_contributors_const.z == 1) //H-bond
......@@ -235,18 +169,9 @@ while(active) {
partialE2 = tmp_pE2 * inverse_distance_pow_6;
//calculating electrostatic term
/*
partialE3 = native_divide( (DockConst_coeff_elec*atom_charges_localcache[atom1_id]*atom_charges_localcache[atom2_id]) , (distance_leo*(-8.5525f + native_divide(86.9525f, (1.0f + 7.7839f*native_exp(-0.3154f*distance_leo))))) );
*/
partialE3 = native_divide( (DockConst_coeff_elec*KerConstStatic_atom_charges_const[atom1_id]*KerConstStatic_atom_charges_const[atom2_id]) , (distance_leo*(-8.5525f + native_divide(86.9525f, (1.0f + 7.7839f*native_exp(-0.3154f*distance_leo))))) );
//calculating desolvation term
/*
partialE4 = (
(dspars_S_localcache[atom1_typeid] + DockConst_qasp*fabs(atom_charges_localcache[atom1_id])) * dspars_V_localcache[atom2_typeid] +
(dspars_S_localcache[atom2_typeid] + DockConst_qasp*fabs(atom_charges_localcache[atom2_id])) * dspars_V_localcache[atom1_typeid]) *
DockConst_coeff_desolv*native_exp(-0.0386f*distance_pow_2);
*/
partialE4 = (
(KerConstStatic_dspars_S_const [atom1_typeid] + DockConst_qasp*fabs(KerConstStatic_atom_charges_const[atom1_id])) * KerConstStatic_dspars_V_const [atom2_typeid] +
(KerConstStatic_dspars_S_const [atom2_typeid] + DockConst_qasp*fabs(KerConstStatic_atom_charges_const[atom2_id])) * KerConstStatic_dspars_V_const [atom1_typeid]) *
......
......@@ -448,6 +448,17 @@ First achieving speedup vs i5 cpu core: 3ptb: 59/45 = 1.3x, 1stp: 84/85 = 0.98x
>>> commit "SPEEDUP: reduced scope+removed __local caches"
151. `Krnl_Conform`: cleaned up
152. `Krnl_IntraE`: cleaned up
153. `Krnl_InterE`: cleaned up
154. `Makefile`: added AOC flag for reducing the size of __constant (onchip mem) from 16KB (default) downto to 12KB
careful calculation was done to set this 12KB size
added AOC flags related to floating point: --fp-relaxed --fpc
freq: 178 MHz (43 sec non-instrumented on 10runs 3ptb, 81 sec on 1stp)(instrumented freq: 174 MHz)
First achieving speedup vs i5 cpu core: 3ptb: 59/43 = 1.37x, 1stp: 84/81 = 1.03x
>>> commit "set __constant to 12KB + aoc fp flags"
XXX, Between Conform and InterE, IntraE create a wider channel:
https://www.alteraforum.com/forum/showthread.php?t=55979
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment