Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
docking
ocladock-fpga
Commits
ca97576b
Commit
ca97576b
authored
Sep 07, 2018
by
Leonardo Solis
Browse files
refactored LS-loops to remove bottleneck with II=15
Former-commit-id:
f78cfe4b
parent
a8b7ab8b
Changes
9
Hide whitespace changes
Inline
Side-by-side
ofdock_taskpar_alt/device/Krnl_LS.cl
View file @
ca97576b
...
...
@@ -122,11 +122,25 @@ while(valid) {
#endif
mem_fence(CLK_CHANNEL_MEM_FENCE);
// read pnrgs from prng_kernel using channels.
// these and following loop were initially merged for deeper pipelining.
// however the prng channel-read created a bottleneck II=15.
// splitted loops have each II=1.
float float_prng [ACTUAL_GENOTYPE_LENGTH];
for (uchar i=0; i<DockConst_num_of_genes; i++) {
float_prng [i] = read_channel_intel(chan_PRNG2LS_float_prng[0]);
}
mem_fence(CLK_CHANNEL_MEM_FENCE);
// new random deviate
// rho is the deviation of the uniform distribution
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
float tmp_prng = read_channel_intel(chan_PRNG2LS_float_prng[0]);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float tmp_prng = float_prng[i];
#if defined (FIXED_POINT_LS1)
fixedpt fixpt_tmp_prng = *(fixedpt*) &tmp_prng;
...
...
@@ -285,9 +299,6 @@ while(valid) {
for
(
uchar
i=0
; i<DockConst_num_of_genes; i++) {
if
(
i
==
0
)
{
float2
evalenergy
=
{*
(
float*
)
&LS_eval,
current_energy}
;
/*
write_channel_intel
(
chan_LS2GA_LS1_evalenergy,
evalenergy
)
;
*/
write_channel_intel
(
chan_LS2GA_evalenergy[0],
evalenergy
)
;
}
mem_fence
(
CLK_CHANNEL_MEM_FENCE
)
;
...
...
ofdock_taskpar_alt/device/Krnl_LS2.cl
View file @
ca97576b
...
...
@@ -124,12 +124,26 @@ while(valid) {
write_channel_intel(chan_LS2Arbiter_end[1], (rho < DockConst_rho_lower_bound)?true:false);
#endif
mem_fence(CLK_CHANNEL_MEM_FENCE);
// read pnrgs from prng_kernel using channels.
// these and following loop were initially merged for deeper pipelining.
// however the prng channel-read created a bottleneck II=15.
// splitted loops have each II=1.
float float_prng [ACTUAL_GENOTYPE_LENGTH];
for (uchar i=0; i<DockConst_num_of_genes; i++) {
float_prng [i] = read_channel_intel(chan_PRNG2LS_float_prng[1]);
}
mem_fence(CLK_CHANNEL_MEM_FENCE);
// new random deviate
// rho is the deviation of the uniform distribution
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
float tmp_prng = read_channel_intel(chan_PRNG2LS_float_prng[1]);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float tmp_prng = float_prng[i];
#if defined (FIXED_POINT_LS2)
fixedpt fixpt_tmp_prng = *(fixedpt*) &tmp_prng;
...
...
@@ -292,9 +306,6 @@ while(valid) {
for
(
uchar
i=0
; i<DockConst_num_of_genes; i++) {
if
(
i
==
0
)
{
float2
evalenergy
=
{*
(
float*
)
&LS_eval,
current_energy}
;
/*
write_channel_intel
(
chan_LS2GA_LS2_evalenergy,
evalenergy
)
;
*/
write_channel_intel
(
chan_LS2GA_evalenergy[1],
evalenergy
)
;
}
mem_fence
(
CLK_CHANNEL_MEM_FENCE
)
;
...
...
ofdock_taskpar_alt/device/Krnl_LS3.cl
View file @
ca97576b
...
...
@@ -125,11 +125,25 @@ while(valid) {
#endif
mem_fence(CLK_CHANNEL_MEM_FENCE);
// read pnrgs from prng_kernel using channels.
// these and following loop were initially merged for deeper pipelining.
// however the prng channel-read created a bottleneck II=15.
// splitted loops have each II=1.
float float_prng [ACTUAL_GENOTYPE_LENGTH];
for (uchar i=0; i<DockConst_num_of_genes; i++) {
float_prng [i] = read_channel_intel(chan_PRNG2LS_float_prng[2]);
}
mem_fence(CLK_CHANNEL_MEM_FENCE);
// new random deviate
// rho is the deviation of the uniform distribution
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
float tmp_prng = read_channel_intel(chan_PRNG2LS_float_prng[2]);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float tmp_prng = float_prng[i];
#if defined (FIXED_POINT_LS3)
fixedpt fixpt_tmp_prng = *(fixedpt*) &tmp_prng;
...
...
@@ -292,9 +306,6 @@ while(valid) {
for
(
uchar
i=0
; i<DockConst_num_of_genes; i++) {
if
(
i
==
0
)
{
float2
evalenergy
=
{*
(
float*
)
&LS_eval,
current_energy}
;
/*
write_channel_intel
(
chan_LS2GA_LS3_evalenergy,
evalenergy
)
;
*/
write_channel_intel
(
chan_LS2GA_evalenergy[2],
evalenergy
)
;
}
mem_fence
(
CLK_CHANNEL_MEM_FENCE
)
;
...
...
ofdock_taskpar_alt/device/Krnl_LS4.cl
View file @
ca97576b
...
...
@@ -124,12 +124,26 @@ while(valid) {
write_channel_intel(chan_LS2Arbiter_end[3], (rho < DockConst_rho_lower_bound)?true:false);
#endif
mem_fence(CLK_CHANNEL_MEM_FENCE);
// read pnrgs from prng_kernel using channels.
// these and following loop were initially merged for deeper pipelining.
// however the prng channel-read created a bottleneck II=15.
// splitted loops have each II=1.
float float_prng [ACTUAL_GENOTYPE_LENGTH];
for (uchar i=0; i<DockConst_num_of_genes; i++) {
float_prng [i] = read_channel_intel(chan_PRNG2LS_float_prng[3]);
}
mem_fence(CLK_CHANNEL_MEM_FENCE);
// new random deviate
// rho is the deviation of the uniform distribution
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
float tmp_prng = read_channel_intel(chan_PRNG2LS_float_prng[3]);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float tmp_prng = float_prng[i];
#if defined (FIXED_POINT_LS4)
fixedpt fixpt_tmp_prng = *(fixedpt*) &tmp_prng;
...
...
@@ -292,9 +306,6 @@ while(valid) {
for
(
uchar
i=0
; i<DockConst_num_of_genes; i++) {
if
(
i
==
0
)
{
float2
evalenergy
=
{*
(
float*
)
&LS_eval,
current_energy}
;
/*
write_channel_intel
(
chan_LS2GA_LS4_evalenergy,
evalenergy
)
;
*/
write_channel_intel
(
chan_LS2GA_evalenergy[3],
evalenergy
)
;
}
mem_fence
(
CLK_CHANNEL_MEM_FENCE
)
;
...
...
ofdock_taskpar_alt/device/Krnl_LS5.cl
View file @
ca97576b
...
...
@@ -124,12 +124,26 @@ while(valid) {
write_channel_intel(chan_LS2Arbiter_end[4], (rho < DockConst_rho_lower_bound)?true:false);
#endif
mem_fence(CLK_CHANNEL_MEM_FENCE);
// read pnrgs from prng_kernel using channels.
// these and following loop were initially merged for deeper pipelining.
// however the prng channel-read created a bottleneck II=15.
// splitted loops have each II=1.
float float_prng [ACTUAL_GENOTYPE_LENGTH];
for (uchar i=0; i<DockConst_num_of_genes; i++) {
float_prng [i] = read_channel_intel(chan_PRNG2LS_float_prng[4]);
}
mem_fence(CLK_CHANNEL_MEM_FENCE);
// new random deviate
// rho is the deviation of the uniform distribution
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
float tmp_prng = read_channel_intel(chan_PRNG2LS_float_prng[4]);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float tmp_prng = float_prng[i];
#if defined (FIXED_POINT_LS5)
fixedpt fixpt_tmp_prng = *(fixedpt*) &tmp_prng;
...
...
@@ -292,9 +306,6 @@ while(valid) {
for
(
uchar
i=0
; i<DockConst_num_of_genes; i++) {
if
(
i
==
0
)
{
float2
evalenergy
=
{*
(
float*
)
&LS_eval,
current_energy}
;
/*
write_channel_intel
(
chan_LS2GA_LS5_evalenergy,
evalenergy
)
;
*/
write_channel_intel
(
chan_LS2GA_evalenergy[4],
evalenergy
)
;
}
mem_fence
(
CLK_CHANNEL_MEM_FENCE
)
;
...
...
ofdock_taskpar_alt/device/Krnl_LS6.cl
View file @
ca97576b
...
...
@@ -124,12 +124,26 @@ while(valid) {
write_channel_intel(chan_LS2Arbiter_end[5], (rho < DockConst_rho_lower_bound)?true:false);
#endif
mem_fence(CLK_CHANNEL_MEM_FENCE);
// read pnrgs from prng_kernel using channels.
// these and following loop were initially merged for deeper pipelining.
// however the prng channel-read created a bottleneck II=15.
// splitted loops have each II=1.
float float_prng [ACTUAL_GENOTYPE_LENGTH];
for (uchar i=0; i<DockConst_num_of_genes; i++) {
float_prng [i] = read_channel_intel(chan_PRNG2LS_float_prng[5]);
}
mem_fence(CLK_CHANNEL_MEM_FENCE);
// new random deviate
// rho is the deviation of the uniform distribution
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
float tmp_prng = read_channel_intel(chan_PRNG2LS_float_prng[5]);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float tmp_prng = float_prng[i];
#if defined (FIXED_POINT_LS6)
fixedpt fixpt_tmp_prng = *(fixedpt*) &tmp_prng;
...
...
@@ -292,9 +306,6 @@ while(valid) {
for
(
uchar
i=0
; i<DockConst_num_of_genes; i++) {
if
(
i
==
0
)
{
float2
evalenergy
=
{*
(
float*
)
&LS_eval,
current_energy}
;
/*
write_channel_intel
(
chan_LS2GA_LS6_evalenergy,
evalenergy
)
;
*/
write_channel_intel
(
chan_LS2GA_evalenergy[5],
evalenergy
)
;
}
mem_fence
(
CLK_CHANNEL_MEM_FENCE
)
;
...
...
ofdock_taskpar_alt/device/Krnl_LS7.cl
View file @
ca97576b
...
...
@@ -124,12 +124,26 @@ while(valid) {
write_channel_intel(chan_LS2Arbiter_end[6], (rho < DockConst_rho_lower_bound)?true:false);
#endif
mem_fence(CLK_CHANNEL_MEM_FENCE);
// read pnrgs from prng_kernel using channels.
// these and following loop were initially merged for deeper pipelining.
// however the prng channel-read created a bottleneck II=15.
// splitted loops have each II=1.
float float_prng [ACTUAL_GENOTYPE_LENGTH];
for (uchar i=0; i<DockConst_num_of_genes; i++) {
float_prng [i] = read_channel_intel(chan_PRNG2LS_float_prng[6]);
}
mem_fence(CLK_CHANNEL_MEM_FENCE);
// new random deviate
// rho is the deviation of the uniform distribution
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
float tmp_prng = read_channel_intel(chan_PRNG2LS_float_prng[6]);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float tmp_prng = float_prng[i];
#if defined (FIXED_POINT_LS7)
fixedpt fixpt_tmp_prng = *(fixedpt*) &tmp_prng;
...
...
@@ -292,9 +306,6 @@ while(valid) {
for
(
uchar
i=0
; i<DockConst_num_of_genes; i++) {
if
(
i
==
0
)
{
float2
evalenergy
=
{*
(
float*
)
&LS_eval,
current_energy}
;
/*
write_channel_intel
(
chan_LS2GA_LS7_evalenergy,
evalenergy
)
;
*/
write_channel_intel
(
chan_LS2GA_evalenergy[6],
evalenergy
)
;
}
mem_fence
(
CLK_CHANNEL_MEM_FENCE
)
;
...
...
ofdock_taskpar_alt/device/Krnl_LS8.cl
View file @
ca97576b
...
...
@@ -124,12 +124,26 @@ while(valid) {
write_channel_intel(chan_LS2Arbiter_end[7], (rho < DockConst_rho_lower_bound)?true:false);
#endif
mem_fence(CLK_CHANNEL_MEM_FENCE);
// read pnrgs from prng_kernel using channels.
// these and following loop were initially merged for deeper pipelining.
// however the prng channel-read created a bottleneck II=15.
// splitted loops have each II=1.
float float_prng [ACTUAL_GENOTYPE_LENGTH];
for (uchar i=0; i<DockConst_num_of_genes; i++) {
float_prng [i] = read_channel_intel(chan_PRNG2LS_float_prng[7]);
}
mem_fence(CLK_CHANNEL_MEM_FENCE);
// new random deviate
// rho is the deviation of the uniform distribution
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
float tmp_prng = read_channel_intel(chan_PRNG2LS_float_prng[7]);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float tmp_prng = float_prng[i];
#if defined (FIXED_POINT_LS8)
fixedpt fixpt_tmp_prng = *(fixedpt*) &tmp_prng;
...
...
@@ -292,9 +306,6 @@ while(valid) {
for
(
uchar
i=0
; i<DockConst_num_of_genes; i++) {
if
(
i
==
0
)
{
float2
evalenergy
=
{*
(
float*
)
&LS_eval,
current_energy}
;
/*
write_channel_intel
(
chan_LS2GA_LS8_evalenergy,
evalenergy
)
;
*/
write_channel_intel
(
chan_LS2GA_evalenergy[7],
evalenergy
)
;
}
mem_fence
(
CLK_CHANNEL_MEM_FENCE
)
;
...
...
ofdock_taskpar_alt/device/Krnl_LS9.cl
View file @
ca97576b
...
...
@@ -124,12 +124,26 @@ while(valid) {
write_channel_intel(chan_LS2Arbiter_end[8], (rho < DockConst_rho_lower_bound)?true:false);
#endif
mem_fence(CLK_CHANNEL_MEM_FENCE);
// read pnrgs from prng_kernel using channels.
// these and following loop were initially merged for deeper pipelining.
// however the prng channel-read created a bottleneck II=15.
// splitted loops have each II=1.
float float_prng [ACTUAL_GENOTYPE_LENGTH];
for (uchar i=0; i<DockConst_num_of_genes; i++) {
float_prng [i] = read_channel_intel(chan_PRNG2LS_float_prng[8]);
}
mem_fence(CLK_CHANNEL_MEM_FENCE);
// new random deviate
// rho is the deviation of the uniform distribution
for (uchar i=0; i<DockConst_num_of_genes; i++) {
/*
float tmp_prng = read_channel_intel(chan_PRNG2LS_float_prng[8]);
mem_fence(CLK_CHANNEL_MEM_FENCE);
*/
float tmp_prng = float_prng[i];
#if defined (FIXED_POINT_LS9)
fixedpt fixpt_tmp_prng = *(fixedpt*) &tmp_prng;
...
...
@@ -292,9 +306,6 @@ while(valid) {
for
(
uchar
i=0
; i<DockConst_num_of_genes; i++) {
if
(
i
==
0
)
{
float2
evalenergy
=
{*
(
float*
)
&LS_eval,
current_energy}
;
/*
write_channel_intel
(
chan_LS2GA_LS9_evalenergy,
evalenergy
)
;
*/
write_channel_intel
(
chan_LS2GA_evalenergy[8],
evalenergy
)
;
}
mem_fence
(
CLK_CHANNEL_MEM_FENCE
)
;
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment