Commit e0592524 authored by Leonardo Solis's avatar Leonardo Solis
Browse files

deleted not-needed altera examples


Former-commit-id: be654387
parent 024d457d
# Copyright (C) 2013-2016 Altera Corporation, San Jose, California, USA. All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to
# whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# This agreement shall be governed in all respects by the laws of the State of California and
# by the laws of the United States of America.
# This is a GNU Makefile.
# You must configure ALTERAOCLSDKROOT to point the root directory of the Intel(R) FPGA SDK for OpenCL(TM)
# software installation.
# See http://www.altera.com/literature/hb/opencl-sdk/aocl_getting_started.pdf
# for more information on installing and configuring the Intel(R) FPGA SDK for OpenCL(TM).
ifeq ($(VERBOSE),1)
ECHO :=
else
ECHO := @
endif
# Where is the Intel(R) FPGA SDK for OpenCL(TM) software?
ifeq ($(wildcard $(ALTERAOCLSDKROOT)),)
$(error Set ALTERAOCLSDKROOT to the root directory of the Intel(R) FPGA SDK for OpenCL(TM) software installation)
endif
ifeq ($(wildcard $(ALTERAOCLSDKROOT)/host/include/CL/opencl.h),)
$(error Set ALTERAOCLSDKROOT to the root directory of the Intel(R) FPGA SDK for OpenCL(TM) software installation.)
endif
# OpenCL compile and link flags.
AOCL_COMPILE_CONFIG := $(shell aocl compile-config )
AOCL_LINK_CONFIG := $(shell aocl link-config )
# Compilation flags
ifeq ($(DEBUG),1)
CXXFLAGS += -g
else
CXXFLAGS += -O2
endif
# Compiler
CXX := g++
# Target
TARGET := host
TARGET_DIR := bin
# Directories
INC_DIRS := ../common/inc
LIB_DIRS :=
# Files
INCS := $(wildcard )
SRCS := $(wildcard host/src/*.cpp ../common/src/AOCLUtils/*.cpp)
LIBS := rt pthread
# Make it all!
all : $(TARGET_DIR)/$(TARGET)
# Host executable target.
$(TARGET_DIR)/$(TARGET) : Makefile $(SRCS) $(INCS) $(TARGET_DIR)
$(ECHO)$(CXX) $(CPPFLAGS) $(CXXFLAGS) -fPIC $(foreach D,$(INC_DIRS),-I$D) \
$(AOCL_COMPILE_CONFIG) $(SRCS) $(AOCL_LINK_CONFIG) \
$(foreach D,$(LIB_DIRS),-L$D) \
$(foreach L,$(LIBS),-l$L) \
-o $(TARGET_DIR)/$(TARGET)
$(TARGET_DIR) :
$(ECHO)mkdir $(TARGET_DIR)
# Standard make targets
clean :
$(ECHO)rm -f $(TARGET_DIR)/$(TARGET)
.PHONY : all clean
<!DOCTYPE html>
<!--
Copyright (C) 2013-2016 Altera Corporation, San Jose, California, USA. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software
without restriction, including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to
whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or
substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
This agreement shall be governed in all respects by the laws of the State of California and
by the laws of the United States of America.
-->
<html>
<head>
<meta charset="utf-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge"/>
<title>Asian Option Pricing: Intel FPGA&reg; OpenCL&trade; Design Example</title>
<link rel="stylesheet" href="../common/readme.css" type="text/css">
<script src="https://google-code-prettify.googlecode.com/svn/loader/run_prettify.js"></script>
</head>
<body>
<h1>
<div class="preheading">Intel FPGA<sup>&reg;</sup> OpenCL&trade; Design Example</div>
Asian Option Pricing
</h1>
<p>This readme file for the Asian Option Pricing OpenCL Design Example contains
information about the design example package. For more examples, please
visit the <a href="https://www.altera.com/products/design-software/embedded-software-developers/opencl/developer-zone.html">Intel FPGA OpenCL Design Examples page</a>.</p>
<nav>
<h2>Contents</h2>
<ul>
<li><a href="#Description">Description</a></li>
<li><a href="#Software_Hardware_Requirements">Software &amp; Hardware Requirements</a></li>
<li><a href="#Package_Contents">Package Contents</a></li>
<li><a href="#Compiling_the_OpenCL_Kernel">Compiling the OpenCL Kernel</a></li>
<li><a href="#Compiling_the_Host_Program">Compiling the Host Program</a></li>
<li><a href="#Running_the_Host_Program">Running the Host Program</a></li>
<li><a href="#Release_History">Release History</a></li>
<li><a href="#Legal">Legal</a></li>
<li><a href="#Contacting_Intel">Contacting Intel</a></li>
</ul>
</nav>
<section>
<a id="Description"><h2>Description</h2></a>
<p>This benchmark demonstrates an OpenCL implementation of an Asian Option pricing algorithm
implemented on Altera FPGAs.</p><p>An Asian Option is a financial instrument whose price is path dependent. In this benchmark,
we consider the arithmetic Asian option. This option depends on the average of a number of
sampled point from the start time to the time of maturity. Upon maturity, the average price
is compared to the strike price for the computation of put or call.</p>
</section>
<section>
<a id="Software_Hardware_Requirements"><h2>Software &amp; Hardware Requirements</h2></a>
<p/>
<table class="reqs">
<thead>
<tr>
<th rowspan="3">Requirement</th>
<th rowspan="3">Version</th>
<th colspan="2">OpenCL Kernel</th><th colspan="4">Host Program</th></tr><tr><th rowspan="2">Hardware<br/>Compile</th><th rowspan="2">Emulation<br/>Compile</th><th colspan="2">Hardware</th><th colspan="2">Emulation</th></tr><tr><th>Compile</th><th>Run</th><th>Compile</th><th>Run</th></tr></thead><tbody><tr><td>Quartus Prime Design Software <small>(Quartus II)</small></td><td>16.1 or later</td><td class="req">&#x02713;</td><td class="req">&#x02713;</td><td></td><td></td><td></td><td></td></tr><tr><td>Intel(R) FPGA SDK for OpenCL(TM)</td><td>16.1 or later</td><td class="req">&#x02713;</td><td class="req">&#x02713;</td><td class="req" rowspan="2">&#x02713;<div class="either">(either)</div></td><td class="req" rowspan="2">&#x02713;<div class="either">(either)</div></td><td class="req" rowspan="2">&#x02713;<div class="either">(either)</div></td><td class="req" rowspan="2">&#x02713;<div class="either">(either)</div></td></tr><tr><td>Intel(R) FPGA Runtime Environment for OpenCL(TM)</td><td>16.1 or later</td><td></td><td></td></tr><tr><td>Board Support Package</td><td>16.1-compatible</td><td class="req">&#x02713;</td><td class="req">&#x02713;</td><td class="req">&#x02713;</td><td class="req">&#x02713;</td><td class="req">&#x02713;</td><td class="req">&#x02713;</td></tr><tr><td>Board Hardware</td><td>-</td><td></td><td></td><td></td><td class="req">&#x02713;</td><td></td><td></td></tr><tr><td>gcc</td><td>4.4.7 or later</td><td></td><td></td><td class="req">&#x02713;</td><td class="req">&#x02713;</td><td class="req">&#x02713;</td><td class="req">&#x02713;</td></tr><tr><td>GNU Make</td><td>3.8.1 or later</td><td></td><td></td><td class="req">&#x02713;</td><td></td><td class="req">&#x02713;</td><td></td></tr></tbody>
</table>
</section>
<section>
<a id="Package_Contents"><h2>Package Contents</h2></a>
<p/>
<table class="pkg-contents">
<thead>
<tr>
<th class="path">Path</th>
<th class="desc">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td class="path"><a href="./" style="padding-left: 0.0ex">asian_option/</a></td>
<td class="desc"></td>
</tr>
<tr class="highlight">
<td class="path"><a href="./asian_option_opencl_fpga.pdf" style="padding-left: 2.0ex">asian_option_opencl_fpga.pdf</a></td>
<td class="desc">Detailed presentation about this design, including optimizations and results.</td>
</tr>
<tr>
<td class="path"><a href="./Makefile" style="padding-left: 2.0ex">Makefile</a></td>
<td class="desc">Makefile for host program</td>
</tr>
<tr>
<td class="path"><a href="./bin/" style="padding-left: 2.0ex">bin/</a></td>
<td class="desc">Host program, AOCX files</td>
</tr>
<tr>
<td class="path"><a href="./device/" style="padding-left: 2.0ex">device/</a></td>
<td class="desc">OpenCL kernel files</td>
</tr>
<tr>
<td class="path"><a href="./device/asian_option.cl" style="padding-left: 4.0ex">asian_option.cl</a></td>
<td class="desc">Top-level OpenCL kernel file</td>
</tr>
<tr>
<td class="path"><a href="./host/" style="padding-left: 2.0ex">host/</a></td>
<td class="desc"></td>
</tr>
<tr>
<td class="path"><a href="./host/src/" style="padding-left: 4.0ex">src/</a></td>
<td class="desc">Host source files</td>
</tr>
</tbody>
</table>
<h3>Additional Documentation</h3>
<div class="doc">
<div class="title"><a href="asian_option_opencl_fpga.pdf">Monte Carlo Pricing of Asian Options on FPGAs using OpenCL</a> <small>(PDF)</small></div>
<div class="desc"><p>This presentation contains detailed information about this design:</p><ul>
<li>Overview of the application</li>
<li>Using Altera's channels vendor extension to efficiently pass data between kernels</li>
<li>Using loop pipelining to efficiently execute single-threaded code</li>
<li>Results with comparisons to other platforms</li>
</ul></div>
</div>
</section>
<section>
<a id="Compiling_the_OpenCL_Kernel"><h2>Compiling the OpenCL Kernel</h2></a>
<p>The top-level OpenCL kernel file is <span class="mono">device/asian_option.cl</span>.</p>
<p>To compile the OpenCL kernel, run:</p>
<div class="command">aoc device/asian_option.cl <span class="nowrap">-o</span> bin/asian_option.aocx<span class="nowrap"></span> <span class="nowrap">--fp-relaxed</span> --board <span class="highlight">&lt;<i>board</i>&gt;</span></div>
<p>where <span class="highlight mono">&lt;<i>board</i>&gt;</span> matches the board you want to target.
The <span class="mono">-o bin/asian_option.aocx</span> argument is used to place the compiled binary
in the location that the host program expects.
</p>
<p>If you are unsure of the boards available, use the following command to list
available boards:</p>
<div class="command">aoc --list-boards</div>
<section>
<h3>Compiling for Emulator</h3>
<p>To use the emulation flow, the compilation command just needs to be modified slightly:</p>
<div class="command">aoc <span class="highlight nowrap">-march=emulator</span> device/asian_option.cl -o bin/asian_option.aocx<span class="nowrap"></span> <span class="nowrap">--fp-relaxed</span> --board &lt;<i>board</i>&gt;</div>
</section>
</section>
<section>
<a id="Compiling_the_Host_Program"><h2>Compiling the Host Program</h2></a>
<p>To compile the host program, run:</p>
<div class="command">make</div>
<p>The compiled host program will be located at <span class="mono">bin/host</span>.</p>
<section>
<h3>Host Preprocessor Definitions</h3>
<p>The host program has the following preprocessor definitions:</p>
<table class="host-defines parameters">
<thead>
<tr>
<th class="name">Define</th>
<th class="type">Type</th>
<th class="default">Default</th>
<th class="desc">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td class="name">-D<span class="highlight">USE_SVM_API</span>=&lt;<i>#</i>&gt;</td>
<td class="type">Optional</td>
<td class="default">0</td>
<td class="desc">
This option when set to 1 will use the OpenCL 2.0 shared virtual memory (SVM) API.
</td>
</tr>
</tbody>
</table>
<p>On Linux, custom values for preprocessor defines can be specified by setting
the value of <mono>CPPFLAGS</mono> when invoking the Makefile.</p>
</section>
<section>
<a id="Running_the_Host_Program"><h2>Running the Host Program</h2></a>
<p>Before running the host program, you should have compiled the OpenCL kernel and the host program. Refer to the above sections if you have not completed those steps.</p>
<p>To run the host program on hardware, execute:</p>
<div class="command">bin/host</div>
<p>The host program supports execution on multiple OpenCL FPGA devices. Example
output of running this application in a system with two boards is shown here:</p><pre class="console-output">
Starting Computations
DEVICE 0: r=0.08 sigma=0.30 T=1.0 S0=30.0 K=29.0 : Resulting Price is 3.133114
DEVICE 1: r=0.08 sigma=0.30 T=1.0 S0=30.0 K=28.0 : Resulting Price is 3.746063
2 Devices ran a total of 4.1943e+011 Simulations
Throughput = 24.19 Billion Simulations / second
<div class="heading"><span>Output</span></div></pre><p>The throughput is printed as the number of simulations per second. A simulation
refers to the key computation in the Monte Carlo algorithm that simulates the movement
of the stock price, as defined in <span class="mono">device/asian_option.cl</span>:</p><pre class="code-block prettyprint">
// Simulate the path movement using geometric brownian motion
S *= drift * exp(vol * gauss_rnd);
arithmetic_average += S;
<div class="heading nocode"><span>Code</span></div></pre><p>Each FPGA is dedicated to computing the Asian Option value for a particular
set of parameters. The host is setup to vary the strike price when multiple OpenCL
FPGA devices are present to demonstrate scalable parallel offload of many option
price calculations.</p><section>
<h3>Running with the Emulator</h3>
<p>Prior to running the emulation flow, ensure that you have compiled the kernel for emulation.
Refer to the above sections if you have not done so. Also, please set up your environment for
emulation. Please see the <a href="http://www.altera.com/literature/hb/opencl-sdk/aocl_programming_guide.pdf">Intel(R) FPGA SDK for OpenCL(TM) Programming Guide</a> for more information.</p>
<p>For this example design, the suggested emulation command is:</p>
<div class="command">CL_CONTEXT_EMULATOR_DEVICE_ALTERA=1 bin/host <span class="nowrap">-sims=100</span></div>
<section>
<h3>Host Parameters</h3>
<p>The general command-line for the host program is:</p>
<div class="command">bin/host <span class="nowrap">[-<span class="highlight">sims</span>=&lt;<i>#</i>&gt;]</span></div>
<p>where the one parameter is:</p>
<table class="host-params parameters">
<thead>
<tr>
<th class="name">Parameter</th>
<th class="type">Type</th>
<th class="default">Default</th>
<th class="desc">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td class="name">-<span class="highlight">sims</span>=&lt;<i>#</i>&gt;</td>
<td class="type">Optional</td>
<td class="default">100000</td>
<td class="desc">Number of simulations</td>
</tr>
</tbody>
</table>
</section>
<section>
<h3>OpenCL Binary Selection</h3>
<p>The host program requires a OpenCL binary (AOCX) file to run. For this example design, OpenCL binary files should be placed in the
<span class="mono">bin</span> directory.</p>
<p>By default, the host program will look for a binary file in the following order (earlier pattern matches
take priority):</p>
<ol>
<li>A file named <span class="mono">asian_option.aocx</span>.</li>
<li>A file named <span class="mono">asian_option_<span class="highlight">&lt;<i>board</i>&gt;</span>_161.aocx</span>,
where <span class="highlight mono">&lt;<i>board</i>&gt;</span> is the name of the board (as passed as the
<span class="mono">--board</span> argument to <span class="mono">aoc</span>).</li>
</ol>
</section>
</section>
<section>
<a id="Release_History"><h2>Release History</h2></a>
<p/>
<table class="history">
<thead>
<tr>
<th class="version">Example Version</th>
<th class="sdk-version">SDK Version</th>
<th class="date">Date</th>
<th class="changes">Changes</th>
</tr>
</thead>
<tbody>
<tr>
<td class="version">2.3</td>
<td class="sdk-version">16.0</td>
<td class="date">November 2016</td>
<td class="changes"><ul><li>Add SVM API option.</li></ul></td>
</tr>
<tr>
<td class="version">2.2</td>
<td class="sdk-version">16.0</td>
<td class="date">June 2016</td>
<td class="changes"><ul><li>Fixed makefile.</li></ul></td>
</tr>
<tr>
<td class="version">2.1</td>
<td class="sdk-version">14.1</td>
<td class="date">December 2014</td>
<td class="changes"><ul><li>New readme documentation.</li><li>Provide suggested emulation-specific arguments.</li><li>Device file is now <span class="mono">asian_option.cl</span>.</li><li>Remove <span class="mono">__attribute((task))__</span> from kernels (no longer necessary).</li></ul></td>
</tr>
<tr>
<td class="version">2.0</td>
<td class="sdk-version">14.0</td>
<td class="date">July 2014</td>
<td class="changes"><ul><li>Enforced channel ordering for multi-threaded kernels in 14.0 enabled
the removal of the barrier in black_scholes kernel.</li><li>Added optional host argument to override number of
simulations to run.</li></ul></td>
</tr>
<tr>
<td class="version">1.1</td>
<td class="sdk-version">13.1</td>
<td class="date">January 2014</td>
<td class="changes"><ul><li>On Linux, fix possible compilation issues (missing include files).</li></ul></td>
</tr>
<tr>
<td class="version">1.0</td>
<td class="sdk-version">13.1</td>
<td class="date">December 2013</td>
<td class="changes"><ul><li>First release of example.</li></ul></td>
</tr>
</tbody>
</table>
</section>
<section>
<a id="Legal"><h2>Legal</h2></a>
<pre class="license">Copyright (C) 2013-2016 Altera Corporation, San Jose, California, USA. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software
without restriction, including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to
whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or
substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
This agreement shall be governed in all respects by the laws of the State of California and
by the laws of the United States of America.
</pre><section><h3>Trademarks</h3><div class="trademark"><p>OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos.</p><p>Product is based on a published Khronos Specification, and has passed the Khronos Conformance Testing Process. Current conformance status can be found at <a href="www.khronos.org/conformance">www.khronos.org/conformance</a>.</p></div></section>
</section>
<section>
<a id="Contacting_Intel"><h2>Contacting Intel</h2></a>
<p>Although we have made every effort to ensure that this design example works
correctly, there might be problems that we have not encountered. If you have
a question or problem that is not answered by the information provided in
this readme file or the example's documentation, please contact Intel
support (<a href="http://www.altera.com/myaltera">myAltera</a>).</p>
</section>
</body>
</html>
// Copyright (C) 2013-2016 Altera Corporation, San Jose, California, USA. All rights reserved.
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to
// whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
// This agreement shall be governed in all respects by the laws of the State of California and
// by the laws of the United States of America.
/*
* Single Asian Option Pricing
* Author: Deshanand Singh (dsingh@altera.com)
*
* Please see host/src/main.cpp for a complete description of the algorithm
* and README.txt for a description of the expected output and steps to
* compile this benchmark.
*
*/
#pragma OPENCL EXTENSION cl_altera_channels : enable
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
float2 box_muller(float a, float b);
// Mersenne twister constants
#define MT_M 397
#define MT_N 624
#define MATRIX_A 0x9908b0dfUL
#define UPPER_MASK 0x80000000UL
#define LOWER_MASK 0x7fffffffUL
// Used to ensure that the uniformly generated random numbers are in the range (0,1)
#define CLAMP_ZERO 0x1.0p-126f
#define CLAMP_ONE 0x1.fffffep-1f
// In this implementations, we will create vectors of 64 random numbers per clock cycle
// Each of these random numbers will be used to simulate the movement of a stock price
// for a single timestep. In this case, we are simulating 64 timesteps per clock cycle.
#define VECTOR 64
#define VECTOR_DIV2 32
#define VECTOR_DIV4 16
// We will use OpenCL vector types to simplify the code. 64 random numbers can be expressed
// as 4 sets of size-16 vectors provided by OpenCL
typedef float16 vec_float_ty;
typedef uint16 vec_uint_ty;
// 4 channels of random numbers
channel vec_float_ty RANDOM_STREAM_0 __attribute__((depth(8)));
channel vec_float_ty RANDOM_STREAM_1 __attribute__((depth(8)));
channel vec_float_ty RANDOM_STREAM_2 __attribute__((depth(8)));
channel vec_float_ty RANDOM_STREAM_3 __attribute__((depth(8)));
// 4 channels of unsigned integer types used to initialize the mersenne twister
channel vec_uint_ty INIT_STREAM_0 __attribute__((depth(8)));
channel vec_uint_ty INIT_STREAM_1 __attribute__((depth(8)));
channel vec_uint_ty INIT_STREAM_2 __attribute__((depth(8)));
channel vec_uint_ty INIT_STREAM_3 __attribute__((depth(8)));
// Double precision ACCUMULATE_STREAM
// Unfortunately, we do not support channels with a double-precision types at this time
// however, we can get around this with a generic 64-bit channel type
typedef ulong t_64bit;
channel t_64bit ACCUMULATE_STREAM __attribute__((depth(8)));
#define NUM_THREADS 8192
// This kernel computes the initial state for the mersenne twister RNG
// We've hardcoded the initial seed value to 777, but this could be anything. It could
// even be a parameter to the kernel should the user require control of this value from
// the host.
//
// The code below is slightly complicated because we wish to produce 64 values at a time;
// however, the mersenne twister state has 624 values. This is not evenly divisible by 64
// so there are some initial values that are writtent to the channel which are never used
//
__kernel void mersenne_twister_init(void)
{
unsigned int state = 777;
uint ival[VECTOR];
#pragma unroll VECTOR
for (int i=0; i<VECTOR; i++) {
ival[i] = 777;
}
for (unsigned int n=0; n<MT_N; n++) {
#pragma unroll
for (int i=0; i<VECTOR-1; i++) {
ival[i] = ival[i+1];
}
ival[VECTOR-1] = state;
state = (1812433253U * (state ^ (state >> 30)) + n) & 0xffffffffUL;
if ((n & (VECTOR-1)) == 47) {
vec_uint_ty I0, I1, I2, I3;
#pragma unroll VECTOR_DIV4
for (int i=0; i<VECTOR_DIV4; i++) {
I0[i]=ival[i];
I1[i]=ival[i+1*VECTOR_DIV4];
I2[i]=ival[i+2*VECTOR_DIV4];
I3[i]=ival[i+3*VECTOR_DIV4];
}
write_channel_altera(INIT_STREAM_0, I0);
write_channel_altera(INIT_STREAM_1, I1);
write_channel_altera(INIT_STREAM_2, I2);
write_channel_altera(INIT_STREAM_3, I3);
}
}
}
// This kernel implements the mersenne twister random number generator.
// It is almost a direct implementation of the algorithm shown
// here: http://en.wikipedia.org/wiki/Mersenne_twister
//
__kernel void mersenne_twister_generate(ulong N)
{
unsigned int mt[MT_N];
bool read_from_initialization = true;
ushort num_initializers_read = 0;
for (ulong n=0; n<N/VECTOR+(MT_N/VECTOR+1); n++) {
uint y[VECTOR];
bool write_channel = false;
if (read_from_initialization) {
vec_uint_ty I0 = read_channel_altera(INIT_STREAM_0);
vec_uint_ty I1 = read_channel_altera(INIT_STREAM_1);
vec_uint_ty I2 = read_channel_altera(INIT_STREAM_2);
vec_uint_ty I3 = read_channel_altera(INIT_STREAM_3);
#pragma unroll VECTOR_DIV4
for (int i=0; i<VECTOR_DIV4; i++) {
y[i]=I0[i];
y[i+1*VECTOR_DIV4]=I1[i];
y[i+2*VECTOR_DIV4]=I2[i];
y[i+3*VECTOR_DIV4]=I3[i];
}
if (++num_initializers_read == MT_N/VECTOR+1) read_from_initialization=false;
} else {
// You'll notice quite alot of this design pattern in this particular example
// We unroll inner loops fully as much as possible. This technique will generally
// lead to the best performance on the FPGA as long as the resulting pipelined
// implementation can fit within the avilable resources.
//
#pragma unroll VECTOR
for (int i=0; i<VECTOR; i++) {