// -------------------------------------------------------
//
// -------------------------------------------------------
unsigned int gpu_rand(
			#if defined (RESTRICT_ARGS)
			__global unsigned int* restrict prng_states
			#else
			__global unsigned int* prng_states
			#endif
)
//The GPU device function generates a random int
//with a linear congruential generator.
//Each thread (supposing num_of_runs*pop_size blocks and NUM_OF_THREADS_PER_BLOCK threads per block)
//has its own state which is stored in the global memory area pointed by
//prng_states (thread with ID tx in block with ID bx stores its state in prng_states[bx*NUM_OF_THREADS_PER_BLOCK+$
//The random number generator uses the gcc linear congruential generator constants.
{
  unsigned int state;

#if defined (REPRO)
	state = 1;
#else
  //current state of the threads own PRNG
  //state = prng_states[get_group_id(0)*NUM_OF_THREADS_PER_BLOCK + get_local_id(0)];
	state = prng_states[get_global_id(0)];

  //calculating next state
  state = (RAND_A*state+RAND_C);
#endif
  //saving next state to memory
  //prng_states[get_group_id(0)*NUM_OF_THREADS_PER_BLOCK + get_local_id(0)] = state;
	prng_states[get_global_id(0)] = state;

  return state;
}

// -------------------------------------------------------
//
// -------------------------------------------------------
float gpu_randf(
		#if defined (RESTRICT_ARGS)
		__global unsigned int* restrict prng_states
		#else
		__global unsigned int* prng_states
		#endif
)
//The GPU device function generates a
//random float greater than (or equal to) 0 and less than 1.
//It uses gpu_rand() function.
{
  float state;

	//state will be between 0 and 1
#if defined (REPRO)
	state = 0.55f; //0.55f;
#else
	#if defined (NATIVE_PRECISION)
	state =  native_divide(gpu_rand(prng_states),MAX_UINT)*0.999999f;
	#elif defined (HALF_PRECISION)
	state =  half_divide(gpu_rand(prng_states),MAX_UINT)*0.999999f;
	#else	// Full precision
	state = (((float) gpu_rand(prng_states))/MAX_UINT)*0.999999f;
	#endif
#endif

  return state;
}

// -------------------------------------------------------
//
// -------------------------------------------------------
void map_angle(__local float* angle)
//The GPU device function maps
//the input parameter to the interval 0...360
//(supposing that it is an angle).
{
  while (*angle >= 360.0f)
    *angle -= 360.0f;

  while (*angle < 0.0f)
    *angle += 360.0f;
}

// -------------------------------------------------------
//
// -------------------------------------------------------
void gpu_perform_elitist_selection(int    dockpars_pop_size,

	         #if defined (RESTRICT_ARGS)
				    __global float* restrict dockpars_energies_current,
				    __global float* restrict dockpars_energies_next,
				    __global int*   restrict dockpars_evals_of_new_entities,
					#else
				    __global float* dockpars_energies_current,
				    __global float* dockpars_energies_next,
				    __global int*   dockpars_evals_of_new_entities,
					#endif

					     int    dockpars_num_of_genes,

					#if defined (RESTRICT_ARGS)
				    __global float* restrict dockpars_conformations_next,
		        __global const float* restrict dockpars_conformations_current
					#else
				    __global float* dockpars_conformations_next,
		        __global const float* dockpars_conformations_current
					#endif
)
//The GPU device function performs elitist selection,
//that is, it looks for the best entity in conformations_current and
//energies_current of the run that corresponds to the block ID,
//and copies it to the place of the first entity in
//conformations_next and energies_next.
{

	int entity_counter;
	int gene_counter;

	__local float best_energies[NUM_OF_THREADS_PER_BLOCK];
	__local int best_IDs[NUM_OF_THREADS_PER_BLOCK];
	float best_energy;
	__local int best_ID;

	if (get_local_id(0) < dockpars_pop_size)
	{
		best_energies[get_local_id(0)] = dockpars_energies_current[get_group_id(0)+get_local_id(0)];
		best_IDs[get_local_id(0)] = get_local_id(0);
	}

	for (entity_counter=NUM_OF_THREADS_PER_BLOCK+get_local_id(0);
	     entity_counter<dockpars_pop_size;
	     entity_counter+=NUM_OF_THREADS_PER_BLOCK)

	     if (dockpars_energies_current[get_group_id(0)+entity_counter] < best_energies[get_local_id(0)])
	     {
	     	best_energies[get_local_id(0)] = dockpars_energies_current[get_group_id(0)+entity_counter];
	     	best_IDs[get_local_id(0)] = entity_counter;
	     }

       barrier(CLK_LOCAL_MEM_FENCE);

	//this could be implemented with a tree-like structure
	//which may be slightly faster
	if (get_local_id(0) == 0)
	{
		best_energy = best_energies[0];
		best_ID = best_IDs[0];

		for (entity_counter=1;
		     entity_counter<NUM_OF_THREADS_PER_BLOCK;
		     entity_counter++)

		     if ((best_energies[entity_counter] < best_energy) && (entity_counter < dockpars_pop_size))
		     {
			      best_energy = best_energies[entity_counter];
			      best_ID = best_IDs[entity_counter];
		     }

		//setting energy value of new entity
		dockpars_energies_next[get_group_id(0)] = best_energy;

		//0 evals were performed for entity selected with elitism (since it was copied only)
		dockpars_evals_of_new_entities[get_group_id(0)] = 0;
	}

	//now best_id stores the id of the best entity in the population,
	//copying genotype and energy value to the first entity of new population
	barrier(CLK_LOCAL_MEM_FENCE);

	for (gene_counter=get_local_id(0);
	     gene_counter<dockpars_num_of_genes;
	     gene_counter+=NUM_OF_THREADS_PER_BLOCK)
	     dockpars_conformations_next[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+gene_counter] = dockpars_conformations_current[GENOTYPE_LENGTH_IN_GLOBMEM*get_group_id(0)+GENOTYPE_LENGTH_IN_GLOBMEM*best_ID+gene_counter];
}
