The code below calculates the value of PI using a recursive approach. Modify the MAX_PARALLEL_RECURSIVE_LEVEL
value to determine at which recursion depth stop creating tasks. With this approach to create parallelism out of recursive applications: the more tasks you create, the more parallel tasks created but also the lesser work per task. So it is convenient to experiment with the application to understand at which level it creating further tasks do not benefit in terms of performance.
#include <stdio.h>
#include <omp.h>
double pi_r (double h, unsigned depth, unsigned maxdepth, unsigned long long begin, unsigned long long niters)
{
if (depth < maxdepth)
{
double area1, area2;
// Process first half
#pragma omp task shared(area1)
area1 = pi_r (h, depth+1, maxdepth, begin, niters/2-1);
// Process second half
#pragma omp task shared(area2)
area2 = pi_r (h, depth+1, maxdepth, begin+niters/2, niters/2);
#pragma omp taskwait
return area1+area2;
}
else
{
unsigned long long i;
double area = 0.0;
for (i = begin; i <= begin+niters; i++)
{
double x = h * (i - 0.5);
area += (4.0 / (1.0 + x*x));
}
return area;
}
}
double pi (unsigned long long niters)
{
double res;
double h = 1.0 / (double) niters;
#pragma omp parallel shared(res)
{
#define MAX_PARALLEL_RECURSIVE_LEVEL 4
#pragma omp single
res = pi_r (h, 0, MAX_PARALLEL_RECURSIVE_LEVEL, 1, niters);
}
return res * h;
}
int main (int argc, char *argv[])
{
#define NITERS (100*1000*1000ULL)
printf ("PI (w/%d iters) is %lf\n", NITERS, pi(NITERS));
return 0;
}