Lesson 12

Date: 4/16/2014
High Performance Computing (part I)
Linux for Engineering and IT applications


Parallel regions and loops

Exercise
  • Simple "Hello World" program. Every thread executes all code enclosed in the parallel section. OpenMP library routines are used to obtain thread identifiers and total number of threads.
    C / C++ - Parallel Region Example
    #include <omp.h>
    #include <stdio.h>
    
    main ()  {
    
    int nthreads, tid;
    
    /* Fork a team of threads with each thread having a private tid variable */
    #pragma omp parallel private(tid)
      {
    
      /* Obtain and print thread id */
      tid = omp_get_thread_num();
      printf("Hello World from thread = %d\n", tid);
    
      /* Only master thread does this */
      if (tid == 0) 
        {
        nthreads = omp_get_num_threads();
        printf("Number of threads = %d\n", nthreads);
        }
    
      }  /* All threads join master thread and terminate */
    
    }
    
    Copy the content of the code above into file hello.c, then compile and run it as follows:
    gcc -fopenmp -o hello.x hello.c
    export OMP_NUM_THREADS=4
    ./hello.x
    

    C / C++ - for Directive Example
    #include <stdio.h>
    #include <omp.h>
    
    #define CHUNKSIZE 100
    #define N     1000
    
    main ()  
    {
    
    int i, chunk, tid;
    float a[N], b[N], c[N];
    
    /* Some initializations */
    for (i=0; i < N; i++)
      a[i] = b[i] = i * 1.0;
    chunk = CHUNKSIZE;
    
    #pragma omp parallel shared(a,b,c,chunk) private(i)
      {
    
      #pragma omp for schedule(dynamic,chunk) nowait
      for (i=0; i < N; i++)
       {
         c[i] = a[i] + b[i];
    
          /* Obtain and print thread id and array index number */
         tid = omp_get_thread_num();
         printf("thread = %d, i = %d\n", tid, i);
       }
    
      }  /* end of parallel section */
    
    }
    
    Copy the content of the code above into file for.c, then compile and run it as follows:
    gcc -fopenmp -o for.x for.c
    export OMP_NUM_THREADS=4
    ./for.x
    
    Run ./for.x several times and observe how the array elements are distributed across the threads.




  • Take me to the Course Website