LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #define __KMP_IMP
15 #include "omp.h" /* extern "C" declarations of user-visible routines */
16 #include "kmp.h"
17 #include "kmp_error.h"
18 #include "kmp_i18n.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_stats.h"
22 
23 #if OMPT_SUPPORT
24 #include "ompt-specific.h"
25 #endif
26 
27 #define MAX_MESSAGE 512
28 
29 // flags will be used in future, e.g. to implement openmp_strict library
30 // restrictions
31 
40 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
41  // By default __kmpc_begin() is no-op.
42  char *env;
43  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
44  __kmp_str_match_true(env)) {
45  __kmp_middle_initialize();
46  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
47  } else if (__kmp_ignore_mppbeg() == FALSE) {
48  // By default __kmp_ignore_mppbeg() returns TRUE.
49  __kmp_internal_begin();
50  KC_TRACE(10, ("__kmpc_begin: called\n"));
51  }
52 }
53 
62 void __kmpc_end(ident_t *loc) {
63  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
64  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
65  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
66  // returns FALSE and __kmpc_end() will unregister this root (it can cause
67  // library shut down).
68  if (__kmp_ignore_mppend() == FALSE) {
69  KC_TRACE(10, ("__kmpc_end: called\n"));
70  KA_TRACE(30, ("__kmpc_end\n"));
71 
72  __kmp_internal_end_thread(-1);
73  }
74 }
75 
95  kmp_int32 gtid = __kmp_entry_gtid();
96 
97  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
98 
99  return gtid;
100 }
101 
117  KC_TRACE(10,
118  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
119 
120  return TCR_4(__kmp_all_nth);
121 }
122 
130  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
131  return __kmp_tid_from_gtid(__kmp_entry_gtid());
132 }
133 
140  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
141 
142  return __kmp_entry_thread()->th.th_team->t.t_nproc;
143 }
144 
151 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
152 #ifndef KMP_DEBUG
153 
154  return TRUE;
155 
156 #else
157 
158  const char *semi2;
159  const char *semi3;
160  int line_no;
161 
162  if (__kmp_par_range == 0) {
163  return TRUE;
164  }
165  semi2 = loc->psource;
166  if (semi2 == NULL) {
167  return TRUE;
168  }
169  semi2 = strchr(semi2, ';');
170  if (semi2 == NULL) {
171  return TRUE;
172  }
173  semi2 = strchr(semi2 + 1, ';');
174  if (semi2 == NULL) {
175  return TRUE;
176  }
177  if (__kmp_par_range_filename[0]) {
178  const char *name = semi2 - 1;
179  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
180  name--;
181  }
182  if ((*name == '/') || (*name == ';')) {
183  name++;
184  }
185  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
186  return __kmp_par_range < 0;
187  }
188  }
189  semi3 = strchr(semi2 + 1, ';');
190  if (__kmp_par_range_routine[0]) {
191  if ((semi3 != NULL) && (semi3 > semi2) &&
192  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
193  return __kmp_par_range < 0;
194  }
195  }
196  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
197  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
198  return __kmp_par_range > 0;
199  }
200  return __kmp_par_range < 0;
201  }
202  return TRUE;
203 
204 #endif /* KMP_DEBUG */
205 }
206 
213 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
214  return __kmp_entry_thread()->th.th_root->r.r_active;
215 }
216 
226 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
227  kmp_int32 num_threads) {
228  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
229  global_tid, num_threads));
230 
231  __kmp_push_num_threads(loc, global_tid, num_threads);
232 }
233 
234 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
235  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
236 
237  /* the num_threads are automatically popped */
238 }
239 
240 #if OMP_40_ENABLED
241 
242 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
243  kmp_int32 proc_bind) {
244  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
245  proc_bind));
246 
247  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
248 }
249 
250 #endif /* OMP_40_ENABLED */
251 
262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263  int gtid = __kmp_entry_gtid();
264 
265 #if (KMP_STATS_ENABLED)
266  // If we were in a serial region, then stop the serial timer, record
267  // the event, and start parallel region timer
268  stats_state_e previous_state = KMP_GET_THREAD_STATE();
269  if (previous_state == stats_state_e::SERIAL_REGION) {
270  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271  } else {
272  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273  }
274  int inParallel = __kmpc_in_parallel(loc);
275  if (inParallel) {
276  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277  } else {
278  KMP_COUNT_BLOCK(OMP_PARALLEL);
279  }
280 #endif
281 
282  // maybe to save thr_state is enough here
283  {
284  va_list ap;
285  va_start(ap, microtask);
286 
287 #if OMPT_SUPPORT
288  omp_frame_t *ompt_frame;
289  if (ompt_enabled.enabled) {
290  kmp_info_t *master_th = __kmp_threads[gtid];
291  kmp_team_t *parent_team = master_th->th.th_team;
292  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
293  if (lwt)
294  ompt_frame = &(lwt->ompt_task_info.frame);
295  else {
296  int tid = __kmp_tid_from_gtid(gtid);
297  ompt_frame = &(
298  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
299  }
300  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
301  OMPT_STORE_RETURN_ADDRESS(gtid);
302  }
303 #endif
304 
305 #if INCLUDE_SSC_MARKS
306  SSC_MARK_FORKING();
307 #endif
308  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
309  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
310  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
311 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
312 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
313  &ap
314 #else
315  ap
316 #endif
317  );
318 #if INCLUDE_SSC_MARKS
319  SSC_MARK_JOINING();
320 #endif
321  __kmp_join_call(loc, gtid
322 #if OMPT_SUPPORT
323  ,
324  fork_context_intel
325 #endif
326  );
327 
328  va_end(ap);
329  }
330 
331 #if KMP_STATS_ENABLED
332  if (previous_state == stats_state_e::SERIAL_REGION) {
333  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
334  } else {
335  KMP_POP_PARTITIONED_TIMER();
336  }
337 #endif // KMP_STATS_ENABLED
338 }
339 
340 #if OMP_40_ENABLED
341 
352 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
353  kmp_int32 num_teams, kmp_int32 num_threads) {
354  KA_TRACE(20,
355  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
356  global_tid, num_teams, num_threads));
357 
358  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
359 }
360 
371 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
372  ...) {
373  int gtid = __kmp_entry_gtid();
374  kmp_info_t *this_thr = __kmp_threads[gtid];
375  va_list ap;
376  va_start(ap, microtask);
377 
378  KMP_COUNT_BLOCK(OMP_TEAMS);
379 
380  // remember teams entry point and nesting level
381  this_thr->th.th_teams_microtask = microtask;
382  this_thr->th.th_teams_level =
383  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
384 
385 #if OMPT_SUPPORT
386  kmp_team_t *parent_team = this_thr->th.th_team;
387  int tid = __kmp_tid_from_gtid(gtid);
388  if (ompt_enabled.enabled) {
389  parent_team->t.t_implicit_task_taskdata[tid]
390  .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
391  }
392  OMPT_STORE_RETURN_ADDRESS(gtid);
393 #endif
394 
395  // check if __kmpc_push_num_teams called, set default number of teams
396  // otherwise
397  if (this_thr->th.th_teams_size.nteams == 0) {
398  __kmp_push_num_teams(loc, gtid, 0, 0);
399  }
400  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
401  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
402  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
403 
404  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
405  VOLATILE_CAST(microtask_t)
406  __kmp_teams_master, // "wrapped" task
407  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
408 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
409  &ap
410 #else
411  ap
412 #endif
413  );
414  __kmp_join_call(loc, gtid
415 #if OMPT_SUPPORT
416  ,
417  fork_context_intel
418 #endif
419  );
420 
421  this_thr->th.th_teams_microtask = NULL;
422  this_thr->th.th_teams_level = 0;
423  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
424  va_end(ap);
425 }
426 #endif /* OMP_40_ENABLED */
427 
428 // I don't think this function should ever have been exported.
429 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
430 // openmp code ever called it, but it's been exported from the RTL for so
431 // long that I'm afraid to remove the definition.
432 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
433 
446 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
447 // The implementation is now in kmp_runtime.cpp so that it can share static
448 // functions with kmp_fork_call since the tasks to be done are similar in
449 // each case.
450 #if OMPT_SUPPORT
451  OMPT_STORE_RETURN_ADDRESS(global_tid);
452 #endif
453  __kmp_serialized_parallel(loc, global_tid);
454 }
455 
463 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
464  kmp_internal_control_t *top;
465  kmp_info_t *this_thr;
466  kmp_team_t *serial_team;
467 
468  KC_TRACE(10,
469  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
470 
471  /* skip all this code for autopar serialized loops since it results in
472  unacceptable overhead */
473  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
474  return;
475 
476  // Not autopar code
477  if (!TCR_4(__kmp_init_parallel))
478  __kmp_parallel_initialize();
479 
480  this_thr = __kmp_threads[global_tid];
481  serial_team = this_thr->th.th_serial_team;
482 
483 #if OMP_45_ENABLED
484  kmp_task_team_t *task_team = this_thr->th.th_task_team;
485 
486  // we need to wait for the proxy tasks before finishing the thread
487  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
488  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
489 #endif
490 
491  KMP_MB();
492  KMP_DEBUG_ASSERT(serial_team);
493  KMP_ASSERT(serial_team->t.t_serialized);
494  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
495  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
496  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
497  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
498 
499 #if OMPT_SUPPORT
500  if (ompt_enabled.enabled &&
501  this_thr->th.ompt_thread_info.state != omp_state_overhead) {
502  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL;
503  if (ompt_enabled.ompt_callback_implicit_task) {
504  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
505  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
506  OMPT_CUR_TASK_INFO(this_thr)->thread_num);
507  }
508 
509  // reset clear the task id only after unlinking the task
510  ompt_data_t *parent_task_data;
511  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
512 
513  if (ompt_enabled.ompt_callback_parallel_end) {
514  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
515  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
516  ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
517  }
518  __ompt_lw_taskteam_unlink(this_thr);
519  this_thr->th.ompt_thread_info.state = omp_state_overhead;
520  }
521 #endif
522 
523  /* If necessary, pop the internal control stack values and replace the team
524  * values */
525  top = serial_team->t.t_control_stack_top;
526  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
527  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
528  serial_team->t.t_control_stack_top = top->next;
529  __kmp_free(top);
530  }
531 
532  // if( serial_team -> t.t_serialized > 1 )
533  serial_team->t.t_level--;
534 
535  /* pop dispatch buffers stack */
536  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
537  {
538  dispatch_private_info_t *disp_buffer =
539  serial_team->t.t_dispatch->th_disp_buffer;
540  serial_team->t.t_dispatch->th_disp_buffer =
541  serial_team->t.t_dispatch->th_disp_buffer->next;
542  __kmp_free(disp_buffer);
543  }
544 #if OMP_50_ENABLED
545  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
546 #endif
547 
548  --serial_team->t.t_serialized;
549  if (serial_team->t.t_serialized == 0) {
550 
551 /* return to the parallel section */
552 
553 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
554  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
555  __kmp_clear_x87_fpu_status_word();
556  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
557  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
558  }
559 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
560 
561  this_thr->th.th_team = serial_team->t.t_parent;
562  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
563 
564  /* restore values cached in the thread */
565  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
566  this_thr->th.th_team_master =
567  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
568  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
569 
570  /* TODO the below shouldn't need to be adjusted for serialized teams */
571  this_thr->th.th_dispatch =
572  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
573 
574  __kmp_pop_current_task_from_thread(this_thr);
575 
576  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
577  this_thr->th.th_current_task->td_flags.executing = 1;
578 
579  if (__kmp_tasking_mode != tskm_immediate_exec) {
580  // Copy the task team from the new child / old parent team to the thread.
581  this_thr->th.th_task_team =
582  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
583  KA_TRACE(20,
584  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
585  "team %p\n",
586  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
587  }
588  } else {
589  if (__kmp_tasking_mode != tskm_immediate_exec) {
590  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
591  "depth of serial team %p to %d\n",
592  global_tid, serial_team, serial_team->t.t_serialized));
593  }
594  }
595 
596  if (__kmp_env_consistency_check)
597  __kmp_pop_parallel(global_tid, NULL);
598 #if OMPT_SUPPORT
599  if (ompt_enabled.enabled)
600  this_thr->th.ompt_thread_info.state =
601  ((this_thr->th.th_team_serialized) ? omp_state_work_serial
602  : omp_state_work_parallel);
603 #endif
604 }
605 
614 void __kmpc_flush(ident_t *loc) {
615  KC_TRACE(10, ("__kmpc_flush: called\n"));
616 
617  /* need explicit __mf() here since use volatile instead in library */
618  KMP_MB(); /* Flush all pending memory write invalidates. */
619 
620 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
621 #if KMP_MIC
622 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
623 // We shouldn't need it, though, since the ABI rules require that
624 // * If the compiler generates NGO stores it also generates the fence
625 // * If users hand-code NGO stores they should insert the fence
626 // therefore no incomplete unordered stores should be visible.
627 #else
628  // C74404
629  // This is to address non-temporal store instructions (sfence needed).
630  // The clflush instruction is addressed either (mfence needed).
631  // Probably the non-temporal load monvtdqa instruction should also be
632  // addressed.
633  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
634  if (!__kmp_cpuinfo.initialized) {
635  __kmp_query_cpuid(&__kmp_cpuinfo);
636  }
637  if (!__kmp_cpuinfo.sse2) {
638  // CPU cannot execute SSE2 instructions.
639  } else {
640 #if KMP_COMPILER_ICC
641  _mm_mfence();
642 #elif KMP_COMPILER_MSVC
643  MemoryBarrier();
644 #else
645  __sync_synchronize();
646 #endif // KMP_COMPILER_ICC
647  }
648 #endif // KMP_MIC
649 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
650 // Nothing to see here move along
651 #elif KMP_ARCH_PPC64
652 // Nothing needed here (we have a real MB above).
653 #if KMP_OS_CNK
654  // The flushing thread needs to yield here; this prevents a
655  // busy-waiting thread from saturating the pipeline. flush is
656  // often used in loops like this:
657  // while (!flag) {
658  // #pragma omp flush(flag)
659  // }
660  // and adding the yield here is good for at least a 10x speedup
661  // when running >2 threads per core (on the NAS LU benchmark).
662  __kmp_yield(TRUE);
663 #endif
664 #else
665 #error Unknown or unsupported architecture
666 #endif
667 
668 #if OMPT_SUPPORT && OMPT_OPTIONAL
669  if (ompt_enabled.ompt_callback_flush) {
670  ompt_callbacks.ompt_callback(ompt_callback_flush)(
671  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
672  }
673 #endif
674 }
675 
676 /* -------------------------------------------------------------------------- */
684 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
685  KMP_COUNT_BLOCK(OMP_BARRIER);
686  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
687 
688  if (!TCR_4(__kmp_init_parallel))
689  __kmp_parallel_initialize();
690 
691  if (__kmp_env_consistency_check) {
692  if (loc == 0) {
693  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
694  }
695 
696  __kmp_check_barrier(global_tid, ct_barrier, loc);
697  }
698 
699 #if OMPT_SUPPORT
700  omp_frame_t *ompt_frame;
701  if (ompt_enabled.enabled) {
702  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
703  if (ompt_frame->enter_frame == NULL)
704  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
705  OMPT_STORE_RETURN_ADDRESS(global_tid);
706  }
707 #endif
708  __kmp_threads[global_tid]->th.th_ident = loc;
709  // TODO: explicit barrier_wait_id:
710  // this function is called when 'barrier' directive is present or
711  // implicit barrier at the end of a worksharing construct.
712  // 1) better to add a per-thread barrier counter to a thread data structure
713  // 2) set to 0 when a new team is created
714  // 4) no sync is required
715 
716  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
717 #if OMPT_SUPPORT && OMPT_OPTIONAL
718  if (ompt_enabled.enabled) {
719  ompt_frame->enter_frame = NULL;
720  }
721 #endif
722 }
723 
724 /* The BARRIER for a MASTER section is always explicit */
731 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
732  int status = 0;
733 
734  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
735 
736  if (!TCR_4(__kmp_init_parallel))
737  __kmp_parallel_initialize();
738 
739  if (KMP_MASTER_GTID(global_tid)) {
740  KMP_COUNT_BLOCK(OMP_MASTER);
741  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
742  status = 1;
743  }
744 
745 #if OMPT_SUPPORT && OMPT_OPTIONAL
746  if (status) {
747  if (ompt_enabled.ompt_callback_master) {
748  kmp_info_t *this_thr = __kmp_threads[global_tid];
749  kmp_team_t *team = this_thr->th.th_team;
750 
751  int tid = __kmp_tid_from_gtid(global_tid);
752  ompt_callbacks.ompt_callback(ompt_callback_master)(
753  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
754  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
755  OMPT_GET_RETURN_ADDRESS(0));
756  }
757  }
758 #endif
759 
760  if (__kmp_env_consistency_check) {
761 #if KMP_USE_DYNAMIC_LOCK
762  if (status)
763  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
764  else
765  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
766 #else
767  if (status)
768  __kmp_push_sync(global_tid, ct_master, loc, NULL);
769  else
770  __kmp_check_sync(global_tid, ct_master, loc, NULL);
771 #endif
772  }
773 
774  return status;
775 }
776 
785 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
786  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
787 
788  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
789  KMP_POP_PARTITIONED_TIMER();
790 
791 #if OMPT_SUPPORT && OMPT_OPTIONAL
792  kmp_info_t *this_thr = __kmp_threads[global_tid];
793  kmp_team_t *team = this_thr->th.th_team;
794  if (ompt_enabled.ompt_callback_master) {
795  int tid = __kmp_tid_from_gtid(global_tid);
796  ompt_callbacks.ompt_callback(ompt_callback_master)(
797  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
798  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
799  OMPT_GET_RETURN_ADDRESS(0));
800  }
801 #endif
802 
803  if (__kmp_env_consistency_check) {
804  if (global_tid < 0)
805  KMP_WARNING(ThreadIdentInvalid);
806 
807  if (KMP_MASTER_GTID(global_tid))
808  __kmp_pop_sync(global_tid, ct_master, loc);
809  }
810 }
811 
819 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
820  int cid = 0;
821  kmp_info_t *th;
822  KMP_DEBUG_ASSERT(__kmp_init_serial);
823 
824  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
825 
826  if (!TCR_4(__kmp_init_parallel))
827  __kmp_parallel_initialize();
828 
829 #if USE_ITT_BUILD
830  __kmp_itt_ordered_prep(gtid);
831 // TODO: ordered_wait_id
832 #endif /* USE_ITT_BUILD */
833 
834  th = __kmp_threads[gtid];
835 
836 #if OMPT_SUPPORT && OMPT_OPTIONAL
837  kmp_team_t *team;
838  omp_wait_id_t lck;
839  void *codeptr_ra;
840  if (ompt_enabled.enabled) {
841  OMPT_STORE_RETURN_ADDRESS(gtid);
842  team = __kmp_team_from_gtid(gtid);
843  lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value;
844  /* OMPT state update */
845  th->th.ompt_thread_info.wait_id = lck;
846  th->th.ompt_thread_info.state = omp_state_wait_ordered;
847 
848  /* OMPT event callback */
849  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
850  if (ompt_enabled.ompt_callback_mutex_acquire) {
851  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
852  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
853  (omp_wait_id_t)lck, codeptr_ra);
854  }
855  }
856 #endif
857 
858  if (th->th.th_dispatch->th_deo_fcn != 0)
859  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
860  else
861  __kmp_parallel_deo(&gtid, &cid, loc);
862 
863 #if OMPT_SUPPORT && OMPT_OPTIONAL
864  if (ompt_enabled.enabled) {
865  /* OMPT state update */
866  th->th.ompt_thread_info.state = omp_state_work_parallel;
867  th->th.ompt_thread_info.wait_id = 0;
868 
869  /* OMPT event callback */
870  if (ompt_enabled.ompt_callback_mutex_acquired) {
871  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
872  ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra);
873  }
874  }
875 #endif
876 
877 #if USE_ITT_BUILD
878  __kmp_itt_ordered_start(gtid);
879 #endif /* USE_ITT_BUILD */
880 }
881 
889 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
890  int cid = 0;
891  kmp_info_t *th;
892 
893  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
894 
895 #if USE_ITT_BUILD
896  __kmp_itt_ordered_end(gtid);
897 // TODO: ordered_wait_id
898 #endif /* USE_ITT_BUILD */
899 
900  th = __kmp_threads[gtid];
901 
902  if (th->th.th_dispatch->th_dxo_fcn != 0)
903  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
904  else
905  __kmp_parallel_dxo(&gtid, &cid, loc);
906 
907 #if OMPT_SUPPORT && OMPT_OPTIONAL
908  OMPT_STORE_RETURN_ADDRESS(gtid);
909  if (ompt_enabled.ompt_callback_mutex_released) {
910  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
911  ompt_mutex_ordered,
912  (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
913  OMPT_LOAD_RETURN_ADDRESS(gtid));
914  }
915 #endif
916 }
917 
918 #if KMP_USE_DYNAMIC_LOCK
919 
920 static __forceinline void
921 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
922  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
923  // Pointer to the allocated indirect lock is written to crit, while indexing
924  // is ignored.
925  void *idx;
926  kmp_indirect_lock_t **lck;
927  lck = (kmp_indirect_lock_t **)crit;
928  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
929  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
930  KMP_SET_I_LOCK_LOCATION(ilk, loc);
931  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
932  KA_TRACE(20,
933  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
934 #if USE_ITT_BUILD
935  __kmp_itt_critical_creating(ilk->lock, loc);
936 #endif
937  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
938  if (status == 0) {
939 #if USE_ITT_BUILD
940  __kmp_itt_critical_destroyed(ilk->lock);
941 #endif
942  // We don't really need to destroy the unclaimed lock here since it will be
943  // cleaned up at program exit.
944  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
945  }
946  KMP_DEBUG_ASSERT(*lck != NULL);
947 }
948 
949 // Fast-path acquire tas lock
950 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
951  { \
952  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
953  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
954  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
955  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
956  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
957  kmp_uint32 spins; \
958  KMP_FSYNC_PREPARE(l); \
959  KMP_INIT_YIELD(spins); \
960  if (TCR_4(__kmp_nth) > \
961  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
962  KMP_YIELD(TRUE); \
963  } else { \
964  KMP_YIELD_SPIN(spins); \
965  } \
966  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
967  while ( \
968  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
969  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
970  __kmp_spin_backoff(&backoff); \
971  if (TCR_4(__kmp_nth) > \
972  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
973  KMP_YIELD(TRUE); \
974  } else { \
975  KMP_YIELD_SPIN(spins); \
976  } \
977  } \
978  } \
979  KMP_FSYNC_ACQUIRED(l); \
980  }
981 
982 // Fast-path test tas lock
983 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
984  { \
985  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
986  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
987  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
988  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
989  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
990  }
991 
992 // Fast-path release tas lock
993 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
994  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
995 
996 #if KMP_USE_FUTEX
997 
998 #include <sys/syscall.h>
999 #include <unistd.h>
1000 #ifndef FUTEX_WAIT
1001 #define FUTEX_WAIT 0
1002 #endif
1003 #ifndef FUTEX_WAKE
1004 #define FUTEX_WAKE 1
1005 #endif
1006 
1007 // Fast-path acquire futex lock
1008 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1009  { \
1010  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1011  kmp_int32 gtid_code = (gtid + 1) << 1; \
1012  KMP_MB(); \
1013  KMP_FSYNC_PREPARE(ftx); \
1014  kmp_int32 poll_val; \
1015  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1016  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1017  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1018  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1019  if (!cond) { \
1020  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1021  poll_val | \
1022  KMP_LOCK_BUSY(1, futex))) { \
1023  continue; \
1024  } \
1025  poll_val |= KMP_LOCK_BUSY(1, futex); \
1026  } \
1027  kmp_int32 rc; \
1028  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1029  NULL, NULL, 0)) != 0) { \
1030  continue; \
1031  } \
1032  gtid_code |= 1; \
1033  } \
1034  KMP_FSYNC_ACQUIRED(ftx); \
1035  }
1036 
1037 // Fast-path test futex lock
1038 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1039  { \
1040  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1041  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1042  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1043  KMP_FSYNC_ACQUIRED(ftx); \
1044  rc = TRUE; \
1045  } else { \
1046  rc = FALSE; \
1047  } \
1048  }
1049 
1050 // Fast-path release futex lock
1051 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1052  { \
1053  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1054  KMP_MB(); \
1055  KMP_FSYNC_RELEASING(ftx); \
1056  kmp_int32 poll_val = \
1057  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1058  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1059  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1060  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1061  } \
1062  KMP_MB(); \
1063  KMP_YIELD(TCR_4(__kmp_nth) > \
1064  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1065  }
1066 
1067 #endif // KMP_USE_FUTEX
1068 
1069 #else // KMP_USE_DYNAMIC_LOCK
1070 
1071 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1072  ident_t const *loc,
1073  kmp_int32 gtid) {
1074  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1075 
1076  // Because of the double-check, the following load doesn't need to be volatile
1077  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1078 
1079  if (lck == NULL) {
1080  void *idx;
1081 
1082  // Allocate & initialize the lock.
1083  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1084  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1085  __kmp_init_user_lock_with_checks(lck);
1086  __kmp_set_user_lock_location(lck, loc);
1087 #if USE_ITT_BUILD
1088  __kmp_itt_critical_creating(lck);
1089 // __kmp_itt_critical_creating() should be called *before* the first usage
1090 // of underlying lock. It is the only place where we can guarantee it. There
1091 // are chances the lock will destroyed with no usage, but it is not a
1092 // problem, because this is not real event seen by user but rather setting
1093 // name for object (lock). See more details in kmp_itt.h.
1094 #endif /* USE_ITT_BUILD */
1095 
1096  // Use a cmpxchg instruction to slam the start of the critical section with
1097  // the lock pointer. If another thread beat us to it, deallocate the lock,
1098  // and use the lock that the other thread allocated.
1099  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1100 
1101  if (status == 0) {
1102 // Deallocate the lock and reload the value.
1103 #if USE_ITT_BUILD
1104  __kmp_itt_critical_destroyed(lck);
1105 // Let ITT know the lock is destroyed and the same memory location may be reused
1106 // for another purpose.
1107 #endif /* USE_ITT_BUILD */
1108  __kmp_destroy_user_lock_with_checks(lck);
1109  __kmp_user_lock_free(&idx, gtid, lck);
1110  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1111  KMP_DEBUG_ASSERT(lck != NULL);
1112  }
1113  }
1114  return lck;
1115 }
1116 
1117 #endif // KMP_USE_DYNAMIC_LOCK
1118 
1129 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1130  kmp_critical_name *crit) {
1131 #if KMP_USE_DYNAMIC_LOCK
1132 #if OMPT_SUPPORT && OMPT_OPTIONAL
1133  OMPT_STORE_RETURN_ADDRESS(global_tid);
1134 #endif // OMPT_SUPPORT
1135  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1136 #else
1137  KMP_COUNT_BLOCK(OMP_CRITICAL);
1138 #if OMPT_SUPPORT && OMPT_OPTIONAL
1139  omp_state_t prev_state = omp_state_undefined;
1140  ompt_thread_info_t ti;
1141 #endif
1142  kmp_user_lock_p lck;
1143 
1144  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1145 
1146  // TODO: add THR_OVHD_STATE
1147 
1148  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1149  KMP_CHECK_USER_LOCK_INIT();
1150 
1151  if ((__kmp_user_lock_kind == lk_tas) &&
1152  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1153  lck = (kmp_user_lock_p)crit;
1154  }
1155 #if KMP_USE_FUTEX
1156  else if ((__kmp_user_lock_kind == lk_futex) &&
1157  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1158  lck = (kmp_user_lock_p)crit;
1159  }
1160 #endif
1161  else { // ticket, queuing or drdpa
1162  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1163  }
1164 
1165  if (__kmp_env_consistency_check)
1166  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1167 
1168 // since the critical directive binds to all threads, not just the current
1169 // team we have to check this even if we are in a serialized team.
1170 // also, even if we are the uber thread, we still have to conduct the lock,
1171 // as we have to contend with sibling threads.
1172 
1173 #if USE_ITT_BUILD
1174  __kmp_itt_critical_acquiring(lck);
1175 #endif /* USE_ITT_BUILD */
1176 #if OMPT_SUPPORT && OMPT_OPTIONAL
1177  OMPT_STORE_RETURN_ADDRESS(gtid);
1178  void *codeptr_ra = NULL;
1179  if (ompt_enabled.enabled) {
1180  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1181  /* OMPT state update */
1182  prev_state = ti.state;
1183  ti.wait_id = (omp_wait_id_t)lck;
1184  ti.state = omp_state_wait_critical;
1185 
1186  /* OMPT event callback */
1187  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1188  if (ompt_enabled.ompt_callback_mutex_acquire) {
1189  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1190  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1191  (omp_wait_id_t)crit, codeptr_ra);
1192  }
1193  }
1194 #endif
1195  // Value of 'crit' should be good for using as a critical_id of the critical
1196  // section directive.
1197  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1198 
1199 #if USE_ITT_BUILD
1200  __kmp_itt_critical_acquired(lck);
1201 #endif /* USE_ITT_BUILD */
1202 #if OMPT_SUPPORT && OMPT_OPTIONAL
1203  if (ompt_enabled.enabled) {
1204  /* OMPT state update */
1205  ti.state = prev_state;
1206  ti.wait_id = 0;
1207 
1208  /* OMPT event callback */
1209  if (ompt_enabled.ompt_callback_mutex_acquired) {
1210  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1211  ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra);
1212  }
1213  }
1214 #endif
1215  KMP_POP_PARTITIONED_TIMER();
1216 
1217  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1218  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1219 #endif // KMP_USE_DYNAMIC_LOCK
1220 }
1221 
1222 #if KMP_USE_DYNAMIC_LOCK
1223 
1224 // Converts the given hint to an internal lock implementation
1225 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1226 #if KMP_USE_TSX
1227 #define KMP_TSX_LOCK(seq) lockseq_##seq
1228 #else
1229 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1230 #endif
1231 
1232 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1233 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1234 #else
1235 #define KMP_CPUINFO_RTM 0
1236 #endif
1237 
1238  // Hints that do not require further logic
1239  if (hint & kmp_lock_hint_hle)
1240  return KMP_TSX_LOCK(hle);
1241  if (hint & kmp_lock_hint_rtm)
1242  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1243  if (hint & kmp_lock_hint_adaptive)
1244  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1245 
1246  // Rule out conflicting hints first by returning the default lock
1247  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1248  return __kmp_user_lock_seq;
1249  if ((hint & omp_lock_hint_speculative) &&
1250  (hint & omp_lock_hint_nonspeculative))
1251  return __kmp_user_lock_seq;
1252 
1253  // Do not even consider speculation when it appears to be contended
1254  if (hint & omp_lock_hint_contended)
1255  return lockseq_queuing;
1256 
1257  // Uncontended lock without speculation
1258  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1259  return lockseq_tas;
1260 
1261  // HLE lock for speculation
1262  if (hint & omp_lock_hint_speculative)
1263  return KMP_TSX_LOCK(hle);
1264 
1265  return __kmp_user_lock_seq;
1266 }
1267 
1268 #if OMPT_SUPPORT && OMPT_OPTIONAL
1269 #if KMP_USE_DYNAMIC_LOCK
1270 static kmp_mutex_impl_t
1271 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1272  if (user_lock) {
1273  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1274  case 0:
1275  break;
1276 #if KMP_USE_FUTEX
1277  case locktag_futex:
1278  return kmp_mutex_impl_queuing;
1279 #endif
1280  case locktag_tas:
1281  return kmp_mutex_impl_spin;
1282 #if KMP_USE_TSX
1283  case locktag_hle:
1284  return kmp_mutex_impl_speculative;
1285 #endif
1286  default:
1287  return ompt_mutex_impl_unknown;
1288  }
1289  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1290  }
1291  KMP_ASSERT(ilock);
1292  switch (ilock->type) {
1293 #if KMP_USE_TSX
1294  case locktag_adaptive:
1295  case locktag_rtm:
1296  return kmp_mutex_impl_speculative;
1297 #endif
1298  case locktag_nested_tas:
1299  return kmp_mutex_impl_spin;
1300 #if KMP_USE_FUTEX
1301  case locktag_nested_futex:
1302 #endif
1303  case locktag_ticket:
1304  case locktag_queuing:
1305  case locktag_drdpa:
1306  case locktag_nested_ticket:
1307  case locktag_nested_queuing:
1308  case locktag_nested_drdpa:
1309  return kmp_mutex_impl_queuing;
1310  default:
1311  return ompt_mutex_impl_unknown;
1312  }
1313 }
1314 #else
1315 // For locks without dynamic binding
1316 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1317  switch (__kmp_user_lock_kind) {
1318  case lk_tas:
1319  return kmp_mutex_impl_spin;
1320 #if KMP_USE_FUTEX
1321  case lk_futex:
1322 #endif
1323  case lk_ticket:
1324  case lk_queuing:
1325  case lk_drdpa:
1326  return kmp_mutex_impl_queuing;
1327 #if KMP_USE_TSX
1328  case lk_hle:
1329  case lk_rtm:
1330  case lk_adaptive:
1331  return kmp_mutex_impl_speculative;
1332 #endif
1333  default:
1334  return ompt_mutex_impl_unknown;
1335  }
1336 }
1337 #endif // KMP_USE_DYNAMIC_LOCK
1338 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1339 
1353 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1354  kmp_critical_name *crit, uint32_t hint) {
1355  KMP_COUNT_BLOCK(OMP_CRITICAL);
1356  kmp_user_lock_p lck;
1357 #if OMPT_SUPPORT && OMPT_OPTIONAL
1358  omp_state_t prev_state = omp_state_undefined;
1359  ompt_thread_info_t ti;
1360  // This is the case, if called from __kmpc_critical:
1361  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1362  if (!codeptr)
1363  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1364 #endif
1365 
1366  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1367 
1368  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1369  // Check if it is initialized.
1370  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1371  if (*lk == 0) {
1372  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1373  if (KMP_IS_D_LOCK(lckseq)) {
1374  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1375  KMP_GET_D_TAG(lckseq));
1376  } else {
1377  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1378  }
1379  }
1380  // Branch for accessing the actual lock object and set operation. This
1381  // branching is inevitable since this lock initialization does not follow the
1382  // normal dispatch path (lock table is not used).
1383  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1384  lck = (kmp_user_lock_p)lk;
1385  if (__kmp_env_consistency_check) {
1386  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1387  __kmp_map_hint_to_lock(hint));
1388  }
1389 #if USE_ITT_BUILD
1390  __kmp_itt_critical_acquiring(lck);
1391 #endif
1392 #if OMPT_SUPPORT && OMPT_OPTIONAL
1393  if (ompt_enabled.enabled) {
1394  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1395  /* OMPT state update */
1396  prev_state = ti.state;
1397  ti.wait_id = (omp_wait_id_t)lck;
1398  ti.state = omp_state_wait_critical;
1399 
1400  /* OMPT event callback */
1401  if (ompt_enabled.ompt_callback_mutex_acquire) {
1402  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1403  ompt_mutex_critical, (unsigned int)hint,
1404  __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr);
1405  }
1406  }
1407 #endif
1408 #if KMP_USE_INLINED_TAS
1409  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1410  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1411  } else
1412 #elif KMP_USE_INLINED_FUTEX
1413  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1414  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1415  } else
1416 #endif
1417  {
1418  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1419  }
1420  } else {
1421  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1422  lck = ilk->lock;
1423  if (__kmp_env_consistency_check) {
1424  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1425  __kmp_map_hint_to_lock(hint));
1426  }
1427 #if USE_ITT_BUILD
1428  __kmp_itt_critical_acquiring(lck);
1429 #endif
1430 #if OMPT_SUPPORT && OMPT_OPTIONAL
1431  if (ompt_enabled.enabled) {
1432  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1433  /* OMPT state update */
1434  prev_state = ti.state;
1435  ti.wait_id = (omp_wait_id_t)lck;
1436  ti.state = omp_state_wait_critical;
1437 
1438  /* OMPT event callback */
1439  if (ompt_enabled.ompt_callback_mutex_acquire) {
1440  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1441  ompt_mutex_critical, (unsigned int)hint,
1442  __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr);
1443  }
1444  }
1445 #endif
1446  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1447  }
1448  KMP_POP_PARTITIONED_TIMER();
1449 
1450 #if USE_ITT_BUILD
1451  __kmp_itt_critical_acquired(lck);
1452 #endif /* USE_ITT_BUILD */
1453 #if OMPT_SUPPORT && OMPT_OPTIONAL
1454  if (ompt_enabled.enabled) {
1455  /* OMPT state update */
1456  ti.state = prev_state;
1457  ti.wait_id = 0;
1458 
1459  /* OMPT event callback */
1460  if (ompt_enabled.ompt_callback_mutex_acquired) {
1461  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1462  ompt_mutex_critical, (omp_wait_id_t)crit, codeptr);
1463  }
1464  }
1465 #endif
1466 
1467  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1468  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1469 } // __kmpc_critical_with_hint
1470 
1471 #endif // KMP_USE_DYNAMIC_LOCK
1472 
1482 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1483  kmp_critical_name *crit) {
1484  kmp_user_lock_p lck;
1485 
1486  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1487 
1488 #if KMP_USE_DYNAMIC_LOCK
1489  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1490  lck = (kmp_user_lock_p)crit;
1491  KMP_ASSERT(lck != NULL);
1492  if (__kmp_env_consistency_check) {
1493  __kmp_pop_sync(global_tid, ct_critical, loc);
1494  }
1495 #if USE_ITT_BUILD
1496  __kmp_itt_critical_releasing(lck);
1497 #endif
1498 #if KMP_USE_INLINED_TAS
1499  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1500  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1501  } else
1502 #elif KMP_USE_INLINED_FUTEX
1503  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1504  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1505  } else
1506 #endif
1507  {
1508  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1509  }
1510  } else {
1511  kmp_indirect_lock_t *ilk =
1512  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1513  KMP_ASSERT(ilk != NULL);
1514  lck = ilk->lock;
1515  if (__kmp_env_consistency_check) {
1516  __kmp_pop_sync(global_tid, ct_critical, loc);
1517  }
1518 #if USE_ITT_BUILD
1519  __kmp_itt_critical_releasing(lck);
1520 #endif
1521  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1522  }
1523 
1524 #else // KMP_USE_DYNAMIC_LOCK
1525 
1526  if ((__kmp_user_lock_kind == lk_tas) &&
1527  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1528  lck = (kmp_user_lock_p)crit;
1529  }
1530 #if KMP_USE_FUTEX
1531  else if ((__kmp_user_lock_kind == lk_futex) &&
1532  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1533  lck = (kmp_user_lock_p)crit;
1534  }
1535 #endif
1536  else { // ticket, queuing or drdpa
1537  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1538  }
1539 
1540  KMP_ASSERT(lck != NULL);
1541 
1542  if (__kmp_env_consistency_check)
1543  __kmp_pop_sync(global_tid, ct_critical, loc);
1544 
1545 #if USE_ITT_BUILD
1546  __kmp_itt_critical_releasing(lck);
1547 #endif /* USE_ITT_BUILD */
1548  // Value of 'crit' should be good for using as a critical_id of the critical
1549  // section directive.
1550  __kmp_release_user_lock_with_checks(lck, global_tid);
1551 
1552 #endif // KMP_USE_DYNAMIC_LOCK
1553 
1554 #if OMPT_SUPPORT && OMPT_OPTIONAL
1555  /* OMPT release event triggers after lock is released; place here to trigger
1556  * for all #if branches */
1557  OMPT_STORE_RETURN_ADDRESS(global_tid);
1558  if (ompt_enabled.ompt_callback_mutex_released) {
1559  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1560  ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
1561  }
1562 #endif
1563 
1564  KMP_POP_PARTITIONED_TIMER();
1565  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1566 }
1567 
1577 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1578  int status;
1579 
1580  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1581 
1582  if (!TCR_4(__kmp_init_parallel))
1583  __kmp_parallel_initialize();
1584 
1585  if (__kmp_env_consistency_check)
1586  __kmp_check_barrier(global_tid, ct_barrier, loc);
1587 
1588 #if OMPT_SUPPORT
1589  omp_frame_t *ompt_frame;
1590  if (ompt_enabled.enabled) {
1591  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1592  if (ompt_frame->enter_frame == NULL)
1593  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1594  OMPT_STORE_RETURN_ADDRESS(global_tid);
1595  }
1596 #endif
1597 #if USE_ITT_NOTIFY
1598  __kmp_threads[global_tid]->th.th_ident = loc;
1599 #endif
1600  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1601 #if OMPT_SUPPORT && OMPT_OPTIONAL
1602  if (ompt_enabled.enabled) {
1603  ompt_frame->enter_frame = NULL;
1604  }
1605 #endif
1606 
1607  return (status != 0) ? 0 : 1;
1608 }
1609 
1619 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1620  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1621 
1622  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1623 }
1624 
1635 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1636  kmp_int32 ret;
1637 
1638  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1639 
1640  if (!TCR_4(__kmp_init_parallel))
1641  __kmp_parallel_initialize();
1642 
1643  if (__kmp_env_consistency_check) {
1644  if (loc == 0) {
1645  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1646  }
1647  __kmp_check_barrier(global_tid, ct_barrier, loc);
1648  }
1649 
1650 #if OMPT_SUPPORT
1651  omp_frame_t *ompt_frame;
1652  if (ompt_enabled.enabled) {
1653  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1654  if (ompt_frame->enter_frame == NULL)
1655  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1656  OMPT_STORE_RETURN_ADDRESS(global_tid);
1657  }
1658 #endif
1659 #if USE_ITT_NOTIFY
1660  __kmp_threads[global_tid]->th.th_ident = loc;
1661 #endif
1662  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1663 #if OMPT_SUPPORT && OMPT_OPTIONAL
1664  if (ompt_enabled.enabled) {
1665  ompt_frame->enter_frame = NULL;
1666  }
1667 #endif
1668 
1669  ret = __kmpc_master(loc, global_tid);
1670 
1671  if (__kmp_env_consistency_check) {
1672  /* there's no __kmpc_end_master called; so the (stats) */
1673  /* actions of __kmpc_end_master are done here */
1674 
1675  if (global_tid < 0) {
1676  KMP_WARNING(ThreadIdentInvalid);
1677  }
1678  if (ret) {
1679  /* only one thread should do the pop since only */
1680  /* one did the push (see __kmpc_master()) */
1681 
1682  __kmp_pop_sync(global_tid, ct_master, loc);
1683  }
1684  }
1685 
1686  return (ret);
1687 }
1688 
1689 /* The BARRIER for a SINGLE process section is always explicit */
1701 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1702  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1703 
1704  if (rc) {
1705  // We are going to execute the single statement, so we should count it.
1706  KMP_COUNT_BLOCK(OMP_SINGLE);
1707  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1708  }
1709 
1710 #if OMPT_SUPPORT && OMPT_OPTIONAL
1711  kmp_info_t *this_thr = __kmp_threads[global_tid];
1712  kmp_team_t *team = this_thr->th.th_team;
1713  int tid = __kmp_tid_from_gtid(global_tid);
1714 
1715  if (ompt_enabled.enabled) {
1716  if (rc) {
1717  if (ompt_enabled.ompt_callback_work) {
1718  ompt_callbacks.ompt_callback(ompt_callback_work)(
1719  ompt_work_single_executor, ompt_scope_begin,
1720  &(team->t.ompt_team_info.parallel_data),
1721  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1722  1, OMPT_GET_RETURN_ADDRESS(0));
1723  }
1724  } else {
1725  if (ompt_enabled.ompt_callback_work) {
1726  ompt_callbacks.ompt_callback(ompt_callback_work)(
1727  ompt_work_single_other, ompt_scope_begin,
1728  &(team->t.ompt_team_info.parallel_data),
1729  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1730  1, OMPT_GET_RETURN_ADDRESS(0));
1731  ompt_callbacks.ompt_callback(ompt_callback_work)(
1732  ompt_work_single_other, ompt_scope_end,
1733  &(team->t.ompt_team_info.parallel_data),
1734  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1735  1, OMPT_GET_RETURN_ADDRESS(0));
1736  }
1737  }
1738  }
1739 #endif
1740 
1741  return rc;
1742 }
1743 
1753 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1754  __kmp_exit_single(global_tid);
1755  KMP_POP_PARTITIONED_TIMER();
1756 
1757 #if OMPT_SUPPORT && OMPT_OPTIONAL
1758  kmp_info_t *this_thr = __kmp_threads[global_tid];
1759  kmp_team_t *team = this_thr->th.th_team;
1760  int tid = __kmp_tid_from_gtid(global_tid);
1761 
1762  if (ompt_enabled.ompt_callback_work) {
1763  ompt_callbacks.ompt_callback(ompt_callback_work)(
1764  ompt_work_single_executor, ompt_scope_end,
1765  &(team->t.ompt_team_info.parallel_data),
1766  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1767  OMPT_GET_RETURN_ADDRESS(0));
1768  }
1769 #endif
1770 }
1771 
1779 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1780  KMP_POP_PARTITIONED_TIMER();
1781  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1782 
1783 #if OMPT_SUPPORT && OMPT_OPTIONAL
1784  if (ompt_enabled.ompt_callback_work) {
1785  ompt_work_t ompt_work_type = ompt_work_loop;
1786  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1787  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1788  // Determine workshare type
1789  if (loc != NULL) {
1790  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1791  ompt_work_type = ompt_work_loop;
1792  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1793  ompt_work_type = ompt_work_sections;
1794  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1795  ompt_work_type = ompt_work_distribute;
1796  } else {
1797  // use default set above.
1798  // a warning about this case is provided in __kmpc_for_static_init
1799  }
1800  KMP_DEBUG_ASSERT(ompt_work_type);
1801  }
1802  ompt_callbacks.ompt_callback(ompt_callback_work)(
1803  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1804  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1805  }
1806 #endif
1807  if (__kmp_env_consistency_check)
1808  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1809 }
1810 
1811 // User routines which take C-style arguments (call by value)
1812 // different from the Fortran equivalent routines
1813 
1814 void ompc_set_num_threads(int arg) {
1815  // !!!!! TODO: check the per-task binding
1816  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1817 }
1818 
1819 void ompc_set_dynamic(int flag) {
1820  kmp_info_t *thread;
1821 
1822  /* For the thread-private implementation of the internal controls */
1823  thread = __kmp_entry_thread();
1824 
1825  __kmp_save_internal_controls(thread);
1826 
1827  set__dynamic(thread, flag ? TRUE : FALSE);
1828 }
1829 
1830 void ompc_set_nested(int flag) {
1831  kmp_info_t *thread;
1832 
1833  /* For the thread-private internal controls implementation */
1834  thread = __kmp_entry_thread();
1835 
1836  __kmp_save_internal_controls(thread);
1837 
1838  set__nested(thread, flag ? TRUE : FALSE);
1839 }
1840 
1841 void ompc_set_max_active_levels(int max_active_levels) {
1842  /* TO DO */
1843  /* we want per-task implementation of this internal control */
1844 
1845  /* For the per-thread internal controls implementation */
1846  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1847 }
1848 
1849 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1850  // !!!!! TODO: check the per-task binding
1851  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1852 }
1853 
1854 int ompc_get_ancestor_thread_num(int level) {
1855  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1856 }
1857 
1858 int ompc_get_team_size(int level) {
1859  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1860 }
1861 
1862 void kmpc_set_stacksize(int arg) {
1863  // __kmp_aux_set_stacksize initializes the library if needed
1864  __kmp_aux_set_stacksize(arg);
1865 }
1866 
1867 void kmpc_set_stacksize_s(size_t arg) {
1868  // __kmp_aux_set_stacksize initializes the library if needed
1869  __kmp_aux_set_stacksize(arg);
1870 }
1871 
1872 void kmpc_set_blocktime(int arg) {
1873  int gtid, tid;
1874  kmp_info_t *thread;
1875 
1876  gtid = __kmp_entry_gtid();
1877  tid = __kmp_tid_from_gtid(gtid);
1878  thread = __kmp_thread_from_gtid(gtid);
1879 
1880  __kmp_aux_set_blocktime(arg, thread, tid);
1881 }
1882 
1883 void kmpc_set_library(int arg) {
1884  // __kmp_user_set_library initializes the library if needed
1885  __kmp_user_set_library((enum library_type)arg);
1886 }
1887 
1888 void kmpc_set_defaults(char const *str) {
1889  // __kmp_aux_set_defaults initializes the library if needed
1890  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1891 }
1892 
1893 void kmpc_set_disp_num_buffers(int arg) {
1894  // ignore after initialization because some teams have already
1895  // allocated dispatch buffers
1896  if (__kmp_init_serial == 0 && arg > 0)
1897  __kmp_dispatch_num_buffers = arg;
1898 }
1899 
1900 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1901 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1902  return -1;
1903 #else
1904  if (!TCR_4(__kmp_init_middle)) {
1905  __kmp_middle_initialize();
1906  }
1907  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1908 #endif
1909 }
1910 
1911 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1912 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1913  return -1;
1914 #else
1915  if (!TCR_4(__kmp_init_middle)) {
1916  __kmp_middle_initialize();
1917  }
1918  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1919 #endif
1920 }
1921 
1922 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
1923 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1924  return -1;
1925 #else
1926  if (!TCR_4(__kmp_init_middle)) {
1927  __kmp_middle_initialize();
1928  }
1929  return __kmp_aux_get_affinity_mask_proc(proc, mask);
1930 #endif
1931 }
1932 
1933 /* -------------------------------------------------------------------------- */
1978 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
1979  void *cpy_data, void (*cpy_func)(void *, void *),
1980  kmp_int32 didit) {
1981  void **data_ptr;
1982 
1983  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
1984 
1985  KMP_MB();
1986 
1987  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
1988 
1989  if (__kmp_env_consistency_check) {
1990  if (loc == 0) {
1991  KMP_WARNING(ConstructIdentInvalid);
1992  }
1993  }
1994 
1995  // ToDo: Optimize the following two barriers into some kind of split barrier
1996 
1997  if (didit)
1998  *data_ptr = cpy_data;
1999 
2000 #if OMPT_SUPPORT
2001  omp_frame_t *ompt_frame;
2002  if (ompt_enabled.enabled) {
2003  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2004  if (ompt_frame->enter_frame == NULL)
2005  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
2006  OMPT_STORE_RETURN_ADDRESS(gtid);
2007  }
2008 #endif
2009 /* This barrier is not a barrier region boundary */
2010 #if USE_ITT_NOTIFY
2011  __kmp_threads[gtid]->th.th_ident = loc;
2012 #endif
2013  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2014 
2015  if (!didit)
2016  (*cpy_func)(cpy_data, *data_ptr);
2017 
2018 // Consider next barrier a user-visible barrier for barrier region boundaries
2019 // Nesting checks are already handled by the single construct checks
2020 
2021 #if OMPT_SUPPORT
2022  if (ompt_enabled.enabled) {
2023  OMPT_STORE_RETURN_ADDRESS(gtid);
2024  }
2025 #endif
2026 #if USE_ITT_NOTIFY
2027  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2028 // tasks can overwrite the location)
2029 #endif
2030  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2031 #if OMPT_SUPPORT && OMPT_OPTIONAL
2032  if (ompt_enabled.enabled) {
2033  ompt_frame->enter_frame = NULL;
2034  }
2035 #endif
2036 }
2037 
2038 /* -------------------------------------------------------------------------- */
2039 
2040 #define INIT_LOCK __kmp_init_user_lock_with_checks
2041 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2042 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2043 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2044 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2045 #define ACQUIRE_NESTED_LOCK_TIMED \
2046  __kmp_acquire_nested_user_lock_with_checks_timed
2047 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2048 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2049 #define TEST_LOCK __kmp_test_user_lock_with_checks
2050 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2051 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2052 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2053 
2054 // TODO: Make check abort messages use location info & pass it into
2055 // with_checks routines
2056 
2057 #if KMP_USE_DYNAMIC_LOCK
2058 
2059 // internal lock initializer
2060 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2061  kmp_dyna_lockseq_t seq) {
2062  if (KMP_IS_D_LOCK(seq)) {
2063  KMP_INIT_D_LOCK(lock, seq);
2064 #if USE_ITT_BUILD
2065  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2066 #endif
2067  } else {
2068  KMP_INIT_I_LOCK(lock, seq);
2069 #if USE_ITT_BUILD
2070  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2071  __kmp_itt_lock_creating(ilk->lock, loc);
2072 #endif
2073  }
2074 }
2075 
2076 // internal nest lock initializer
2077 static __forceinline void
2078 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2079  kmp_dyna_lockseq_t seq) {
2080 #if KMP_USE_TSX
2081  // Don't have nested lock implementation for speculative locks
2082  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2083  seq = __kmp_user_lock_seq;
2084 #endif
2085  switch (seq) {
2086  case lockseq_tas:
2087  seq = lockseq_nested_tas;
2088  break;
2089 #if KMP_USE_FUTEX
2090  case lockseq_futex:
2091  seq = lockseq_nested_futex;
2092  break;
2093 #endif
2094  case lockseq_ticket:
2095  seq = lockseq_nested_ticket;
2096  break;
2097  case lockseq_queuing:
2098  seq = lockseq_nested_queuing;
2099  break;
2100  case lockseq_drdpa:
2101  seq = lockseq_nested_drdpa;
2102  break;
2103  default:
2104  seq = lockseq_nested_queuing;
2105  }
2106  KMP_INIT_I_LOCK(lock, seq);
2107 #if USE_ITT_BUILD
2108  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2109  __kmp_itt_lock_creating(ilk->lock, loc);
2110 #endif
2111 }
2112 
2113 /* initialize the lock with a hint */
2114 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2115  uintptr_t hint) {
2116  KMP_DEBUG_ASSERT(__kmp_init_serial);
2117  if (__kmp_env_consistency_check && user_lock == NULL) {
2118  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2119  }
2120 
2121  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2122 
2123 #if OMPT_SUPPORT && OMPT_OPTIONAL
2124  // This is the case, if called from omp_init_lock_with_hint:
2125  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2126  if (!codeptr)
2127  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2128  if (ompt_enabled.ompt_callback_lock_init) {
2129  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2130  ompt_mutex_lock, (omp_lock_hint_t)hint,
2131  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2132  codeptr);
2133  }
2134 #endif
2135 }
2136 
2137 /* initialize the lock with a hint */
2138 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2139  void **user_lock, uintptr_t hint) {
2140  KMP_DEBUG_ASSERT(__kmp_init_serial);
2141  if (__kmp_env_consistency_check && user_lock == NULL) {
2142  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2143  }
2144 
2145  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2146 
2147 #if OMPT_SUPPORT && OMPT_OPTIONAL
2148  // This is the case, if called from omp_init_lock_with_hint:
2149  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2150  if (!codeptr)
2151  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2152  if (ompt_enabled.ompt_callback_lock_init) {
2153  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2154  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2155  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2156  codeptr);
2157  }
2158 #endif
2159 }
2160 
2161 #endif // KMP_USE_DYNAMIC_LOCK
2162 
2163 /* initialize the lock */
2164 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2165 #if KMP_USE_DYNAMIC_LOCK
2166 
2167  KMP_DEBUG_ASSERT(__kmp_init_serial);
2168  if (__kmp_env_consistency_check && user_lock == NULL) {
2169  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2170  }
2171  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2172 
2173 #if OMPT_SUPPORT && OMPT_OPTIONAL
2174  // This is the case, if called from omp_init_lock_with_hint:
2175  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2176  if (!codeptr)
2177  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2178  if (ompt_enabled.ompt_callback_lock_init) {
2179  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2180  ompt_mutex_lock, omp_lock_hint_none,
2181  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2182  codeptr);
2183  }
2184 #endif
2185 
2186 #else // KMP_USE_DYNAMIC_LOCK
2187 
2188  static char const *const func = "omp_init_lock";
2189  kmp_user_lock_p lck;
2190  KMP_DEBUG_ASSERT(__kmp_init_serial);
2191 
2192  if (__kmp_env_consistency_check) {
2193  if (user_lock == NULL) {
2194  KMP_FATAL(LockIsUninitialized, func);
2195  }
2196  }
2197 
2198  KMP_CHECK_USER_LOCK_INIT();
2199 
2200  if ((__kmp_user_lock_kind == lk_tas) &&
2201  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2202  lck = (kmp_user_lock_p)user_lock;
2203  }
2204 #if KMP_USE_FUTEX
2205  else if ((__kmp_user_lock_kind == lk_futex) &&
2206  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2207  lck = (kmp_user_lock_p)user_lock;
2208  }
2209 #endif
2210  else {
2211  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2212  }
2213  INIT_LOCK(lck);
2214  __kmp_set_user_lock_location(lck, loc);
2215 
2216 #if OMPT_SUPPORT && OMPT_OPTIONAL
2217  // This is the case, if called from omp_init_lock_with_hint:
2218  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2219  if (!codeptr)
2220  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2221  if (ompt_enabled.ompt_callback_lock_init) {
2222  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2223  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2224  (omp_wait_id_t)user_lock, codeptr);
2225  }
2226 #endif
2227 
2228 #if USE_ITT_BUILD
2229  __kmp_itt_lock_creating(lck);
2230 #endif /* USE_ITT_BUILD */
2231 
2232 #endif // KMP_USE_DYNAMIC_LOCK
2233 } // __kmpc_init_lock
2234 
2235 /* initialize the lock */
2236 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2237 #if KMP_USE_DYNAMIC_LOCK
2238 
2239  KMP_DEBUG_ASSERT(__kmp_init_serial);
2240  if (__kmp_env_consistency_check && user_lock == NULL) {
2241  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2242  }
2243  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2244 
2245 #if OMPT_SUPPORT && OMPT_OPTIONAL
2246  // This is the case, if called from omp_init_lock_with_hint:
2247  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2248  if (!codeptr)
2249  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2250  if (ompt_enabled.ompt_callback_lock_init) {
2251  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2252  ompt_mutex_nest_lock, omp_lock_hint_none,
2253  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2254  codeptr);
2255  }
2256 #endif
2257 
2258 #else // KMP_USE_DYNAMIC_LOCK
2259 
2260  static char const *const func = "omp_init_nest_lock";
2261  kmp_user_lock_p lck;
2262  KMP_DEBUG_ASSERT(__kmp_init_serial);
2263 
2264  if (__kmp_env_consistency_check) {
2265  if (user_lock == NULL) {
2266  KMP_FATAL(LockIsUninitialized, func);
2267  }
2268  }
2269 
2270  KMP_CHECK_USER_LOCK_INIT();
2271 
2272  if ((__kmp_user_lock_kind == lk_tas) &&
2273  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2274  OMP_NEST_LOCK_T_SIZE)) {
2275  lck = (kmp_user_lock_p)user_lock;
2276  }
2277 #if KMP_USE_FUTEX
2278  else if ((__kmp_user_lock_kind == lk_futex) &&
2279  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2280  OMP_NEST_LOCK_T_SIZE)) {
2281  lck = (kmp_user_lock_p)user_lock;
2282  }
2283 #endif
2284  else {
2285  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2286  }
2287 
2288  INIT_NESTED_LOCK(lck);
2289  __kmp_set_user_lock_location(lck, loc);
2290 
2291 #if OMPT_SUPPORT && OMPT_OPTIONAL
2292  // This is the case, if called from omp_init_lock_with_hint:
2293  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2294  if (!codeptr)
2295  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2296  if (ompt_enabled.ompt_callback_lock_init) {
2297  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2298  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2299  (omp_wait_id_t)user_lock, codeptr);
2300  }
2301 #endif
2302 
2303 #if USE_ITT_BUILD
2304  __kmp_itt_lock_creating(lck);
2305 #endif /* USE_ITT_BUILD */
2306 
2307 #endif // KMP_USE_DYNAMIC_LOCK
2308 } // __kmpc_init_nest_lock
2309 
2310 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2311 #if KMP_USE_DYNAMIC_LOCK
2312 
2313 #if USE_ITT_BUILD
2314  kmp_user_lock_p lck;
2315  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2316  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2317  } else {
2318  lck = (kmp_user_lock_p)user_lock;
2319  }
2320  __kmp_itt_lock_destroyed(lck);
2321 #endif
2322 #if OMPT_SUPPORT && OMPT_OPTIONAL
2323  // This is the case, if called from omp_init_lock_with_hint:
2324  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2325  if (!codeptr)
2326  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2327  if (ompt_enabled.ompt_callback_lock_destroy) {
2328  kmp_user_lock_p lck;
2329  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2330  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2331  } else {
2332  lck = (kmp_user_lock_p)user_lock;
2333  }
2334  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2335  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2336  }
2337 #endif
2338  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2339 #else
2340  kmp_user_lock_p lck;
2341 
2342  if ((__kmp_user_lock_kind == lk_tas) &&
2343  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2344  lck = (kmp_user_lock_p)user_lock;
2345  }
2346 #if KMP_USE_FUTEX
2347  else if ((__kmp_user_lock_kind == lk_futex) &&
2348  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2349  lck = (kmp_user_lock_p)user_lock;
2350  }
2351 #endif
2352  else {
2353  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2354  }
2355 
2356 #if OMPT_SUPPORT && OMPT_OPTIONAL
2357  // This is the case, if called from omp_init_lock_with_hint:
2358  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2359  if (!codeptr)
2360  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2361  if (ompt_enabled.ompt_callback_lock_destroy) {
2362  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2363  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2364  }
2365 #endif
2366 
2367 #if USE_ITT_BUILD
2368  __kmp_itt_lock_destroyed(lck);
2369 #endif /* USE_ITT_BUILD */
2370  DESTROY_LOCK(lck);
2371 
2372  if ((__kmp_user_lock_kind == lk_tas) &&
2373  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2374  ;
2375  }
2376 #if KMP_USE_FUTEX
2377  else if ((__kmp_user_lock_kind == lk_futex) &&
2378  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2379  ;
2380  }
2381 #endif
2382  else {
2383  __kmp_user_lock_free(user_lock, gtid, lck);
2384  }
2385 #endif // KMP_USE_DYNAMIC_LOCK
2386 } // __kmpc_destroy_lock
2387 
2388 /* destroy the lock */
2389 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2390 #if KMP_USE_DYNAMIC_LOCK
2391 
2392 #if USE_ITT_BUILD
2393  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2394  __kmp_itt_lock_destroyed(ilk->lock);
2395 #endif
2396 #if OMPT_SUPPORT && OMPT_OPTIONAL
2397  // This is the case, if called from omp_init_lock_with_hint:
2398  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2399  if (!codeptr)
2400  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2401  if (ompt_enabled.ompt_callback_lock_destroy) {
2402  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2403  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2404  }
2405 #endif
2406  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2407 
2408 #else // KMP_USE_DYNAMIC_LOCK
2409 
2410  kmp_user_lock_p lck;
2411 
2412  if ((__kmp_user_lock_kind == lk_tas) &&
2413  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2414  OMP_NEST_LOCK_T_SIZE)) {
2415  lck = (kmp_user_lock_p)user_lock;
2416  }
2417 #if KMP_USE_FUTEX
2418  else if ((__kmp_user_lock_kind == lk_futex) &&
2419  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2420  OMP_NEST_LOCK_T_SIZE)) {
2421  lck = (kmp_user_lock_p)user_lock;
2422  }
2423 #endif
2424  else {
2425  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2426  }
2427 
2428 #if OMPT_SUPPORT && OMPT_OPTIONAL
2429  // This is the case, if called from omp_init_lock_with_hint:
2430  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2431  if (!codeptr)
2432  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2433  if (ompt_enabled.ompt_callback_lock_destroy) {
2434  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2435  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2436  }
2437 #endif
2438 
2439 #if USE_ITT_BUILD
2440  __kmp_itt_lock_destroyed(lck);
2441 #endif /* USE_ITT_BUILD */
2442 
2443  DESTROY_NESTED_LOCK(lck);
2444 
2445  if ((__kmp_user_lock_kind == lk_tas) &&
2446  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2447  OMP_NEST_LOCK_T_SIZE)) {
2448  ;
2449  }
2450 #if KMP_USE_FUTEX
2451  else if ((__kmp_user_lock_kind == lk_futex) &&
2452  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2453  OMP_NEST_LOCK_T_SIZE)) {
2454  ;
2455  }
2456 #endif
2457  else {
2458  __kmp_user_lock_free(user_lock, gtid, lck);
2459  }
2460 #endif // KMP_USE_DYNAMIC_LOCK
2461 } // __kmpc_destroy_nest_lock
2462 
2463 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2464  KMP_COUNT_BLOCK(OMP_set_lock);
2465 #if KMP_USE_DYNAMIC_LOCK
2466  int tag = KMP_EXTRACT_D_TAG(user_lock);
2467 #if USE_ITT_BUILD
2468  __kmp_itt_lock_acquiring(
2469  (kmp_user_lock_p)
2470  user_lock); // itt function will get to the right lock object.
2471 #endif
2472 #if OMPT_SUPPORT && OMPT_OPTIONAL
2473  // This is the case, if called from omp_init_lock_with_hint:
2474  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2475  if (!codeptr)
2476  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2477  if (ompt_enabled.ompt_callback_mutex_acquire) {
2478  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2479  ompt_mutex_lock, omp_lock_hint_none,
2480  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2481  codeptr);
2482  }
2483 #endif
2484 #if KMP_USE_INLINED_TAS
2485  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2486  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2487  } else
2488 #elif KMP_USE_INLINED_FUTEX
2489  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2490  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2491  } else
2492 #endif
2493  {
2494  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2495  }
2496 #if USE_ITT_BUILD
2497  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2498 #endif
2499 #if OMPT_SUPPORT && OMPT_OPTIONAL
2500  if (ompt_enabled.ompt_callback_mutex_acquired) {
2501  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2502  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2503  }
2504 #endif
2505 
2506 #else // KMP_USE_DYNAMIC_LOCK
2507 
2508  kmp_user_lock_p lck;
2509 
2510  if ((__kmp_user_lock_kind == lk_tas) &&
2511  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2512  lck = (kmp_user_lock_p)user_lock;
2513  }
2514 #if KMP_USE_FUTEX
2515  else if ((__kmp_user_lock_kind == lk_futex) &&
2516  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2517  lck = (kmp_user_lock_p)user_lock;
2518  }
2519 #endif
2520  else {
2521  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2522  }
2523 
2524 #if USE_ITT_BUILD
2525  __kmp_itt_lock_acquiring(lck);
2526 #endif /* USE_ITT_BUILD */
2527 #if OMPT_SUPPORT && OMPT_OPTIONAL
2528  // This is the case, if called from omp_init_lock_with_hint:
2529  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2530  if (!codeptr)
2531  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2532  if (ompt_enabled.ompt_callback_mutex_acquire) {
2533  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2534  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2535  (omp_wait_id_t)lck, codeptr);
2536  }
2537 #endif
2538 
2539  ACQUIRE_LOCK(lck, gtid);
2540 
2541 #if USE_ITT_BUILD
2542  __kmp_itt_lock_acquired(lck);
2543 #endif /* USE_ITT_BUILD */
2544 
2545 #if OMPT_SUPPORT && OMPT_OPTIONAL
2546  if (ompt_enabled.ompt_callback_mutex_acquired) {
2547  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2548  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2549  }
2550 #endif
2551 
2552 #endif // KMP_USE_DYNAMIC_LOCK
2553 }
2554 
2555 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2556 #if KMP_USE_DYNAMIC_LOCK
2557 
2558 #if USE_ITT_BUILD
2559  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2560 #endif
2561 #if OMPT_SUPPORT && OMPT_OPTIONAL
2562  // This is the case, if called from omp_init_lock_with_hint:
2563  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2564  if (!codeptr)
2565  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2566  if (ompt_enabled.enabled) {
2567  if (ompt_enabled.ompt_callback_mutex_acquire) {
2568  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2569  ompt_mutex_nest_lock, omp_lock_hint_none,
2570  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2571  codeptr);
2572  }
2573  }
2574 #endif
2575  int acquire_status =
2576  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2577  (void) acquire_status;
2578 #if USE_ITT_BUILD
2579  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2580 #endif
2581 
2582 #if OMPT_SUPPORT && OMPT_OPTIONAL
2583  if (ompt_enabled.enabled) {
2584  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2585  if (ompt_enabled.ompt_callback_mutex_acquired) {
2586  // lock_first
2587  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2588  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2589  }
2590  } else {
2591  if (ompt_enabled.ompt_callback_nest_lock) {
2592  // lock_next
2593  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2594  ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
2595  }
2596  }
2597  }
2598 #endif
2599 
2600 #else // KMP_USE_DYNAMIC_LOCK
2601  int acquire_status;
2602  kmp_user_lock_p lck;
2603 
2604  if ((__kmp_user_lock_kind == lk_tas) &&
2605  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2606  OMP_NEST_LOCK_T_SIZE)) {
2607  lck = (kmp_user_lock_p)user_lock;
2608  }
2609 #if KMP_USE_FUTEX
2610  else if ((__kmp_user_lock_kind == lk_futex) &&
2611  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2612  OMP_NEST_LOCK_T_SIZE)) {
2613  lck = (kmp_user_lock_p)user_lock;
2614  }
2615 #endif
2616  else {
2617  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2618  }
2619 
2620 #if USE_ITT_BUILD
2621  __kmp_itt_lock_acquiring(lck);
2622 #endif /* USE_ITT_BUILD */
2623 #if OMPT_SUPPORT && OMPT_OPTIONAL
2624  // This is the case, if called from omp_init_lock_with_hint:
2625  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2626  if (!codeptr)
2627  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2628  if (ompt_enabled.enabled) {
2629  if (ompt_enabled.ompt_callback_mutex_acquire) {
2630  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2631  ompt_mutex_nest_lock, omp_lock_hint_none,
2632  __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
2633  }
2634  }
2635 #endif
2636 
2637  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2638 
2639 #if USE_ITT_BUILD
2640  __kmp_itt_lock_acquired(lck);
2641 #endif /* USE_ITT_BUILD */
2642 
2643 #if OMPT_SUPPORT && OMPT_OPTIONAL
2644  if (ompt_enabled.enabled) {
2645  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2646  if (ompt_enabled.ompt_callback_mutex_acquired) {
2647  // lock_first
2648  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2649  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2650  }
2651  } else {
2652  if (ompt_enabled.ompt_callback_nest_lock) {
2653  // lock_next
2654  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2655  ompt_scope_begin, (omp_wait_id_t)lck, codeptr);
2656  }
2657  }
2658  }
2659 #endif
2660 
2661 #endif // KMP_USE_DYNAMIC_LOCK
2662 }
2663 
2664 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2665 #if KMP_USE_DYNAMIC_LOCK
2666 
2667  int tag = KMP_EXTRACT_D_TAG(user_lock);
2668 #if USE_ITT_BUILD
2669  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2670 #endif
2671 #if KMP_USE_INLINED_TAS
2672  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2673  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2674  } else
2675 #elif KMP_USE_INLINED_FUTEX
2676  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2677  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2678  } else
2679 #endif
2680  {
2681  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2682  }
2683 
2684 #if OMPT_SUPPORT && OMPT_OPTIONAL
2685  // This is the case, if called from omp_init_lock_with_hint:
2686  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2687  if (!codeptr)
2688  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2689  if (ompt_enabled.ompt_callback_mutex_released) {
2690  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2691  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2692  }
2693 #endif
2694 
2695 #else // KMP_USE_DYNAMIC_LOCK
2696 
2697  kmp_user_lock_p lck;
2698 
2699  /* Can't use serial interval since not block structured */
2700  /* release the lock */
2701 
2702  if ((__kmp_user_lock_kind == lk_tas) &&
2703  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2704 #if KMP_OS_LINUX && \
2705  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2706 // "fast" path implemented to fix customer performance issue
2707 #if USE_ITT_BUILD
2708  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2709 #endif /* USE_ITT_BUILD */
2710  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2711  KMP_MB();
2712 
2713 #if OMPT_SUPPORT && OMPT_OPTIONAL
2714  // This is the case, if called from omp_init_lock_with_hint:
2715  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2716  if (!codeptr)
2717  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2718  if (ompt_enabled.ompt_callback_mutex_released) {
2719  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2720  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2721  }
2722 #endif
2723 
2724  return;
2725 #else
2726  lck = (kmp_user_lock_p)user_lock;
2727 #endif
2728  }
2729 #if KMP_USE_FUTEX
2730  else if ((__kmp_user_lock_kind == lk_futex) &&
2731  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2732  lck = (kmp_user_lock_p)user_lock;
2733  }
2734 #endif
2735  else {
2736  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2737  }
2738 
2739 #if USE_ITT_BUILD
2740  __kmp_itt_lock_releasing(lck);
2741 #endif /* USE_ITT_BUILD */
2742 
2743  RELEASE_LOCK(lck, gtid);
2744 
2745 #if OMPT_SUPPORT && OMPT_OPTIONAL
2746  // This is the case, if called from omp_init_lock_with_hint:
2747  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2748  if (!codeptr)
2749  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2750  if (ompt_enabled.ompt_callback_mutex_released) {
2751  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2752  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2753  }
2754 #endif
2755 
2756 #endif // KMP_USE_DYNAMIC_LOCK
2757 }
2758 
2759 /* release the lock */
2760 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2761 #if KMP_USE_DYNAMIC_LOCK
2762 
2763 #if USE_ITT_BUILD
2764  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2765 #endif
2766  int release_status =
2767  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2768  (void) release_status;
2769 
2770 #if OMPT_SUPPORT && OMPT_OPTIONAL
2771  // This is the case, if called from omp_init_lock_with_hint:
2772  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2773  if (!codeptr)
2774  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2775  if (ompt_enabled.enabled) {
2776  if (release_status == KMP_LOCK_RELEASED) {
2777  if (ompt_enabled.ompt_callback_mutex_released) {
2778  // release_lock_last
2779  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2780  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2781  }
2782  } else if (ompt_enabled.ompt_callback_nest_lock) {
2783  // release_lock_prev
2784  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2785  ompt_scope_end, (omp_wait_id_t)user_lock, codeptr);
2786  }
2787  }
2788 #endif
2789 
2790 #else // KMP_USE_DYNAMIC_LOCK
2791 
2792  kmp_user_lock_p lck;
2793 
2794  /* Can't use serial interval since not block structured */
2795 
2796  if ((__kmp_user_lock_kind == lk_tas) &&
2797  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2798  OMP_NEST_LOCK_T_SIZE)) {
2799 #if KMP_OS_LINUX && \
2800  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2801  // "fast" path implemented to fix customer performance issue
2802  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2803 #if USE_ITT_BUILD
2804  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2805 #endif /* USE_ITT_BUILD */
2806 
2807 #if OMPT_SUPPORT && OMPT_OPTIONAL
2808  int release_status = KMP_LOCK_STILL_HELD;
2809 #endif
2810 
2811  if (--(tl->lk.depth_locked) == 0) {
2812  TCW_4(tl->lk.poll, 0);
2813 #if OMPT_SUPPORT && OMPT_OPTIONAL
2814  release_status = KMP_LOCK_RELEASED;
2815 #endif
2816  }
2817  KMP_MB();
2818 
2819 #if OMPT_SUPPORT && OMPT_OPTIONAL
2820  // This is the case, if called from omp_init_lock_with_hint:
2821  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2822  if (!codeptr)
2823  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2824  if (ompt_enabled.enabled) {
2825  if (release_status == KMP_LOCK_RELEASED) {
2826  if (ompt_enabled.ompt_callback_mutex_released) {
2827  // release_lock_last
2828  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2829  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2830  }
2831  } else if (ompt_enabled.ompt_callback_nest_lock) {
2832  // release_lock_previous
2833  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2834  ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
2835  }
2836  }
2837 #endif
2838 
2839  return;
2840 #else
2841  lck = (kmp_user_lock_p)user_lock;
2842 #endif
2843  }
2844 #if KMP_USE_FUTEX
2845  else if ((__kmp_user_lock_kind == lk_futex) &&
2846  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2847  OMP_NEST_LOCK_T_SIZE)) {
2848  lck = (kmp_user_lock_p)user_lock;
2849  }
2850 #endif
2851  else {
2852  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2853  }
2854 
2855 #if USE_ITT_BUILD
2856  __kmp_itt_lock_releasing(lck);
2857 #endif /* USE_ITT_BUILD */
2858 
2859  int release_status;
2860  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2861 #if OMPT_SUPPORT && OMPT_OPTIONAL
2862  // This is the case, if called from omp_init_lock_with_hint:
2863  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2864  if (!codeptr)
2865  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2866  if (ompt_enabled.enabled) {
2867  if (release_status == KMP_LOCK_RELEASED) {
2868  if (ompt_enabled.ompt_callback_mutex_released) {
2869  // release_lock_last
2870  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2871  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2872  }
2873  } else if (ompt_enabled.ompt_callback_nest_lock) {
2874  // release_lock_previous
2875  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2876  ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
2877  }
2878  }
2879 #endif
2880 
2881 #endif // KMP_USE_DYNAMIC_LOCK
2882 }
2883 
2884 /* try to acquire the lock */
2885 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2886  KMP_COUNT_BLOCK(OMP_test_lock);
2887 
2888 #if KMP_USE_DYNAMIC_LOCK
2889  int rc;
2890  int tag = KMP_EXTRACT_D_TAG(user_lock);
2891 #if USE_ITT_BUILD
2892  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2893 #endif
2894 #if OMPT_SUPPORT && OMPT_OPTIONAL
2895  // This is the case, if called from omp_init_lock_with_hint:
2896  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2897  if (!codeptr)
2898  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2899  if (ompt_enabled.ompt_callback_mutex_acquire) {
2900  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2901  ompt_mutex_lock, omp_lock_hint_none,
2902  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2903  codeptr);
2904  }
2905 #endif
2906 #if KMP_USE_INLINED_TAS
2907  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2908  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2909  } else
2910 #elif KMP_USE_INLINED_FUTEX
2911  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2912  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2913  } else
2914 #endif
2915  {
2916  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2917  }
2918  if (rc) {
2919 #if USE_ITT_BUILD
2920  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2921 #endif
2922 #if OMPT_SUPPORT && OMPT_OPTIONAL
2923  if (ompt_enabled.ompt_callback_mutex_acquired) {
2924  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2925  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2926  }
2927 #endif
2928  return FTN_TRUE;
2929  } else {
2930 #if USE_ITT_BUILD
2931  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2932 #endif
2933  return FTN_FALSE;
2934  }
2935 
2936 #else // KMP_USE_DYNAMIC_LOCK
2937 
2938  kmp_user_lock_p lck;
2939  int rc;
2940 
2941  if ((__kmp_user_lock_kind == lk_tas) &&
2942  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2943  lck = (kmp_user_lock_p)user_lock;
2944  }
2945 #if KMP_USE_FUTEX
2946  else if ((__kmp_user_lock_kind == lk_futex) &&
2947  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2948  lck = (kmp_user_lock_p)user_lock;
2949  }
2950 #endif
2951  else {
2952  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
2953  }
2954 
2955 #if USE_ITT_BUILD
2956  __kmp_itt_lock_acquiring(lck);
2957 #endif /* USE_ITT_BUILD */
2958 #if OMPT_SUPPORT && OMPT_OPTIONAL
2959  // This is the case, if called from omp_init_lock_with_hint:
2960  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2961  if (!codeptr)
2962  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2963  if (ompt_enabled.ompt_callback_mutex_acquire) {
2964  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2965  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2966  (omp_wait_id_t)lck, codeptr);
2967  }
2968 #endif
2969 
2970  rc = TEST_LOCK(lck, gtid);
2971 #if USE_ITT_BUILD
2972  if (rc) {
2973  __kmp_itt_lock_acquired(lck);
2974  } else {
2975  __kmp_itt_lock_cancelled(lck);
2976  }
2977 #endif /* USE_ITT_BUILD */
2978 #if OMPT_SUPPORT && OMPT_OPTIONAL
2979  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
2980  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2981  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2982  }
2983 #endif
2984 
2985  return (rc ? FTN_TRUE : FTN_FALSE);
2986 
2987 /* Can't use serial interval since not block structured */
2988 
2989 #endif // KMP_USE_DYNAMIC_LOCK
2990 }
2991 
2992 /* try to acquire the lock */
2993 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2994 #if KMP_USE_DYNAMIC_LOCK
2995  int rc;
2996 #if USE_ITT_BUILD
2997  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2998 #endif
2999 #if OMPT_SUPPORT && OMPT_OPTIONAL
3000  // This is the case, if called from omp_init_lock_with_hint:
3001  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3002  if (!codeptr)
3003  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3004  if (ompt_enabled.ompt_callback_mutex_acquire) {
3005  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3006  ompt_mutex_nest_lock, omp_lock_hint_none,
3007  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
3008  codeptr);
3009  }
3010 #endif
3011  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3012 #if USE_ITT_BUILD
3013  if (rc) {
3014  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3015  } else {
3016  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3017  }
3018 #endif
3019 #if OMPT_SUPPORT && OMPT_OPTIONAL
3020  if (ompt_enabled.enabled && rc) {
3021  if (rc == 1) {
3022  if (ompt_enabled.ompt_callback_mutex_acquired) {
3023  // lock_first
3024  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3025  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
3026  }
3027  } else {
3028  if (ompt_enabled.ompt_callback_nest_lock) {
3029  // lock_next
3030  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3031  ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
3032  }
3033  }
3034  }
3035 #endif
3036  return rc;
3037 
3038 #else // KMP_USE_DYNAMIC_LOCK
3039 
3040  kmp_user_lock_p lck;
3041  int rc;
3042 
3043  if ((__kmp_user_lock_kind == lk_tas) &&
3044  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3045  OMP_NEST_LOCK_T_SIZE)) {
3046  lck = (kmp_user_lock_p)user_lock;
3047  }
3048 #if KMP_USE_FUTEX
3049  else if ((__kmp_user_lock_kind == lk_futex) &&
3050  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3051  OMP_NEST_LOCK_T_SIZE)) {
3052  lck = (kmp_user_lock_p)user_lock;
3053  }
3054 #endif
3055  else {
3056  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3057  }
3058 
3059 #if USE_ITT_BUILD
3060  __kmp_itt_lock_acquiring(lck);
3061 #endif /* USE_ITT_BUILD */
3062 
3063 #if OMPT_SUPPORT && OMPT_OPTIONAL
3064  // This is the case, if called from omp_init_lock_with_hint:
3065  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3066  if (!codeptr)
3067  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3068  if (ompt_enabled.enabled) &&
3069  ompt_enabled.ompt_callback_mutex_acquire) {
3070  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3071  ompt_mutex_nest_lock, omp_lock_hint_none,
3072  __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
3073  }
3074 #endif
3075 
3076  rc = TEST_NESTED_LOCK(lck, gtid);
3077 #if USE_ITT_BUILD
3078  if (rc) {
3079  __kmp_itt_lock_acquired(lck);
3080  } else {
3081  __kmp_itt_lock_cancelled(lck);
3082  }
3083 #endif /* USE_ITT_BUILD */
3084 #if OMPT_SUPPORT && OMPT_OPTIONAL
3085  if (ompt_enabled.enabled && rc) {
3086  if (rc == 1) {
3087  if (ompt_enabled.ompt_callback_mutex_acquired) {
3088  // lock_first
3089  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3090  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
3091  }
3092  } else {
3093  if (ompt_enabled.ompt_callback_nest_lock) {
3094  // lock_next
3095  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3096  ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr);
3097  }
3098  }
3099  }
3100 #endif
3101  return rc;
3102 
3103 /* Can't use serial interval since not block structured */
3104 
3105 #endif // KMP_USE_DYNAMIC_LOCK
3106 }
3107 
3108 // Interface to fast scalable reduce methods routines
3109 
3110 // keep the selected method in a thread local structure for cross-function
3111 // usage: will be used in __kmpc_end_reduce* functions;
3112 // another solution: to re-determine the method one more time in
3113 // __kmpc_end_reduce* functions (new prototype required then)
3114 // AT: which solution is better?
3115 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3116  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3117 
3118 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3119  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3120 
3121 // description of the packed_reduction_method variable: look at the macros in
3122 // kmp.h
3123 
3124 // used in a critical section reduce block
3125 static __forceinline void
3126 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3127  kmp_critical_name *crit) {
3128 
3129  // this lock was visible to a customer and to the threading profile tool as a
3130  // serial overhead span (although it's used for an internal purpose only)
3131  // why was it visible in previous implementation?
3132  // should we keep it visible in new reduce block?
3133  kmp_user_lock_p lck;
3134 
3135 #if KMP_USE_DYNAMIC_LOCK
3136 
3137  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3138  // Check if it is initialized.
3139  if (*lk == 0) {
3140  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3141  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3142  KMP_GET_D_TAG(__kmp_user_lock_seq));
3143  } else {
3144  __kmp_init_indirect_csptr(crit, loc, global_tid,
3145  KMP_GET_I_TAG(__kmp_user_lock_seq));
3146  }
3147  }
3148  // Branch for accessing the actual lock object and set operation. This
3149  // branching is inevitable since this lock initialization does not follow the
3150  // normal dispatch path (lock table is not used).
3151  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3152  lck = (kmp_user_lock_p)lk;
3153  KMP_DEBUG_ASSERT(lck != NULL);
3154  if (__kmp_env_consistency_check) {
3155  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3156  }
3157  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3158  } else {
3159  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3160  lck = ilk->lock;
3161  KMP_DEBUG_ASSERT(lck != NULL);
3162  if (__kmp_env_consistency_check) {
3163  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3164  }
3165  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3166  }
3167 
3168 #else // KMP_USE_DYNAMIC_LOCK
3169 
3170  // We know that the fast reduction code is only emitted by Intel compilers
3171  // with 32 byte critical sections. If there isn't enough space, then we
3172  // have to use a pointer.
3173  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3174  lck = (kmp_user_lock_p)crit;
3175  } else {
3176  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3177  }
3178  KMP_DEBUG_ASSERT(lck != NULL);
3179 
3180  if (__kmp_env_consistency_check)
3181  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3182 
3183  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3184 
3185 #endif // KMP_USE_DYNAMIC_LOCK
3186 }
3187 
3188 // used in a critical section reduce block
3189 static __forceinline void
3190 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3191  kmp_critical_name *crit) {
3192 
3193  kmp_user_lock_p lck;
3194 
3195 #if KMP_USE_DYNAMIC_LOCK
3196 
3197  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3198  lck = (kmp_user_lock_p)crit;
3199  if (__kmp_env_consistency_check)
3200  __kmp_pop_sync(global_tid, ct_critical, loc);
3201  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3202  } else {
3203  kmp_indirect_lock_t *ilk =
3204  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3205  if (__kmp_env_consistency_check)
3206  __kmp_pop_sync(global_tid, ct_critical, loc);
3207  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3208  }
3209 
3210 #else // KMP_USE_DYNAMIC_LOCK
3211 
3212  // We know that the fast reduction code is only emitted by Intel compilers
3213  // with 32 byte critical sections. If there isn't enough space, then we have
3214  // to use a pointer.
3215  if (__kmp_base_user_lock_size > 32) {
3216  lck = *((kmp_user_lock_p *)crit);
3217  KMP_ASSERT(lck != NULL);
3218  } else {
3219  lck = (kmp_user_lock_p)crit;
3220  }
3221 
3222  if (__kmp_env_consistency_check)
3223  __kmp_pop_sync(global_tid, ct_critical, loc);
3224 
3225  __kmp_release_user_lock_with_checks(lck, global_tid);
3226 
3227 #endif // KMP_USE_DYNAMIC_LOCK
3228 } // __kmp_end_critical_section_reduce_block
3229 
3230 #if OMP_40_ENABLED
3231 static __forceinline int
3232 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3233  int *task_state) {
3234  kmp_team_t *team;
3235 
3236  // Check if we are inside the teams construct?
3237  if (th->th.th_teams_microtask) {
3238  *team_p = team = th->th.th_team;
3239  if (team->t.t_level == th->th.th_teams_level) {
3240  // This is reduction at teams construct.
3241  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3242  // Let's swap teams temporarily for the reduction.
3243  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3244  th->th.th_team = team->t.t_parent;
3245  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3246  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3247  *task_state = th->th.th_task_state;
3248  th->th.th_task_state = 0;
3249 
3250  return 1;
3251  }
3252  }
3253  return 0;
3254 }
3255 
3256 static __forceinline void
3257 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3258  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3259  th->th.th_info.ds.ds_tid = 0;
3260  th->th.th_team = team;
3261  th->th.th_team_nproc = team->t.t_nproc;
3262  th->th.th_task_team = team->t.t_task_team[task_state];
3263  th->th.th_task_state = task_state;
3264 }
3265 #endif
3266 
3267 /* 2.a.i. Reduce Block without a terminating barrier */
3283 kmp_int32
3284 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3285  size_t reduce_size, void *reduce_data,
3286  void (*reduce_func)(void *lhs_data, void *rhs_data),
3287  kmp_critical_name *lck) {
3288 
3289  KMP_COUNT_BLOCK(REDUCE_nowait);
3290  int retval = 0;
3291  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3292 #if OMP_40_ENABLED
3293  kmp_info_t *th;
3294  kmp_team_t *team;
3295  int teams_swapped = 0, task_state;
3296 #endif
3297  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3298 
3299  // why do we need this initialization here at all?
3300  // Reduction clause can not be used as a stand-alone directive.
3301 
3302  // do not call __kmp_serial_initialize(), it will be called by
3303  // __kmp_parallel_initialize() if needed
3304  // possible detection of false-positive race by the threadchecker ???
3305  if (!TCR_4(__kmp_init_parallel))
3306  __kmp_parallel_initialize();
3307 
3308 // check correctness of reduce block nesting
3309 #if KMP_USE_DYNAMIC_LOCK
3310  if (__kmp_env_consistency_check)
3311  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3312 #else
3313  if (__kmp_env_consistency_check)
3314  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3315 #endif
3316 
3317 #if OMP_40_ENABLED
3318  th = __kmp_thread_from_gtid(global_tid);
3319  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3320 #endif // OMP_40_ENABLED
3321 
3322  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3323  // the value should be kept in a variable
3324  // the variable should be either a construct-specific or thread-specific
3325  // property, not a team specific property
3326  // (a thread can reach the next reduce block on the next construct, reduce
3327  // method may differ on the next construct)
3328  // an ident_t "loc" parameter could be used as a construct-specific property
3329  // (what if loc == 0?)
3330  // (if both construct-specific and team-specific variables were shared,
3331  // then unness extra syncs should be needed)
3332  // a thread-specific variable is better regarding two issues above (next
3333  // construct and extra syncs)
3334  // a thread-specific "th_local.reduction_method" variable is used currently
3335  // each thread executes 'determine' and 'set' lines (no need to execute by one
3336  // thread, to avoid unness extra syncs)
3337 
3338  packed_reduction_method = __kmp_determine_reduction_method(
3339  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3340  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3341 
3342  if (packed_reduction_method == critical_reduce_block) {
3343 
3344  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3345  retval = 1;
3346 
3347  } else if (packed_reduction_method == empty_reduce_block) {
3348 
3349  // usage: if team size == 1, no synchronization is required ( Intel
3350  // platforms only )
3351  retval = 1;
3352 
3353  } else if (packed_reduction_method == atomic_reduce_block) {
3354 
3355  retval = 2;
3356 
3357  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3358  // won't be called by the code gen)
3359  // (it's not quite good, because the checking block has been closed by
3360  // this 'pop',
3361  // but atomic operation has not been executed yet, will be executed
3362  // slightly later, literally on next instruction)
3363  if (__kmp_env_consistency_check)
3364  __kmp_pop_sync(global_tid, ct_reduce, loc);
3365 
3366  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3367  tree_reduce_block)) {
3368 
3369 // AT: performance issue: a real barrier here
3370 // AT: (if master goes slow, other threads are blocked here waiting for the
3371 // master to come and release them)
3372 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3373 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3374 // be confusing to a customer)
3375 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3376 // might go faster and be more in line with sense of NOWAIT
3377 // AT: TO DO: do epcc test and compare times
3378 
3379 // this barrier should be invisible to a customer and to the threading profile
3380 // tool (it's neither a terminating barrier nor customer's code, it's
3381 // used for an internal purpose)
3382 #if OMPT_SUPPORT
3383  // JP: can this barrier potentially leed to task scheduling?
3384  // JP: as long as there is a barrier in the implementation, OMPT should and
3385  // will provide the barrier events
3386  // so we set-up the necessary frame/return addresses.
3387  omp_frame_t *ompt_frame;
3388  if (ompt_enabled.enabled) {
3389  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3390  if (ompt_frame->enter_frame == NULL)
3391  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3392  OMPT_STORE_RETURN_ADDRESS(global_tid);
3393  }
3394 #endif
3395 #if USE_ITT_NOTIFY
3396  __kmp_threads[global_tid]->th.th_ident = loc;
3397 #endif
3398  retval =
3399  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3400  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3401  retval = (retval != 0) ? (0) : (1);
3402 #if OMPT_SUPPORT && OMPT_OPTIONAL
3403  if (ompt_enabled.enabled) {
3404  ompt_frame->enter_frame = NULL;
3405  }
3406 #endif
3407 
3408  // all other workers except master should do this pop here
3409  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3410  if (__kmp_env_consistency_check) {
3411  if (retval == 0) {
3412  __kmp_pop_sync(global_tid, ct_reduce, loc);
3413  }
3414  }
3415 
3416  } else {
3417 
3418  // should never reach this block
3419  KMP_ASSERT(0); // "unexpected method"
3420  }
3421 #if OMP_40_ENABLED
3422  if (teams_swapped) {
3423  __kmp_restore_swapped_teams(th, team, task_state);
3424  }
3425 #endif
3426  KA_TRACE(
3427  10,
3428  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3429  global_tid, packed_reduction_method, retval));
3430 
3431  return retval;
3432 }
3433 
3442 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3443  kmp_critical_name *lck) {
3444 
3445  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3446 
3447  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3448 
3449  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3450 
3451  if (packed_reduction_method == critical_reduce_block) {
3452 
3453  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3454 
3455  } else if (packed_reduction_method == empty_reduce_block) {
3456 
3457  // usage: if team size == 1, no synchronization is required ( on Intel
3458  // platforms only )
3459 
3460  } else if (packed_reduction_method == atomic_reduce_block) {
3461 
3462  // neither master nor other workers should get here
3463  // (code gen does not generate this call in case 2: atomic reduce block)
3464  // actually it's better to remove this elseif at all;
3465  // after removal this value will checked by the 'else' and will assert
3466 
3467  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3468  tree_reduce_block)) {
3469 
3470  // only master gets here
3471 
3472  } else {
3473 
3474  // should never reach this block
3475  KMP_ASSERT(0); // "unexpected method"
3476  }
3477 
3478  if (__kmp_env_consistency_check)
3479  __kmp_pop_sync(global_tid, ct_reduce, loc);
3480 
3481  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3482  global_tid, packed_reduction_method));
3483 
3484  return;
3485 }
3486 
3487 /* 2.a.ii. Reduce Block with a terminating barrier */
3488 
3504 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3505  size_t reduce_size, void *reduce_data,
3506  void (*reduce_func)(void *lhs_data, void *rhs_data),
3507  kmp_critical_name *lck) {
3508  KMP_COUNT_BLOCK(REDUCE_wait);
3509  int retval = 0;
3510  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3511 #if OMP_40_ENABLED
3512  kmp_info_t *th;
3513  kmp_team_t *team;
3514  int teams_swapped = 0, task_state;
3515 #endif
3516 
3517  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3518 
3519  // why do we need this initialization here at all?
3520  // Reduction clause can not be a stand-alone directive.
3521 
3522  // do not call __kmp_serial_initialize(), it will be called by
3523  // __kmp_parallel_initialize() if needed
3524  // possible detection of false-positive race by the threadchecker ???
3525  if (!TCR_4(__kmp_init_parallel))
3526  __kmp_parallel_initialize();
3527 
3528 // check correctness of reduce block nesting
3529 #if KMP_USE_DYNAMIC_LOCK
3530  if (__kmp_env_consistency_check)
3531  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3532 #else
3533  if (__kmp_env_consistency_check)
3534  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3535 #endif
3536 
3537 #if OMP_40_ENABLED
3538  th = __kmp_thread_from_gtid(global_tid);
3539  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3540 #endif // OMP_40_ENABLED
3541 
3542  packed_reduction_method = __kmp_determine_reduction_method(
3543  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3544  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3545 
3546  if (packed_reduction_method == critical_reduce_block) {
3547 
3548  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3549  retval = 1;
3550 
3551  } else if (packed_reduction_method == empty_reduce_block) {
3552 
3553  // usage: if team size == 1, no synchronization is required ( Intel
3554  // platforms only )
3555  retval = 1;
3556 
3557  } else if (packed_reduction_method == atomic_reduce_block) {
3558 
3559  retval = 2;
3560 
3561  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3562  tree_reduce_block)) {
3563 
3564 // case tree_reduce_block:
3565 // this barrier should be visible to a customer and to the threading profile
3566 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3567 #if OMPT_SUPPORT
3568  omp_frame_t *ompt_frame;
3569  if (ompt_enabled.enabled) {
3570  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3571  if (ompt_frame->enter_frame == NULL)
3572  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3573  OMPT_STORE_RETURN_ADDRESS(global_tid);
3574  }
3575 #endif
3576 #if USE_ITT_NOTIFY
3577  __kmp_threads[global_tid]->th.th_ident =
3578  loc; // needed for correct notification of frames
3579 #endif
3580  retval =
3581  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3582  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3583  retval = (retval != 0) ? (0) : (1);
3584 #if OMPT_SUPPORT && OMPT_OPTIONAL
3585  if (ompt_enabled.enabled) {
3586  ompt_frame->enter_frame = NULL;
3587  }
3588 #endif
3589 
3590  // all other workers except master should do this pop here
3591  // ( none of other workers except master will enter __kmpc_end_reduce() )
3592  if (__kmp_env_consistency_check) {
3593  if (retval == 0) { // 0: all other workers; 1: master
3594  __kmp_pop_sync(global_tid, ct_reduce, loc);
3595  }
3596  }
3597 
3598  } else {
3599 
3600  // should never reach this block
3601  KMP_ASSERT(0); // "unexpected method"
3602  }
3603 #if OMP_40_ENABLED
3604  if (teams_swapped) {
3605  __kmp_restore_swapped_teams(th, team, task_state);
3606  }
3607 #endif
3608 
3609  KA_TRACE(10,
3610  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3611  global_tid, packed_reduction_method, retval));
3612 
3613  return retval;
3614 }
3615 
3626 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3627  kmp_critical_name *lck) {
3628 
3629  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3630 #if OMP_40_ENABLED
3631  kmp_info_t *th;
3632  kmp_team_t *team;
3633  int teams_swapped = 0, task_state;
3634 #endif
3635 
3636  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3637 
3638 #if OMP_40_ENABLED
3639  th = __kmp_thread_from_gtid(global_tid);
3640  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3641 #endif // OMP_40_ENABLED
3642 
3643  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3644 
3645  // this barrier should be visible to a customer and to the threading profile
3646  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3647 
3648  if (packed_reduction_method == critical_reduce_block) {
3649 
3650  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3651 
3652 // TODO: implicit barrier: should be exposed
3653 #if OMPT_SUPPORT
3654  omp_frame_t *ompt_frame;
3655  if (ompt_enabled.enabled) {
3656  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3657  if (ompt_frame->enter_frame == NULL)
3658  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3659  OMPT_STORE_RETURN_ADDRESS(global_tid);
3660  }
3661 #endif
3662 #if USE_ITT_NOTIFY
3663  __kmp_threads[global_tid]->th.th_ident = loc;
3664 #endif
3665  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3666 #if OMPT_SUPPORT && OMPT_OPTIONAL
3667  if (ompt_enabled.enabled) {
3668  ompt_frame->enter_frame = NULL;
3669  }
3670 #endif
3671 
3672  } else if (packed_reduction_method == empty_reduce_block) {
3673 
3674 // usage: if team size==1, no synchronization is required (Intel platforms only)
3675 
3676 // TODO: implicit barrier: should be exposed
3677 #if OMPT_SUPPORT
3678  omp_frame_t *ompt_frame;
3679  if (ompt_enabled.enabled) {
3680  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3681  if (ompt_frame->enter_frame == NULL)
3682  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3683  OMPT_STORE_RETURN_ADDRESS(global_tid);
3684  }
3685 #endif
3686 #if USE_ITT_NOTIFY
3687  __kmp_threads[global_tid]->th.th_ident = loc;
3688 #endif
3689  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3690 #if OMPT_SUPPORT && OMPT_OPTIONAL
3691  if (ompt_enabled.enabled) {
3692  ompt_frame->enter_frame = NULL;
3693  }
3694 #endif
3695 
3696  } else if (packed_reduction_method == atomic_reduce_block) {
3697 
3698 #if OMPT_SUPPORT
3699  omp_frame_t *ompt_frame;
3700  if (ompt_enabled.enabled) {
3701  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3702  if (ompt_frame->enter_frame == NULL)
3703  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3704  OMPT_STORE_RETURN_ADDRESS(global_tid);
3705  }
3706 #endif
3707 // TODO: implicit barrier: should be exposed
3708 #if USE_ITT_NOTIFY
3709  __kmp_threads[global_tid]->th.th_ident = loc;
3710 #endif
3711  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3712 #if OMPT_SUPPORT && OMPT_OPTIONAL
3713  if (ompt_enabled.enabled) {
3714  ompt_frame->enter_frame = NULL;
3715  }
3716 #endif
3717 
3718  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3719  tree_reduce_block)) {
3720 
3721  // only master executes here (master releases all other workers)
3722  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3723  global_tid);
3724 
3725  } else {
3726 
3727  // should never reach this block
3728  KMP_ASSERT(0); // "unexpected method"
3729  }
3730 #if OMP_40_ENABLED
3731  if (teams_swapped) {
3732  __kmp_restore_swapped_teams(th, team, task_state);
3733  }
3734 #endif
3735 
3736  if (__kmp_env_consistency_check)
3737  __kmp_pop_sync(global_tid, ct_reduce, loc);
3738 
3739  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3740  global_tid, packed_reduction_method));
3741 
3742  return;
3743 }
3744 
3745 #undef __KMP_GET_REDUCTION_METHOD
3746 #undef __KMP_SET_REDUCTION_METHOD
3747 
3748 /* end of interface to fast scalable reduce routines */
3749 
3750 kmp_uint64 __kmpc_get_taskid() {
3751 
3752  kmp_int32 gtid;
3753  kmp_info_t *thread;
3754 
3755  gtid = __kmp_get_gtid();
3756  if (gtid < 0) {
3757  return 0;
3758  }
3759  thread = __kmp_thread_from_gtid(gtid);
3760  return thread->th.th_current_task->td_task_id;
3761 
3762 } // __kmpc_get_taskid
3763 
3764 kmp_uint64 __kmpc_get_parent_taskid() {
3765 
3766  kmp_int32 gtid;
3767  kmp_info_t *thread;
3768  kmp_taskdata_t *parent_task;
3769 
3770  gtid = __kmp_get_gtid();
3771  if (gtid < 0) {
3772  return 0;
3773  }
3774  thread = __kmp_thread_from_gtid(gtid);
3775  parent_task = thread->th.th_current_task->td_parent;
3776  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3777 
3778 } // __kmpc_get_parent_taskid
3779 
3780 #if OMP_45_ENABLED
3781 
3792 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3793  const struct kmp_dim *dims) {
3794  int j, idx;
3795  kmp_int64 last, trace_count;
3796  kmp_info_t *th = __kmp_threads[gtid];
3797  kmp_team_t *team = th->th.th_team;
3798  kmp_uint32 *flags;
3799  kmp_disp_t *pr_buf = th->th.th_dispatch;
3800  dispatch_shared_info_t *sh_buf;
3801 
3802  KA_TRACE(
3803  20,
3804  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3805  gtid, num_dims, !team->t.t_serialized));
3806  KMP_DEBUG_ASSERT(dims != NULL);
3807  KMP_DEBUG_ASSERT(num_dims > 0);
3808 
3809  if (team->t.t_serialized) {
3810  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3811  return; // no dependencies if team is serialized
3812  }
3813  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3814  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3815  // the next loop
3816  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3817 
3818  // Save bounds info into allocated private buffer
3819  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3820  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3821  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3822  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3823  pr_buf->th_doacross_info[0] =
3824  (kmp_int64)num_dims; // first element is number of dimensions
3825  // Save also address of num_done in order to access it later without knowing
3826  // the buffer index
3827  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3828  pr_buf->th_doacross_info[2] = dims[0].lo;
3829  pr_buf->th_doacross_info[3] = dims[0].up;
3830  pr_buf->th_doacross_info[4] = dims[0].st;
3831  last = 5;
3832  for (j = 1; j < num_dims; ++j) {
3833  kmp_int64
3834  range_length; // To keep ranges of all dimensions but the first dims[0]
3835  if (dims[j].st == 1) { // most common case
3836  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3837  range_length = dims[j].up - dims[j].lo + 1;
3838  } else {
3839  if (dims[j].st > 0) {
3840  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3841  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3842  } else { // negative increment
3843  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3844  range_length =
3845  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3846  }
3847  }
3848  pr_buf->th_doacross_info[last++] = range_length;
3849  pr_buf->th_doacross_info[last++] = dims[j].lo;
3850  pr_buf->th_doacross_info[last++] = dims[j].up;
3851  pr_buf->th_doacross_info[last++] = dims[j].st;
3852  }
3853 
3854  // Compute total trip count.
3855  // Start with range of dims[0] which we don't need to keep in the buffer.
3856  if (dims[0].st == 1) { // most common case
3857  trace_count = dims[0].up - dims[0].lo + 1;
3858  } else if (dims[0].st > 0) {
3859  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3860  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3861  } else { // negative increment
3862  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3863  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3864  }
3865  for (j = 1; j < num_dims; ++j) {
3866  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3867  }
3868  KMP_DEBUG_ASSERT(trace_count > 0);
3869 
3870  // Check if shared buffer is not occupied by other loop (idx -
3871  // __kmp_dispatch_num_buffers)
3872  if (idx != sh_buf->doacross_buf_idx) {
3873  // Shared buffer is occupied, wait for it to be free
3874  __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3875  __kmp_eq_4, NULL);
3876  }
3877 #if KMP_32_BIT_ARCH
3878  // Check if we are the first thread. After the CAS the first thread gets 0,
3879  // others get 1 if initialization is in progress, allocated pointer otherwise.
3880  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3881  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3882  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3883 #else
3884  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3885  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3886 #endif
3887  if (flags == NULL) {
3888  // we are the first thread, allocate the array of flags
3889  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3890  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3891  KMP_MB();
3892  sh_buf->doacross_flags = flags;
3893  } else if (flags == (kmp_uint32 *)1) {
3894 #if KMP_32_BIT_ARCH
3895  // initialization is still in progress, need to wait
3896  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3897 #else
3898  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3899 #endif
3900  KMP_YIELD(TRUE);
3901  KMP_MB();
3902  } else {
3903  KMP_MB();
3904  }
3905  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3906  pr_buf->th_doacross_flags =
3907  sh_buf->doacross_flags; // save private copy in order to not
3908  // touch shared buffer on each iteration
3909  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
3910 }
3911 
3912 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
3913  kmp_int32 shft, num_dims, i;
3914  kmp_uint32 flag;
3915  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3916  kmp_info_t *th = __kmp_threads[gtid];
3917  kmp_team_t *team = th->th.th_team;
3918  kmp_disp_t *pr_buf;
3919  kmp_int64 lo, up, st;
3920 
3921  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3922  if (team->t.t_serialized) {
3923  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3924  return; // no dependencies if team is serialized
3925  }
3926 
3927  // calculate sequential iteration number and check out-of-bounds condition
3928  pr_buf = th->th.th_dispatch;
3929  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3930  num_dims = pr_buf->th_doacross_info[0];
3931  lo = pr_buf->th_doacross_info[2];
3932  up = pr_buf->th_doacross_info[3];
3933  st = pr_buf->th_doacross_info[4];
3934  if (st == 1) { // most common case
3935  if (vec[0] < lo || vec[0] > up) {
3936  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3937  "bounds [%lld,%lld]\n",
3938  gtid, vec[0], lo, up));
3939  return;
3940  }
3941  iter_number = vec[0] - lo;
3942  } else if (st > 0) {
3943  if (vec[0] < lo || vec[0] > up) {
3944  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3945  "bounds [%lld,%lld]\n",
3946  gtid, vec[0], lo, up));
3947  return;
3948  }
3949  iter_number = (kmp_uint64)(vec[0] - lo) / st;
3950  } else { // negative increment
3951  if (vec[0] > lo || vec[0] < up) {
3952  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3953  "bounds [%lld,%lld]\n",
3954  gtid, vec[0], lo, up));
3955  return;
3956  }
3957  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3958  }
3959  for (i = 1; i < num_dims; ++i) {
3960  kmp_int64 iter, ln;
3961  kmp_int32 j = i * 4;
3962  ln = pr_buf->th_doacross_info[j + 1];
3963  lo = pr_buf->th_doacross_info[j + 2];
3964  up = pr_buf->th_doacross_info[j + 3];
3965  st = pr_buf->th_doacross_info[j + 4];
3966  if (st == 1) {
3967  if (vec[i] < lo || vec[i] > up) {
3968  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3969  "bounds [%lld,%lld]\n",
3970  gtid, vec[i], lo, up));
3971  return;
3972  }
3973  iter = vec[i] - lo;
3974  } else if (st > 0) {
3975  if (vec[i] < lo || vec[i] > up) {
3976  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3977  "bounds [%lld,%lld]\n",
3978  gtid, vec[i], lo, up));
3979  return;
3980  }
3981  iter = (kmp_uint64)(vec[i] - lo) / st;
3982  } else { // st < 0
3983  if (vec[i] > lo || vec[i] < up) {
3984  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3985  "bounds [%lld,%lld]\n",
3986  gtid, vec[i], lo, up));
3987  return;
3988  }
3989  iter = (kmp_uint64)(lo - vec[i]) / (-st);
3990  }
3991  iter_number = iter + ln * iter_number;
3992  }
3993  shft = iter_number % 32; // use 32-bit granularity
3994  iter_number >>= 5; // divided by 32
3995  flag = 1 << shft;
3996  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
3997  KMP_YIELD(TRUE);
3998  }
3999  KMP_MB();
4000  KA_TRACE(20,
4001  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4002  gtid, (iter_number << 5) + shft));
4003 }
4004 
4005 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4006  kmp_int32 shft, num_dims, i;
4007  kmp_uint32 flag;
4008  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4009  kmp_info_t *th = __kmp_threads[gtid];
4010  kmp_team_t *team = th->th.th_team;
4011  kmp_disp_t *pr_buf;
4012  kmp_int64 lo, st;
4013 
4014  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4015  if (team->t.t_serialized) {
4016  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4017  return; // no dependencies if team is serialized
4018  }
4019 
4020  // calculate sequential iteration number (same as in "wait" but no
4021  // out-of-bounds checks)
4022  pr_buf = th->th.th_dispatch;
4023  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4024  num_dims = pr_buf->th_doacross_info[0];
4025  lo = pr_buf->th_doacross_info[2];
4026  st = pr_buf->th_doacross_info[4];
4027  if (st == 1) { // most common case
4028  iter_number = vec[0] - lo;
4029  } else if (st > 0) {
4030  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4031  } else { // negative increment
4032  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4033  }
4034  for (i = 1; i < num_dims; ++i) {
4035  kmp_int64 iter, ln;
4036  kmp_int32 j = i * 4;
4037  ln = pr_buf->th_doacross_info[j + 1];
4038  lo = pr_buf->th_doacross_info[j + 2];
4039  st = pr_buf->th_doacross_info[j + 4];
4040  if (st == 1) {
4041  iter = vec[i] - lo;
4042  } else if (st > 0) {
4043  iter = (kmp_uint64)(vec[i] - lo) / st;
4044  } else { // st < 0
4045  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4046  }
4047  iter_number = iter + ln * iter_number;
4048  }
4049  shft = iter_number % 32; // use 32-bit granularity
4050  iter_number >>= 5; // divided by 32
4051  flag = 1 << shft;
4052  KMP_MB();
4053  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4054  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4055  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4056  (iter_number << 5) + shft));
4057 }
4058 
4059 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4060  kmp_int32 num_done;
4061  kmp_info_t *th = __kmp_threads[gtid];
4062  kmp_team_t *team = th->th.th_team;
4063  kmp_disp_t *pr_buf = th->th.th_dispatch;
4064 
4065  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4066  if (team->t.t_serialized) {
4067  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4068  return; // nothing to do
4069  }
4070  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4071  if (num_done == th->th.th_team_nproc) {
4072  // we are the last thread, need to free shared resources
4073  int idx = pr_buf->th_doacross_buf_idx - 1;
4074  dispatch_shared_info_t *sh_buf =
4075  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4076  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4077  (kmp_int64)&sh_buf->doacross_num_done);
4078  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4079  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4080  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4081  sh_buf->doacross_flags = NULL;
4082  sh_buf->doacross_num_done = 0;
4083  sh_buf->doacross_buf_idx +=
4084  __kmp_dispatch_num_buffers; // free buffer for future re-use
4085  }
4086  // free private resources (need to keep buffer index forever)
4087  pr_buf->th_doacross_flags = NULL;
4088  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4089  pr_buf->th_doacross_info = NULL;
4090  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4091 }
4092 #endif
4093 
4094 #if OMP_50_ENABLED
4095 int __kmpc_get_target_offload(void) {
4096  if (!__kmp_init_serial) {
4097  __kmp_serial_initialize();
4098  }
4099  return __kmp_target_offload;
4100 }
4101 #endif // OMP_50_ENABLED
4102 
4103 // end of file //
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:890
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:219
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1394
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:64
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:229
kmp_int32 flags
Definition: kmp.h:221