17 #include "kmp_error.h" 21 #include "kmp_stats.h" 24 #include "ompt-specific.h" 27 #define MAX_MESSAGE 512 43 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
44 __kmp_str_match_true(env)) {
45 __kmp_middle_initialize();
46 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
47 }
else if (__kmp_ignore_mppbeg() == FALSE) {
49 __kmp_internal_begin();
50 KC_TRACE(10, (
"__kmpc_begin: called\n"));
68 if (__kmp_ignore_mppend() == FALSE) {
69 KC_TRACE(10, (
"__kmpc_end: called\n"));
70 KA_TRACE(30, (
"__kmpc_end\n"));
72 __kmp_internal_end_thread(-1);
74 #if KMP_OS_WINDOWS && OMPT_SUPPORT 79 if (ompt_enabled.enabled)
80 __kmp_internal_end_library(__kmp_gtid_get_specific());
103 kmp_int32 gtid = __kmp_entry_gtid();
105 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
126 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
128 return TCR_4(__kmp_all_nth);
138 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
139 return __kmp_tid_from_gtid(__kmp_entry_gtid());
148 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
150 return __kmp_entry_thread()->th.th_team->t.t_nproc;
170 if (__kmp_par_range == 0) {
177 semi2 = strchr(semi2,
';');
181 semi2 = strchr(semi2 + 1,
';');
185 if (__kmp_par_range_filename[0]) {
186 const char *name = semi2 - 1;
187 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
190 if ((*name ==
'/') || (*name ==
';')) {
193 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
194 return __kmp_par_range < 0;
197 semi3 = strchr(semi2 + 1,
';');
198 if (__kmp_par_range_routine[0]) {
199 if ((semi3 != NULL) && (semi3 > semi2) &&
200 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
201 return __kmp_par_range < 0;
204 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
205 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
206 return __kmp_par_range > 0;
208 return __kmp_par_range < 0;
222 return __kmp_entry_thread()->th.th_root->r.r_active;
235 kmp_int32 num_threads) {
236 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
237 global_tid, num_threads));
239 __kmp_push_num_threads(loc, global_tid, num_threads);
242 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
243 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
250 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
251 kmp_int32 proc_bind) {
252 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
255 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
271 int gtid = __kmp_entry_gtid();
273 #if (KMP_STATS_ENABLED) 277 if (previous_state == stats_state_e::SERIAL_REGION) {
278 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
280 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
293 va_start(ap, microtask);
296 ompt_frame_t *ompt_frame;
297 if (ompt_enabled.enabled) {
298 kmp_info_t *master_th = __kmp_threads[gtid];
299 kmp_team_t *parent_team = master_th->th.th_team;
300 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
302 ompt_frame = &(lwt->ompt_task_info.frame);
304 int tid = __kmp_tid_from_gtid(gtid);
306 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
308 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
309 OMPT_STORE_RETURN_ADDRESS(gtid);
313 #if INCLUDE_SSC_MARKS 316 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
317 VOLATILE_CAST(microtask_t) microtask,
318 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
320 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
326 #if INCLUDE_SSC_MARKS 329 __kmp_join_call(loc, gtid
339 #if KMP_STATS_ENABLED 340 if (previous_state == stats_state_e::SERIAL_REGION) {
341 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
343 KMP_POP_PARTITIONED_TIMER();
345 #endif // KMP_STATS_ENABLED 361 kmp_int32 num_teams, kmp_int32 num_threads) {
363 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
364 global_tid, num_teams, num_threads));
366 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
381 int gtid = __kmp_entry_gtid();
382 kmp_info_t *this_thr = __kmp_threads[gtid];
384 va_start(ap, microtask);
389 this_thr->th.th_teams_microtask = microtask;
390 this_thr->th.th_teams_level =
391 this_thr->th.th_team->t.t_level;
394 kmp_team_t *parent_team = this_thr->th.th_team;
395 int tid = __kmp_tid_from_gtid(gtid);
396 if (ompt_enabled.enabled) {
397 parent_team->t.t_implicit_task_taskdata[tid]
398 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
400 OMPT_STORE_RETURN_ADDRESS(gtid);
405 if (this_thr->th.th_teams_size.nteams == 0) {
406 __kmp_push_num_teams(loc, gtid, 0, 0);
408 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
410 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
412 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
413 VOLATILE_CAST(microtask_t)
415 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
416 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
422 __kmp_join_call(loc, gtid
429 this_thr->th.th_teams_microtask = NULL;
430 this_thr->th.th_teams_level = 0;
431 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
440 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
459 OMPT_STORE_RETURN_ADDRESS(global_tid);
461 __kmp_serialized_parallel(loc, global_tid);
472 kmp_internal_control_t *top;
473 kmp_info_t *this_thr;
474 kmp_team_t *serial_team;
477 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
485 if (!TCR_4(__kmp_init_parallel))
486 __kmp_parallel_initialize();
488 this_thr = __kmp_threads[global_tid];
489 serial_team = this_thr->th.th_serial_team;
492 kmp_task_team_t *task_team = this_thr->th.th_task_team;
495 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
496 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
500 KMP_DEBUG_ASSERT(serial_team);
501 KMP_ASSERT(serial_team->t.t_serialized);
502 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
503 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
504 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
505 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
508 if (ompt_enabled.enabled &&
509 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
510 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
511 if (ompt_enabled.ompt_callback_implicit_task) {
512 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
513 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
514 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
518 ompt_data_t *parent_task_data;
519 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
521 if (ompt_enabled.ompt_callback_parallel_end) {
522 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
523 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
524 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
526 __ompt_lw_taskteam_unlink(this_thr);
527 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
533 top = serial_team->t.t_control_stack_top;
534 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
535 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
536 serial_team->t.t_control_stack_top = top->next;
541 serial_team->t.t_level--;
544 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
546 dispatch_private_info_t *disp_buffer =
547 serial_team->t.t_dispatch->th_disp_buffer;
548 serial_team->t.t_dispatch->th_disp_buffer =
549 serial_team->t.t_dispatch->th_disp_buffer->next;
550 __kmp_free(disp_buffer);
553 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
556 --serial_team->t.t_serialized;
557 if (serial_team->t.t_serialized == 0) {
561 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 562 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
563 __kmp_clear_x87_fpu_status_word();
564 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
565 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
569 this_thr->th.th_team = serial_team->t.t_parent;
570 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
573 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
574 this_thr->th.th_team_master =
575 serial_team->t.t_parent->t.t_threads[0];
576 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
579 this_thr->th.th_dispatch =
580 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
582 __kmp_pop_current_task_from_thread(this_thr);
584 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
585 this_thr->th.th_current_task->td_flags.executing = 1;
587 if (__kmp_tasking_mode != tskm_immediate_exec) {
589 this_thr->th.th_task_team =
590 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
592 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 594 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
597 if (__kmp_tasking_mode != tskm_immediate_exec) {
598 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting " 599 "depth of serial team %p to %d\n",
600 global_tid, serial_team, serial_team->t.t_serialized));
604 if (__kmp_env_consistency_check)
605 __kmp_pop_parallel(global_tid, NULL);
607 if (ompt_enabled.enabled)
608 this_thr->th.ompt_thread_info.state =
609 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
610 : ompt_state_work_parallel);
623 KC_TRACE(10, (
"__kmpc_flush: called\n"));
628 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 642 if (!__kmp_cpuinfo.initialized) {
643 __kmp_query_cpuid(&__kmp_cpuinfo);
645 if (!__kmp_cpuinfo.sse2) {
650 #elif KMP_COMPILER_MSVC 653 __sync_synchronize();
654 #endif // KMP_COMPILER_ICC 657 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 673 #error Unknown or unsupported architecture 676 #if OMPT_SUPPORT && OMPT_OPTIONAL 677 if (ompt_enabled.ompt_callback_flush) {
678 ompt_callbacks.ompt_callback(ompt_callback_flush)(
679 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
694 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
696 if (!TCR_4(__kmp_init_parallel))
697 __kmp_parallel_initialize();
699 if (__kmp_env_consistency_check) {
701 KMP_WARNING(ConstructIdentInvalid);
704 __kmp_check_barrier(global_tid, ct_barrier, loc);
708 ompt_frame_t *ompt_frame;
709 if (ompt_enabled.enabled) {
710 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
711 if (ompt_frame->enter_frame.ptr == NULL)
712 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
713 OMPT_STORE_RETURN_ADDRESS(global_tid);
716 __kmp_threads[global_tid]->th.th_ident = loc;
724 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
725 #if OMPT_SUPPORT && OMPT_OPTIONAL 726 if (ompt_enabled.enabled) {
727 ompt_frame->enter_frame = ompt_data_none;
742 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
744 if (!TCR_4(__kmp_init_parallel))
745 __kmp_parallel_initialize();
747 if (KMP_MASTER_GTID(global_tid)) {
749 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
753 #if OMPT_SUPPORT && OMPT_OPTIONAL 755 if (ompt_enabled.ompt_callback_master) {
756 kmp_info_t *this_thr = __kmp_threads[global_tid];
757 kmp_team_t *team = this_thr->th.th_team;
759 int tid = __kmp_tid_from_gtid(global_tid);
760 ompt_callbacks.ompt_callback(ompt_callback_master)(
761 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
762 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
763 OMPT_GET_RETURN_ADDRESS(0));
768 if (__kmp_env_consistency_check) {
769 #if KMP_USE_DYNAMIC_LOCK 771 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
773 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
776 __kmp_push_sync(global_tid, ct_master, loc, NULL);
778 __kmp_check_sync(global_tid, ct_master, loc, NULL);
794 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
796 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
797 KMP_POP_PARTITIONED_TIMER();
799 #if OMPT_SUPPORT && OMPT_OPTIONAL 800 kmp_info_t *this_thr = __kmp_threads[global_tid];
801 kmp_team_t *team = this_thr->th.th_team;
802 if (ompt_enabled.ompt_callback_master) {
803 int tid = __kmp_tid_from_gtid(global_tid);
804 ompt_callbacks.ompt_callback(ompt_callback_master)(
805 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
806 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
807 OMPT_GET_RETURN_ADDRESS(0));
811 if (__kmp_env_consistency_check) {
813 KMP_WARNING(ThreadIdentInvalid);
815 if (KMP_MASTER_GTID(global_tid))
816 __kmp_pop_sync(global_tid, ct_master, loc);
830 KMP_DEBUG_ASSERT(__kmp_init_serial);
832 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
834 if (!TCR_4(__kmp_init_parallel))
835 __kmp_parallel_initialize();
838 __kmp_itt_ordered_prep(gtid);
842 th = __kmp_threads[gtid];
844 #if OMPT_SUPPORT && OMPT_OPTIONAL 848 if (ompt_enabled.enabled) {
849 OMPT_STORE_RETURN_ADDRESS(gtid);
850 team = __kmp_team_from_gtid(gtid);
851 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
853 th->th.ompt_thread_info.wait_id = lck;
854 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
857 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
858 if (ompt_enabled.ompt_callback_mutex_acquire) {
859 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
860 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
866 if (th->th.th_dispatch->th_deo_fcn != 0)
867 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
869 __kmp_parallel_deo(>id, &cid, loc);
871 #if OMPT_SUPPORT && OMPT_OPTIONAL 872 if (ompt_enabled.enabled) {
874 th->th.ompt_thread_info.state = ompt_state_work_parallel;
875 th->th.ompt_thread_info.wait_id = 0;
878 if (ompt_enabled.ompt_callback_mutex_acquired) {
879 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
880 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
886 __kmp_itt_ordered_start(gtid);
901 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
904 __kmp_itt_ordered_end(gtid);
908 th = __kmp_threads[gtid];
910 if (th->th.th_dispatch->th_dxo_fcn != 0)
911 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
913 __kmp_parallel_dxo(>id, &cid, loc);
915 #if OMPT_SUPPORT && OMPT_OPTIONAL 916 OMPT_STORE_RETURN_ADDRESS(gtid);
917 if (ompt_enabled.ompt_callback_mutex_released) {
918 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
920 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
921 ->t.t_ordered.dt.t_value,
922 OMPT_LOAD_RETURN_ADDRESS(gtid));
927 #if KMP_USE_DYNAMIC_LOCK 929 static __forceinline
void 930 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
931 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
935 kmp_indirect_lock_t **lck;
936 lck = (kmp_indirect_lock_t **)crit;
937 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
938 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
939 KMP_SET_I_LOCK_LOCATION(ilk, loc);
940 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
942 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
944 __kmp_itt_critical_creating(ilk->lock, loc);
946 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
949 __kmp_itt_critical_destroyed(ilk->lock);
955 KMP_DEBUG_ASSERT(*lck != NULL);
959 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 961 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 962 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 963 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 964 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 965 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 967 KMP_FSYNC_PREPARE(l); \ 968 KMP_INIT_YIELD(spins); \ 969 if (TCR_4(__kmp_nth) > \ 970 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 973 KMP_YIELD_SPIN(spins); \ 975 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 977 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 978 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 979 __kmp_spin_backoff(&backoff); \ 980 if (TCR_4(__kmp_nth) > \ 981 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 984 KMP_YIELD_SPIN(spins); \ 988 KMP_FSYNC_ACQUIRED(l); \ 992 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 994 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 995 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 996 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 997 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 998 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1002 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1003 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1007 #include <sys/syscall.h> 1010 #define FUTEX_WAIT 0 1013 #define FUTEX_WAKE 1 1017 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1019 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1020 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1022 KMP_FSYNC_PREPARE(ftx); \ 1023 kmp_int32 poll_val; \ 1024 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1025 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1026 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1027 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1029 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1031 KMP_LOCK_BUSY(1, futex))) { \ 1034 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1037 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1038 NULL, NULL, 0)) != 0) { \ 1043 KMP_FSYNC_ACQUIRED(ftx); \ 1047 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1049 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1050 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1051 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1052 KMP_FSYNC_ACQUIRED(ftx); \ 1060 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1062 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1064 KMP_FSYNC_RELEASING(ftx); \ 1065 kmp_int32 poll_val = \ 1066 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1067 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1068 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1069 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1072 KMP_YIELD(TCR_4(__kmp_nth) > \ 1073 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ 1076 #endif // KMP_USE_FUTEX 1078 #else // KMP_USE_DYNAMIC_LOCK 1080 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1083 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1086 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1093 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1094 __kmp_init_user_lock_with_checks(lck);
1095 __kmp_set_user_lock_location(lck, loc);
1097 __kmp_itt_critical_creating(lck);
1108 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1113 __kmp_itt_critical_destroyed(lck);
1117 __kmp_destroy_user_lock_with_checks(lck);
1118 __kmp_user_lock_free(&idx, gtid, lck);
1119 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1120 KMP_DEBUG_ASSERT(lck != NULL);
1126 #endif // KMP_USE_DYNAMIC_LOCK 1139 kmp_critical_name *crit) {
1140 #if KMP_USE_DYNAMIC_LOCK 1141 #if OMPT_SUPPORT && OMPT_OPTIONAL 1142 OMPT_STORE_RETURN_ADDRESS(global_tid);
1143 #endif // OMPT_SUPPORT 1144 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1147 #if OMPT_SUPPORT && OMPT_OPTIONAL 1148 ompt_state_t prev_state = ompt_state_undefined;
1149 ompt_thread_info_t ti;
1151 kmp_user_lock_p lck;
1153 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1157 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1158 KMP_CHECK_USER_LOCK_INIT();
1160 if ((__kmp_user_lock_kind == lk_tas) &&
1161 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1162 lck = (kmp_user_lock_p)crit;
1165 else if ((__kmp_user_lock_kind == lk_futex) &&
1166 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1167 lck = (kmp_user_lock_p)crit;
1171 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1174 if (__kmp_env_consistency_check)
1175 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1183 __kmp_itt_critical_acquiring(lck);
1185 #if OMPT_SUPPORT && OMPT_OPTIONAL 1186 OMPT_STORE_RETURN_ADDRESS(gtid);
1187 void *codeptr_ra = NULL;
1188 if (ompt_enabled.enabled) {
1189 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1191 prev_state = ti.state;
1192 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1193 ti.state = ompt_state_wait_critical;
1196 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1197 if (ompt_enabled.ompt_callback_mutex_acquire) {
1198 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1199 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1200 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1206 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1209 __kmp_itt_critical_acquired(lck);
1211 #if OMPT_SUPPORT && OMPT_OPTIONAL 1212 if (ompt_enabled.enabled) {
1214 ti.state = prev_state;
1218 if (ompt_enabled.ompt_callback_mutex_acquired) {
1219 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1220 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1224 KMP_POP_PARTITIONED_TIMER();
1226 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1227 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1228 #endif // KMP_USE_DYNAMIC_LOCK 1231 #if KMP_USE_DYNAMIC_LOCK 1234 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1236 #define KMP_TSX_LOCK(seq) lockseq_##seq 1238 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1241 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1242 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1244 #define KMP_CPUINFO_RTM 0 1248 if (hint & kmp_lock_hint_hle)
1249 return KMP_TSX_LOCK(hle);
1250 if (hint & kmp_lock_hint_rtm)
1251 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1252 if (hint & kmp_lock_hint_adaptive)
1253 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1256 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1257 return __kmp_user_lock_seq;
1258 if ((hint & omp_lock_hint_speculative) &&
1259 (hint & omp_lock_hint_nonspeculative))
1260 return __kmp_user_lock_seq;
1263 if (hint & omp_lock_hint_contended)
1264 return lockseq_queuing;
1267 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1271 if (hint & omp_lock_hint_speculative)
1272 return KMP_TSX_LOCK(hle);
1274 return __kmp_user_lock_seq;
1277 #if OMPT_SUPPORT && OMPT_OPTIONAL 1278 #if KMP_USE_DYNAMIC_LOCK 1279 static kmp_mutex_impl_t
1280 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1282 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1287 return kmp_mutex_impl_queuing;
1290 return kmp_mutex_impl_spin;
1293 return kmp_mutex_impl_speculative;
1296 return kmp_mutex_impl_none;
1298 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1301 switch (ilock->type) {
1303 case locktag_adaptive:
1305 return kmp_mutex_impl_speculative;
1307 case locktag_nested_tas:
1308 return kmp_mutex_impl_spin;
1310 case locktag_nested_futex:
1312 case locktag_ticket:
1313 case locktag_queuing:
1315 case locktag_nested_ticket:
1316 case locktag_nested_queuing:
1317 case locktag_nested_drdpa:
1318 return kmp_mutex_impl_queuing;
1320 return kmp_mutex_impl_none;
1325 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1326 switch (__kmp_user_lock_kind) {
1328 return kmp_mutex_impl_spin;
1335 return kmp_mutex_impl_queuing;
1340 return kmp_mutex_impl_speculative;
1343 return kmp_mutex_impl_none;
1346 #endif // KMP_USE_DYNAMIC_LOCK 1347 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1362 void __kmpc_critical_with_hint(
ident_t *loc, kmp_int32 global_tid,
1363 kmp_critical_name *crit, uint32_t hint) {
1365 kmp_user_lock_p lck;
1366 #if OMPT_SUPPORT && OMPT_OPTIONAL 1367 ompt_state_t prev_state = ompt_state_undefined;
1368 ompt_thread_info_t ti;
1370 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1372 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1375 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1377 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1379 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1381 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1382 if (KMP_IS_D_LOCK(lckseq)) {
1383 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1384 KMP_GET_D_TAG(lckseq));
1386 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1392 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1393 lck = (kmp_user_lock_p)lk;
1394 if (__kmp_env_consistency_check) {
1395 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1396 __kmp_map_hint_to_lock(hint));
1399 __kmp_itt_critical_acquiring(lck);
1401 #if OMPT_SUPPORT && OMPT_OPTIONAL 1402 if (ompt_enabled.enabled) {
1403 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1405 prev_state = ti.state;
1406 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1407 ti.state = ompt_state_wait_critical;
1410 if (ompt_enabled.ompt_callback_mutex_acquire) {
1411 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1412 ompt_mutex_critical, (
unsigned int)hint,
1413 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1418 #if KMP_USE_INLINED_TAS 1419 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1420 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1422 #elif KMP_USE_INLINED_FUTEX 1423 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1424 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1428 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
1431 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1433 if (__kmp_env_consistency_check) {
1434 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1435 __kmp_map_hint_to_lock(hint));
1438 __kmp_itt_critical_acquiring(lck);
1440 #if OMPT_SUPPORT && OMPT_OPTIONAL 1441 if (ompt_enabled.enabled) {
1442 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1444 prev_state = ti.state;
1445 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1446 ti.state = ompt_state_wait_critical;
1449 if (ompt_enabled.ompt_callback_mutex_acquire) {
1450 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1451 ompt_mutex_critical, (
unsigned int)hint,
1452 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1457 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
1459 KMP_POP_PARTITIONED_TIMER();
1462 __kmp_itt_critical_acquired(lck);
1464 #if OMPT_SUPPORT && OMPT_OPTIONAL 1465 if (ompt_enabled.enabled) {
1467 ti.state = prev_state;
1471 if (ompt_enabled.ompt_callback_mutex_acquired) {
1472 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1473 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1478 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1479 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1482 #endif // KMP_USE_DYNAMIC_LOCK 1494 kmp_critical_name *crit) {
1495 kmp_user_lock_p lck;
1497 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1499 #if KMP_USE_DYNAMIC_LOCK 1500 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1501 lck = (kmp_user_lock_p)crit;
1502 KMP_ASSERT(lck != NULL);
1503 if (__kmp_env_consistency_check) {
1504 __kmp_pop_sync(global_tid, ct_critical, loc);
1507 __kmp_itt_critical_releasing(lck);
1509 #if KMP_USE_INLINED_TAS 1510 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1511 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1513 #elif KMP_USE_INLINED_FUTEX 1514 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1515 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1519 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1522 kmp_indirect_lock_t *ilk =
1523 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1524 KMP_ASSERT(ilk != NULL);
1526 if (__kmp_env_consistency_check) {
1527 __kmp_pop_sync(global_tid, ct_critical, loc);
1530 __kmp_itt_critical_releasing(lck);
1532 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1535 #else // KMP_USE_DYNAMIC_LOCK 1537 if ((__kmp_user_lock_kind == lk_tas) &&
1538 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1539 lck = (kmp_user_lock_p)crit;
1542 else if ((__kmp_user_lock_kind == lk_futex) &&
1543 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1544 lck = (kmp_user_lock_p)crit;
1548 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1551 KMP_ASSERT(lck != NULL);
1553 if (__kmp_env_consistency_check)
1554 __kmp_pop_sync(global_tid, ct_critical, loc);
1557 __kmp_itt_critical_releasing(lck);
1561 __kmp_release_user_lock_with_checks(lck, global_tid);
1563 #endif // KMP_USE_DYNAMIC_LOCK 1565 #if OMPT_SUPPORT && OMPT_OPTIONAL 1568 OMPT_STORE_RETURN_ADDRESS(global_tid);
1569 if (ompt_enabled.ompt_callback_mutex_released) {
1570 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1571 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1572 OMPT_LOAD_RETURN_ADDRESS(0));
1576 KMP_POP_PARTITIONED_TIMER();
1577 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1592 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1594 if (!TCR_4(__kmp_init_parallel))
1595 __kmp_parallel_initialize();
1597 if (__kmp_env_consistency_check)
1598 __kmp_check_barrier(global_tid, ct_barrier, loc);
1601 ompt_frame_t *ompt_frame;
1602 if (ompt_enabled.enabled) {
1603 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1604 if (ompt_frame->enter_frame.ptr == NULL)
1605 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1606 OMPT_STORE_RETURN_ADDRESS(global_tid);
1610 __kmp_threads[global_tid]->th.th_ident = loc;
1612 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1613 #if OMPT_SUPPORT && OMPT_OPTIONAL 1614 if (ompt_enabled.enabled) {
1615 ompt_frame->enter_frame = ompt_data_none;
1619 return (status != 0) ? 0 : 1;
1632 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1634 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1650 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1652 if (!TCR_4(__kmp_init_parallel))
1653 __kmp_parallel_initialize();
1655 if (__kmp_env_consistency_check) {
1657 KMP_WARNING(ConstructIdentInvalid);
1659 __kmp_check_barrier(global_tid, ct_barrier, loc);
1663 ompt_frame_t *ompt_frame;
1664 if (ompt_enabled.enabled) {
1665 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1666 if (ompt_frame->enter_frame.ptr == NULL)
1667 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1668 OMPT_STORE_RETURN_ADDRESS(global_tid);
1672 __kmp_threads[global_tid]->th.th_ident = loc;
1674 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1675 #if OMPT_SUPPORT && OMPT_OPTIONAL 1676 if (ompt_enabled.enabled) {
1677 ompt_frame->enter_frame = ompt_data_none;
1683 if (__kmp_env_consistency_check) {
1687 if (global_tid < 0) {
1688 KMP_WARNING(ThreadIdentInvalid);
1694 __kmp_pop_sync(global_tid, ct_master, loc);
1714 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1719 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1722 #if OMPT_SUPPORT && OMPT_OPTIONAL 1723 kmp_info_t *this_thr = __kmp_threads[global_tid];
1724 kmp_team_t *team = this_thr->th.th_team;
1725 int tid = __kmp_tid_from_gtid(global_tid);
1727 if (ompt_enabled.enabled) {
1729 if (ompt_enabled.ompt_callback_work) {
1730 ompt_callbacks.ompt_callback(ompt_callback_work)(
1731 ompt_work_single_executor, ompt_scope_begin,
1732 &(team->t.ompt_team_info.parallel_data),
1733 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1734 1, OMPT_GET_RETURN_ADDRESS(0));
1737 if (ompt_enabled.ompt_callback_work) {
1738 ompt_callbacks.ompt_callback(ompt_callback_work)(
1739 ompt_work_single_other, ompt_scope_begin,
1740 &(team->t.ompt_team_info.parallel_data),
1741 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1742 1, OMPT_GET_RETURN_ADDRESS(0));
1743 ompt_callbacks.ompt_callback(ompt_callback_work)(
1744 ompt_work_single_other, ompt_scope_end,
1745 &(team->t.ompt_team_info.parallel_data),
1746 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1747 1, OMPT_GET_RETURN_ADDRESS(0));
1766 __kmp_exit_single(global_tid);
1767 KMP_POP_PARTITIONED_TIMER();
1769 #if OMPT_SUPPORT && OMPT_OPTIONAL 1770 kmp_info_t *this_thr = __kmp_threads[global_tid];
1771 kmp_team_t *team = this_thr->th.th_team;
1772 int tid = __kmp_tid_from_gtid(global_tid);
1774 if (ompt_enabled.ompt_callback_work) {
1775 ompt_callbacks.ompt_callback(ompt_callback_work)(
1776 ompt_work_single_executor, ompt_scope_end,
1777 &(team->t.ompt_team_info.parallel_data),
1778 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1779 OMPT_GET_RETURN_ADDRESS(0));
1792 KMP_POP_PARTITIONED_TIMER();
1793 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1795 #if OMPT_SUPPORT && OMPT_OPTIONAL 1796 if (ompt_enabled.ompt_callback_work) {
1797 ompt_work_t ompt_work_type = ompt_work_loop;
1798 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1799 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1803 ompt_work_type = ompt_work_loop;
1805 ompt_work_type = ompt_work_sections;
1807 ompt_work_type = ompt_work_distribute;
1812 KMP_DEBUG_ASSERT(ompt_work_type);
1814 ompt_callbacks.ompt_callback(ompt_callback_work)(
1815 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1816 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1819 if (__kmp_env_consistency_check)
1820 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1826 void ompc_set_num_threads(
int arg) {
1828 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1831 void ompc_set_dynamic(
int flag) {
1835 thread = __kmp_entry_thread();
1837 __kmp_save_internal_controls(thread);
1839 set__dynamic(thread, flag ? TRUE : FALSE);
1842 void ompc_set_nested(
int flag) {
1846 thread = __kmp_entry_thread();
1848 __kmp_save_internal_controls(thread);
1850 set__nested(thread, flag ? TRUE : FALSE);
1853 void ompc_set_max_active_levels(
int max_active_levels) {
1858 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1861 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1863 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1866 int ompc_get_ancestor_thread_num(
int level) {
1867 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1870 int ompc_get_team_size(
int level) {
1871 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1877 void ompc_set_affinity_format(
char const *format) {
1878 if (!__kmp_init_serial) {
1879 __kmp_serial_initialize();
1881 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1882 format, KMP_STRLEN(format) + 1);
1885 size_t ompc_get_affinity_format(
char *buffer,
size_t size) {
1887 if (!__kmp_init_serial) {
1888 __kmp_serial_initialize();
1890 format_size = KMP_STRLEN(__kmp_affinity_format);
1891 if (buffer && size) {
1892 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1898 void ompc_display_affinity(
char const *format) {
1900 if (!TCR_4(__kmp_init_middle)) {
1901 __kmp_middle_initialize();
1903 gtid = __kmp_get_gtid();
1904 __kmp_aux_display_affinity(gtid, format);
1907 size_t ompc_capture_affinity(
char *buffer,
size_t buf_size,
1908 char const *format) {
1910 size_t num_required;
1911 kmp_str_buf_t capture_buf;
1912 if (!TCR_4(__kmp_init_middle)) {
1913 __kmp_middle_initialize();
1915 gtid = __kmp_get_gtid();
1916 __kmp_str_buf_init(&capture_buf);
1917 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1918 if (buffer && buf_size) {
1919 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1920 capture_buf.used + 1);
1922 __kmp_str_buf_free(&capture_buf);
1923 return num_required;
1927 void kmpc_set_stacksize(
int arg) {
1929 __kmp_aux_set_stacksize(arg);
1932 void kmpc_set_stacksize_s(
size_t arg) {
1934 __kmp_aux_set_stacksize(arg);
1937 void kmpc_set_blocktime(
int arg) {
1941 gtid = __kmp_entry_gtid();
1942 tid = __kmp_tid_from_gtid(gtid);
1943 thread = __kmp_thread_from_gtid(gtid);
1945 __kmp_aux_set_blocktime(arg, thread, tid);
1948 void kmpc_set_library(
int arg) {
1950 __kmp_user_set_library((
enum library_type)arg);
1953 void kmpc_set_defaults(
char const *str) {
1955 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1958 void kmpc_set_disp_num_buffers(
int arg) {
1961 if (__kmp_init_serial == 0 && arg > 0)
1962 __kmp_dispatch_num_buffers = arg;
1965 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
1966 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1969 if (!TCR_4(__kmp_init_middle)) {
1970 __kmp_middle_initialize();
1972 return __kmp_aux_set_affinity_mask_proc(proc, mask);
1976 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
1977 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1980 if (!TCR_4(__kmp_init_middle)) {
1981 __kmp_middle_initialize();
1983 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1987 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
1988 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1991 if (!TCR_4(__kmp_init_middle)) {
1992 __kmp_middle_initialize();
1994 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2044 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2048 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2052 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2054 if (__kmp_env_consistency_check) {
2056 KMP_WARNING(ConstructIdentInvalid);
2063 *data_ptr = cpy_data;
2066 ompt_frame_t *ompt_frame;
2067 if (ompt_enabled.enabled) {
2068 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2069 if (ompt_frame->enter_frame.ptr == NULL)
2070 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2071 OMPT_STORE_RETURN_ADDRESS(gtid);
2076 __kmp_threads[gtid]->th.th_ident = loc;
2078 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2081 (*cpy_func)(cpy_data, *data_ptr);
2087 if (ompt_enabled.enabled) {
2088 OMPT_STORE_RETURN_ADDRESS(gtid);
2092 __kmp_threads[gtid]->th.th_ident = loc;
2095 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2096 #if OMPT_SUPPORT && OMPT_OPTIONAL 2097 if (ompt_enabled.enabled) {
2098 ompt_frame->enter_frame = ompt_data_none;
2105 #define INIT_LOCK __kmp_init_user_lock_with_checks 2106 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2107 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2108 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2109 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2110 #define ACQUIRE_NESTED_LOCK_TIMED \ 2111 __kmp_acquire_nested_user_lock_with_checks_timed 2112 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2113 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2114 #define TEST_LOCK __kmp_test_user_lock_with_checks 2115 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2116 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2117 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2122 #if KMP_USE_DYNAMIC_LOCK 2125 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2126 kmp_dyna_lockseq_t seq) {
2127 if (KMP_IS_D_LOCK(seq)) {
2128 KMP_INIT_D_LOCK(lock, seq);
2130 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2133 KMP_INIT_I_LOCK(lock, seq);
2135 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2136 __kmp_itt_lock_creating(ilk->lock, loc);
2142 static __forceinline
void 2143 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2144 kmp_dyna_lockseq_t seq) {
2147 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2148 seq = __kmp_user_lock_seq;
2152 seq = lockseq_nested_tas;
2156 seq = lockseq_nested_futex;
2159 case lockseq_ticket:
2160 seq = lockseq_nested_ticket;
2162 case lockseq_queuing:
2163 seq = lockseq_nested_queuing;
2166 seq = lockseq_nested_drdpa;
2169 seq = lockseq_nested_queuing;
2171 KMP_INIT_I_LOCK(lock, seq);
2173 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2174 __kmp_itt_lock_creating(ilk->lock, loc);
2179 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2181 KMP_DEBUG_ASSERT(__kmp_init_serial);
2182 if (__kmp_env_consistency_check && user_lock == NULL) {
2183 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2186 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2188 #if OMPT_SUPPORT && OMPT_OPTIONAL 2190 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2192 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2193 if (ompt_enabled.ompt_callback_lock_init) {
2194 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2195 ompt_mutex_lock, (omp_lock_hint_t)hint,
2196 __ompt_get_mutex_impl_type(user_lock),
2197 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2203 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2204 void **user_lock, uintptr_t hint) {
2205 KMP_DEBUG_ASSERT(__kmp_init_serial);
2206 if (__kmp_env_consistency_check && user_lock == NULL) {
2207 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2210 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2212 #if OMPT_SUPPORT && OMPT_OPTIONAL 2214 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2216 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2217 if (ompt_enabled.ompt_callback_lock_init) {
2218 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2219 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2220 __ompt_get_mutex_impl_type(user_lock),
2221 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2226 #endif // KMP_USE_DYNAMIC_LOCK 2229 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2230 #if KMP_USE_DYNAMIC_LOCK 2232 KMP_DEBUG_ASSERT(__kmp_init_serial);
2233 if (__kmp_env_consistency_check && user_lock == NULL) {
2234 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2236 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2238 #if OMPT_SUPPORT && OMPT_OPTIONAL 2240 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2242 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2243 if (ompt_enabled.ompt_callback_lock_init) {
2244 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2245 ompt_mutex_lock, omp_lock_hint_none,
2246 __ompt_get_mutex_impl_type(user_lock),
2247 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2251 #else // KMP_USE_DYNAMIC_LOCK 2253 static char const *
const func =
"omp_init_lock";
2254 kmp_user_lock_p lck;
2255 KMP_DEBUG_ASSERT(__kmp_init_serial);
2257 if (__kmp_env_consistency_check) {
2258 if (user_lock == NULL) {
2259 KMP_FATAL(LockIsUninitialized, func);
2263 KMP_CHECK_USER_LOCK_INIT();
2265 if ((__kmp_user_lock_kind == lk_tas) &&
2266 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2267 lck = (kmp_user_lock_p)user_lock;
2270 else if ((__kmp_user_lock_kind == lk_futex) &&
2271 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2272 lck = (kmp_user_lock_p)user_lock;
2276 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2279 __kmp_set_user_lock_location(lck, loc);
2281 #if OMPT_SUPPORT && OMPT_OPTIONAL 2283 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2285 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2286 if (ompt_enabled.ompt_callback_lock_init) {
2287 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2288 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2289 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2294 __kmp_itt_lock_creating(lck);
2297 #endif // KMP_USE_DYNAMIC_LOCK 2301 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2302 #if KMP_USE_DYNAMIC_LOCK 2304 KMP_DEBUG_ASSERT(__kmp_init_serial);
2305 if (__kmp_env_consistency_check && user_lock == NULL) {
2306 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2308 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2310 #if OMPT_SUPPORT && OMPT_OPTIONAL 2312 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2314 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2315 if (ompt_enabled.ompt_callback_lock_init) {
2316 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2317 ompt_mutex_nest_lock, omp_lock_hint_none,
2318 __ompt_get_mutex_impl_type(user_lock),
2319 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2323 #else // KMP_USE_DYNAMIC_LOCK 2325 static char const *
const func =
"omp_init_nest_lock";
2326 kmp_user_lock_p lck;
2327 KMP_DEBUG_ASSERT(__kmp_init_serial);
2329 if (__kmp_env_consistency_check) {
2330 if (user_lock == NULL) {
2331 KMP_FATAL(LockIsUninitialized, func);
2335 KMP_CHECK_USER_LOCK_INIT();
2337 if ((__kmp_user_lock_kind == lk_tas) &&
2338 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2339 OMP_NEST_LOCK_T_SIZE)) {
2340 lck = (kmp_user_lock_p)user_lock;
2343 else if ((__kmp_user_lock_kind == lk_futex) &&
2344 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2345 OMP_NEST_LOCK_T_SIZE)) {
2346 lck = (kmp_user_lock_p)user_lock;
2350 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2353 INIT_NESTED_LOCK(lck);
2354 __kmp_set_user_lock_location(lck, loc);
2356 #if OMPT_SUPPORT && OMPT_OPTIONAL 2358 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2360 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2361 if (ompt_enabled.ompt_callback_lock_init) {
2362 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2363 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2364 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2369 __kmp_itt_lock_creating(lck);
2372 #endif // KMP_USE_DYNAMIC_LOCK 2375 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2376 #if KMP_USE_DYNAMIC_LOCK 2379 kmp_user_lock_p lck;
2380 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2381 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2383 lck = (kmp_user_lock_p)user_lock;
2385 __kmp_itt_lock_destroyed(lck);
2387 #if OMPT_SUPPORT && OMPT_OPTIONAL 2389 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2391 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2392 if (ompt_enabled.ompt_callback_lock_destroy) {
2393 kmp_user_lock_p lck;
2394 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2395 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2397 lck = (kmp_user_lock_p)user_lock;
2399 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2400 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2403 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2405 kmp_user_lock_p lck;
2407 if ((__kmp_user_lock_kind == lk_tas) &&
2408 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2409 lck = (kmp_user_lock_p)user_lock;
2412 else if ((__kmp_user_lock_kind == lk_futex) &&
2413 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2414 lck = (kmp_user_lock_p)user_lock;
2418 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2421 #if OMPT_SUPPORT && OMPT_OPTIONAL 2423 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2425 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2426 if (ompt_enabled.ompt_callback_lock_destroy) {
2427 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2428 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2433 __kmp_itt_lock_destroyed(lck);
2437 if ((__kmp_user_lock_kind == lk_tas) &&
2438 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2442 else if ((__kmp_user_lock_kind == lk_futex) &&
2443 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2448 __kmp_user_lock_free(user_lock, gtid, lck);
2450 #endif // KMP_USE_DYNAMIC_LOCK 2454 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2455 #if KMP_USE_DYNAMIC_LOCK 2458 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2459 __kmp_itt_lock_destroyed(ilk->lock);
2461 #if OMPT_SUPPORT && OMPT_OPTIONAL 2463 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2465 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2466 if (ompt_enabled.ompt_callback_lock_destroy) {
2467 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2468 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2471 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2473 #else // KMP_USE_DYNAMIC_LOCK 2475 kmp_user_lock_p lck;
2477 if ((__kmp_user_lock_kind == lk_tas) &&
2478 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2479 OMP_NEST_LOCK_T_SIZE)) {
2480 lck = (kmp_user_lock_p)user_lock;
2483 else if ((__kmp_user_lock_kind == lk_futex) &&
2484 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2485 OMP_NEST_LOCK_T_SIZE)) {
2486 lck = (kmp_user_lock_p)user_lock;
2490 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2493 #if OMPT_SUPPORT && OMPT_OPTIONAL 2495 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2497 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2498 if (ompt_enabled.ompt_callback_lock_destroy) {
2499 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2500 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2505 __kmp_itt_lock_destroyed(lck);
2508 DESTROY_NESTED_LOCK(lck);
2510 if ((__kmp_user_lock_kind == lk_tas) &&
2511 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2512 OMP_NEST_LOCK_T_SIZE)) {
2516 else if ((__kmp_user_lock_kind == lk_futex) &&
2517 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2518 OMP_NEST_LOCK_T_SIZE)) {
2523 __kmp_user_lock_free(user_lock, gtid, lck);
2525 #endif // KMP_USE_DYNAMIC_LOCK 2528 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2530 #if KMP_USE_DYNAMIC_LOCK 2531 int tag = KMP_EXTRACT_D_TAG(user_lock);
2533 __kmp_itt_lock_acquiring(
2537 #if OMPT_SUPPORT && OMPT_OPTIONAL 2539 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2541 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2542 if (ompt_enabled.ompt_callback_mutex_acquire) {
2543 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2544 ompt_mutex_lock, omp_lock_hint_none,
2545 __ompt_get_mutex_impl_type(user_lock),
2546 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2549 #if KMP_USE_INLINED_TAS 2550 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2551 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2553 #elif KMP_USE_INLINED_FUTEX 2554 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2555 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2559 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2562 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2564 #if OMPT_SUPPORT && OMPT_OPTIONAL 2565 if (ompt_enabled.ompt_callback_mutex_acquired) {
2566 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2567 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2571 #else // KMP_USE_DYNAMIC_LOCK 2573 kmp_user_lock_p lck;
2575 if ((__kmp_user_lock_kind == lk_tas) &&
2576 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2577 lck = (kmp_user_lock_p)user_lock;
2580 else if ((__kmp_user_lock_kind == lk_futex) &&
2581 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2582 lck = (kmp_user_lock_p)user_lock;
2586 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2590 __kmp_itt_lock_acquiring(lck);
2592 #if OMPT_SUPPORT && OMPT_OPTIONAL 2594 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2596 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2597 if (ompt_enabled.ompt_callback_mutex_acquire) {
2598 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2599 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2600 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2604 ACQUIRE_LOCK(lck, gtid);
2607 __kmp_itt_lock_acquired(lck);
2610 #if OMPT_SUPPORT && OMPT_OPTIONAL 2611 if (ompt_enabled.ompt_callback_mutex_acquired) {
2612 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2613 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2617 #endif // KMP_USE_DYNAMIC_LOCK 2620 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2621 #if KMP_USE_DYNAMIC_LOCK 2624 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2626 #if OMPT_SUPPORT && OMPT_OPTIONAL 2628 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2630 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2631 if (ompt_enabled.enabled) {
2632 if (ompt_enabled.ompt_callback_mutex_acquire) {
2633 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2634 ompt_mutex_nest_lock, omp_lock_hint_none,
2635 __ompt_get_mutex_impl_type(user_lock),
2636 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2640 int acquire_status =
2641 KMP_D_LOCK_FUNC(user_lock,
set)((kmp_dyna_lock_t *)user_lock, gtid);
2642 (void) acquire_status;
2644 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2647 #if OMPT_SUPPORT && OMPT_OPTIONAL 2648 if (ompt_enabled.enabled) {
2649 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2650 if (ompt_enabled.ompt_callback_mutex_acquired) {
2652 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2653 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2657 if (ompt_enabled.ompt_callback_nest_lock) {
2659 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2660 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2666 #else // KMP_USE_DYNAMIC_LOCK 2668 kmp_user_lock_p lck;
2670 if ((__kmp_user_lock_kind == lk_tas) &&
2671 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2672 OMP_NEST_LOCK_T_SIZE)) {
2673 lck = (kmp_user_lock_p)user_lock;
2676 else if ((__kmp_user_lock_kind == lk_futex) &&
2677 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2678 OMP_NEST_LOCK_T_SIZE)) {
2679 lck = (kmp_user_lock_p)user_lock;
2683 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2687 __kmp_itt_lock_acquiring(lck);
2689 #if OMPT_SUPPORT && OMPT_OPTIONAL 2691 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2693 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2694 if (ompt_enabled.enabled) {
2695 if (ompt_enabled.ompt_callback_mutex_acquire) {
2696 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2697 ompt_mutex_nest_lock, omp_lock_hint_none,
2698 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2704 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2707 __kmp_itt_lock_acquired(lck);
2710 #if OMPT_SUPPORT && OMPT_OPTIONAL 2711 if (ompt_enabled.enabled) {
2712 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2713 if (ompt_enabled.ompt_callback_mutex_acquired) {
2715 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2716 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2719 if (ompt_enabled.ompt_callback_nest_lock) {
2721 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2722 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2728 #endif // KMP_USE_DYNAMIC_LOCK 2731 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2732 #if KMP_USE_DYNAMIC_LOCK 2734 int tag = KMP_EXTRACT_D_TAG(user_lock);
2736 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2738 #if KMP_USE_INLINED_TAS 2739 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2740 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2742 #elif KMP_USE_INLINED_FUTEX 2743 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2744 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2748 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2751 #if OMPT_SUPPORT && OMPT_OPTIONAL 2753 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2755 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2756 if (ompt_enabled.ompt_callback_mutex_released) {
2757 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2758 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2762 #else // KMP_USE_DYNAMIC_LOCK 2764 kmp_user_lock_p lck;
2769 if ((__kmp_user_lock_kind == lk_tas) &&
2770 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2771 #if KMP_OS_LINUX && \ 2772 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2775 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2777 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2780 #if OMPT_SUPPORT && OMPT_OPTIONAL 2782 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2784 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2785 if (ompt_enabled.ompt_callback_mutex_released) {
2786 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2787 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2793 lck = (kmp_user_lock_p)user_lock;
2797 else if ((__kmp_user_lock_kind == lk_futex) &&
2798 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2799 lck = (kmp_user_lock_p)user_lock;
2803 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2807 __kmp_itt_lock_releasing(lck);
2810 RELEASE_LOCK(lck, gtid);
2812 #if OMPT_SUPPORT && OMPT_OPTIONAL 2814 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2816 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2817 if (ompt_enabled.ompt_callback_mutex_released) {
2818 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2819 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2823 #endif // KMP_USE_DYNAMIC_LOCK 2827 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2828 #if KMP_USE_DYNAMIC_LOCK 2831 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2833 int release_status =
2834 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2835 (void) release_status;
2837 #if OMPT_SUPPORT && OMPT_OPTIONAL 2839 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2841 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2842 if (ompt_enabled.enabled) {
2843 if (release_status == KMP_LOCK_RELEASED) {
2844 if (ompt_enabled.ompt_callback_mutex_released) {
2846 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2847 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2850 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2852 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2853 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2858 #else // KMP_USE_DYNAMIC_LOCK 2860 kmp_user_lock_p lck;
2864 if ((__kmp_user_lock_kind == lk_tas) &&
2865 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2866 OMP_NEST_LOCK_T_SIZE)) {
2867 #if KMP_OS_LINUX && \ 2868 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2870 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2872 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2875 #if OMPT_SUPPORT && OMPT_OPTIONAL 2876 int release_status = KMP_LOCK_STILL_HELD;
2879 if (--(tl->lk.depth_locked) == 0) {
2880 TCW_4(tl->lk.poll, 0);
2881 #if OMPT_SUPPORT && OMPT_OPTIONAL 2882 release_status = KMP_LOCK_RELEASED;
2887 #if OMPT_SUPPORT && OMPT_OPTIONAL 2889 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2891 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2892 if (ompt_enabled.enabled) {
2893 if (release_status == KMP_LOCK_RELEASED) {
2894 if (ompt_enabled.ompt_callback_mutex_released) {
2896 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2897 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2899 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2901 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2902 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2909 lck = (kmp_user_lock_p)user_lock;
2913 else if ((__kmp_user_lock_kind == lk_futex) &&
2914 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2915 OMP_NEST_LOCK_T_SIZE)) {
2916 lck = (kmp_user_lock_p)user_lock;
2920 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
2924 __kmp_itt_lock_releasing(lck);
2928 release_status = RELEASE_NESTED_LOCK(lck, gtid);
2929 #if OMPT_SUPPORT && OMPT_OPTIONAL 2931 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2933 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2934 if (ompt_enabled.enabled) {
2935 if (release_status == KMP_LOCK_RELEASED) {
2936 if (ompt_enabled.ompt_callback_mutex_released) {
2938 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2939 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2941 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2943 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2944 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2949 #endif // KMP_USE_DYNAMIC_LOCK 2953 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2956 #if KMP_USE_DYNAMIC_LOCK 2958 int tag = KMP_EXTRACT_D_TAG(user_lock);
2960 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2962 #if OMPT_SUPPORT && OMPT_OPTIONAL 2964 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2966 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2967 if (ompt_enabled.ompt_callback_mutex_acquire) {
2968 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2969 ompt_mutex_lock, omp_lock_hint_none,
2970 __ompt_get_mutex_impl_type(user_lock),
2971 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2974 #if KMP_USE_INLINED_TAS 2975 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2976 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2978 #elif KMP_USE_INLINED_FUTEX 2979 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2980 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2984 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2988 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2990 #if OMPT_SUPPORT && OMPT_OPTIONAL 2991 if (ompt_enabled.ompt_callback_mutex_acquired) {
2992 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2993 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2999 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3004 #else // KMP_USE_DYNAMIC_LOCK 3006 kmp_user_lock_p lck;
3009 if ((__kmp_user_lock_kind == lk_tas) &&
3010 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3011 lck = (kmp_user_lock_p)user_lock;
3014 else if ((__kmp_user_lock_kind == lk_futex) &&
3015 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3016 lck = (kmp_user_lock_p)user_lock;
3020 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3024 __kmp_itt_lock_acquiring(lck);
3026 #if OMPT_SUPPORT && OMPT_OPTIONAL 3028 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3030 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3031 if (ompt_enabled.ompt_callback_mutex_acquire) {
3032 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3033 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3034 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3038 rc = TEST_LOCK(lck, gtid);
3041 __kmp_itt_lock_acquired(lck);
3043 __kmp_itt_lock_cancelled(lck);
3046 #if OMPT_SUPPORT && OMPT_OPTIONAL 3047 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3048 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3049 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3053 return (rc ? FTN_TRUE : FTN_FALSE);
3057 #endif // KMP_USE_DYNAMIC_LOCK 3061 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3062 #if KMP_USE_DYNAMIC_LOCK 3065 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3067 #if OMPT_SUPPORT && OMPT_OPTIONAL 3069 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3071 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3072 if (ompt_enabled.ompt_callback_mutex_acquire) {
3073 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3074 ompt_mutex_nest_lock, omp_lock_hint_none,
3075 __ompt_get_mutex_impl_type(user_lock),
3076 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3079 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3082 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3084 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3087 #if OMPT_SUPPORT && OMPT_OPTIONAL 3088 if (ompt_enabled.enabled && rc) {
3090 if (ompt_enabled.ompt_callback_mutex_acquired) {
3092 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3093 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3097 if (ompt_enabled.ompt_callback_nest_lock) {
3099 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3100 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3107 #else // KMP_USE_DYNAMIC_LOCK 3109 kmp_user_lock_p lck;
3112 if ((__kmp_user_lock_kind == lk_tas) &&
3113 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3114 OMP_NEST_LOCK_T_SIZE)) {
3115 lck = (kmp_user_lock_p)user_lock;
3118 else if ((__kmp_user_lock_kind == lk_futex) &&
3119 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3120 OMP_NEST_LOCK_T_SIZE)) {
3121 lck = (kmp_user_lock_p)user_lock;
3125 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3129 __kmp_itt_lock_acquiring(lck);
3132 #if OMPT_SUPPORT && OMPT_OPTIONAL 3134 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3136 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3137 if (ompt_enabled.enabled) &&
3138 ompt_enabled.ompt_callback_mutex_acquire) {
3139 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3140 ompt_mutex_nest_lock, omp_lock_hint_none,
3141 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3146 rc = TEST_NESTED_LOCK(lck, gtid);
3149 __kmp_itt_lock_acquired(lck);
3151 __kmp_itt_lock_cancelled(lck);
3154 #if OMPT_SUPPORT && OMPT_OPTIONAL 3155 if (ompt_enabled.enabled && rc) {
3157 if (ompt_enabled.ompt_callback_mutex_acquired) {
3159 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3160 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3163 if (ompt_enabled.ompt_callback_nest_lock) {
3165 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3166 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3175 #endif // KMP_USE_DYNAMIC_LOCK 3185 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3186 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3188 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3189 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3195 static __forceinline
void 3196 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3197 kmp_critical_name *crit) {
3203 kmp_user_lock_p lck;
3205 #if KMP_USE_DYNAMIC_LOCK 3207 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3210 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3211 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3212 KMP_GET_D_TAG(__kmp_user_lock_seq));
3214 __kmp_init_indirect_csptr(crit, loc, global_tid,
3215 KMP_GET_I_TAG(__kmp_user_lock_seq));
3221 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3222 lck = (kmp_user_lock_p)lk;
3223 KMP_DEBUG_ASSERT(lck != NULL);
3224 if (__kmp_env_consistency_check) {
3225 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3227 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
3229 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3231 KMP_DEBUG_ASSERT(lck != NULL);
3232 if (__kmp_env_consistency_check) {
3233 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3235 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
3238 #else // KMP_USE_DYNAMIC_LOCK 3243 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3244 lck = (kmp_user_lock_p)crit;
3246 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3248 KMP_DEBUG_ASSERT(lck != NULL);
3250 if (__kmp_env_consistency_check)
3251 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3253 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3255 #endif // KMP_USE_DYNAMIC_LOCK 3259 static __forceinline
void 3260 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3261 kmp_critical_name *crit) {
3263 kmp_user_lock_p lck;
3265 #if KMP_USE_DYNAMIC_LOCK 3267 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3268 lck = (kmp_user_lock_p)crit;
3269 if (__kmp_env_consistency_check)
3270 __kmp_pop_sync(global_tid, ct_critical, loc);
3271 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3273 kmp_indirect_lock_t *ilk =
3274 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3275 if (__kmp_env_consistency_check)
3276 __kmp_pop_sync(global_tid, ct_critical, loc);
3277 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3280 #else // KMP_USE_DYNAMIC_LOCK 3285 if (__kmp_base_user_lock_size > 32) {
3286 lck = *((kmp_user_lock_p *)crit);
3287 KMP_ASSERT(lck != NULL);
3289 lck = (kmp_user_lock_p)crit;
3292 if (__kmp_env_consistency_check)
3293 __kmp_pop_sync(global_tid, ct_critical, loc);
3295 __kmp_release_user_lock_with_checks(lck, global_tid);
3297 #endif // KMP_USE_DYNAMIC_LOCK 3301 static __forceinline
int 3302 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3307 if (th->th.th_teams_microtask) {
3308 *team_p = team = th->th.th_team;
3309 if (team->t.t_level == th->th.th_teams_level) {
3311 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3313 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3314 th->th.th_team = team->t.t_parent;
3315 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3316 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3317 *task_state = th->th.th_task_state;
3318 th->th.th_task_state = 0;
3326 static __forceinline
void 3327 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3329 th->th.th_info.ds.ds_tid = 0;
3330 th->th.th_team = team;
3331 th->th.th_team_nproc = team->t.t_nproc;
3332 th->th.th_task_team = team->t.t_task_team[task_state];
3333 th->th.th_task_state = task_state;
3355 size_t reduce_size,
void *reduce_data,
3356 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3357 kmp_critical_name *lck) {
3361 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3365 int teams_swapped = 0, task_state;
3367 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3375 if (!TCR_4(__kmp_init_parallel))
3376 __kmp_parallel_initialize();
3379 #if KMP_USE_DYNAMIC_LOCK 3380 if (__kmp_env_consistency_check)
3381 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3383 if (__kmp_env_consistency_check)
3384 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3388 th = __kmp_thread_from_gtid(global_tid);
3389 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3390 #endif // OMP_40_ENABLED 3408 packed_reduction_method = __kmp_determine_reduction_method(
3409 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3410 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3412 if (packed_reduction_method == critical_reduce_block) {
3414 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3417 }
else if (packed_reduction_method == empty_reduce_block) {
3423 }
else if (packed_reduction_method == atomic_reduce_block) {
3433 if (__kmp_env_consistency_check)
3434 __kmp_pop_sync(global_tid, ct_reduce, loc);
3436 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3437 tree_reduce_block)) {
3457 ompt_frame_t *ompt_frame;
3458 if (ompt_enabled.enabled) {
3459 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3460 if (ompt_frame->enter_frame.ptr == NULL)
3461 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3462 OMPT_STORE_RETURN_ADDRESS(global_tid);
3466 __kmp_threads[global_tid]->th.th_ident = loc;
3469 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3470 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3471 retval = (retval != 0) ? (0) : (1);
3472 #if OMPT_SUPPORT && OMPT_OPTIONAL 3473 if (ompt_enabled.enabled) {
3474 ompt_frame->enter_frame = ompt_data_none;
3480 if (__kmp_env_consistency_check) {
3482 __kmp_pop_sync(global_tid, ct_reduce, loc);
3492 if (teams_swapped) {
3493 __kmp_restore_swapped_teams(th, team, task_state);
3498 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3499 global_tid, packed_reduction_method, retval));
3513 kmp_critical_name *lck) {
3515 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3517 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3519 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3521 if (packed_reduction_method == critical_reduce_block) {
3523 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3525 }
else if (packed_reduction_method == empty_reduce_block) {
3530 }
else if (packed_reduction_method == atomic_reduce_block) {
3537 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3538 tree_reduce_block)) {
3548 if (__kmp_env_consistency_check)
3549 __kmp_pop_sync(global_tid, ct_reduce, loc);
3551 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3552 global_tid, packed_reduction_method));
3575 size_t reduce_size,
void *reduce_data,
3576 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3577 kmp_critical_name *lck) {
3580 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3584 int teams_swapped = 0, task_state;
3587 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3595 if (!TCR_4(__kmp_init_parallel))
3596 __kmp_parallel_initialize();
3599 #if KMP_USE_DYNAMIC_LOCK 3600 if (__kmp_env_consistency_check)
3601 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3603 if (__kmp_env_consistency_check)
3604 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3608 th = __kmp_thread_from_gtid(global_tid);
3609 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3610 #endif // OMP_40_ENABLED 3612 packed_reduction_method = __kmp_determine_reduction_method(
3613 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3614 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3616 if (packed_reduction_method == critical_reduce_block) {
3618 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3621 }
else if (packed_reduction_method == empty_reduce_block) {
3627 }
else if (packed_reduction_method == atomic_reduce_block) {
3631 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3632 tree_reduce_block)) {
3638 ompt_frame_t *ompt_frame;
3639 if (ompt_enabled.enabled) {
3640 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3641 if (ompt_frame->enter_frame.ptr == NULL)
3642 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3643 OMPT_STORE_RETURN_ADDRESS(global_tid);
3647 __kmp_threads[global_tid]->th.th_ident =
3651 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3652 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3653 retval = (retval != 0) ? (0) : (1);
3654 #if OMPT_SUPPORT && OMPT_OPTIONAL 3655 if (ompt_enabled.enabled) {
3656 ompt_frame->enter_frame = ompt_data_none;
3662 if (__kmp_env_consistency_check) {
3664 __kmp_pop_sync(global_tid, ct_reduce, loc);
3674 if (teams_swapped) {
3675 __kmp_restore_swapped_teams(th, team, task_state);
3680 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3681 global_tid, packed_reduction_method, retval));
3697 kmp_critical_name *lck) {
3699 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3703 int teams_swapped = 0, task_state;
3706 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3709 th = __kmp_thread_from_gtid(global_tid);
3710 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3711 #endif // OMP_40_ENABLED 3713 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3718 if (packed_reduction_method == critical_reduce_block) {
3720 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3724 ompt_frame_t *ompt_frame;
3725 if (ompt_enabled.enabled) {
3726 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3727 if (ompt_frame->enter_frame.ptr == NULL)
3728 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3729 OMPT_STORE_RETURN_ADDRESS(global_tid);
3733 __kmp_threads[global_tid]->th.th_ident = loc;
3735 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3736 #if OMPT_SUPPORT && OMPT_OPTIONAL 3737 if (ompt_enabled.enabled) {
3738 ompt_frame->enter_frame = ompt_data_none;
3742 }
else if (packed_reduction_method == empty_reduce_block) {
3748 ompt_frame_t *ompt_frame;
3749 if (ompt_enabled.enabled) {
3750 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3751 if (ompt_frame->enter_frame.ptr == NULL)
3752 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3753 OMPT_STORE_RETURN_ADDRESS(global_tid);
3757 __kmp_threads[global_tid]->th.th_ident = loc;
3759 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3760 #if OMPT_SUPPORT && OMPT_OPTIONAL 3761 if (ompt_enabled.enabled) {
3762 ompt_frame->enter_frame = ompt_data_none;
3766 }
else if (packed_reduction_method == atomic_reduce_block) {
3769 ompt_frame_t *ompt_frame;
3770 if (ompt_enabled.enabled) {
3771 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3772 if (ompt_frame->enter_frame.ptr == NULL)
3773 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3774 OMPT_STORE_RETURN_ADDRESS(global_tid);
3779 __kmp_threads[global_tid]->th.th_ident = loc;
3781 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3782 #if OMPT_SUPPORT && OMPT_OPTIONAL 3783 if (ompt_enabled.enabled) {
3784 ompt_frame->enter_frame = ompt_data_none;
3788 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3789 tree_reduce_block)) {
3792 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3801 if (teams_swapped) {
3802 __kmp_restore_swapped_teams(th, team, task_state);
3806 if (__kmp_env_consistency_check)
3807 __kmp_pop_sync(global_tid, ct_reduce, loc);
3809 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3810 global_tid, packed_reduction_method));
3815 #undef __KMP_GET_REDUCTION_METHOD 3816 #undef __KMP_SET_REDUCTION_METHOD 3820 kmp_uint64 __kmpc_get_taskid() {
3825 gtid = __kmp_get_gtid();
3829 thread = __kmp_thread_from_gtid(gtid);
3830 return thread->th.th_current_task->td_task_id;
3834 kmp_uint64 __kmpc_get_parent_taskid() {
3838 kmp_taskdata_t *parent_task;
3840 gtid = __kmp_get_gtid();
3844 thread = __kmp_thread_from_gtid(gtid);
3845 parent_task = thread->th.th_current_task->td_parent;
3846 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3862 void __kmpc_doacross_init(
ident_t *loc,
int gtid,
int num_dims,
3863 const struct kmp_dim *dims) {
3865 kmp_int64 last, trace_count;
3866 kmp_info_t *th = __kmp_threads[gtid];
3867 kmp_team_t *team = th->th.th_team;
3869 kmp_disp_t *pr_buf = th->th.th_dispatch;
3870 dispatch_shared_info_t *sh_buf;
3874 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3875 gtid, num_dims, !team->t.t_serialized));
3876 KMP_DEBUG_ASSERT(dims != NULL);
3877 KMP_DEBUG_ASSERT(num_dims > 0);
3879 if (team->t.t_serialized) {
3880 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
3883 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3884 idx = pr_buf->th_doacross_buf_idx++;
3886 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3889 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3890 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3891 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
3892 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3893 pr_buf->th_doacross_info[0] =
3894 (kmp_int64)num_dims;
3897 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3898 pr_buf->th_doacross_info[2] = dims[0].lo;
3899 pr_buf->th_doacross_info[3] = dims[0].up;
3900 pr_buf->th_doacross_info[4] = dims[0].st;
3902 for (j = 1; j < num_dims; ++j) {
3905 if (dims[j].st == 1) {
3907 range_length = dims[j].up - dims[j].lo + 1;
3909 if (dims[j].st > 0) {
3910 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3911 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3913 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3915 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3918 pr_buf->th_doacross_info[last++] = range_length;
3919 pr_buf->th_doacross_info[last++] = dims[j].lo;
3920 pr_buf->th_doacross_info[last++] = dims[j].up;
3921 pr_buf->th_doacross_info[last++] = dims[j].st;
3926 if (dims[0].st == 1) {
3927 trace_count = dims[0].up - dims[0].lo + 1;
3928 }
else if (dims[0].st > 0) {
3929 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3930 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3932 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3933 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3935 for (j = 1; j < num_dims; ++j) {
3936 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3938 KMP_DEBUG_ASSERT(trace_count > 0);
3942 if (idx != sh_buf->doacross_buf_idx) {
3944 __kmp_wait_yield_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3951 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3952 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3954 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3955 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3957 if (flags == NULL) {
3959 size_t size = trace_count / 8 + 8;
3960 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3962 sh_buf->doacross_flags = flags;
3963 }
else if (flags == (kmp_uint32 *)1) {
3966 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3968 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3975 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
3976 pr_buf->th_doacross_flags =
3977 sh_buf->doacross_flags;
3979 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
3982 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
3983 kmp_int32 shft, num_dims, i;
3985 kmp_int64 iter_number;
3986 kmp_info_t *th = __kmp_threads[gtid];
3987 kmp_team_t *team = th->th.th_team;
3989 kmp_int64 lo, up, st;
3991 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3992 if (team->t.t_serialized) {
3993 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
3998 pr_buf = th->th.th_dispatch;
3999 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4000 num_dims = pr_buf->th_doacross_info[0];
4001 lo = pr_buf->th_doacross_info[2];
4002 up = pr_buf->th_doacross_info[3];
4003 st = pr_buf->th_doacross_info[4];
4005 if (vec[0] < lo || vec[0] > up) {
4006 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4007 "bounds [%lld,%lld]\n",
4008 gtid, vec[0], lo, up));
4011 iter_number = vec[0] - lo;
4012 }
else if (st > 0) {
4013 if (vec[0] < lo || vec[0] > up) {
4014 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4015 "bounds [%lld,%lld]\n",
4016 gtid, vec[0], lo, up));
4019 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4021 if (vec[0] > lo || vec[0] < up) {
4022 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4023 "bounds [%lld,%lld]\n",
4024 gtid, vec[0], lo, up));
4027 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4029 for (i = 1; i < num_dims; ++i) {
4031 kmp_int32 j = i * 4;
4032 ln = pr_buf->th_doacross_info[j + 1];
4033 lo = pr_buf->th_doacross_info[j + 2];
4034 up = pr_buf->th_doacross_info[j + 3];
4035 st = pr_buf->th_doacross_info[j + 4];
4037 if (vec[i] < lo || vec[i] > up) {
4038 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4039 "bounds [%lld,%lld]\n",
4040 gtid, vec[i], lo, up));
4044 }
else if (st > 0) {
4045 if (vec[i] < lo || vec[i] > up) {
4046 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4047 "bounds [%lld,%lld]\n",
4048 gtid, vec[i], lo, up));
4051 iter = (kmp_uint64)(vec[i] - lo) / st;
4053 if (vec[i] > lo || vec[i] < up) {
4054 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4055 "bounds [%lld,%lld]\n",
4056 gtid, vec[i], lo, up));
4059 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4061 iter_number = iter + ln * iter_number;
4063 shft = iter_number % 32;
4066 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4071 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4072 gtid, (iter_number << 5) + shft));
4075 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4076 kmp_int32 shft, num_dims, i;
4078 kmp_int64 iter_number;
4079 kmp_info_t *th = __kmp_threads[gtid];
4080 kmp_team_t *team = th->th.th_team;
4084 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4085 if (team->t.t_serialized) {
4086 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4092 pr_buf = th->th.th_dispatch;
4093 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4094 num_dims = pr_buf->th_doacross_info[0];
4095 lo = pr_buf->th_doacross_info[2];
4096 st = pr_buf->th_doacross_info[4];
4098 iter_number = vec[0] - lo;
4099 }
else if (st > 0) {
4100 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4102 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4104 for (i = 1; i < num_dims; ++i) {
4106 kmp_int32 j = i * 4;
4107 ln = pr_buf->th_doacross_info[j + 1];
4108 lo = pr_buf->th_doacross_info[j + 2];
4109 st = pr_buf->th_doacross_info[j + 4];
4112 }
else if (st > 0) {
4113 iter = (kmp_uint64)(vec[i] - lo) / st;
4115 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4117 iter_number = iter + ln * iter_number;
4119 shft = iter_number % 32;
4123 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4124 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4125 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4126 (iter_number << 5) + shft));
4129 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4131 kmp_info_t *th = __kmp_threads[gtid];
4132 kmp_team_t *team = th->th.th_team;
4133 kmp_disp_t *pr_buf = th->th.th_dispatch;
4135 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4136 if (team->t.t_serialized) {
4137 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4140 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4141 if (num_done == th->th.th_team_nproc) {
4143 int idx = pr_buf->th_doacross_buf_idx - 1;
4144 dispatch_shared_info_t *sh_buf =
4145 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4146 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4147 (kmp_int64)&sh_buf->doacross_num_done);
4148 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4149 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4150 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4151 sh_buf->doacross_flags = NULL;
4152 sh_buf->doacross_num_done = 0;
4153 sh_buf->doacross_buf_idx +=
4154 __kmp_dispatch_num_buffers;
4157 pr_buf->th_doacross_flags = NULL;
4158 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4159 pr_buf->th_doacross_info = NULL;
4160 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4165 int __kmpc_get_target_offload(
void) {
4166 if (!__kmp_init_serial) {
4167 __kmp_serial_initialize();
4169 return __kmp_target_offload;
4171 #endif // OMP_50_ENABLED kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)