Actual source code: tcpthread.c

petsc-3.4.2 2013-07-02
  1: /* Define feature test macros to make sure CPU_SET and other functions are available
  2:  */
  3: #define PETSC_DESIRE_FEATURE_TEST_MACROS

  5: #include <../src/sys/threadcomm/impls/pthread/tcpthreadimpl.h>

  7: #if defined(PETSC_PTHREAD_LOCAL)
  8: PETSC_PTHREAD_LOCAL PetscInt PetscPThreadRank;
  9: #else
 10: pthread_key_t PetscPThreadRankkey;
 11: #endif

 13: static PetscBool PetscPThreadCommInitializeCalled = PETSC_FALSE;

 15: const char *const PetscPThreadCommSynchronizationTypes[] = {"LOCKFREE","PetscPThreadCommSynchronizationType","PTHREADSYNC_",0};
 16: const char *const PetscPThreadCommAffinityPolicyTypes[] = {"ALL","ONECORE","NONE","PetscPThreadCommAffinityPolicyType","PTHREADAFFPOLICY_",0};
 17: const char *const PetscPThreadCommPoolSparkTypes[] = {"SELF","PetscPThreadCommPoolSparkType","PTHREADPOOLSPARK_",0};

 19: static PetscInt ptcommcrtct = 0; /* PThread communicator creation count. Incremented whenever a pthread
 20:                                     communicator is created and decremented when it is destroyed. On the
 21:                                     last pthread communicator destruction, the thread pool is also terminated
 22:                                   */

 24: PetscErrorCode PetscThreadCommGetRank_PThread(PetscInt *trank)
 25: {
 26: #if defined(PETSC_PTHREAD_LOCAL)
 27:   *trank = PetscPThreadRank;
 28: #else
 29:   *trank = *((PetscInt*)pthread_getspecific(PetscPThreadRankkey));
 30: #endif
 31:   return 0;
 32: }


 35: /* Sets the attributes for threads */
 38: PetscErrorCode PetscThreadCommSetPThreadAttributes(PetscThreadComm tcomm)
 39: {
 40:   PetscErrorCode          ierr;
 41:   PetscThreadComm_PThread ptcomm=(PetscThreadComm_PThread)tcomm->data;
 42:   pthread_attr_t          *attr =ptcomm->attr;
 43: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
 44:   PetscInt                ncores;
 45:   cpu_set_t               *cpuset;
 46: #endif
 47:   PetscInt                i;

 50: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
 51:   PetscMalloc(tcomm->nworkThreads*sizeof(cpu_set_t),&cpuset);
 52:   ptcomm->cpuset = cpuset;
 53:   PetscGetNCores(&ncores);
 54: #endif

 56:   for (i=ptcomm->thread_num_start; i < tcomm->nworkThreads; i++) {
 57:     pthread_attr_init(&attr[i]);
 58:     /* CPU affinity */
 59: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
 60:     PetscInt j;
 61:     switch (ptcomm->aff) {
 62:     case PTHREADAFFPOLICY_ONECORE:
 63:       CPU_ZERO(&cpuset[i]);
 64:       CPU_SET(tcomm->affinities[i]%ncores,&cpuset[i]);
 65:       pthread_attr_setaffinity_np(&attr[i],sizeof(cpu_set_t),&cpuset[i]);
 66:       break;
 67:     case PTHREADAFFPOLICY_ALL:
 68:       CPU_ZERO(&cpuset[i]);
 69:       for (j=0; j<ncores; j++) CPU_SET(j,&cpuset[i]);
 70:       pthread_attr_setaffinity_np(&attr[i],sizeof(cpu_set_t),&cpuset[i]);
 71:       break;
 72:     case PTHREADAFFPOLICY_NONE:
 73:       break;
 74:     }
 75: #endif
 76:   }
 77:   return(0);
 78: }

 82: PetscErrorCode PetscThreadCommDestroy_PThread(PetscThreadComm tcomm)
 83: {
 84:   PetscThreadComm_PThread ptcomm=(PetscThreadComm_PThread)tcomm->data;
 85:   PetscErrorCode          ierr;

 88:   if (!ptcomm) return(0);
 89:   ptcommcrtct--;
 90:   if (!ptcommcrtct) {
 91:     /* Terminate the thread pool */
 92:     (*ptcomm->finalize)(tcomm);
 93:     PetscFree(ptcomm->tid);
 94: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
 95:     PetscFree(ptcomm->cpuset);
 96: #endif
 97:     PetscFree(ptcomm->attr);
 98:     PetscPThreadCommInitializeCalled = PETSC_FALSE;
 99:   }
100:   PetscFree(ptcomm->granks);
101:   PetscFree(ptcomm);
102:   return(0);
103: }

107: PETSC_EXTERN PetscErrorCode PetscThreadCommCreate_PThread(PetscThreadComm tcomm)
108: {
109:   PetscThreadComm_PThread ptcomm;
110:   PetscErrorCode          ierr;
111:   PetscInt                i;

114:   ptcommcrtct++;
115:   PetscStrcpy(tcomm->type,PTHREAD);
116:   PetscNew(struct _p_PetscThreadComm_PThread,&ptcomm);

118:   tcomm->data              = (void*)ptcomm;
119:   ptcomm->nthreads         = 0;
120:   ptcomm->sync             = PTHREADSYNC_LOCKFREE;
121:   ptcomm->aff              = PTHREADAFFPOLICY_ONECORE;
122:   ptcomm->spark            = PTHREADPOOLSPARK_SELF;
123:   ptcomm->ismainworker     = PETSC_TRUE;
124:   ptcomm->synchronizeafter = PETSC_TRUE;
125:   tcomm->ops->destroy      = PetscThreadCommDestroy_PThread;
126:   tcomm->ops->runkernel    = PetscThreadCommRunKernel_PThread_LockFree;
127:   tcomm->ops->barrier      = PetscThreadCommBarrier_PThread_LockFree;
128:   tcomm->ops->getrank      = PetscThreadCommGetRank_PThread;

130:   PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&ptcomm->granks);

132:   if (!PetscPThreadCommInitializeCalled) { /* Only done for PETSC_THREAD_COMM_WORLD */
133:     PetscBool flg1,flg2,flg3,flg4;
134:     PetscPThreadCommInitializeCalled = PETSC_TRUE;

136:     PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"PThread communicator options",NULL);
137:     PetscOptionsBool("-threadcomm_pthread_main_is_worker","Main thread is also a worker thread",NULL,PETSC_TRUE,&ptcomm->ismainworker,&flg1);
138:     PetscOptionsEnum("-threadcomm_pthread_affpolicy","Thread affinity policy"," ",PetscPThreadCommAffinityPolicyTypes,(PetscEnum)ptcomm->aff,(PetscEnum*)&ptcomm->aff,&flg2);
139:     PetscOptionsEnum("-threadcomm_pthread_type","Thread pool type"," ",PetscPThreadCommSynchronizationTypes,(PetscEnum)ptcomm->sync,(PetscEnum*)&ptcomm->sync,&flg3);
140:     PetscOptionsEnum("-threadcomm_pthread_spark","Thread pool spark type"," ",PetscPThreadCommPoolSparkTypes,(PetscEnum)ptcomm->spark,(PetscEnum*)&ptcomm->spark,&flg4);
141:     PetscOptionsBool("-threadcomm_pthread_synchronizeafter","Puts a barrier after every kernel call",NULL,PETSC_TRUE,&ptcomm->synchronizeafter,&flg1);
142:     PetscOptionsEnd();

144:     if (ptcomm->ismainworker) {
145:       ptcomm->nthreads         = tcomm->nworkThreads-1;
146:       ptcomm->thread_num_start = 1;
147:     } else {
148:       ptcomm->nthreads         = tcomm->nworkThreads;
149:       ptcomm->thread_num_start = 0;
150:     }

152:     switch (ptcomm->sync) {
153:     case PTHREADSYNC_LOCKFREE:
154:       ptcomm->initialize    = PetscPThreadCommInitialize_LockFree;
155:       ptcomm->finalize      = PetscPThreadCommFinalize_LockFree;
156:       tcomm->ops->runkernel = PetscThreadCommRunKernel_PThread_LockFree;
157:       tcomm->ops->barrier   = PetscThreadCommBarrier_PThread_LockFree;
158:       break;
159:     default:
160:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only Lock-free synchronization scheme supported currently");
161:     }
162:     /* Set up thread ranks */
163:     for (i=0; i< tcomm->nworkThreads; i++) ptcomm->granks[i] = i;

165:     if (ptcomm->ismainworker) {
166: #if defined(PETSC_PTHREAD_LOCAL)
167:       PetscPThreadRank=0; /* Main thread rank */
168: #else
169:       pthread_key_create(&PetscPThreadRankkey,NULL);
170:       pthread_setspecific(PetscPThreadRankkey,&ptcomm->granks[0]);
171: #endif
172:     }
173:     /* Set the leader thread rank */
174:     if (ptcomm->nthreads) {
175:       if (ptcomm->ismainworker) tcomm->leader = ptcomm->granks[1];
176:       else tcomm->leader = ptcomm->granks[0];
177:     }

179:     /* Create array holding pthread ids */
180:     PetscMalloc(tcomm->nworkThreads*sizeof(pthread_t),&ptcomm->tid);
181:     /* Create thread attributes */
182:     PetscMalloc(tcomm->nworkThreads*sizeof(pthread_attr_t),&ptcomm->attr);
183:     PetscThreadCommSetPThreadAttributes(tcomm);
184:     if (ptcomm->ismainworker) {
185:       /* Pin main thread */
186: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
187:       cpu_set_t mset;
188:       PetscInt  ncores,icorr;

190:       PetscGetNCores(&ncores);
191:       CPU_ZERO(&mset);
192:       icorr = tcomm->affinities[0]%ncores;
193:       CPU_SET(icorr,&mset);
194:       sched_setaffinity(0,sizeof(cpu_set_t),&mset);
195: #endif
196:     }
197:     /* Initialize thread pool */
198:     (*ptcomm->initialize)(tcomm);

200:   } else {
201:     PetscThreadComm         gtcomm;
202:     PetscThreadComm_PThread gptcomm;
203:     PetscInt                *granks,j,*gaffinities;

205:     PetscCommGetThreadComm(PETSC_COMM_WORLD,&gtcomm);
206:     gaffinities = gtcomm->affinities;
207:     gptcomm     = (PetscThreadComm_PThread)tcomm->data;
208:     granks      = gptcomm->granks;
209:     /* Copy over the data from the global thread communicator structure */
210:     ptcomm->ismainworker     = gptcomm->ismainworker;
211:     ptcomm->thread_num_start = gptcomm->thread_num_start;
212:     ptcomm->sync             = gptcomm->sync;
213:     ptcomm->aff              = gptcomm->aff;
214:     tcomm->ops->runkernel    = gtcomm->ops->runkernel;
215:     tcomm->ops->barrier      = gtcomm->ops->barrier;

217:     for (i=0; i < tcomm->nworkThreads; i++) {
218:       for (j=0; j < gtcomm->nworkThreads; j++) {
219:         if (tcomm->affinities[i] == gaffinities[j]) ptcomm->granks[i] = granks[j];
220:       }
221:     }
222:   }
223:   return(0);
224: }