Package astLib :: Module astStats
[hide private]
[frames] | no frames]

Source Code for Module astLib.astStats

  1  """module for performing statistical calculations. 
  2   
  3  (c) 2007-2012 Matt Hilton  
  4   
  5  (c) 2013-2014 Matt Hilton & Steven Boada 
  6   
  7  U{http://astlib.sourceforge.net} 
  8   
  9  This module (as you may notice) provides very few statistical routines. It does, however, provide 
 10  biweight (robust) estimators of location and scale, as described in Beers et al. 1990 (AJ, 100, 
 11  32), in addition to a robust least squares fitting routine that uses the biweight transform. 
 12   
 13  Some routines may fail if they are passed lists with few items and encounter a `divide by zero' 
 14  error. Where this occurs, the function will return None. An error message will be printed to the 
 15  console when this happens if astStats.REPORT_ERRORS=True (the default). Testing if an 
 16  astStats function returns None can be used to handle errors in scripts.  
 17   
 18  For extensive statistics modules, the Python bindings for GNU R (U{http://rpy.sourceforge.net}), or 
 19  SciPy (U{http://www.scipy.org}) are suggested. 
 20   
 21  """ 
 22   
 23  import math 
 24  import numpy 
 25  import sys 
 26   
 27  REPORT_ERRORS=True 
 28   
 29  #--------------------------------------------------------------------------------------------------- 
30 -def mean(dataList):
31 """Calculates the mean average of a list of numbers. 32 33 @type dataList: list or numpy array 34 @param dataList: input data, must be a one dimensional list 35 @rtype: float 36 @return: mean average 37 38 """ 39 return numpy.mean(dataList)
40 41 #---------------------------------------------------------------------------------------------------
42 -def weightedMean(dataList):
43 """Calculates the weighted mean average of a two dimensional list (value, weight) of 44 numbers. 45 46 @type dataList: list 47 @param dataList: input data, must be a two dimensional list in format [value, weight] 48 @rtype: float 49 @return: weighted mean average 50 51 """ 52 sum=0 53 weightSum=0 54 for item in dataList: 55 sum=sum+float(item[0]*item[1]) 56 weightSum=weightSum+item[1] 57 if len(dataList)>0: 58 mean=sum/weightSum 59 else: 60 mean=0 61 return mean
62 63 #---------------------------------------------------------------------------------------------------
64 -def stdev(dataList):
65 """Calculates the (sample) standard deviation of a list of numbers. 66 67 @type dataList: list or numpy array 68 @param dataList: input data, must be a one dimensional list 69 @rtype: float 70 @return: standard deviation 71 72 """ 73 return numpy.std(dataList)
74 75 #---------------------------------------------------------------------------------------------------
76 -def rms(dataList):
77 """Calculates the root mean square of a list of numbers. 78 79 @type dataList: list 80 @param dataList: input data, must be a one dimensional list 81 @rtype: float 82 @return: root mean square 83 84 """ 85 dataListSq=[] 86 for item in dataList: 87 dataListSq.append(item*item) 88 listMeanSq=mean(dataListSq) 89 rms=math.sqrt(listMeanSq) 90 91 return rms
92 93 #---------------------------------------------------------------------------------------------------
94 -def weightedStdev(dataList):
95 """Calculates the weighted (sample) standard deviation of a list of numbers. 96 97 @type dataList: list 98 @param dataList: input data, must be a two dimensional list in format [value, weight] 99 @rtype: float 100 @return: weighted standard deviation 101 102 @note: Returns None if an error occurs. 103 104 """ 105 listMean=weightedMean(dataList) 106 sum=0 107 wSum=0 108 wNonZero=0 109 for item in dataList: 110 if item[1]>0.0: 111 sum=sum+float((item[0]-listMean)/item[1])*float((item[0]-listMean)/item[1]) 112 wSum=wSum+float(1.0/item[1])*float(1.0/item[1]) 113 114 if len(dataList)>1: 115 nFactor=float(len(dataList))/float(len(dataList)-1) 116 stdev=math.sqrt(nFactor*(sum/wSum)) 117 else: 118 if REPORT_ERRORS==True: 119 print("""ERROR: astStats.weightedStdev() : dataList contains < 2 items.""") 120 stdev=None 121 return stdev
122 123 #---------------------------------------------------------------------------------------------------
124 -def median(dataList):
125 """Calculates the median of a list of numbers. 126 127 @type dataList: list or numpy array 128 @param dataList: input data, must be a one dimensional list 129 @rtype: float 130 @return: median average 131 132 """ 133 return numpy.median(dataList)
134 135 #---------------------------------------------------------------------------------------------------
136 -def modeEstimate(dataList):
137 """Returns an estimate of the mode of a set of values by mode=(3*median)-(2*mean). 138 139 @type dataList: list 140 @param dataList: input data, must be a one dimensional list 141 @rtype: float 142 @return: estimate of mode average 143 144 """ 145 mode=(3*median(dataList))-(2*mean(dataList)) 146 147 return mode
148 149 #---------------------------------------------------------------------------------------------------
150 -def MAD(dataList):
151 """Calculates the Median Absolute Deviation of a list of numbers. 152 153 @type dataList: list 154 @param dataList: input data, must be a one dimensional list 155 @rtype: float 156 @return: median absolute deviation 157 158 """ 159 listMedian=median(dataList) 160 161 # Calculate |x-M| values 162 diffModuli=[] 163 for item in dataList: 164 diffModuli.append(math.fabs(item-listMedian)) 165 166 MAD=median(diffModuli) 167 168 return MAD
169 170 #---------------------------------------------------------------------------------------------------
171 -def biweightLocation(dataList, tuningConstant):
172 """Calculates the biweight location estimator (like a robust average) of a list of 173 numbers. 174 175 @type dataList: list 176 @param dataList: input data, must be a one dimensional list 177 @type tuningConstant: float 178 @param tuningConstant: 6.0 is recommended. 179 @rtype: float 180 @return: biweight location 181 182 @note: Returns None if an error occurs. 183 184 """ 185 C=tuningConstant 186 listMedian=median(dataList) 187 listMAD=MAD(dataList) 188 if listMAD!=0: 189 uValues=[] 190 for item in dataList: 191 uValues.append((item-listMedian)/(C*listMAD)) 192 193 top=0 # numerator equation (5) Beers et al if you like 194 bottom=0 # denominator 195 for i in range(len(uValues)): 196 if math.fabs(uValues[i])<=1.0: 197 top=top+((dataList[i]-listMedian) \ 198 *(1.0-(uValues[i]*uValues[i])) \ 199 *(1.0-(uValues[i]*uValues[i]))) 200 201 bottom=bottom+((1.0-(uValues[i]*uValues[i])) \ 202 *(1.0-(uValues[i]*uValues[i]))) 203 204 CBI=listMedian+(top/bottom) 205 206 else: 207 if REPORT_ERRORS==True: 208 print("""ERROR: astStats: biweightLocation() : MAD() returned 0.""") 209 return None 210 211 return CBI
212 213 #---------------------------------------------------------------------------------------------------
214 -def biweightScale(dataList, tuningConstant):
215 """Calculates the biweight scale estimator (like a robust standard deviation) of a list 216 of numbers. 217 218 @type dataList: list 219 @param dataList: input data, must be a one dimensional list 220 @type tuningConstant: float 221 @param tuningConstant: 9.0 is recommended. 222 @rtype: float 223 @return: biweight scale 224 225 @note: Returns None if an error occurs. 226 227 """ 228 C=tuningConstant 229 230 # Calculate |x-M| values and u values 231 listMedian=median(dataList) 232 listMAD=MAD(dataList) 233 diffModuli=[] 234 for item in dataList: 235 diffModuli.append(math.fabs(item-listMedian)) 236 uValues=[] 237 for item in dataList: 238 try: 239 uValues.append((item-listMedian)/(C*listMAD)) 240 except ZeroDivisionError: 241 if REPORT_ERRORS==True: 242 print("""ERROR: astStats.biweightScale() : divide by zero error.""") 243 return None 244 245 top=0 # numerator equation (9) Beers et al 246 bottom=0 247 valCount=0 # Count values where u<1 only 248 249 for i in range(len(uValues)): 250 # Skip u values >1 251 if math.fabs(uValues[i])<=1.0: 252 u2Term=1.0-(uValues[i]*uValues[i]) 253 u4Term=math.pow(u2Term, 4) 254 top=top+((diffModuli[i]*diffModuli[i])*u4Term) 255 bottom=bottom+(u2Term*(1.0-(5.0*(uValues[i]*uValues[i])))) 256 valCount=valCount+1 257 258 top=math.sqrt(top) 259 bottom=math.fabs(bottom) 260 261 SBI=math.pow(float(valCount), 0.5)*(top/bottom) 262 return SBI
263 264 #---------------------------------------------------------------------------------------------------
265 -def biweightClipped(dataList, tuningConstant, sigmaCut):
266 """Iteratively calculates biweight location and scale, using sigma clipping, for a list 267 of values. The calculation is performed on the first column of a multi-dimensional 268 list; other columns are ignored. 269 270 @type dataList: list 271 @param dataList: input data 272 @type tuningConstant: float 273 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 274 scale estimates 275 @type sigmaCut: float 276 @param sigmaCut: sigma clipping to apply 277 @rtype: dictionary 278 @return: estimate of biweight location, scale, and list of non-clipped data, in the format 279 {'biweightLocation', 'biweightScale', 'dataList'} 280 281 @note: Returns None if an error occurs. 282 283 """ 284 285 iterations=0 286 clippedValues=[] 287 for row in dataList: 288 if type(row)==list: 289 clippedValues.append(row[0]) 290 else: 291 clippedValues.append(row) 292 293 while iterations<11 and len(clippedValues)>5: 294 295 cbi=biweightLocation(clippedValues, tuningConstant) 296 sbi=biweightScale(clippedValues, tuningConstant) 297 298 # check for either biweight routine falling over 299 # happens when feed in lots of similar numbers 300 # e.g. when bootstrapping with a small sample 301 if cbi==None or sbi==None: 302 303 if REPORT_ERRORS==True: 304 print("""ERROR: astStats : biweightClipped() : 305 divide by zero error.""") 306 307 return None 308 309 else: 310 311 clippedValues=[] 312 clippedData=[] 313 for row in dataList: 314 if type(row)==list: 315 if row[0]>cbi-(sigmaCut*sbi) \ 316 and row[0]<cbi+(sigmaCut*sbi): 317 clippedValues.append(row[0]) 318 clippedData.append(row) 319 else: 320 if row>cbi-(sigmaCut*sbi) \ 321 and row<cbi+(sigmaCut*sbi): 322 clippedValues.append(row) 323 clippedData.append(row) 324 325 iterations=iterations+1 326 327 return {'biweightLocation':cbi, 'biweightScale':sbi, 'dataList':clippedData}
328 329 #---------------------------------------------------------------------------------------------------
330 -def biweightTransform(dataList, tuningConstant):
331 """Calculates the biweight transform for a set of values. Useful for using as weights in 332 robust line fitting. 333 334 @type dataList: list 335 @param dataList: input data, must be a one dimensional list 336 @type tuningConstant: float 337 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 338 scale estimates 339 @rtype: list 340 @return: list of biweights 341 342 """ 343 C=tuningConstant 344 345 # Calculate |x-M| values and u values 346 listMedian=abs(median(dataList)) 347 cutoff=C*listMedian 348 biweights=[] 349 for item in dataList: 350 if abs(item)<cutoff: 351 biweights.append([item, 352 (1.0-((item/cutoff)*(item/cutoff))) \ 353 *(1.0-((item/cutoff)*(item/cutoff)))]) 354 else: 355 biweights.append([item, 0.0]) 356 357 return biweights
358 359 #---------------------------------------------------------------------------------------------------
360 -def OLSFit(dataList):
361 """Performs an ordinary least squares fit on a two dimensional list of numbers. 362 Minimum number of data points is 5. 363 364 @type dataList: list 365 @param dataList: input data, must be a two dimensional list in format [x, y] 366 @rtype: dictionary 367 @return: slope and intercept on y-axis, with associated errors, in the format 368 {'slope', 'intercept', 'slopeError', 'interceptError'} 369 370 @note: Returns None if an error occurs. 371 372 """ 373 sumX=0 374 sumY=0 375 sumXY=0 376 sumXX=0 377 n=float(len(dataList)) 378 if n > 2: 379 for item in dataList: 380 sumX=sumX+item[0] 381 sumY=sumY+item[1] 382 sumXY=sumXY+(item[0]*item[1]) 383 sumXX=sumXX+(item[0]*item[0]) 384 m=((n*sumXY)-(sumX*sumY))/((n*sumXX)-(sumX*sumX)) 385 c=((sumXX*sumY)-(sumX*sumXY))/((n*sumXX)-(sumX*sumX)) 386 387 sumRes=0 388 for item in dataList: 389 390 sumRes=sumRes+((item[1]-(m*item[0])-c) \ 391 *(item[1]-(m*item[0])-c)) 392 393 sigma=math.sqrt((1.0/(n-2))*sumRes) 394 395 try: 396 mSigma=(sigma*math.sqrt(n))/math.sqrt((n*sumXX)-(sumX*sumX)) 397 except: 398 mSigma=numpy.nan 399 try: 400 cSigma=(sigma*math.sqrt(sumXX))/math.sqrt((n*sumXX)-(sumX*sumX)) 401 except: 402 cSigma=numpy.nan 403 else: 404 if REPORT_ERRORS==True: 405 print("""ERROR: astStats.OLSFit() : dataList contains < 3 items.""") 406 407 return None 408 409 return {'slope':m, 410 'intercept':c, 411 'slopeError':mSigma, 412 'interceptError':cSigma}
413 414 #---------------------------------------------------------------------------------------------------
415 -def clippedMeanStdev(dataList, sigmaCut = 3.0, maxIterations = 10.0):
416 """Calculates the clipped mean and stdev of a list of numbers. 417 418 @type dataList: list 419 @param dataList: input data, one dimensional list of numbers 420 @type sigmaCut: float 421 @param sigmaCut: clipping in Gaussian sigma to apply 422 @type maxIterations: int 423 @param maxIterations: maximum number of iterations 424 @rtype: dictionary 425 @return: format {'clippedMean', 'clippedStdev', 'numPoints'} 426 427 """ 428 429 listCopy=[] 430 for d in dataList: 431 listCopy.append(d) 432 listCopy=numpy.array(listCopy) 433 434 iterations=0 435 while iterations < maxIterations and len(listCopy) > 4: 436 437 m=listCopy.mean() 438 s=listCopy.std() 439 440 listCopy=listCopy[numpy.less(abs(listCopy), abs(m+sigmaCut*s))] 441 442 iterations=iterations+1 443 444 return {'clippedMean': m, 'clippedStdev': s, 'numPoints': listCopy.shape[0]}
445 446 #---------------------------------------------------------------------------------------------------
447 -def clippedWeightedLSFit(dataList, sigmaCut):
448 """Performs a weighted least squares fit on a list of numbers with sigma clipping. Minimum number of data 449 points is 5. 450 451 @type dataList: list 452 @param dataList: input data, must be a three dimensional list in format [x, y, y weight] 453 @rtype: dictionary 454 @return: slope and intercept on y-axis, with associated errors, in the format 455 {'slope', 'intercept', 'slopeError', 'interceptError'} 456 457 @note: Returns None if an error occurs. 458 459 """ 460 461 iterations=0 462 clippedValues=[] 463 for row in dataList: 464 clippedValues.append(row) 465 466 while iterations<11 and len(clippedValues)>4: 467 468 fitResults=weightedLSFit(clippedValues, "errors") 469 470 if fitResults['slope'] == None: 471 472 if REPORT_ERRORS==True: 473 print("""ERROR: astStats : clippedWeightedLSFit() : 474 divide by zero error.""") 475 476 return None 477 478 else: 479 480 clippedValues=[] 481 for row in dataList: 482 483 # Trim points more than sigmaCut*sigma away from the fitted line 484 fit=fitResults['slope']*row[0]+fitResults['intercept'] 485 res=row[1]-fit 486 if abs(res)/row[2] < sigmaCut: 487 clippedValues.append(row) 488 489 iterations=iterations+1 490 491 # store the number of values that made it through the clipping process 492 fitResults['numDataPoints']=len(clippedValues) 493 494 return fitResults
495 496 #---------------------------------------------------------------------------------------------------
497 -def weightedLSFit(dataList, weightType):
498 """Performs a weighted least squares fit on a three dimensional list of numbers [x, y, y error]. 499 500 @type dataList: list 501 @param dataList: input data, must be a three dimensional list in format [x, y, y error] 502 @type weightType: string 503 @param weightType: if "errors", weights are calculated assuming the input data is in the 504 format [x, y, error on y]; if "weights", the weights are assumed to be already calculated and 505 stored in a fourth column [x, y, error on y, weight] (as used by e.g. L{astStats.biweightLSFit}) 506 @rtype: dictionary 507 @return: slope and intercept on y-axis, with associated errors, in the format 508 {'slope', 'intercept', 'slopeError', 'interceptError'} 509 510 @note: Returns None if an error occurs. 511 512 """ 513 if weightType == "weights": 514 sumW=0 515 sumWX=0 516 sumWY=0 517 sumWXY=0 518 sumWXX=0 519 n=float(len(dataList)) 520 if n > 4: 521 for item in dataList: 522 W=item[3] 523 sumWX=sumWX+(W*item[0]) 524 sumWY=sumWY+(W*item[1]) 525 sumWXY=sumWXY+(W*item[0]*item[1]) 526 sumWXX=sumWXX+(W*item[0]*item[0]) 527 sumW=sumW+W 528 #print sumW, sumWXX, sumWX 529 530 try: 531 m=((sumW*sumWXY)-(sumWX*sumWY)) \ 532 /((sumW*sumWXX)-(sumWX*sumWX)) 533 except ZeroDivisionError: 534 if REPORT_ERRORS == True: 535 print("ERROR: astStats.weightedLSFit() : divide by zero error.") 536 return None 537 538 try: 539 c=((sumWXX*sumWY)-(sumWX*sumWXY)) \ 540 /((sumW*sumWXX)-(sumWX*sumWX)) 541 except ZeroDivisionError: 542 if REPORT_ERRORS == True: 543 print("ERROR: astStats.weightedLSFit() : divide by zero error.") 544 return None 545 546 sumRes=0 547 for item in dataList: 548 549 sumRes=sumRes+((item[1]-(m*item[0])-c) \ 550 *(item[1]-(m*item[0])-c)) 551 552 sigma=math.sqrt((1.0/(n-2))*sumRes) 553 554 # Can get div0 errors here so check 555 # When biweight fitting converges this shouldn't happen 556 if (n*sumWXX)-(sumWX*sumWX)>0.0: 557 558 mSigma=(sigma*math.sqrt(n)) \ 559 /math.sqrt((n*sumWXX)-(sumWX*sumWX)) 560 561 cSigma=(sigma*math.sqrt(sumWXX)) \ 562 /math.sqrt((n*sumWXX)-(sumWX*sumWX)) 563 564 else: 565 566 if REPORT_ERRORS==True: 567 print("""ERROR: astStats.weightedLSFit() 568 : divide by zero error.""") 569 return None 570 571 else: 572 if REPORT_ERRORS==True: 573 print("""ERROR: astStats.weightedLSFit() : 574 dataList contains < 5 items.""") 575 return None 576 577 elif weightType == "errors": 578 sumX=0 579 sumY=0 580 sumXY=0 581 sumXX=0 582 sumSigma=0 583 n=float(len(dataList)) 584 for item in dataList: 585 sumX=sumX+(item[0]/(item[2]*item[2])) 586 sumY=sumY+(item[1]/(item[2]*item[2])) 587 sumXY=sumXY+((item[0]*item[1])/(item[2]*item[2])) 588 sumXX=sumXX+((item[0]*item[0])/(item[2]*item[2])) 589 sumSigma=sumSigma+(1.0/(item[2]*item[2])) 590 delta=(sumSigma*sumXX)-(sumX*sumX) 591 m=((sumSigma*sumXY)-(sumX*sumY))/delta 592 c=((sumXX*sumY)-(sumX*sumXY))/delta 593 mSigma=math.sqrt(sumSigma/delta) 594 cSigma=math.sqrt(sumXX/delta) 595 596 return {'slope':m, 597 'intercept':c, 598 'slopeError':mSigma, 599 'interceptError':cSigma}
600 601 #---------------------------------------------------------------------------------------------------
602 -def biweightLSFit(dataList, tuningConstant, sigmaCut = None):
603 """Performs a weighted least squares fit, where the weights used are the biweight 604 transforms of the residuals to the previous best fit .i.e. the procedure is iterative, 605 and converges very quickly (iterations is set to 10 by default). Minimum number of data 606 points is 10. 607 608 This seems to give slightly different results to the equivalent R routine, so use at your 609 own risk! 610 611 @type dataList: list 612 @param dataList: input data, must be a three dimensional list in format [x, y, y weight] 613 @type tuningConstant: float 614 @param tuningConstant: 6.0 is recommended for location estimates, 9.0 is recommended for 615 scale estimates 616 @type sigmaCut: float 617 @param sigmaCut: sigma clipping to apply (set to None if not required) 618 @rtype: dictionary 619 @return: slope and intercept on y-axis, with associated errors, in the format 620 {'slope', 'intercept', 'slopeError', 'interceptError'} 621 622 @note: Returns None if an error occurs. 623 624 """ 625 626 dataCopy=[] 627 for row in dataList: 628 dataCopy.append(row) 629 630 # First perform unweighted fit, then calculate residuals 631 results=OLSFit(dataCopy) 632 origLen=len(dataCopy) 633 for k in range(10): 634 m=results['slope'] 635 c=results['intercept'] 636 res=[] 637 for item in dataCopy: 638 res.append((m*item[0]+c)-item[1]) 639 640 if len(res)>5: 641 # For clipping, trim away things >3 sigma 642 # away from median 643 if sigmaCut != None: 644 absRes=[] 645 for item in res: 646 absRes.append(abs(item)) 647 sigma=stdev(absRes) 648 count=0 649 for item in absRes: 650 if item>(sigmaCut*sigma) \ 651 and len(dataCopy)>2: 652 del dataCopy[count] 653 del res[count] 654 655 # Index of datalist gets out of 656 # sync with absRes as we delete 657 # items 658 count=count-1 659 660 count=count+1 661 662 # Biweight transform residuals 663 weights=biweightTransform(res, tuningConstant) 664 665 # Perform weighted fit, using biweight transforms 666 # of residuals as weight 667 wData=[] 668 for i in range(len(dataCopy)): 669 wData.append([dataCopy[i][0], dataCopy[i][1], dataCopy[i][2], weights[i][1]]) 670 671 results=weightedLSFit(wData, "weights") 672 673 return results
674 675 #---------------------------------------------------------------------------------------------------
676 -def cumulativeBinner(data, binMin, binMax, binTotal):
677 """Bins the input data cumulatively. 678 679 @param data: input data, must be a one dimensional list 680 @type binMin: float 681 @param binMin: minimum value from which to bin data 682 @type binMax: float 683 @param binMax: maximum value from which to bin data 684 @type binTotal: int 685 @param binTotal: number of bins 686 @rtype: list 687 @return: binned data, in format [bin centre, frequency] 688 689 """ 690 #Bin data 691 binStep=float(binMax-binMin)/binTotal 692 bins=[] 693 totalItems=len(data) 694 for i in range(binTotal): 695 bins.append(0) 696 for item in data: 697 if item>(binMin+(i*binStep)): 698 bins[i]=bins[i]+1.0/totalItems 699 700 # Gnuplot requires points at bin midpoints 701 coords=[] 702 for i in range(binTotal): 703 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 704 705 return coords
706 707 #---------------------------------------------------------------------------------------------------
708 -def binner(data, binMin, binMax, binTotal):
709 """Bins the input data.. 710 711 @param data: input data, must be a one dimensional list 712 @type binMin: float 713 @param binMin: minimum value from which to bin data 714 @type binMax: float 715 @param binMax: maximum value from which to bin data 716 @type binTotal: int 717 @param binTotal: number of bins 718 @rtype: list 719 @return: binned data, in format [bin centre, frequency] 720 721 """ 722 #Bin data 723 binStep=float(binMax-binMin)/binTotal 724 bins=[] 725 for i in range(binTotal): 726 bins.append(0) 727 for item in data: 728 if item>(binMin+(i*binStep)) \ 729 and item<=(binMin+((i+1)*binStep)): 730 bins[i]=bins[i]+1 731 732 # Gnuplot requires points at bin midpoints 733 coords=[] 734 for i in range(binTotal): 735 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 736 737 return coords
738 739 #---------------------------------------------------------------------------------------------------
740 -def weightedBinner(data, weights, binMin, binMax, binTotal):
741 """Bins the input data, recorded frequency is sum of weights in bin. 742 743 @param data: input data, must be a one dimensional list 744 @type binMin: float 745 @param binMin: minimum value from which to bin data 746 @type binMax: float 747 @param binMax: maximum value from which to bin data 748 @type binTotal: int 749 @param binTotal: number of bins 750 @rtype: list 751 @return: binned data, in format [bin centre, frequency] 752 753 """ 754 #Bin data 755 binStep=float(binMax-binMin)/binTotal 756 bins=[] 757 for i in range(binTotal): 758 bins.append(0.0) 759 for item, weight in zip(data, weights): 760 if item>(binMin+(i*binStep)) \ 761 and item<=(binMin+((i+1)*binStep)): 762 bins[i]=bins[i]+weight 763 764 # Gnuplot requires points at bin midpoints 765 coords=[] 766 for i in range(binTotal): 767 coords.append([binMin+(float(i+0.5)*binStep), bins[i]]) 768 769 return coords
770 771 #--------------------------------------------------------------------------------------------------- 772