acgc.stats.bivariate

Bivariate statistics

Statistical measures of relationships between two populations

  1#!/usr/bin/env python3
  2# -*- coding: utf-8 -*-
  3""" Bivariate statistics
  4
  5Statistical measures of relationships between two populations
  6"""
  7
  8import numpy as np
  9from scipy import stats
 10from .bivariate_lines import sma
 11# import xarray as xr
 12
 13__all__ = [
 14    "BivariateStatistics",
 15    "nmb",
 16    "nmae",
 17    "nmbf",
 18    "nmaef"
 19]
 20
 21def nmb( x0, x1 ):
 22    '''Compute Normalized Mean Bias (NMB)
 23
 24    NMB = ( mean(x1) - mean(x0) ) / mean(x0)
 25
 26    Parameters
 27    ----------
 28    x0 : array_like
 29        reference values
 30    x1 : array_like
 31        experiment values
 32    '''
 33
 34    assert (len(x0) == len(x1)), \
 35        "Parameters x0 and x1 must have the same length"
 36
 37    # Mean values
 38    x0_mean = np.mean(x0)
 39    x1_mean = np.mean(x1)
 40
 41    # Metric value
 42    return x1_mean / x0_mean - 1
 43
 44def nmae( x0, x1 ):
 45    '''Compute Normalized Mean Absolute Error (NMAE)
 46
 47    NMAE = mean(abs(x1 - x0)) / abs(mean(x0))
 48
 49    Parameters
 50    ---------
 51    x0 : array_like
 52        reference values
 53    x1 : array_like
 54        experiment values
 55    '''
 56
 57     # Mean values
 58    x0_mean = np.mean(x0)
 59
 60    # Mean absolute difference
 61    abs_diff = np.mean( np.abs(x1 - x0) )
 62
 63    # Metric value
 64    return abs_diff / np.abs( x0_mean )
 65
 66
 67def nmbf( x0, x1 ):
 68    '''Compute Normalized Mean Bias Factor (NMBF)
 69
 70    Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
 71
 72    Parameters
 73    ----------
 74    x0 : array_like
 75        reference values
 76    x1 : array_like
 77        experiment values
 78    '''
 79
 80    # Ensure that arguments have the same length
 81    assert (len(x0) == len(x1)), \
 82        "Parameters x0 and x1 must have the same length"
 83
 84    # Mean values
 85    x0_mean = np.mean(x0)
 86    x1_mean = np.mean(x1)
 87
 88    # Metric value
 89    if x1_mean >= x0_mean:
 90        result = x1_mean / x0_mean - 1
 91    else:
 92        result= 1 - x0_mean / x1_mean
 93    # Equivalent (faster?) implementation
 94    #S = (mMean - oMean) / np.abs(mMean - oMean)
 95    #result = S * ( np.exp( np.abs( mMean / oMean )) - 1 )
 96
 97    return result
 98
 99def nmaef( x0, x1 ):
100    '''Compute Normalized Mean Absolute Error Factor (NMAEF)
101
102    Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
103    
104    Parameters
105    ----------
106    x0 : array_like
107        reference values
108    x1 : array_like
109        experiment values
110    '''
111
112    # Ensure that arguments have the same length
113    assert (len(x0) == len(x1)), \
114        "Parameters x0 and x1 must have the same length"
115
116    # Mean values
117    x0_mean = np.mean(x0)
118    x1_mean = np.mean(x1)
119
120    # Mean absolute difference
121    abs_diff = np.mean( np.abs(x1 - x0))
122
123    # Metric value
124    if x1_mean >= x0_mean:
125        result = abs_diff / x0_mean 
126    else:
127        result = abs_diff / x1_mean
128    # Equivalent (faster?) implementation
129    #S = (exp_mean - ref_mean) / np.abs(exp_mean - ref_mean)
130    #result = abs_diff / ( oMean**((1+S)/2) * mMean**((1-S)/2) )
131
132    return result
133
134def _texify_name(name):
135    '''Return a LaTex formatted string for some variables
136    
137    Parameter
138    ---------
139    name : str
140    
141    Returns
142    -------
143    pretty_name : str
144    '''
145    if name=='R2':
146        pretty_name = f'$R^2$'
147    elif name=='r2':
148        pretty_name = f'$r^2$'
149    else:
150        pretty_name = name
151    return pretty_name
152
153class BivariateStatistics:
154    '''A suite of common statistics to quantify bivariate relationships
155
156    Class method 'summary' provides a formatted summary of these statistics
157    
158    Attributes
159    ----------
160    xmean, ymean : float
161        mean of x and y variables
162    xmedian, ymedian :float
163        median of x and y variables
164    xstd, ystd : float
165        standard deviation of x and y variables
166    mean_difference, md : float
167        ymean - xmean
168    mean_absolute_difference, mad : float
169        mean( |y-x| )
170    relative_mean_difference, rmd : float
171        md / xmean
172    relative_mean_absolute_difference, rmad :float
173        mad / xmean
174    standardized_mean_difference, smd : float
175        md / xstd
176    standardized_mean_absolute_difference, smad : float
177        mad /xstd
178    mean_relative_difference, mrd : float
179        mean(y/x) - 1
180    median_difference, medd : float
181        median(y-x)
182    median_absolute_difference, medad : float
183        median(|y-x|)
184    relative_median_difference, rmedd : float
185        median(y-x) / xmedian
186    relative_median_absolute_difference, rmedad : float
187        median(|y-x|) / xmedian
188    median_relative_difference, medianrd, medrd : float
189        median(y/x)-1
190    normalized_mean_bias_factor, nmbf : float
191        see `nmbf` 
192    normalized_mean_absolute_error_factor, nmaef : float
193        see `nmaef`
194    root_mean_square_difference, rmsd : float
195        $\\sqrt{ \\langle (y - x)^2 \\rangle }$
196    covariance : float
197        cov(x,y)
198    correlation_pearson, correlation, pearsonr, R, r : float
199        Pearson linear correlation coefficient 
200    correlation_spearman, spearmanr : float
201        Spearman, non-parametric rank correlation coefficient
202    R2, r2 : float
203        Linear coefficient of determination, $R^2$
204    '''
205
206    def __init__(self,x,y,w=None):
207        '''Compute suite of bivariate statistics during initialization
208        
209        Statistic values are save in attributes.
210        CAUTION: Weights w are ignored except in SMA fit
211
212        Parameters
213        ----------
214        x : ndarray
215            independent variable values
216        y : ndarray
217            dependent variable values, same size as x
218        w : ndarray, optional
219            weights for points (x,y), same size as x and y
220        '''
221
222        #Ensure that x and y have same length
223        if len(x) != len(y):
224            raise ValueError( 'Arguments x and y must have the same length' )
225        if (w is not None) and (len(w) != len(x)):
226            raise ValueError( 'Argument w (if present) must have the same length as x' )
227
228        diff = y - x
229        absdiff = np.abs( y - x )
230        ratio = y/x
231
232        # Means, medians, and standard deviations
233        self.xmean = np.mean(x)
234        self.ymean = np.mean(y)
235        self.xmedian = np.median(x)
236        self.ymedian = np.median(y)
237        self.xstd   = np.std(x)
238        self.ystd   = np.std(y)
239
240        self._x = x
241        self._y = y
242        self._w = w
243
244        # Mean and mean absolute differences
245        self.mean_difference            = self.md   = self.ymean - self.xmean
246        self.mean_absolute_difference   = self.mad  = np.mean( absdiff )
247
248        # Relative and standardized differences
249        self.relative_mean_difference           = self.rmd  = self.mean_difference / self.xmean
250        self.relative_mean_absolute_difference  = self.rmad = self.mean_absolute_difference / self.xmean
251        self.standardized_mean_difference       = self.smd  = self.mean_difference / self.xstd
252        self.standardized_mean_absolute_difference  = self.smad = self.mean_absolute_difference / self.xstd
253
254        # Mean and median relative differences
255        self.mean_relative_difference   = self.mrd  = np.mean( ratio - 1 )
256        self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 )
257
258        # Median and median absolute differences
259        self.median_difference          = self.medd  = np.median( diff )
260        self.median_absolute_difference = self.medad = np.median( absdiff )
261
262        # Relative median differences
263        self.relative_median_difference          = self.rmedd  = self.median_difference / self.xmedian
264        self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian
265
266        self.normalized_mean_bias_factor            = self.nmbf  = nmbf(x,y)
267        self.normalized_mean_absolute_error_factor  = self.nmaef = nmaef(x,y)
268
269        # RMS difference
270        self.root_mean_square_difference    = self.rmsd     = np.sqrt( np.mean( np.power( diff, 2) ) )
271
272        # Covariance, correlation
273        self.covariance = np.cov(x,y)[0][1]
274        self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \
275            np.corrcoef(x,y)[0][1]
276        self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic
277        self.R2 = self.r2 = self.R**2
278
279    def __getitem__(self,key):
280        '''Accesses attribute values via object['key']'''
281        return getattr(self,key)
282
283    def fitline(self,method='sma',intercept=True,**kwargs):
284        '''Compute bivariate line fit
285        
286        Parameters
287        ----------
288        method : str
289            line fitting method: sma (default), ols, wls, York, sen, siegel
290        intercept : bool
291            defines whether non-zero intercept should be fitted
292        **kwargs 
293            passed to `acgc.stats.sma` (e.g. robust=True)
294
295        Returns
296        -------
297        result : dict
298            dictionary with keys:
299            - slope (float)
300                slope of fitted line
301            - intercept (float)
302                intercept of fitted line
303            - fittedvalues (array (N,))
304                values on fit line
305            - residuals (array (N,))
306                residual from fit line
307        '''
308
309        if method.lower()=='sma':
310            fit = sma(  self._x,
311                        self._y,
312                        self._w,
313                        intercept=intercept,
314                        **kwargs)
315            slope = fit['slope']
316            intercept= fit['intercept']
317
318        elif method.lower()=='ols':
319            if intercept:
320                ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 
321                                      self._y, rcond=None )
322            else:
323                ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None )
324            slope = ols[0][0]
325            intercept = ols[0][1]
326
327        elif method.lower() in ['theil','sen','theilsen']:
328            sen = stats.theilslopes( self._y,
329                                     self._x )
330            slope = sen.slope
331            intercept = sen.intercept
332
333        elif method.lower()=='siegel':
334            siegel = stats.siegelslopes( self._x,
335                                         self._y )
336            slope = siegel.slope
337            intercept = siegel.intercept
338
339        elif method.lower()=='wls':
340            raise NotImplementedError('WLS regression not implemented yet')
341
342        elif method.lower()=='york':
343            raise NotImplementedError('York regression not implemented yet')
344
345        else:
346            raise ValueError('Undefined method '+method)
347
348        line = dict( slope          = slope,
349                     intercept      = intercept,
350                     fittedvalues   = slope * self._x + intercept,
351                     residuals      = self._y - ( slope * self._x + intercept ) )
352
353        return line
354
355    def slope(self,method='sma',intercept=True,**kwargs):
356        '''Compute slope of bivariate line fit
357        
358        Parameters
359        ----------
360        method : str
361            line fitting method: sma (default), ols, wls
362        intercept : bool
363            defines whether non-zero intercept should be fitted
364        **kwargs 
365            passed to `fitline`
366
367        Returns
368        -------
369        slope : float
370            value of y intercept
371        '''
372        return self.fitline(method,intercept,**kwargs)['slope']
373
374    def intercept(self,method='sma',intercept=True,**kwargs):
375        '''Compute intercept of bivariate line fit
376        
377        Parameters
378        ----------
379        method : str
380            line fitting method: sma (default) or ols
381        intercept : bool
382            defines whether non-zero intercept should be fitted
383        **kwargs 
384            passed to `fitline`
385
386        Returns
387        -------
388        intercept : float
389            value of y intercept
390        '''
391        return self.fitline(method,intercept,**kwargs)['intercept']
392
393    def _expand_variables(self,variables):
394        '''Expand special strings into a list of variables
395        
396        Parameter
397        ---------
398        variables : list or str, default='common'
399            Special strings ("all","common") will be expanded to a list of variables
400            list arguments will not be modified
401
402        Returns
403        -------
404        list 
405            variable names
406        '''
407        if variables is None:
408            variables='common'
409        if variables=='all':
410            variables=['MD','MAD','RMD','RMAD','MRD','SMD','SMAD',
411                       'MedD','MedAD','RMedD','RMedAD','MedRD',
412                       'NMBF','NMAEF','RMSD',
413                       'R','R2','spearmanr','slope','intercept']
414        elif variables=='common':
415            variables=['MD','MAD','RMD','RMAD','MRD','R2','slope']
416        if not isinstance(variables,list):
417            raise ValueError(
418                'variables must be a list, None, or one of these strings: "all","common"')
419
420        return variables
421
422    def summary_dict(self, variables=None, fitline_kw=None ):
423        '''Summarize bivariate statistics into a dict
424
425        Parameters
426        ----------
427        vars : list or str, default='common'
428            names of attribute variables to include in summary
429            names are case insensitive            
430            The following strings are also accepted in place of a list 
431                "all" (displays all variables)
432                "common" (displays all measures of mean difference)
433        fitline_kw : dict, default=None)
434            keywords passed to self.fitline()
435        
436        Returns
437        -------
438        summary : dict
439            names and values of variables
440        '''
441
442        # List of variables
443        variables = self._expand_variables(variables)
444
445        if fitline_kw is None:
446            fitline_kw = {'method':'sma',
447                          'intercept':True}
448
449        # Construct the dict
450        summary = {}
451        for v in variables:
452            if v in ['slope','intercept']:
453                # These variables are object methods
454                func = getattr(self,v)
455                value = func(**fitline_kw)
456            else:
457                # Retrieve values
458                value = getattr(self,v.lower())
459
460            # summary += (stringformat+'='+floatformat+'\n').format(v,value)
461            summary[v] = value
462
463        return summary
464
465    def summary(self, variables=None, fitline_kw=None, 
466                floatformat='{:.4f}', stringlength=None ):
467        '''Summarize bivariate statistics
468
469        Parameters
470        ----------
471        vars : list or str, default='common'
472            names of attribute variables to include in summary
473            names are case insensitive            
474            The following strings are also accepted in place of a list 
475                "all" (displays all variables)
476                "common" (displays all measures of mean difference)
477        floatformat : str, default='{:.4f}'
478            format specifier for floating point values
479        stringlength : int, default=None
480            length of the variables on output
481            default (None) is to use the length of the longest variable name
482        fitline_kw : dict, default=None
483            keywords passed to `fitline`
484        
485        Returns
486        -------
487        summary : str
488            names and values of variables
489        '''
490        # List of variables
491        variables = self._expand_variables(variables)
492
493        if stringlength is None:
494            stringlength = np.max([len(v) for v in variables])
495        stringformat = '{:'+str(stringlength)+'s}'
496
497        # Get a dict containing the needed variables
498        summarydict = self.summary_dict( variables, fitline_kw )
499
500        # Extract length of the float numbers from floatformat
501        # import re
502        # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)",
503        #       floatformat )[0] ) ).astype(int)
504
505        # summary = (stringformat+'{:>10s}').format('Variable','Value')
506        summarytext = ''
507        for k,v in summarydict.items():
508            summarytext += (stringformat+' = '+floatformat+'\n').format(k,v)
509
510        return summarytext
511
512    def summary_fig_table(self, ax, variables=None, fitline_kw=None,
513                          floatformat='{:.3f}',
514                          loc=None, loc_units='axes',
515                          **kwargs):
516        '''Display bivariate statistics as a table on a plot axis
517
518        Parameters
519        ----------
520        ax : matplotlib.Figure.Axis 
521            axis where the table will be displayed
522        variables : list or str, default='common'
523            names of attribute variables to include in summary
524            names are case insensitive            
525            The following strings are also accepted in place of a list 
526                "all" (displays all variables)
527                "common" (displays all measures of mean difference)
528        fitline_kw : dict, default=None
529            keywords passed to `fitline`
530        floatformat : str, default='{:.3f}'
531            format specifier for floating point values
532        loc : tuple (x0,y0), default=(0.85, 0.05)
533            location on the axis where the table will be drawn
534            can be in data units or axes units [0-1]
535        loc_units : {'axes' (default), 'data'}
536            specifies whether loc has 'data' units or 'axes' units [0-1]
537                    
538        Returns
539        -------
540        text1, text2 : matplotlib text object
541            Artist for the two text boxes        
542        '''
543        # List of variables
544        variables = self._expand_variables(variables)
545
546        # Default location in lower right corner
547        if loc is None:
548            loc = (0.8,0.05)
549
550        # Coordinates for loc
551        if loc_units.lower()=='data':
552            coord=ax.transData
553        elif loc_units.lower() in ['axes','axis']:
554            coord=ax.transAxes
555        else:
556            raise ValueError('Display units should be "Data" or "Axes"')
557
558        # Get a dict containing the needed variables
559        summarydict = self.summary_dict( variables, fitline_kw )
560
561        # Column of label text
562        label_text = '\n'.join([_texify_name(key) for key in summarydict])
563        # Column of value text
564        value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()])
565
566        # Check if horizontal alignment keyword is used
567        ha=''
568        try:
569            ha = kwargs['ha']
570        except KeyError:
571            pass
572        try:
573            ha = kwargs['horizontalalignment']
574        except KeyError:
575            pass
576
577        # For right alignment, align on values first
578        # Otherwise, align on labels
579        if ha=='right':
580            first_text = value_text
581            second_text = label_text
582            sign = -1
583        else:
584            first_text = label_text
585            second_text = value_text
586            sign = +1
587
588        # Add first column of text
589        t1=ax.text(loc[0],loc[1],
590                first_text,
591                transform=coord,
592                **kwargs
593                )
594
595        # Get width of first text column
596        bbox = t1.get_window_extent().transformed(coord.inverted())
597        width = bbox.x1-bbox.x0
598
599        # Add second column of text
600        t2 = ax.text(loc[0]+width*sign,loc[1],
601                     second_text,
602                     transform=coord,
603                     **kwargs
604                     )
605
606        ##################################
607        # Early version of this function using matplotlib.table.table()
608
609        # if isinstance(loc,(tuple,list)):
610        #     # Create an inset axis to contain the table
611        #     tableaxis = ax.inset_axes(loc)
612        #     table_width=1
613        # else:
614        #     tableaxis = ax
615
616        # # Display the table on the axis
617        # return mtable.table(
618        #     tableaxis,
619        #     cellText=[[floatformat.format(value)] for value in summarydict.values()],
620        #     rowLabels=[texify_name(key) for key in summarydict],
621        #     colWidths=[table_width/2]*2,
622        #     edges=edges,
623        #     loc=loc, bbox=bbox
624        #     )
625
626        return [t1,t2]
class BivariateStatistics:
154class BivariateStatistics:
155    '''A suite of common statistics to quantify bivariate relationships
156
157    Class method 'summary' provides a formatted summary of these statistics
158    
159    Attributes
160    ----------
161    xmean, ymean : float
162        mean of x and y variables
163    xmedian, ymedian :float
164        median of x and y variables
165    xstd, ystd : float
166        standard deviation of x and y variables
167    mean_difference, md : float
168        ymean - xmean
169    mean_absolute_difference, mad : float
170        mean( |y-x| )
171    relative_mean_difference, rmd : float
172        md / xmean
173    relative_mean_absolute_difference, rmad :float
174        mad / xmean
175    standardized_mean_difference, smd : float
176        md / xstd
177    standardized_mean_absolute_difference, smad : float
178        mad /xstd
179    mean_relative_difference, mrd : float
180        mean(y/x) - 1
181    median_difference, medd : float
182        median(y-x)
183    median_absolute_difference, medad : float
184        median(|y-x|)
185    relative_median_difference, rmedd : float
186        median(y-x) / xmedian
187    relative_median_absolute_difference, rmedad : float
188        median(|y-x|) / xmedian
189    median_relative_difference, medianrd, medrd : float
190        median(y/x)-1
191    normalized_mean_bias_factor, nmbf : float
192        see `nmbf` 
193    normalized_mean_absolute_error_factor, nmaef : float
194        see `nmaef`
195    root_mean_square_difference, rmsd : float
196        $\\sqrt{ \\langle (y - x)^2 \\rangle }$
197    covariance : float
198        cov(x,y)
199    correlation_pearson, correlation, pearsonr, R, r : float
200        Pearson linear correlation coefficient 
201    correlation_spearman, spearmanr : float
202        Spearman, non-parametric rank correlation coefficient
203    R2, r2 : float
204        Linear coefficient of determination, $R^2$
205    '''
206
207    def __init__(self,x,y,w=None):
208        '''Compute suite of bivariate statistics during initialization
209        
210        Statistic values are save in attributes.
211        CAUTION: Weights w are ignored except in SMA fit
212
213        Parameters
214        ----------
215        x : ndarray
216            independent variable values
217        y : ndarray
218            dependent variable values, same size as x
219        w : ndarray, optional
220            weights for points (x,y), same size as x and y
221        '''
222
223        #Ensure that x and y have same length
224        if len(x) != len(y):
225            raise ValueError( 'Arguments x and y must have the same length' )
226        if (w is not None) and (len(w) != len(x)):
227            raise ValueError( 'Argument w (if present) must have the same length as x' )
228
229        diff = y - x
230        absdiff = np.abs( y - x )
231        ratio = y/x
232
233        # Means, medians, and standard deviations
234        self.xmean = np.mean(x)
235        self.ymean = np.mean(y)
236        self.xmedian = np.median(x)
237        self.ymedian = np.median(y)
238        self.xstd   = np.std(x)
239        self.ystd   = np.std(y)
240
241        self._x = x
242        self._y = y
243        self._w = w
244
245        # Mean and mean absolute differences
246        self.mean_difference            = self.md   = self.ymean - self.xmean
247        self.mean_absolute_difference   = self.mad  = np.mean( absdiff )
248
249        # Relative and standardized differences
250        self.relative_mean_difference           = self.rmd  = self.mean_difference / self.xmean
251        self.relative_mean_absolute_difference  = self.rmad = self.mean_absolute_difference / self.xmean
252        self.standardized_mean_difference       = self.smd  = self.mean_difference / self.xstd
253        self.standardized_mean_absolute_difference  = self.smad = self.mean_absolute_difference / self.xstd
254
255        # Mean and median relative differences
256        self.mean_relative_difference   = self.mrd  = np.mean( ratio - 1 )
257        self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 )
258
259        # Median and median absolute differences
260        self.median_difference          = self.medd  = np.median( diff )
261        self.median_absolute_difference = self.medad = np.median( absdiff )
262
263        # Relative median differences
264        self.relative_median_difference          = self.rmedd  = self.median_difference / self.xmedian
265        self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian
266
267        self.normalized_mean_bias_factor            = self.nmbf  = nmbf(x,y)
268        self.normalized_mean_absolute_error_factor  = self.nmaef = nmaef(x,y)
269
270        # RMS difference
271        self.root_mean_square_difference    = self.rmsd     = np.sqrt( np.mean( np.power( diff, 2) ) )
272
273        # Covariance, correlation
274        self.covariance = np.cov(x,y)[0][1]
275        self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \
276            np.corrcoef(x,y)[0][1]
277        self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic
278        self.R2 = self.r2 = self.R**2
279
280    def __getitem__(self,key):
281        '''Accesses attribute values via object['key']'''
282        return getattr(self,key)
283
284    def fitline(self,method='sma',intercept=True,**kwargs):
285        '''Compute bivariate line fit
286        
287        Parameters
288        ----------
289        method : str
290            line fitting method: sma (default), ols, wls, York, sen, siegel
291        intercept : bool
292            defines whether non-zero intercept should be fitted
293        **kwargs 
294            passed to `acgc.stats.sma` (e.g. robust=True)
295
296        Returns
297        -------
298        result : dict
299            dictionary with keys:
300            - slope (float)
301                slope of fitted line
302            - intercept (float)
303                intercept of fitted line
304            - fittedvalues (array (N,))
305                values on fit line
306            - residuals (array (N,))
307                residual from fit line
308        '''
309
310        if method.lower()=='sma':
311            fit = sma(  self._x,
312                        self._y,
313                        self._w,
314                        intercept=intercept,
315                        **kwargs)
316            slope = fit['slope']
317            intercept= fit['intercept']
318
319        elif method.lower()=='ols':
320            if intercept:
321                ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 
322                                      self._y, rcond=None )
323            else:
324                ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None )
325            slope = ols[0][0]
326            intercept = ols[0][1]
327
328        elif method.lower() in ['theil','sen','theilsen']:
329            sen = stats.theilslopes( self._y,
330                                     self._x )
331            slope = sen.slope
332            intercept = sen.intercept
333
334        elif method.lower()=='siegel':
335            siegel = stats.siegelslopes( self._x,
336                                         self._y )
337            slope = siegel.slope
338            intercept = siegel.intercept
339
340        elif method.lower()=='wls':
341            raise NotImplementedError('WLS regression not implemented yet')
342
343        elif method.lower()=='york':
344            raise NotImplementedError('York regression not implemented yet')
345
346        else:
347            raise ValueError('Undefined method '+method)
348
349        line = dict( slope          = slope,
350                     intercept      = intercept,
351                     fittedvalues   = slope * self._x + intercept,
352                     residuals      = self._y - ( slope * self._x + intercept ) )
353
354        return line
355
356    def slope(self,method='sma',intercept=True,**kwargs):
357        '''Compute slope of bivariate line fit
358        
359        Parameters
360        ----------
361        method : str
362            line fitting method: sma (default), ols, wls
363        intercept : bool
364            defines whether non-zero intercept should be fitted
365        **kwargs 
366            passed to `fitline`
367
368        Returns
369        -------
370        slope : float
371            value of y intercept
372        '''
373        return self.fitline(method,intercept,**kwargs)['slope']
374
375    def intercept(self,method='sma',intercept=True,**kwargs):
376        '''Compute intercept of bivariate line fit
377        
378        Parameters
379        ----------
380        method : str
381            line fitting method: sma (default) or ols
382        intercept : bool
383            defines whether non-zero intercept should be fitted
384        **kwargs 
385            passed to `fitline`
386
387        Returns
388        -------
389        intercept : float
390            value of y intercept
391        '''
392        return self.fitline(method,intercept,**kwargs)['intercept']
393
394    def _expand_variables(self,variables):
395        '''Expand special strings into a list of variables
396        
397        Parameter
398        ---------
399        variables : list or str, default='common'
400            Special strings ("all","common") will be expanded to a list of variables
401            list arguments will not be modified
402
403        Returns
404        -------
405        list 
406            variable names
407        '''
408        if variables is None:
409            variables='common'
410        if variables=='all':
411            variables=['MD','MAD','RMD','RMAD','MRD','SMD','SMAD',
412                       'MedD','MedAD','RMedD','RMedAD','MedRD',
413                       'NMBF','NMAEF','RMSD',
414                       'R','R2','spearmanr','slope','intercept']
415        elif variables=='common':
416            variables=['MD','MAD','RMD','RMAD','MRD','R2','slope']
417        if not isinstance(variables,list):
418            raise ValueError(
419                'variables must be a list, None, or one of these strings: "all","common"')
420
421        return variables
422
423    def summary_dict(self, variables=None, fitline_kw=None ):
424        '''Summarize bivariate statistics into a dict
425
426        Parameters
427        ----------
428        vars : list or str, default='common'
429            names of attribute variables to include in summary
430            names are case insensitive            
431            The following strings are also accepted in place of a list 
432                "all" (displays all variables)
433                "common" (displays all measures of mean difference)
434        fitline_kw : dict, default=None)
435            keywords passed to self.fitline()
436        
437        Returns
438        -------
439        summary : dict
440            names and values of variables
441        '''
442
443        # List of variables
444        variables = self._expand_variables(variables)
445
446        if fitline_kw is None:
447            fitline_kw = {'method':'sma',
448                          'intercept':True}
449
450        # Construct the dict
451        summary = {}
452        for v in variables:
453            if v in ['slope','intercept']:
454                # These variables are object methods
455                func = getattr(self,v)
456                value = func(**fitline_kw)
457            else:
458                # Retrieve values
459                value = getattr(self,v.lower())
460
461            # summary += (stringformat+'='+floatformat+'\n').format(v,value)
462            summary[v] = value
463
464        return summary
465
466    def summary(self, variables=None, fitline_kw=None, 
467                floatformat='{:.4f}', stringlength=None ):
468        '''Summarize bivariate statistics
469
470        Parameters
471        ----------
472        vars : list or str, default='common'
473            names of attribute variables to include in summary
474            names are case insensitive            
475            The following strings are also accepted in place of a list 
476                "all" (displays all variables)
477                "common" (displays all measures of mean difference)
478        floatformat : str, default='{:.4f}'
479            format specifier for floating point values
480        stringlength : int, default=None
481            length of the variables on output
482            default (None) is to use the length of the longest variable name
483        fitline_kw : dict, default=None
484            keywords passed to `fitline`
485        
486        Returns
487        -------
488        summary : str
489            names and values of variables
490        '''
491        # List of variables
492        variables = self._expand_variables(variables)
493
494        if stringlength is None:
495            stringlength = np.max([len(v) for v in variables])
496        stringformat = '{:'+str(stringlength)+'s}'
497
498        # Get a dict containing the needed variables
499        summarydict = self.summary_dict( variables, fitline_kw )
500
501        # Extract length of the float numbers from floatformat
502        # import re
503        # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)",
504        #       floatformat )[0] ) ).astype(int)
505
506        # summary = (stringformat+'{:>10s}').format('Variable','Value')
507        summarytext = ''
508        for k,v in summarydict.items():
509            summarytext += (stringformat+' = '+floatformat+'\n').format(k,v)
510
511        return summarytext
512
513    def summary_fig_table(self, ax, variables=None, fitline_kw=None,
514                          floatformat='{:.3f}',
515                          loc=None, loc_units='axes',
516                          **kwargs):
517        '''Display bivariate statistics as a table on a plot axis
518
519        Parameters
520        ----------
521        ax : matplotlib.Figure.Axis 
522            axis where the table will be displayed
523        variables : list or str, default='common'
524            names of attribute variables to include in summary
525            names are case insensitive            
526            The following strings are also accepted in place of a list 
527                "all" (displays all variables)
528                "common" (displays all measures of mean difference)
529        fitline_kw : dict, default=None
530            keywords passed to `fitline`
531        floatformat : str, default='{:.3f}'
532            format specifier for floating point values
533        loc : tuple (x0,y0), default=(0.85, 0.05)
534            location on the axis where the table will be drawn
535            can be in data units or axes units [0-1]
536        loc_units : {'axes' (default), 'data'}
537            specifies whether loc has 'data' units or 'axes' units [0-1]
538                    
539        Returns
540        -------
541        text1, text2 : matplotlib text object
542            Artist for the two text boxes        
543        '''
544        # List of variables
545        variables = self._expand_variables(variables)
546
547        # Default location in lower right corner
548        if loc is None:
549            loc = (0.8,0.05)
550
551        # Coordinates for loc
552        if loc_units.lower()=='data':
553            coord=ax.transData
554        elif loc_units.lower() in ['axes','axis']:
555            coord=ax.transAxes
556        else:
557            raise ValueError('Display units should be "Data" or "Axes"')
558
559        # Get a dict containing the needed variables
560        summarydict = self.summary_dict( variables, fitline_kw )
561
562        # Column of label text
563        label_text = '\n'.join([_texify_name(key) for key in summarydict])
564        # Column of value text
565        value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()])
566
567        # Check if horizontal alignment keyword is used
568        ha=''
569        try:
570            ha = kwargs['ha']
571        except KeyError:
572            pass
573        try:
574            ha = kwargs['horizontalalignment']
575        except KeyError:
576            pass
577
578        # For right alignment, align on values first
579        # Otherwise, align on labels
580        if ha=='right':
581            first_text = value_text
582            second_text = label_text
583            sign = -1
584        else:
585            first_text = label_text
586            second_text = value_text
587            sign = +1
588
589        # Add first column of text
590        t1=ax.text(loc[0],loc[1],
591                first_text,
592                transform=coord,
593                **kwargs
594                )
595
596        # Get width of first text column
597        bbox = t1.get_window_extent().transformed(coord.inverted())
598        width = bbox.x1-bbox.x0
599
600        # Add second column of text
601        t2 = ax.text(loc[0]+width*sign,loc[1],
602                     second_text,
603                     transform=coord,
604                     **kwargs
605                     )
606
607        ##################################
608        # Early version of this function using matplotlib.table.table()
609
610        # if isinstance(loc,(tuple,list)):
611        #     # Create an inset axis to contain the table
612        #     tableaxis = ax.inset_axes(loc)
613        #     table_width=1
614        # else:
615        #     tableaxis = ax
616
617        # # Display the table on the axis
618        # return mtable.table(
619        #     tableaxis,
620        #     cellText=[[floatformat.format(value)] for value in summarydict.values()],
621        #     rowLabels=[texify_name(key) for key in summarydict],
622        #     colWidths=[table_width/2]*2,
623        #     edges=edges,
624        #     loc=loc, bbox=bbox
625        #     )
626
627        return [t1,t2]

A suite of common statistics to quantify bivariate relationships

Class method 'summary' provides a formatted summary of these statistics

Attributes
  • xmean, ymean (float): mean of x and y variables
  • xmedian, ymedian (float): median of x and y variables
  • xstd, ystd (float): standard deviation of x and y variables
  • mean_difference, md (float): ymean - xmean
  • mean_absolute_difference, mad (float): mean( |y-x| )
  • relative_mean_difference, rmd (float): md / xmean
  • relative_mean_absolute_difference, rmad (float): mad / xmean
  • standardized_mean_difference, smd (float): md / xstd
  • standardized_mean_absolute_difference, smad (float): mad /xstd
  • mean_relative_difference, mrd (float): mean(y/x) - 1
  • median_difference, medd (float): median(y-x)
  • median_absolute_difference, medad (float): median(|y-x|)
  • relative_median_difference, rmedd (float): median(y-x) / xmedian
  • relative_median_absolute_difference, rmedad (float): median(|y-x|) / xmedian
  • median_relative_difference, medianrd, medrd (float): median(y/x)-1
  • normalized_mean_bias_factor, nmbf (float): see nmbf
  • normalized_mean_absolute_error_factor, nmaef (float): see nmaef
  • root_mean_square_difference, rmsd (float): $\sqrt{ \langle (y - x)^2 \rangle }$
  • covariance (float): cov(x,y)
  • correlation_pearson, correlation, pearsonr, R, r (float): Pearson linear correlation coefficient
  • correlation_spearman, spearmanr (float): Spearman, non-parametric rank correlation coefficient
  • R2, r2 (float): Linear coefficient of determination, $R^2$
BivariateStatistics(x, y, w=None)
207    def __init__(self,x,y,w=None):
208        '''Compute suite of bivariate statistics during initialization
209        
210        Statistic values are save in attributes.
211        CAUTION: Weights w are ignored except in SMA fit
212
213        Parameters
214        ----------
215        x : ndarray
216            independent variable values
217        y : ndarray
218            dependent variable values, same size as x
219        w : ndarray, optional
220            weights for points (x,y), same size as x and y
221        '''
222
223        #Ensure that x and y have same length
224        if len(x) != len(y):
225            raise ValueError( 'Arguments x and y must have the same length' )
226        if (w is not None) and (len(w) != len(x)):
227            raise ValueError( 'Argument w (if present) must have the same length as x' )
228
229        diff = y - x
230        absdiff = np.abs( y - x )
231        ratio = y/x
232
233        # Means, medians, and standard deviations
234        self.xmean = np.mean(x)
235        self.ymean = np.mean(y)
236        self.xmedian = np.median(x)
237        self.ymedian = np.median(y)
238        self.xstd   = np.std(x)
239        self.ystd   = np.std(y)
240
241        self._x = x
242        self._y = y
243        self._w = w
244
245        # Mean and mean absolute differences
246        self.mean_difference            = self.md   = self.ymean - self.xmean
247        self.mean_absolute_difference   = self.mad  = np.mean( absdiff )
248
249        # Relative and standardized differences
250        self.relative_mean_difference           = self.rmd  = self.mean_difference / self.xmean
251        self.relative_mean_absolute_difference  = self.rmad = self.mean_absolute_difference / self.xmean
252        self.standardized_mean_difference       = self.smd  = self.mean_difference / self.xstd
253        self.standardized_mean_absolute_difference  = self.smad = self.mean_absolute_difference / self.xstd
254
255        # Mean and median relative differences
256        self.mean_relative_difference   = self.mrd  = np.mean( ratio - 1 )
257        self.median_relative_difference = self.medianrd = self.medrd = np.median( ratio - 1 )
258
259        # Median and median absolute differences
260        self.median_difference          = self.medd  = np.median( diff )
261        self.median_absolute_difference = self.medad = np.median( absdiff )
262
263        # Relative median differences
264        self.relative_median_difference          = self.rmedd  = self.median_difference / self.xmedian
265        self.relative_median_absolute_difference = self.rmedad = self.median_absolute_difference / self.xmedian
266
267        self.normalized_mean_bias_factor            = self.nmbf  = nmbf(x,y)
268        self.normalized_mean_absolute_error_factor  = self.nmaef = nmaef(x,y)
269
270        # RMS difference
271        self.root_mean_square_difference    = self.rmsd     = np.sqrt( np.mean( np.power( diff, 2) ) )
272
273        # Covariance, correlation
274        self.covariance = np.cov(x,y)[0][1]
275        self.correlation = self.correlation_pearson = self.R = self.r = self.pearsonr = \
276            np.corrcoef(x,y)[0][1]
277        self.correlation_spearman = self.spearmanr = stats.spearmanr(x,y).statistic
278        self.R2 = self.r2 = self.R**2

Compute suite of bivariate statistics during initialization

Statistic values are save in attributes. CAUTION: Weights w are ignored except in SMA fit

Parameters
  • x (ndarray): independent variable values
  • y (ndarray): dependent variable values, same size as x
  • w (ndarray, optional): weights for points (x,y), same size as x and y
xmean
ymean
xmedian
ymedian
xstd
ystd
covariance
def fitline(self, method='sma', intercept=True, **kwargs):
284    def fitline(self,method='sma',intercept=True,**kwargs):
285        '''Compute bivariate line fit
286        
287        Parameters
288        ----------
289        method : str
290            line fitting method: sma (default), ols, wls, York, sen, siegel
291        intercept : bool
292            defines whether non-zero intercept should be fitted
293        **kwargs 
294            passed to `acgc.stats.sma` (e.g. robust=True)
295
296        Returns
297        -------
298        result : dict
299            dictionary with keys:
300            - slope (float)
301                slope of fitted line
302            - intercept (float)
303                intercept of fitted line
304            - fittedvalues (array (N,))
305                values on fit line
306            - residuals (array (N,))
307                residual from fit line
308        '''
309
310        if method.lower()=='sma':
311            fit = sma(  self._x,
312                        self._y,
313                        self._w,
314                        intercept=intercept,
315                        **kwargs)
316            slope = fit['slope']
317            intercept= fit['intercept']
318
319        elif method.lower()=='ols':
320            if intercept:
321                ols = np.linalg.lstsq( np.vstack([self._x,np.ones(len(self._x))]).T, 
322                                      self._y, rcond=None )
323            else:
324                ols = np.linalg.lstsq( np.vstack([self._x]).T, self._y, rcond=None )
325            slope = ols[0][0]
326            intercept = ols[0][1]
327
328        elif method.lower() in ['theil','sen','theilsen']:
329            sen = stats.theilslopes( self._y,
330                                     self._x )
331            slope = sen.slope
332            intercept = sen.intercept
333
334        elif method.lower()=='siegel':
335            siegel = stats.siegelslopes( self._x,
336                                         self._y )
337            slope = siegel.slope
338            intercept = siegel.intercept
339
340        elif method.lower()=='wls':
341            raise NotImplementedError('WLS regression not implemented yet')
342
343        elif method.lower()=='york':
344            raise NotImplementedError('York regression not implemented yet')
345
346        else:
347            raise ValueError('Undefined method '+method)
348
349        line = dict( slope          = slope,
350                     intercept      = intercept,
351                     fittedvalues   = slope * self._x + intercept,
352                     residuals      = self._y - ( slope * self._x + intercept ) )
353
354        return line

Compute bivariate line fit

Parameters
  • method (str): line fitting method: sma (default), ols, wls, York, sen, siegel
  • intercept (bool): defines whether non-zero intercept should be fitted
  • **kwargs: passed to acgc.stats.sma (e.g. robust=True)
Returns
  • result (dict): dictionary with keys:
    • slope (float) slope of fitted line
    • intercept (float) intercept of fitted line
    • fittedvalues (array (N,)) values on fit line
    • residuals (array (N,)) residual from fit line
def slope(self, method='sma', intercept=True, **kwargs):
356    def slope(self,method='sma',intercept=True,**kwargs):
357        '''Compute slope of bivariate line fit
358        
359        Parameters
360        ----------
361        method : str
362            line fitting method: sma (default), ols, wls
363        intercept : bool
364            defines whether non-zero intercept should be fitted
365        **kwargs 
366            passed to `fitline`
367
368        Returns
369        -------
370        slope : float
371            value of y intercept
372        '''
373        return self.fitline(method,intercept,**kwargs)['slope']

Compute slope of bivariate line fit

Parameters
  • method (str): line fitting method: sma (default), ols, wls
  • intercept (bool): defines whether non-zero intercept should be fitted
  • **kwargs: passed to fitline
Returns
  • slope (float): value of y intercept
def intercept(self, method='sma', intercept=True, **kwargs):
375    def intercept(self,method='sma',intercept=True,**kwargs):
376        '''Compute intercept of bivariate line fit
377        
378        Parameters
379        ----------
380        method : str
381            line fitting method: sma (default) or ols
382        intercept : bool
383            defines whether non-zero intercept should be fitted
384        **kwargs 
385            passed to `fitline`
386
387        Returns
388        -------
389        intercept : float
390            value of y intercept
391        '''
392        return self.fitline(method,intercept,**kwargs)['intercept']

Compute intercept of bivariate line fit

Parameters
  • method (str): line fitting method: sma (default) or ols
  • intercept (bool): defines whether non-zero intercept should be fitted
  • **kwargs: passed to fitline
Returns
  • intercept (float): value of y intercept
def summary_dict(self, variables=None, fitline_kw=None):
423    def summary_dict(self, variables=None, fitline_kw=None ):
424        '''Summarize bivariate statistics into a dict
425
426        Parameters
427        ----------
428        vars : list or str, default='common'
429            names of attribute variables to include in summary
430            names are case insensitive            
431            The following strings are also accepted in place of a list 
432                "all" (displays all variables)
433                "common" (displays all measures of mean difference)
434        fitline_kw : dict, default=None)
435            keywords passed to self.fitline()
436        
437        Returns
438        -------
439        summary : dict
440            names and values of variables
441        '''
442
443        # List of variables
444        variables = self._expand_variables(variables)
445
446        if fitline_kw is None:
447            fitline_kw = {'method':'sma',
448                          'intercept':True}
449
450        # Construct the dict
451        summary = {}
452        for v in variables:
453            if v in ['slope','intercept']:
454                # These variables are object methods
455                func = getattr(self,v)
456                value = func(**fitline_kw)
457            else:
458                # Retrieve values
459                value = getattr(self,v.lower())
460
461            # summary += (stringformat+'='+floatformat+'\n').format(v,value)
462            summary[v] = value
463
464        return summary

Summarize bivariate statistics into a dict

Parameters
  • vars (list or str, default='common'): names of attribute variables to include in summary names are case insensitive
    The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference)
  • fitline_kw (dict, default=None)): keywords passed to self.fitline()
Returns
  • summary (dict): names and values of variables
def summary( self, variables=None, fitline_kw=None, floatformat='{:.4f}', stringlength=None):
466    def summary(self, variables=None, fitline_kw=None, 
467                floatformat='{:.4f}', stringlength=None ):
468        '''Summarize bivariate statistics
469
470        Parameters
471        ----------
472        vars : list or str, default='common'
473            names of attribute variables to include in summary
474            names are case insensitive            
475            The following strings are also accepted in place of a list 
476                "all" (displays all variables)
477                "common" (displays all measures of mean difference)
478        floatformat : str, default='{:.4f}'
479            format specifier for floating point values
480        stringlength : int, default=None
481            length of the variables on output
482            default (None) is to use the length of the longest variable name
483        fitline_kw : dict, default=None
484            keywords passed to `fitline`
485        
486        Returns
487        -------
488        summary : str
489            names and values of variables
490        '''
491        # List of variables
492        variables = self._expand_variables(variables)
493
494        if stringlength is None:
495            stringlength = np.max([len(v) for v in variables])
496        stringformat = '{:'+str(stringlength)+'s}'
497
498        # Get a dict containing the needed variables
499        summarydict = self.summary_dict( variables, fitline_kw )
500
501        # Extract length of the float numbers from floatformat
502        # import re
503        # floatlength = np.floor( float( re.findall("[-+]?(?:\d*\.*\d+)",
504        #       floatformat )[0] ) ).astype(int)
505
506        # summary = (stringformat+'{:>10s}').format('Variable','Value')
507        summarytext = ''
508        for k,v in summarydict.items():
509            summarytext += (stringformat+' = '+floatformat+'\n').format(k,v)
510
511        return summarytext

Summarize bivariate statistics

Parameters
  • vars (list or str, default='common'): names of attribute variables to include in summary names are case insensitive
    The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference)
  • floatformat : str, default='{ (.4f}'): format specifier for floating point values
  • stringlength (int, default=None): length of the variables on output default (None) is to use the length of the longest variable name
  • fitline_kw (dict, default=None): keywords passed to fitline
Returns
  • summary (str): names and values of variables
def summary_fig_table( self, ax, variables=None, fitline_kw=None, floatformat='{:.3f}', loc=None, loc_units='axes', **kwargs):
513    def summary_fig_table(self, ax, variables=None, fitline_kw=None,
514                          floatformat='{:.3f}',
515                          loc=None, loc_units='axes',
516                          **kwargs):
517        '''Display bivariate statistics as a table on a plot axis
518
519        Parameters
520        ----------
521        ax : matplotlib.Figure.Axis 
522            axis where the table will be displayed
523        variables : list or str, default='common'
524            names of attribute variables to include in summary
525            names are case insensitive            
526            The following strings are also accepted in place of a list 
527                "all" (displays all variables)
528                "common" (displays all measures of mean difference)
529        fitline_kw : dict, default=None
530            keywords passed to `fitline`
531        floatformat : str, default='{:.3f}'
532            format specifier for floating point values
533        loc : tuple (x0,y0), default=(0.85, 0.05)
534            location on the axis where the table will be drawn
535            can be in data units or axes units [0-1]
536        loc_units : {'axes' (default), 'data'}
537            specifies whether loc has 'data' units or 'axes' units [0-1]
538                    
539        Returns
540        -------
541        text1, text2 : matplotlib text object
542            Artist for the two text boxes        
543        '''
544        # List of variables
545        variables = self._expand_variables(variables)
546
547        # Default location in lower right corner
548        if loc is None:
549            loc = (0.8,0.05)
550
551        # Coordinates for loc
552        if loc_units.lower()=='data':
553            coord=ax.transData
554        elif loc_units.lower() in ['axes','axis']:
555            coord=ax.transAxes
556        else:
557            raise ValueError('Display units should be "Data" or "Axes"')
558
559        # Get a dict containing the needed variables
560        summarydict = self.summary_dict( variables, fitline_kw )
561
562        # Column of label text
563        label_text = '\n'.join([_texify_name(key) for key in summarydict])
564        # Column of value text
565        value_text = '\n'.join([floatformat.format(value) for value in summarydict.values()])
566
567        # Check if horizontal alignment keyword is used
568        ha=''
569        try:
570            ha = kwargs['ha']
571        except KeyError:
572            pass
573        try:
574            ha = kwargs['horizontalalignment']
575        except KeyError:
576            pass
577
578        # For right alignment, align on values first
579        # Otherwise, align on labels
580        if ha=='right':
581            first_text = value_text
582            second_text = label_text
583            sign = -1
584        else:
585            first_text = label_text
586            second_text = value_text
587            sign = +1
588
589        # Add first column of text
590        t1=ax.text(loc[0],loc[1],
591                first_text,
592                transform=coord,
593                **kwargs
594                )
595
596        # Get width of first text column
597        bbox = t1.get_window_extent().transformed(coord.inverted())
598        width = bbox.x1-bbox.x0
599
600        # Add second column of text
601        t2 = ax.text(loc[0]+width*sign,loc[1],
602                     second_text,
603                     transform=coord,
604                     **kwargs
605                     )
606
607        ##################################
608        # Early version of this function using matplotlib.table.table()
609
610        # if isinstance(loc,(tuple,list)):
611        #     # Create an inset axis to contain the table
612        #     tableaxis = ax.inset_axes(loc)
613        #     table_width=1
614        # else:
615        #     tableaxis = ax
616
617        # # Display the table on the axis
618        # return mtable.table(
619        #     tableaxis,
620        #     cellText=[[floatformat.format(value)] for value in summarydict.values()],
621        #     rowLabels=[texify_name(key) for key in summarydict],
622        #     colWidths=[table_width/2]*2,
623        #     edges=edges,
624        #     loc=loc, bbox=bbox
625        #     )
626
627        return [t1,t2]

Display bivariate statistics as a table on a plot axis

Parameters
  • ax (matplotlib.Figure.Axis): axis where the table will be displayed
  • variables (list or str, default='common'): names of attribute variables to include in summary names are case insensitive
    The following strings are also accepted in place of a list "all" (displays all variables) "common" (displays all measures of mean difference)
  • fitline_kw (dict, default=None): keywords passed to fitline
  • floatformat : str, default='{ (.3f}'): format specifier for floating point values
  • loc (tuple (x0,y0), default=(0.85, 0.05)): location on the axis where the table will be drawn can be in data units or axes units [0-1]
  • loc_units ({'axes' (default), 'data'}): specifies whether loc has 'data' units or 'axes' units [0-1]
Returns
  • text1, text2 (matplotlib text object): Artist for the two text boxes
def nmb(x0, x1):
22def nmb( x0, x1 ):
23    '''Compute Normalized Mean Bias (NMB)
24
25    NMB = ( mean(x1) - mean(x0) ) / mean(x0)
26
27    Parameters
28    ----------
29    x0 : array_like
30        reference values
31    x1 : array_like
32        experiment values
33    '''
34
35    assert (len(x0) == len(x1)), \
36        "Parameters x0 and x1 must have the same length"
37
38    # Mean values
39    x0_mean = np.mean(x0)
40    x1_mean = np.mean(x1)
41
42    # Metric value
43    return x1_mean / x0_mean - 1

Compute Normalized Mean Bias (NMB)

NMB = ( mean(x1) - mean(x0) ) / mean(x0)

Parameters
  • x0 (array_like): reference values
  • x1 (array_like): experiment values
def nmae(x0, x1):
45def nmae( x0, x1 ):
46    '''Compute Normalized Mean Absolute Error (NMAE)
47
48    NMAE = mean(abs(x1 - x0)) / abs(mean(x0))
49
50    Parameters
51    ---------
52    x0 : array_like
53        reference values
54    x1 : array_like
55        experiment values
56    '''
57
58     # Mean values
59    x0_mean = np.mean(x0)
60
61    # Mean absolute difference
62    abs_diff = np.mean( np.abs(x1 - x0) )
63
64    # Metric value
65    return abs_diff / np.abs( x0_mean )

Compute Normalized Mean Absolute Error (NMAE)

NMAE = mean(abs(x1 - x0)) / abs(mean(x0))

Parameters
  • x0 (array_like): reference values
  • x1 (array_like): experiment values
def nmbf(x0, x1):
68def nmbf( x0, x1 ):
69    '''Compute Normalized Mean Bias Factor (NMBF)
70
71    Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
72
73    Parameters
74    ----------
75    x0 : array_like
76        reference values
77    x1 : array_like
78        experiment values
79    '''
80
81    # Ensure that arguments have the same length
82    assert (len(x0) == len(x1)), \
83        "Parameters x0 and x1 must have the same length"
84
85    # Mean values
86    x0_mean = np.mean(x0)
87    x1_mean = np.mean(x1)
88
89    # Metric value
90    if x1_mean >= x0_mean:
91        result = x1_mean / x0_mean - 1
92    else:
93        result= 1 - x0_mean / x1_mean
94    # Equivalent (faster?) implementation
95    #S = (mMean - oMean) / np.abs(mMean - oMean)
96    #result = S * ( np.exp( np.abs( mMean / oMean )) - 1 )
97
98    return result

Compute Normalized Mean Bias Factor (NMBF)

Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125

Parameters
  • x0 (array_like): reference values
  • x1 (array_like): experiment values
def nmaef(x0, x1):
100def nmaef( x0, x1 ):
101    '''Compute Normalized Mean Absolute Error Factor (NMAEF)
102
103    Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125
104    
105    Parameters
106    ----------
107    x0 : array_like
108        reference values
109    x1 : array_like
110        experiment values
111    '''
112
113    # Ensure that arguments have the same length
114    assert (len(x0) == len(x1)), \
115        "Parameters x0 and x1 must have the same length"
116
117    # Mean values
118    x0_mean = np.mean(x0)
119    x1_mean = np.mean(x1)
120
121    # Mean absolute difference
122    abs_diff = np.mean( np.abs(x1 - x0))
123
124    # Metric value
125    if x1_mean >= x0_mean:
126        result = abs_diff / x0_mean 
127    else:
128        result = abs_diff / x1_mean
129    # Equivalent (faster?) implementation
130    #S = (exp_mean - ref_mean) / np.abs(exp_mean - ref_mean)
131    #result = abs_diff / ( oMean**((1+S)/2) * mMean**((1-S)/2) )
132
133    return result

Compute Normalized Mean Absolute Error Factor (NMAEF)

Definition from Yu et al. (2006) https://doi.org/10.1002/asl.125

Parameters
  • x0 (array_like): reference values
  • x1 (array_like): experiment values