;NOTE: SAVE THIS FILE AS IDL PROCEDURE CALLED prob_scores.pro ;pro prob_scores,show=show,h48=h48 ; ;Read from Pertti's POP_3 file and compute a variety of probabilistic ;verification scores and diagrams: ; Brier score ; Brier skill score ; Ranked probability score ; Ranked probability skill score ; Reliability diagram ; ROC diagram ; ROC area ; Value score ;This is to put on the web page as an example ;To show a plot, set show='reliability', 'ROC', or 'value' ;To compute results for 48h forecasts set /h48 (default=24h fcsts) ; ;Example: prob_scores,show='reliability' makes reliability plots for ; 24h forecasts ;Example: prob_scores,show='ROC',/h48 makes ROC plots for 48h fcsts ;----------------------------------------------------------------------- pro pstats,obsocc,pop,refpop,ncat, $ pod=pod,pofd=pofd, show=show ;Compute probabilistic verification statistics, including skill scores, on ;pooled data. ; ;Input: obsocc observed occurrence [npts] ; pop forecast probability of precipitation [npts] ; Note: negative values of obs or prob treated as bad data! ; refpop reference probability of precipitation (climatology) [npts] ; ncat number of probability categories (generally equal to ; number of ensemble members + 1 ) ;Keywords: ; pod, pofd output probability of detection, probability of false ; detection [ncat,nthresh] ; show show='reliability', 'ROC', or 'value' if n_elements(show) eq 0 then show='' probcat=1./(ncat-1.)*findgen(ncat) & print,'probcat=',probcat binsize=probcat[1]-probcat[0] halfbinsize=binsize/2. ;Check compatibility of array sizes npts=n_elements(obsocc) if n_elements(pop) ne npts or n_elements(refpop) ne npts $ then begin print,'pstats -- size mismatch between obsocc, pop, refpop, thresh -- quit' goto, done endif ;Keep only good observations (i.e., >=0; masked values=-1) ss=where(obsocc ge 0,npts) obs=obsocc[ss] prob=pop[ss] refprob=refpop[ss] ;Output POD and POFD pod=fltarr(ncat+1) pofd=fltarr(ncat+1) ;Working arrays for probabilistic verification n =make_array(ncat,/float,value=0.) o =make_array(ncat,/float,value=0.) nref=make_array(ncat,/float,value=0.) oref=make_array(ncat,/float,value=0.) ;Bin by forecast probability category for k=0,ncat-1 do begin ;Which forecasts fall into probability category k ss=where(abs(prob-probcat[k]) lt halfbinsize,num) if num gt 0 then begin n[k]=num ;# fcsts in category k o[k]=total(obs[ss]) ;# observed occurrences in category k endif sr=where(abs(refprob-probcat[k]) lt halfbinsize,num) if num gt 0 then begin nref[k]=num oref[k]=total(obs[sr]) endif endfor ;Compute Brier score manually BS=total((prob-obs)^2.)/npts ;Brier score decomposition into reliability, resolution, uncertainty reliability,probcat,n,o,BSd,decomp=decomp,show=(show eq 'reliability') rel=decomp[1] res=decomp[2] unc=decomp[3] print,'BS-BSd=',BS-BSd ;Brier score for ref fcst (climatology), Brier skill score BSref=total((refprob-obs)^2.)/npts BSS=(BSref-BS)/BSref ;Area under ROC curve roc,n,o,h,f,rocarea,/debug,show=(show eq 'ROC') ;roc,n,o,h,f,rocarea,/debug,/binormal,show=(show eq 'ROC') pod=[1.,h] pofd=[1.,f] ;Value plot pclim=total(oref)/total(nref) value,probcat,n,o,pclim,show=(show eq 'value') print,'n=',n print,'BS=',BS print,'reliability=',rel print,'resolution=',res print,'uncertainty=',unc print,'BSS=',BSS print,'ROCarea=',rocarea done: end ;----------------------------------------------------------------------- pro reliability, probcat,n,o,BS, $ title=title,decomp=decomp,show=show,debug=debug ;Compute Brier score, and components: reliability, resolution, uncertainty ;Optional: make reliability diagram for probabilistic forecast of ;value at each gridpoint exceeding threshold ;Input: probcat probability categories ; n # forecasts in each fcst probability category ; o # occurrences in each fcst probability category ;Output: BS Brier Score computed as rel-res+unc (Wilks, 1995) ;Keywds: title optional title for plot ; decomp [obar, reliability, resolution, uncertainty] ; show show reliability diagram (default=don't) ; debug print,n,o, components BS=-999. decomp=[-999.,-999.,-999.,-999.] ;Get rid of empty bins ss=where(n gt 0.,ng) if ng eq 0 then goto, done nn=n[ss] oo=o[ss] pcat=probcat[ss] ;Compute observed probability in each fcst prob. category oo=oo/nn ntot=total(nn) ;Compute reliability, resolution, and uncertainty obar=total(nn*oo)/ntot rel =total(nn*(pcat-oo)^2.)/ntot res =total(nn*(oo-obar)^2.)/ntot unc =obar*(1.-obar) BS=rel-res+unc if n_elements(debug) eq 0 then debug=0 if (debug) then begin print,'obar,rel,res,unc=',obar,rel,res,unc print,'BS=',BS endif decomp=[obar,rel,res,unc] if n_elements(show) eq 0 then show=0 if (show) then begin if n_elements(title) eq 0 then title='' ;Plot diagonal line plot,[0,1],[0,1],title=title, $ xtitle='Forecast Probability',ytitle='Observed Frequency', $ /xstyle,/ystyle,linestyle=0,thick=1 ;Plot reliability oplot,pcat,oo,max_value=1.,thick=2 ;Plot climatological frequency oplot,[0,1],[obar,obar],linestyle=2,thick=1 ;Plot no-skill line oplot,[0,1],[obar/2,obar+(1.-obar)/2.],linestyle=1,thick=1 plot,[0,1],[0,max(nn)],/nodata,/noerase,position=[.35,.7,.60,.90], $ xtitle='Fcst Probability',ytitle='# Times Fcst', $ charsize=0.8 xbox=[-1,1,1,-1,-1] & ybox=[0,0,1,1,0] for i=0,ng-1 do polyfill,pcat[i]+0.02*xbox,nn[i]*ybox,color=255,/noclip endif done: end ;----------------------------------------------------------------------- pro roc, n,o,h,f,area, $ binormal=binormal,title=title,show=show,debug=debug ;Compute area under ROC curve. ;Optional: make relative operating characteristic (ROC) diagram for ;probabilistic forecast of value at each gridpoint exceeding threshold ;Input: n # forecasts in each fcst probability category ; o # occurrences in each fcst probability category ; title optional title for plot ;Output: h hit rate as a function of probability threshold ; f false alarm rate (POFD) " " ; area area under ROC curve ;Keywds: binormal use binormal fit to estimate ROC area (default=not) ; title title of plot ; show show plot (default=don't) ; debug print values of n,o,n,f if n_elements(binormal) eq 0 then binormal=0 if n_elements(title) eq 0 then title='Relative Operating Characteristic' if n_elements(show) eq 0 then show=0 if n_elements(debug) eq 0 then debug=0 ;Number of probability categories ncat=n_elements(n) ;Compute hit rates and false alarm rates by probability category non=n-o h=make_array(ncat,/float,value=-999.) f=make_array(ncat,/float,value=-999.) for k=0,ncat-2 do begin W=total(non[0:k]) Y=total( o[0:k]) Z=total(non[k+1:ncat-1]) X=total( o[k+1:ncat-1]) if (X+Y) gt 0. then h[k]=X/(X+Y) if (Z+W) gt 0. then f[k]=Z/(Z+W) endfor ;Add endpoints at [0,0] and [1,1] h[ncat-1]=0. f[ncat-1]=0. hs=[1.,h] fs=[1.,f] ;Get rid of empty bins ss=where(hs ge 0. and fs ge 0.,nok) hs=hs[ss] fs=fs[ss] if (debug) then begin print,format='(" f=",16f8.4)',f print,format='(" h=",16f8.4)',h endif if (binormal) then begin print,'Using binormal estimation of ROC area' ;Compute area under the curve using binormal curve fitting (Ian Mason's notes) if nok ge 3 then begin ;Transform hit rate and false alarm rate to Z ;but don't transform values of 0 or fit won't work ss=where(h gt 0. and f gt 0.,npos) nz=npos-1 zh=fltarr(nz) zf=fltarr(nz) for k=0,nz-1 do zh[k]=gauss_cvf(h[ss[k]]) for k=0,nz-1 do zf[k]=gauss_cvf(f[ss[k]]) ;Fit line to zh vs. zf if nz eq 1 then begin slope=1. ;in 2-category case assume equal variances of PDFs intcpt=zh[0]-zf[0] endif if nz eq 2 then begin slope=(zh[1]-zh[0])/(zf[1]-zf[0]) ;2 points define line intcpt=zh[0]-slope*zf[0] endif if nz ge 3 then begin coeff= $ ;Regression fit regress(reform(zf,1,nz),zh,replicate(1.,nz),zhfit,const=intcpt) slope=coeff[0] print,'Binormal fit: slope, intercept=',slope,intcpt endif ;For integration take finer intervals in Z space zffit=-3.0+0.1*findgen(61) zhfit=intcpt+slope*zffit ;Now transform back to probability space before integrating hfit=fltarr(61) ffit=fltarr(61) for k=0,60 do hfit[k]=1.-gauss_pdf(zhfit[k]) for k=0,60 do ffit[k]=1.-gauss_pdf(zffit[k]) hfit=[1.,hfit,0.] ffit=[1.,ffit,0.] ;Now integrate under curve using trapezoid rule area=0. for k=0,61 do area=area+abs(ffit[k]-ffit[k+1])*(hfit[k]+hfit[k+1])/2. endif else begin area=0.5 endelse print,'ROC area (binormal fit)=',area endif else begin ;Compute area under the curve from line segments area=0. if nok ge 3 then for k=0,nok-2 do $ area=area+abs(fs[k]-fs[k+1])*(hs[k]+hs[k+1])/2. $ else area=0.5 print,'ROC area (line segments)=',area endelse if (show) then begin plot,[0,1],[0,1],title=title, $ xtitle='False Alarm Rate',ytitle='Hit Rate',/xstyle,/ystyle,linestyle=2 if (binormal) then oplot,ffit,hfit,max_value=1., thick=2 $ else oplot,fs,hs,max_value=1.,thick=2 ;oplot,fs,hs,max_value=1.,psym=7 endif done: end ;----------------------------------------------------------------------- function rps,prob,obsocc ;Computes the Ranked Probability Score RPS, which is ~the mean squared ;difference between the observed and forecast CDFs. ;Reference: Stanski et al, 1989 ; ;Input: prob forecast probability in each category ; dimensions (nc,nr,nbins) or (npts,nbins) ; obsocc observed occurrence in each category ; dimensions (nc,nr,nbins) or (npts,nbins) ;Output: rps ranked probability score sz=size(obsocc) ndim=sz[0] nbins=sz[ndim] CDFo=fltarr(nbins) CDFf=fltarr(nbins) ;Want to work with arrays of dimension (npts,nbins) if ndim eq 2 then begin npts=sz[1] ob=obsocc prb=prob endif else begin npts=sz[1]*sz[2] ob=reform(obsocc,npts,nbins) prb=reform(prob,npts,nbins) endelse ;Loop on data rps=0. n=0. CDFo=fltarr(nbins) CDFf=fltarr(nbins) for i=0,npts-1 do begin ;Define the CDF for this observation o=reform(ob[i,*],nbins) CDFo[0]=o[0] for j=1,nbins-1 do CDFo[j]=CDFo[j-1]+o[j] ;Invert the fcst prob array to create CDF p=reform(prb[i,*],nbins) CDFf[0]=p[0] for j=1,nbins-1 do CDFf[j]=CDFf[j-1]+p[j] ;Compute mean squared difference (nearly) msd=total((CDFf-CDFo)^2) / (nbins-1.) ;print,'o =',o,' CDFo=',CDFo,' p =',p,' CDFf=',CDFf rps=rps+msd n=n+1 continue: endfor ;Get mean RPS over all data rps=rps/n return,rps end ;----------------------------------------------------------------------- pro value,probcat,n,o,pclim,show=show ;Plot relative value as a function of cost/loss ratio ;For a probability forecast plots the envelope of curve maxima ;Input: probcat probability categories ; n # forecasts in each fcst probability category ; o # occurrences in each fcst probability category ; pclim climatological probability (base rate) ;Keywds: show show plot (default=don't) if n_elements(show) eq 0 then show=0 non=n-o npts=total(n) ncat=n_elements(probcat) ;Compute value as a function of cost/loss ratio between 0 and 1 nb=50 ;number of increments between 0 and 1 CLratio=1./(nb-1.)*findgen(nb) value=fltarr(nb) maxvalue=make_array(nb,/float,value=-999.) ;Envelope of value curves if (show) then plot,[0,1],[0,0],title=title,yrange=[0,1], $ xtitle='Cost/Loss Ratio',ytitle='Relative Value',linestyle=2 for icat=0,ncat-2 do begin p=probcat(icat+1) ;;;;p=pclim ;Used for deterministic forecast value assessment k=max([0,where(probcat lt p)]) Y=total( o[0:k])/npts ;misses Z=total(non[k+1:ncat-1])/npts ;false alarms X=total( o[k+1:ncat-1])/npts ;hits for ib=0,nb-1 do begin if CLratio[ib] lt pclim then $ value[ib]=(Clratio[ib]*(X+Z-1.)+Y) / (CLratio[ib]*(pclim-1.)) if CLratio[ib] ge pclim then $ value[ib]=(Clratio[ib]*(X+Z)+Y-pclim) / (pclim*(CLratio[ib]-1.)) if value[ib] gt maxvalue[ib] then maxvalue[ib]=value[ib] endfor if (show) then oplot,CLratio,value,linestyle=0,thick=0.5 endfor if (show) then oplot,CLratio,maxvalue,thick=2 end ;----------------------------------------------------------------------- pro prob_scores,show=show,h48=h48 ;Read from Pertti's POP_3 file and compute a variety of probabilistic ;verification scores and diagrams: ; Brier score ; Brier skill score ; Ranked probability score ; Ranked probability skill score ; Reliability diagram ; ROC diagram ; ROC area ; Value score ;This is to put on the web page as an example ;To show a plot, set show='reliability', 'ROC', or 'value' ;To compute results for 48h forecasts set /h48 (default=24h fcsts) ;Notes from Pertti: ;------------------ ;OBS is daily rainfall in mm ;P's are probabilities adding up to 1.0 ;h0: No rain (precip le 0.2 mm) ;h1: precip between 0.3 and 4.4 mm ;h2: precip ge 4.5 mm yyyy=0L & mm=0L & dd=0L RR=0. P1d=fltarr(3) P2d=fltarr(3) P24=fltarr(365,3) P48=fltarr(365,3) O24=fltarr(365,3) O48=fltarr(365,3) R24=fltarr(365) R48=fltarr(365) line='' ;Read the data from file ;Throw out bad data n24=0 n48=0 openr,unit,'POP_3cat_2003.txt',/get_lun readf,unit,line for id=0,364 do begin readf,unit,yyyy,mm,dd,RR,P1d,P2d if RR lt 999 and P1d[0] ge 0. then begin P24[n24,*]=P1d O24[n24,0]=(RR le 0.2) O24[n24,1]=(RR ge 0.3 and RR le 4.4) O24[n24,2]=(RR ge 4.5) R24[n24]=RR n24=n24+1 endif if RR lt 999 and P2d[0] ge 0. then begin P48[n48,*]=P2d O48[n48,0]=(RR le 0.2) O48[n48,1]=(RR ge 0.3 and RR le 4.4) O48[n48,2]=(RR ge 4.5) R48[n48]=RR n48=n48+1 endif endfor close,unit & free_lun,unit print,'Number of good samples n24,n48=',n24,n48 n=n24 P24=P24[0:n-1,*] P48=P48[0:n-1,*] O24=O24[0:n-1,*] O48=O48[0:n-1,*] R24=R24[0:n-1,*] R48=R48[0:n-1,*] ;Plot rain amount to see whether there is a seasonal signal ;If there is a strong seasonal cycle then the sample climatology ;for the whole year might not be an appropriate reference plot,R24,ytitle='Rainfall (mm)',xtitle='Day',xrange=[0,n],/xstyle ;Get sample climatological probability P24clim=fltarr(3) P48clim=fltarr(3) for icat=0,2 do begin P24clim[icat]=mean(O24[*,icat]) P48clim[icat]=mean(O48[*,icat]) endfor print,'P24clim=',P24clim print,'P48clim=',P48clim P24clim=[[replicate(P24clim[0],n)],[replicate(P24clim[1],n)],[replicate(P24clim[2],n)]] P48clim=[[replicate(P48clim[0],n)],[replicate(P48clim[1],n)],[replicate(P48clim[2],n)]] ;------------------------------------------------------------------- if n_elements(show) eq 0 then show='' ;Compute statistics using pstats.pro ;Start with probability of rain (cats 1 + 2) obs=O24[*,1]+O24[*,2] pop=P24[*,1]+P24[*,2] refpop=P24clim[*,1]+P24clim[*,2] obshi=O24[*,2] pophi=P24[*,2] refpophi=P24clim[*,2] RR=R24 RaPS=rps(P24,O24) RPSref=rps(P24clim,O24) if n_elements(h48) gt 0 then begin print,'Verifying 48h forecasts' obs=O48[*,1]+O48[*,2] pop=P48[*,1]+P48[*,2] refpop=P48clim[*,1]+P48clim[*,2] obshi=O48[*,2] pophi=P48[*,2] refpophi=P48clim[*,2] RR=R48 RaPS=rps(P48,O48) RPSref=rps(P48clim,O48) endif ncat=11 print,'' & print,'Statistics for POP (2nd and 3rd categories)' if show ne '' then window,0,xsize=400,ysize=380,title='POP' pstats,obs,pop,refpop,ncat,show=show print,'' & print,'Statistics for POP for highest rain category' ;If plotting results then make another window the same size as the first if show ne '' then window,1,xsize=!d.x_size,ysize=!d.y_size,title='POPhi' pstats,obshi,pophi,refpophi,ncat,show=show RPSS=(RPSref-RaPS)/RPSref print,'RPS=',RaPS print,'RPSS=',RPSS end