climateComparison.f
********************************************************************************
* This file collects several functions and subroutines for climate
* comparison
* TODO: to be extended
5: ********************************************************************************
SUBROUTINE tempDist ( p_x, p_y, p_a,
c p_temp1, p_x1, p_y1, p_temp2, p_x2, p_y2, o_tempDist )
************************************************************************
10: * Calculates the temperature distance between 2 data points by calculating
* the averages over the entire timespan given and then the temperature
* distance sum over all months
* for THEORY: see Hallegatte et al. [2006]: Using climate analogues...
* Input:
15: * - p_x (Integer): grid size of data arrays
* - p_y (Integer): grid size of data arrays
* - p_a (Integer): number of years of data
* - p_temp1 (Real[p_x, p_y, p_a, 12]): monthly temperatures in which point 1 is contained
* - p_x1 (Integer): location of data point1 in temperatures array 1
20: * - p_y1 (Integer): location of data point1 in temperatures array 1
* - p_temp2 (Real[p_x, p_y, p_a, 12]): monthly temperatures in which point 2 is contained
* - p_x2 (Integer): location of data point2 in temperatures array 2
* - p_y2 (Integer): location of data point2 in temperatures array 2
* Output:
25: * - o_tempDist (Real): temperature distance between the two points
************************************************************************
IMPLICIT NONE
* Constants
INTEGER p_x, p_y, p_a ! grid size x/y and number of years p_a
30:
* Vars
REAL p_temp1(p_x, p_y, p_a, 12) !months assumed to be 12 ;)
REAL p_temp2(p_x, p_y, p_a, 12)
INTEGER p_x1, p_y1, p_x2, p_y2, m, a
35: REAL temp1Avg(12), temp2Avg(12)
REAL o_tempDist ! output var
! calculate averages
o_tempDist = 0
40: do m=1, 12
temp1Avg(m)=0
temp2Avg(m)=0
do a=1, p_a
temp1Avg(m) = temp1Avg(m) +
45: c p_temp1(p_x1, p_y1, a, m) / real(p_a)
temp2Avg(m) = temp2Avg(m) +
c p_temp2(p_x2, p_y2, a, m) / real(p_a)
enddo
! and from there the distance
50: o_tempDist = o_tempDist +
c sqrt(((temp1Avg(m)-temp2Avg(m)))**2)/12.
enddo
END
55:
SUBROUTINE precDist ( p_x, p_y, p_a,
c p_prec1, p_x1, p_y1, p_prec2, p_x2, p_y2,
c o_precMDist, o_precADist )
************************************************************************
60: * Calculates the relative monthly and annual average precipitation distance
* between 2 data points by first calculating the averages over the entire
* timespan given and then calculating the distances
* for THEORY: see Hallegatte et al. [2006]: Using climate analogues...
* Input:
65: * - p_x (Integer): grid size of data arrays
* - p_y (Integer): grid size of data arrays
* - p_a (Integer): number of years of data
* - p_prec1 (Real[p_x, p_y, p_a, 12]): monthly precipitation in which point 1 is contained
* - p_x1 (Integer): location of data point1 in precipitation array 1
70: * - p_y1 (Integer): location of data point1 in precipitation array 1
* - p_prec2 (Real[p_x, p_y, p_a, 12]): monthly precipitation in which point 2 is contained
* - p_x2 (Integer): location of data point2 in precipitation array 2
* - p_y2 (Integer): location of data point2 in precipitation array 2
* Output:
75: * - o_precMDist (Real): relative monthly distance
* - o_precADist (Real): relative yearly distance
************************************************************************
IMPLICIT NONE
* Constants
80: INTEGER p_x, p_y, p_a ! grid size x/y and number of years p_a
* Vars
REAL p_prec1(p_x, p_y, p_a, 12) !months assumed to be 12 ;)
REAL p_prec2(p_x, p_y, p_a, 12)
85: INTEGER p_x1, p_y1, p_x2, p_y2, m, a
REAL prec1Avg(12), prec2Avg(12), prec1Moy, prec2Moy
REAL o_precMDist, o_precADist ! outpu vars
! calculate averages
90: o_precMDist = 0
prec1Moy = 0
prec2Moy = 0
do m=1, 12
prec1Avg(m)=0
95: prec2Avg(m)=0
do a=1, p_a
prec1Avg(m) = prec1Avg(m) +
c p_prec1(p_x1, p_y1, a, m)/real(p_a)
prec2Avg(m) = prec2Avg(m) +
100: c p_prec2(p_x2, p_y2, a, m)/real(p_a)
enddo
! and from there the distance
o_precMDist = o_precMDist+
c sqrt(((prec2Avg(m)/prec1Avg(m)) - 1)**2)/12.
105: prec1Moy = prec1Moy + prec1Avg(m)/12.
prec2Moy = prec2Moy + prec2Avg(m)/12.
enddo
o_precADist = abs(prec1Moy - prec2Moy)/prec2Moy !FIXME: is this actually correct?
END
110:
SUBROUTINE ks3d2s(x1,y1,z1,n1,x2,y2,z2,n2,d,prob,rr)
************************************************************************
* Two-sample three-dimensional Kolmogorov-Smirnov test
115: * THEORY described by Fasano and Franceschini (1987): A multidimensional
* version of the Kolmogorov-SmirnovA multidimensional version of the Kolmogorov-Smirnov
*
* Three-dimensional Kolmogorov-Smirnov test on two samples. Given the x, y and z coordinates
* of the first sample as n1 values in arrays x1(1:n1), y1(1:n1) and z1(1:n1), and likewise for the
120: * second sample, n2 values in arrays x2, y2 and z2, this routine returns the three-dimensional,
* two-sample K-S statistic as d, and its p-value as prob. Small values of prob show that the
* two samples are significantly different. Prob is inferred from interpolated polynomials
* and thus only an estimate (and only available for limited parameters, see ksDist2Prob)
*
125: * Input:
* - x1 (Real[n1]): x coordinates of first sample
* - y1 (Real[n1]): y coordinates of first sample
* - z1 (Real[n1]): z coordinates of first sample
* - n1 (Integer): size of the first sample
130: * - x2 (Real[n2]): x coordinates of second sample
* - y2 (Real[n2]): y coordinates of second sample
* - z2 (Real[n2]): z coordinates of second sample
* - n2 (Integer): size of the second sample
* Output:
135: * - d (Real): K-D test distance
* - prob (Real): p-value
* - rr (Real): average linear correlation coefficient
************************************************************************
INTEGER n1,n2
140: REAL d,x1(n1),x2(n2),y1(n1),y2(n2),z1(n1),z2(n2)
INTEGER j
REAL d1,d2
REAL fa,fb,fc,fd,fe,ff,fg,fh
REAL ga,gb,gc,gd,ge,gf,gg,gh
145: REAL r1, r2, rxy, ryz, rzx, rr, prob, ksDist2Prob
d1=0.0
! First, use points in the first sample as origins.
do j=1,n1
150: call threedQuadCalc(x1(j),y1(j),z1(j),x1,y1,z1,n1,
c fa,fb,fc,fd,fe,ff,fg,fh)
call threedQuadCalc(x1(j),y1(j),z1(j),x2,y2,z2,n2,
c ga,gb,gc,gd,ge,gf,gg,gh)
d1=max(d1,abs(fa-ga),abs(fb-gb),abs(fc-gc),abs(fd-gd),
155: c abs(fe-ge),abs(ff-gf),abs(fg-gg),abs(fh-gh))
enddo
d2=0.0
! Then, use points in the second sample as origins.
do j=1,n2
160: call threedQuadCalc(x2(j),y2(j),z2(j),x1,y1,z1,n1,
c fa,fb,fc,fd,fe,ff,fg,fh)
call threedQuadCalc(x2(j),y2(j),z2(j),x2,y2,z2,n2,
c ga,gb,gc,gd,ge,gf,gg,gh)
d2=max(d2,abs(fa-ga),abs(fb-gb),abs(fc-gc),abs(fd-gd),
165: c abs(fe-ge),abs(ff-gf),abs(fg-gg),abs(fh-gh))
enddo
d=0.5*(d1+d2) ! average K-S statistic
170: ! correlation coefficient
call corCoef(n1,x1,y1,r1)
call corCoef(n2,x2,y2,r2)
rxy=0.5*(abs(r1)+abs(r2))
175: call corCoef(n1,y1,z1,r1)
call corCoef(n2,y2,z2,r2)
ryz=0.5*(abs(r1)+abs(r2))
call corCoef(n1,z1,x1,r1)
180: call corCoef(n2,z2,x2,r2)
rzx=0.5*(abs(r1)+abs(r2))
rr = (rxy + ryz + rzx)/3.
prob = ksDist2Prob (3, n1, d, rr) !assumes n1 the same as n2
185:
return
END
190: SUBROUTINE threedQuadCalc(x,y,z,xx,yy,zz,nn,
c fa,fb,fc,fd,fe,ff,fg,fh)
************************************************************************
* 3D spatial distribution
* Given an origin (x,y,z), and an array of nn points with coordinates xx,
195: * yy and zz, count how many of them are in each of the 8 quadrants around
* the origin, and return the normalized fractions.
* Quadrants are labeled alphabetically, counterclockwise, top-to-bottom from
* the top upper right quadrant.
* Input:
200: * - x (Real): x-coordinate of the origin
* - y (Real): x-coordinate of the origin
* - z (Real): x-coordinate of the origin
* - xx (Real[nn]): x coordinates of all data points
* - yy (Real[nn]): y coordinates of all data points
205: * - zz (Real[nn]): z coordinates of all data points
* - nn (Integer): number of data points
* Output:
* - fa (Real): normalized fraction of data points in Q1 (top upper right)
* - fb (Real): normalized fraction of data points in Q2 (top upper left)
210: * - fc (Real): normalized fraction of data points in Q3 (top lower left)
* - fd (Real): normalized fraction of data points in Q4 (top lower right)
* - fe (Real): normalized fraction of data points in Q5 (bottom upper right)
* - ff (Real): normalized fraction of data points in Q6 (bottom upper left)
* - fg (Real): normalized fraction of data points in Q7 (bottom lower left)
215: * - fh (Real): normalized fraction of data points in Q8 (bottom lower right)
************************************************************************
IMPLICIT none
INTEGER nn
REAL fa,fb,fc,fd, fe, ff, fg, fh
220: REAL x,y,z,xx(nn),yy(nn),zz(nn)
INTEGER k,na,nb,nc,nd,ne,nf,ng,nh
REAL fnorm
na=0
nb=0
225: nc=0
nd=0
ne=0
nf=0
ng=0
230: nh=0
do k=1,nn
if (zz(k) > z) then
if(yy(k) > y)then
if(xx(k) > x)then
235: na=na+1
else
nb=nb+1
endif
else
240: if(xx(k) > x)then
nd=nd+1
else
nc=nc+1
endif
245: endif
else
if(yy(k) > y)then
if(xx(k) > x)then
ne=ne+1
250: else
nf=nf+1
endif
else
if(xx(k) > x)then
255: nh=nh+1
else
ng=ng+1
endif
endif
260: endif
enddo
fnorm=1.0/nn
fa=fnorm*na
fb=fnorm*nb
265: fc=fnorm*nc
fd=fnorm*nd
fe=fnorm*ne
ff=fnorm*nf
fg=fnorm*ng
270: fh=fnorm*nh
return
END
SUBROUTINE ks2d2s(x1,y1,n1,x2,y2,n2,d,prob,rr)
275: ************************************************************************
* Two-sample two-dimensional K-S test
* SOURCE (slightly modified, prob calculation differs):
* Numerical Recipes in Fortran 77 Chapter 14.7
*
280: * Two-dimensional Kolmogorov-Smirnov test on two samples. Given the x and y coordinates
* of the first sample as n1 values in arrays x1(1:n1) and y1(1:n1), and likewise for the
* second sample, n2 values in arrays x2 and y2, this routine returns the two-dimensional, two-
* sample K-S statistic as d, and its p-value level as prob. Small values of prob show
* that the two samples are significantly different. Note that the test is slightly distribution-
285: * dependent, so prob is only an estimate.
*
* Input:
* - x1 (Real[n1]): x coordinates of first sample
* - y1 (Real[n1]): y coordinates of first sample
290: * - n1 (Integer): size of the first sample
* - x2 (Real[n2]): x coordinates of second sample
* - y2 (Real[n2]): y coordinates of second sample
* - n2 (Integer): size of the second sample
* Output:
295: * - d (Real): K-D test distance
* - prob (Real): p-value
* - rr (Real): average linear correlation coefficient
************************************************************************
INTEGER n1,n2
300: REAL d,prob,x1(n1),x2(n2),y1(n1),y2(n2)
INTEGER j
REAL d1,d2,fa,fb,fc,fd,ga,gb,gc,gd,r1,r2,rr,ksDist2Prob
d1=0.0
305: ! First, use points in the first sample as origins.
do j=1,n1
call quadct(x1(j),y1(j),x1,y1,n1,fa,fb,fc,fd)
call quadct(x1(j),y1(j),x2,y2,n2,ga,gb,gc,gd)
d1=max(d1,abs(fa-ga),abs(fb-gb),abs(fc-gc),abs(fd-gd))
310: enddo
d2=0.0
! Then, use points in the second sample as origins.
do j=1,n2
call quadct(x2(j),y2(j),x1,y1,n1,fa,fb,fc,fd)
315: call quadct(x2(j),y2(j),x2,y2,n2,ga,gb,gc,gd)
d2=max(d2,abs(fa-ga),abs(fb-gb),abs(fc-gc),abs(fd-gd))
enddo
d=0.5*(d1+d2) ! average K-S statistic
320: call corCoef(n1,x1,y1,r1)
call corCoef(n2,x2,y2,r2)
rr = 0.5*(abs(r1)+abs(r2))
prob = ksDist2Prob (2, n1, d, rr) !assumes n1 the same as n2
325: return
END
SUBROUTINE quadct(x,y,xx,yy,nn,fa,fb,fc,fd)
330: ************************************************************************
* 2D spatial distribution
* SOURCE: Numerical Recipes in Fortran 77 Chapter 14.7
*
* Given an origin (x, y), and an array of nn points with coordinates xx and yy, count how
335: * many of them are in each quadrant around the origin, and return the normalized frac-
* tions. Quadrants are labeled alphabetically, counterclockwise from the upper right.
*
* Input:
* - x (Real): x-coordinate of the origin
340: * - y (Real): x-coordinate of the origin
* - xx (Real[nn]): x coordinates of all data points
* - yy (Real[nn]): y coordinates of all data points
* - nn (Integer): number of data points
* Output:
345: * - fa (Real): normalized fraction of data points in Q1 (upper right)
* - fb (Real): normalized fraction of data points in Q2 (upper left)
* - fc (Real): normalized fraction of data points in Q3 (lower left)
* - fd (Real): normalized fraction of data points in Q4 (lower right)
************************************************************************
350: INTEGER nn
REAL fa,fb,fc,fd,x,y,xx(nn),yy(nn)
INTEGER k,na,nb,nc,nd
REAL ff
na=0
355: nb=0
nc=0
nd=0
do k=1,nn
if(yy(k).gt.y)then
360: if(xx(k).gt.x)then
na=na+1
else
nb=nb+1
endif
365: else
if(xx(k).gt.x)then
nd=nd+1
else
nc=nc+1
370: endif
endif
enddo
ff=1.0/nn
fa=ff*na
375: fb=ff*nb
fc=ff*nc
fd=ff*nd
return
END
380:
SUBROUTINE kstwo(data1, n1, data2, n2, d, prob)
************************************************************************
* Two-sample 1-dimensional K-S test
385: * SOURCE (slightly modified): Numerical Recipes in Fortran 77 Chapter 14.3
* WARNING: the input arrays are sorted after use of this routine!!
*
* Input:
* - data1 (Real[n1]): first sample
390: * - n1 (Integer): size of the first sample
* - data2 (Real[n2]): second sample
* - n2 (Integer): size of the second sample
* Output:
* - d (Real): K-D test distance
395: * - prob (Real): p-value (calculated via the function probks)
************************************************************************
INTEGER n1, n2
REAL d, prob, data1(n1), data2(n2)
INTEGER j1,j2
400: REAL d1,d2,dt,en1,en2,en,fn1,fn2,probks
call shell(n1,data1)
call shell(n2,data2)
en1=n1
en2=n2
405: j1=1 !Next value of data1 to be processed.
j2=1
fn1=0.
fn2=0.
d=0.
410: 1 if (j1.le.n1.and.j2.le.n2) then
d1=data1(j1)
d2=data2(j2)
if(d1.le.d2)then
fn1=j1/en1
415: j1=j1+1
endif
if(d2.le.d1)then
fn2=j2/en2
j2=j2+1
420: endif
dt=abs(fn2-fn1)
if(dt.gt.d) d=dt
goto 1
endif
425: en=sqrt(en1*en2/(en1+en2))
prob=probks((en+0.12+0.11/en)*d)
return
END
430:
FUNCTION probks(alam)
************************************************************************
* Kolmogorov-Smirnov probability function for 1D K-S tests
* SOURCE: Numerical Recipes in Fortran 77 Chapter 14.3
435: * Input: - alam(Real): K-S distance derivate (see Numerical Recipes)
* Return (Real): p-value
************************************************************************
REAL probks,alam,EPS1,EPS2
PARAMETER (EPS1=0.001, EPS2=1.e-8)
440: INTEGER j
REAL a2,fac,term,termbf
a2=-2.*alam**2
fac=2.
probks=0.
445: ! Previous term in sum.
termbf=0.
do j=1,100
term=fac*exp(a2*j**2)
probks=probks+term
450: if( (abs(term).le.EPS1*termbf).or.
c (abs(term).le.EPS2*probks) )return
fac=-fac !Alternating signs in sum.
termbf=abs(term)
enddo
455: ! Get here only by failing to converge.
probks=1.
return
END
460:
********************************************************************************
* Distance to pvalue convertion for multidimensional K-S tests
*
REAL FUNCTION ksDist2Prob (p_dim, p_n, p_d, p_cc)
465: ************************************************************************
* K-S distance to pvalue translation for multidimensional K-S tests
* Derived from interpolated polynomials that fit data and functions
* reported by Fasano and Franceschini (1987): A multidimensional
* version of the Kolmogorov-SmirnovA multidimensional version of the
470: * Kolmogorov-Smirnov
*
* Input:
* - p_dim (Integer): n-dimensional test (e.g. 2 or 3)
* - p_n (Integer): sample size
475: * - p_d (Real): K-S distance
* - p_cc (Real): average correlation coefficient
* Return (Real): corresponding p-value
************************************************************************
implicit none
480: INTEGER p_dim !dimension of test (currently 2D and 3D supported)
INTEGER p_n !size of samples (currently 20 and 30 supported)
REAL p_d, p_cc ! distance and correlation coefficient (avg of the 3 in 3D case!)
INTEGER samplesN, ccsN, coefsN
485: PARAMETER(samplesN=2, ccsN=6, coefsN=4)
INTEGER samples(samplesN)
REAL ccs(ccsN)
REAL maxD(ccsN,2:3,samplesN)
490: REAL polCoefs(coefsN,ccsN,2:3,samplesN) ! coefficients stored such that y = sum(1;j) coef_j x^(j-1)
INTEGER sampleI, ccI
REAL bestDist !to find right cc
495: INTEGER i ! looping
DATA samples/20, 30/
DATA ccs/0.0, 0.5, 0.6, 0.7, 0.8, 0.9/
500:
DATA maxD/
c 0.556, 0.554, 0.552, 0.550, 0.544, 0.530, ! 20 samples, 2D
c 0.569, 0.563, 0.563, 0.557, 0.550, 0.538, ! 20 samples, 3D
c 0.462, 0.459, 0.457, 0.454, 0.448, 0.436, ! 30 samples, 2D
505: c 0.468, 0.467, 0.464, 0.458, 0.454, 0.443 ! 30 samples, 3D
c /
DATA polCoefs/
c 4.902, -23.582, 37.477, -19.586, ! 20 samples, 2D, cc 0.0
510: c 4.735, -22.750, 36.042, -18.724, ! 20 samples, 2D, cc 0.5
c 4.710, -22.857, 36.684, -19.385, ! 20 samples, 2D, cc 0.6
c 4.540, -22.139, 35.709, -18.962, ! 20 samples, 2D, cc 0.7
c 4.296, -21.294, 34.968, -18.948, ! 20 samples, 2D, cc 0.8
c 3.835, -19.593, 33.252, -18.680, ! 20 samples, 2D, cc 0.9
515: c
c 5.427, -24.300, 35.040, -15.932, ! 20 samples, 3D, cc 0.0
c 5.520, -26.422, 41.843, -21.837, ! 20 samples, 3D, cc 0.5
c 5.350, -25.864, 41.345, -21.765, ! 20 samples, 3D, cc 0.6
c 5.075, -25.044, 41.055, -22.296, ! 20 samples, 3D, cc 0.7
520: c 5.185, -27.005, 47.183, -27.622, ! 20 samples, 3D, cc 0.8
c 4.898, -26.546, 48.488, -29.801, ! 20 samples, 3D, cc 0.9
c
c 4.825, -27.686, 52.213, -32.134, ! 30 samples, 2D, cc 0.0
c 4.733, -27.338, 51.894, -32.138, ! 30 samples, 2D, cc 0.5
525: c 4.563, -26.191, 49.164, -29.879, ! 30 samples, 2D, cc 0.6
c 4.417, -25.569, 48.448, -29.759, ! 30 samples, 2D, cc 0.7
c 4.202, -24.890, 48.514, -30.900, ! 30 samples, 2D, cc 0.8
c 3.766, -23.101, 46.797, -31.123, ! 30 samples, 2D, cc 0.9
c
530: c 5.334, -28.606, 49.131, -26.314, ! 30 samples, 3D, cc 0.0
c 5.450, -31.334, 59.458, -37.054, ! 30 samples, 3D, cc 0.5
c 5.240, -30.257, 57.477, -35.685, ! 30 samples, 3D, cc 0.6
c 4.984, -29.311, 56.879, -36.224, ! 30 samples, 3D, cc 0.7
c 5.239, -33.069, 69.975, -49.571, ! 30 samples, 3D, cc 0.8
535: c 5.023, -33.367, 74.752, -56.376 ! 30 samples, 3D, cc 0.9
c /
sampleI = 0
do i=1,samplesN
540: if (p_n == samples(i)) sampleI = i
enddo
if (sampleI == 0) then
print "no pvalue coefficients for data of sample size", p_n,
c " available!"
545: STOP
endif
bestDist=1.
do i=1, ccsN
550: if (abs(p_cc-ccs(i)) < bestDist ) then
bestDist = abs(p_cc-ccs(i))
ccI = i
endif
enddo
555:
if (p_d > maxD(ccI,p_dim,sampleI)) then
ksDist2Prob = 0. !distance larger than max, thus probability is 0%
return
endif
560:
ksDist2Prob=polCoefs(coefsN,ccI,p_dim,sampleI)
do i=(coefsN-1),1,-1
ksDist2Prob=ksDist2Prob*p_d + polCoefs(i,ccI,p_dim,sampleI)
enddo
565:
! 100% is the max
ksDist2Prob=min(1.0, ksDist2Prob)
END
Info Section
Warning: externals (function calls) may not be acurate
calls: corcoef, quadct, shell, threedquadcalc
back to top
f2html v0.3 (C) 1997,98 Beroud Jean-Marc. Fri Aug 11 17:54:58 CEST 2006