****The lines below constitute the fortran source files and model data used in compiling pds ****Direct compilation of the combined files below is unlikely to be successful as some of the files refer to the extension to fortran by the Leahy compiler to enable a windows input and output and would require the original compiler and some files in non text format. However, the source code for most of the subroutines will still be useful and it would not be much work to use them for a non-windows calling program to do all the statistical tests that the original program can do. The individual files are combined but headings have been added at the start of most of these individual files to give some (limited) explanation. ANOVA Subroutine !c Prog to read in columns of unequal length for independent t test and anova !c and to then perform one way anova subroutine anova(lc,nr,nc) ! ************ Warning David - xx is only 1-d in calling prog ********** Dimension x(nr,nc),lc(nc),inec(nc),tdj(nc),xx(nr),xt(nr,nc)& ,ave(nc) character*80 rc common/m/rc,cisizp !c x array of data !c xx 1 col of this array !c xt & xxt temporary storage for abs(x-xbar) for Levene's test !c nc number of columns !c ml max length or length of longest column !c inec index of non blank columns at any particular row !c nbc number of non blank cols at any particular row !c Assumes data consists of a row containing number of cols !c followed by a row containing length of each col !c followed by the cols of data write(12,*)'****ANOVA****' call imprint() cisize=cisizp/100. ! rewind 20 ! read(20,*) ! read(20,*)(lc(i),i=1,nc) !c Proceed row by row reading in data Do 2 m=1,nr !c find out how many non-blank cols in the mth row nbc=0 Do 3 i=1,nc if(m.le.lc(i))nbc=nbc+1 3 Continue !c Make an array containing the indices of the non-blank cols at level m ll=1 Do 4 j=1,nbc Do 5 i=ll,nc if(m.gt.lc(i))goto 5 inec(j)=i goto 8 5 Continue 8 Continue ll=i+1 4 Continue read(20,*)(x(m,inec(j)),j=1,nbc) 2 Continue !c Do 9 j=1,nc !c write(*,101)(x(i,j),i=1,lc(j)) !c101 format(10(f5.1,1x)) !c9 Continue !c !c write out data in form for ususal stats package analysis !c ie. with 2 cols 1st data and 2nd group number Open(11,file='data.txt') Do 31 j=1,nc Do 32 i=1,lc(j) write(11,*)x(i,j),j 32 Continue 31 Continue !c !c Prog to run one way anova !c Uses algorithm from p278-283 of Daniel !c tdj T-dot-j =sum of jth col !c tdd T-dot-dot = sum of all x values !c xssq = sum of squares of all x values !c sst = total sum of squares !c ssa = sum of squares among groups !c ssw = sum of squares within groups !c ntot total number of data values ntot=0 Do 16 j=1,nc ntot=ntot+lc(j) 16 Continue tdd=0. xssq=0. Do 12 j=1,nc tdj(j)=0. Do 14 i=1,lc(j) tdj(j)=tdj(j)+x(i,j) tdd=tdd+x(i,j) xssq=xssq+x(i,j)**2 14 Continue 12 Continue sst=xssq-(tdd**2)/float(ntot) gave=tdd/float(ntot) gstd=sqrt(sst/float(ntot-1)) gsem=gstd/sqrt(float(ntot)) df=ntot-1 call invts(cisize,df,cit) gavem=gave-cit*gsem gavep=gave+cit*gsem 121 format(a16,f9.3,a2,f10.3) 122 format(a16,f9.3,a2,2f10.3,a1,f10.3) ssa=0. Do 15 j=1,nc ssa=ssa+(tdj(j)**2)/float(lc(j)) 15 Continue ssa=ssa-(tdd**2)/float(ntot) ssw=sst-ssa ! write(*,105)' Data source * Mean * std dev' ! write(*,106)' ****************************************' write(12,115)' Data source * Mean *',cisizp,'%conf int* *& &std dev' write(12,116)' ***************************************************& &************' 115 format(a27,f6.3,a23) 116 format(a64) 114 format(a10,i2,a3,f10.3,a2,2f10.3,a1,f10.3) Do 18 i=1,nc Do 17 j=1,lc(i) xx(j)=x(j,i) 17 Continue if(lc(i).ne.1)then call mstdss(xx,lc(i),ave(i),std) sem=std/sqrt(float(lc(i))) df=lc(i)-1 call invts(cisizp/100.,df,cit) ! write(*,104)' column',i,' * ',ave(i),' *',std avem=ave(i)-cit*sem avep=ave(i)+cit*sem else ave(i)=xx(1) std=0. avem=0. avep=0. endif write(12,114)' column',i,' * ',ave(i),' *',avem,avep,'*',std 18 continue ! write(*,121)' overall * ',gave,' *',gstd 105 format(a39) 104 format(a10,i2,a3,2(f10.3,a2)) 106 format(a39) write(12,116)' ***************************************************& &************' write(12,*)' *Confidence intervals above calculated assuming unequ& &ual variances' write(12,*)' *Confidence interval below calculated assuming equal& &variances' write(12,122)' overall * ',gave,' *',gavem,gavep,'*',gstd write(12,116)' ***************************************************& &************' write(12,*) !c !c Levene's test !c !c Prog to run one way anova !c Uses algorithm from p278-283 of Daniel !c tdj T-dot-j =sum of jth col !c tdd T-dot-dot = sum of all x values !c xssq = sum of squares of all x values !c sst = total sum of squares !c ssa = sum of squares among groups !c ssw = sum of squares within groups tdd=0. xssq=0. Do 212 j=1,nc tdj(j)=0. Do 214 i=1,lc(j) xt(i,j)=abs(x(i,j)-ave(j)) tdj(j)=tdj(j)+xt(i,j) tdd=tdd+xt(i,j) xssq=xssq+xt(i,j)**2 214 Continue 212 Continue sstt=xssq-(tdd**2)/float(ntot) ssat=0. Do 215 j=1,nc ssat=ssat+(tdj(j)**2)/float(lc(j)) 215 Continue ssat=ssat-(tdd**2)/float(ntot) sswt=sstt-ssat df1=(float(nc)-1.) df2=float(ntot-nc) ft=ssat*df2/(sswt*df1) pvalut=betai(0.5*df2,0.5*df1,df2/(df2+df1*ft)) write(12,120)' Preliminary Levene test for equality of variances gi& &ves p-value = ',pvalut 120 format(a67,f11.7) write(12,*) !c write(12,*)' Intermediate results:-' write(12,117)' between group and within group sum of squares = '& ,ssa,ssw 117 format(a51,2f10.3) df1=(float(nc)-1.) df2=float(ntot-nc) f=ssa*df2/(ssw*df1) pvalue=betai(0.5*df2,0.5*df1,df2/(df2+df1*f)) write(12,*) write(12,118)' f = ',f,' p-value = ',pvalue ! write(*,*) ! write(*,119)' p-value = ',pvalue 118 format(a5,f10.6,a11,f12.9) 119 format(a11,f12.9) ! write(*,*) call pwrite(' ANOVA: p-value = ',pvalue,0,rc) call mwrite() return end Two library subroutines used in beta function/distribution calculations FUNCTION BETACF(A,B,X) PARAMETER (ITMAX=100,EPS=3.E-7) AM=1. BM=1. AZ=1. QAB=A+B QAP=A+1. QAM=A-1. BZ=1.-QAB*X/QAP DO 11 M=1,ITMAX EM=M TEM=EM+EM D=EM*(B-M)*X/((QAM+TEM)*(A+TEM)) AP=AZ+D*AM BP=BZ+D*BM D=-(A+EM)*(QAB+EM)*X/((A+TEM)*(QAP+TEM)) APP=AP+D*AZ BPP=BP+D*BZ AOLD=AZ AM=AP/BPP BM=BP/BPP AZ=APP/BPP BZ=1. IF(ABS(AZ-AOLD).LT.EPS*ABS(AZ))GO TO 1 11 CONTINUE PAUSE 'A or B too big, or ITMAX too small' 1 BETACF=AZ RETURN END FUNCTION BETAI(A,B,X) IF(X.LT.0..OR.X.GT.1.)PAUSE 'bad argument X in BETAI' IF(X.EQ.0..OR.X.EQ.1.)THEN BT=0. ELSE BT=EXP(GAMMLN(A+B)-GAMMLN(A)-GAMMLN(B)& +A*ALOG(X)+B*ALOG(1.-X)) ENDIF IF(X.LT.(A+1.)/(A+B+2.))THEN BETAI=BT*BETACF(A,B,X)/A RETURN ELSE BETAI=1.-BT*BETACF(B,A,1.-X)/B RETURN ENDIF END format change subroutines ! Subroutines to change data format from that of conventional stats prog to pds subroutine cformat(j,k) dimension b(100),idcol(100),ind(100) real, allocatable, dimension (:,:) :: xx character*6, allocatable, dimension (:) :: aa character*80 fmtspec ! b - dummy for number of figures in one row of data of conventional format ! the following line whould be commented out in running this as a subroutine Open(21,file='import.dat') Open(16,file='newdata.dat') ! Find how many data values there are n=0 1 continue read(21,*,end=2)a n=n+1 goto 1 2 continue rewind 21 ! Use a dialogue to find which column contains the value and which the groupid ! j=1 ! k=2 ! End of dialogue - assumes that column j contains values and k contains groupid ! Find out how many groups there are ncols=1 jk=max(j,k) Do i=1,n read(21,*)(b(m),m=1,jk) mmm=b(k) ncols=max(ncols,mmm) end do rewind 21 ! Count the number in each future column idcol=0 Do i=1,n read(21,*)(b(m),m=1,jk) mmm=b(k) idcol(mmm)=idcol(mmm)+1 end do ntot=maxval(idcol) rewind 21 ! Now that future columns are counted, can allocate array and go back and ! read in values allocate (xx(ntot,ncols)) idcol=0 Do i=1,n read(21,*)(b(m),m=1,jk) mmm=b(k) idcol(mmm)=idcol(mmm)+1 xx(idcol(mmm),mmm)=b(j) ! write(*,*)' xx(',idcol(mmm),' , ',mmm,' ) = ',xx(idcol(mmm),mmm) end do if(ncols.gt.2)write(16,*)ncols,' = number of columns' write(16,*)(idcol(i),i=1,ncols),' = number of values in each column' ! Start arranging format of columns ncolsp1=ncols+1 allocate (aa(ncolsp1)) Do i=1,ntot nrow=0 ind=0 aa=' 11x ,' aa(ncolsp1)='2X' Do j=1,ncols if(idcol(j).ge.i)then aa(j)='f0,2x,' nrow=nrow+1 ind(nrow)=j end if end do fmtspec=aa(1) Do j=2,ncolsp1 fmtspec=trim(fmtspec)//aa(j) end do fmtspec='('//trim(fmtspec)//')' ! write(*,*)' fmtspec = ',fmtspec write(16,fmt=fmtspec)(xx(i,ind(j)),j=1,nrow) end do ! stop close (16) return end ! Subroutine to change data format from that of conventional stats prog to pds subroutine cformati(j,k) dimension b(100),idcol(100),ind(100) real, allocatable, dimension (:,:) :: xx character*6, allocatable, dimension (:) :: aa character*80 fmtspec ! b - dummy for number of figures in one row of data of conventional format ! the following line whould be commented out in running this as a subroutine Open(21,file='import.dat') Open(16,file='newdata.dat',status='REPLACE') ! Find how many data values there are n=0 1 continue read(21,*,end=2)a n=n+1 goto 1 2 continue rewind 21 ! Use a dialogue to find which column contains the value and which the groupid ! j=1 ! k=2 ! End of dialogue - assumes that column j contains values and k contains groupid ! Find out how many groups there are ncols=1 jk=max(j,k) Do i=1,n read(21,*)(b(m),m=1,jk) mmm=b(k) ncols=max(ncols,mmm) end do rewind 21 ! Count the number in each future column idcol=0 Do i=1,n read(21,*)(b(m),m=1,jk) mmm=b(k) idcol(mmm)=idcol(mmm)+1 end do ntot=maxval(idcol) rewind 21 ! Now that future columns are counted, can allocate array and go back and ! read in values allocate (xx(ntot,ncols)) idcol=0 Do i=1,n read(21,*)(b(m),m=1,jk) mmm=b(k) idcol(mmm)=idcol(mmm)+1 xx(idcol(mmm),mmm)=b(j) ! write(*,*)' xx(',idcol(mmm),' , ',mmm,' ) = ',xx(idcol(mmm),mmm) end do if(ncols.gt.2)write(16,*)ncols,' = number of columns' write(16,*)(idcol(i),i=1,ncols),' = number of values in each column' ! Start arranging format of columns ncolsp1=ncols+1 allocate (aa(ncolsp1)) Do i=1,ntot nrow=0 ind=0 aa=' 11x ,' aa(ncolsp1)='2X' Do j=1,ncols if(idcol(j).ge.i)then aa(j)='f0,2x,' nrow=nrow+1 ind(nrow)=j end if end do fmtspec=aa(1) Do j=2,ncolsp1 fmtspec=trim(fmtspec)//aa(j) end do fmtspec='('//trim(fmtspec)//')' ! write(*,*)' fmtspec = ',fmtspec write(16,fmt=fmtspec)(xx(i,ind(j)),j=1,nrow) end do ! stop rewind 21 close (16) return end ! Subroutine to change data format from that of conventional stats prog to pds subroutine cformatp(j,k) dimension b(100) ! b - dummy for number of figures in one row of data of conventional format ! the following line whould be commented out in running this as a subroutine Open(21,file='import.dat') Open(16,file='newdata.dat',status='REPLACE') ! Find how many data values there are n=0 1 continue read(21,*,end=2)a n=n+1 goto 1 2 continue rewind 21 ! Use a dialogue to find which column contains the value and which the groupid ! j=1 ! k=2 ! End of dialogue - assumes that column j contains values and k contains groupid ! Find out how many groups there are jk=max(j,k) ! read in values write(16,*)n,' = number of pairs of values in the columns' Do i=1,n read(21,*)(b(m),m=1,jk) write(16,*)b(j),b(k) end do rewind 21 close (16) return end Chi square subroutine subroutine chisq(xx,m,n,pval) Dimension xx(m,n),rsum(m),csum(n),ex(m,n) character*80 rc,rc2 common/m/rc,cisizp write(12,*)' *** CHI-SQUARE TEST OF ASSOCIATION ***' call imprint() Do i=1,m read(20,*)(xx(i,j),j=1,n) end do gtot=0. DO i=1,m rsum(i)=0. DO j=1,n rsum(i)=rsum(i)+xx(i,j) END DO gtot=gtot+rsum(I) END DO DO j=1,n csum(j)=0. DO i=1,m csum(j)=csum(j)+xx(i,j) END DO end do chsq=0. iflag=0 DO i=1,m Do j=1,n ex(i,j)=rsum(i)*csum(j)/gtot if((ex(i,j).lt.5.).and.(iflag.eq.0))then write(12,*)' Some expected vals < 5 so chisq approx unsatisfactory' iflag=1 endif chsq=chsq+(ex(i,j)-xx(i,j))**2/ex(i,j) end do end do Do i=1,m write(12,*)'Obs',(xx(i,j),j=1,n) write(12,*)'Exp',(ex(i,j),j=1,n) write(12,*) end do idf=(m-1)*(n-1) df=idf write(12,*)' Degrees of Freedom = ',idf call pwrite(' Chi-square = ',chsq,12,rc2) pval=gammq(df/2.,chsq/2.) call pwrite(' Chi-square test of association: p-value = ',pval,12,rc) call pwrite(' Chi-square test of association: p-value = ',pval,0,rc) call mwrite() return end subroutine Program to enumerate conbinatons nCk recursive subroutine combmw2(n,k) COMMON j(50),kount,nn,kk,maxmw,ncku,mu(200),nmw,mp(200),iflg21 & ,nit000 character*256 rc, string2 IF(k.eq.0)then DO i=n,1,-1 j(i)=0 END DO ELSEIF((n-k).eq.0)then DO i=n,1,-1 j(i)=1 END DO else DO i=0,1 j(n)=i k1=k-i n1=n-1 ! write(*,*)' n1 & k1 = ',n1,k1 call combmw2(n1,k1) end do return endif 1 continue kount=kount+1 ! Iteration counter if(kount.lt.1000)goto 6 k1000=kount/1000 if(((k1000)*1000).eq.kount)then call windowclear() write(rc,*)nit000,' k comparisons required ',k1000& ,' k comparisons completed' call windowoutstring(500,1000,rc) string2=' To abort the program press ctrl-alt-del' call windowoutstring(1000,2000,string2) endif 6 Continue ! End iteration counter mw=0 Do 3 kk=2,nn Do 4 l=1,kk-1 if(j(kk).eq.0)mw=mw+j(l) 4 Continue 3 Continue if(iflg21.eq.1)mw=maxmw-mw ! mw1=mw ! mw2=maxmw-mw ! mw=min(mw1,mw2) Do mmm=1,nmw if(mw.le.mu(mmm))then ncku=ncku+1 mp(mmm)=mp(mmm)+1 end if end do !c write(*,*)mw,(ii(j),j=1,nn) ! PRINT*,(j(i),i=1,nn),kount return end Program to deal with ties ! prog to rank pairs of arrays with ties possible in each array ! subroutine gets data & outputs array of possible ssq rank differences subroutine esprdat(nn,xx,xy,mmm,id2,d2av) Dimension xx(nn),indxx(19),iaix(19),inarayx(19),kkx(19),irankx(19) Dimension iarray(19),ifcx(19),ifcy(19),id2(10000) Dimension xy(nn),indxy(19),iaiy(19),inarayy(19),kky(19),iranky(19) ! spacex spacey - minimal space between numbers in 1st & 2nd col to ensure ! that numbers are truly different, not just different ! computer representations of the same number ! eg. 1.0 might be represented by 1.0000000 but ! 1. might be represented by 0.9999999 ! Open(20,file='tiedar.dat') d2av=0. spacex=0. spacey=0. ! read(20,*)nn Do i=1,nn ! read(20,*)xx(i),xy(i) spacex=max(spacing(xx(i)),spacex) spacey=max(spacing(xy(i)),spacey) inarayx(i)=i inarayy(i)=i end do spacex=2.5*spacex spacey=2.5*spacey call indexx(nn,xx,indxx) call indexx(nn,xy,indxy) ! Start of modification for ties ! initialise array of ties Do i=1,nn iaix(i)=1 iaiy(i)=1 end do ! Generate an array iaix listing number of ties so that iaix(jj) is the ! number of ties in the jjth group of ties. ia=1 jjx=1 Do ik=2,nn if(abs(xx(indxx(ia))-xx(indxx(ik))).lt.spacex)then iaix(jjx)=iaix(jjx)+1 else jjx=jjx+1 ia=ik endif end do ia=1 jjy=1 Do ik=2,nn if(abs(xy(indxy(ia))-xy(indxy(ik))).lt.spacey)then iaiy(jjy)=iaiy(jjy)+1 else jjy=jjy+1 ia=ik endif end do ! jjx is now the number of groups of ties (Counting unique values as a tie ! of length 1.) write(12,*)' Additional information is available in file xsinfo.dat' write(13,*)' The groupings of tied values rearranged in ascending order' write(13,*)' in the first column are:-' write(13,*)(iaix(i),i=1,jjx) write(13,*)' The groupings of tied values rearranged in ascending order' write(13,*)' independently in the second column are:-' write(13,*)(iaiy(i),i=1,jjy) write(13,*) write(13,*)' All possible arrangements of rankings breaking ties follow' write(13,*)' Ranks of the two columns are written row-wise' ! write(*,*)(iaix(i),i=1,jjx) ! Calculate number of permutations in which a tie can be broken mmmx=1 Do i=1,jjx ifcx(i)=ifac(iaix(i)) mmmx=mmmx*ifcx(i) End do mmmy=1 Do i=1,jjy ifcy(i)=ifac(iaiy(i)) mmmy=mmmy*ifcy(i) End do write(12,*)' Number permutations of ties in cols 1&2 = ',mmmx,'*',mmmy ! write(*,*)' Number permutations = ',mmmx,mmmy ! Formulate & Step through a method of counting all these permutations ! and for each possible permutation break the ties mmm=mmmx*mmmy if(mmm.gt.10000)then write(12,*)' Too many ties for program to evaluate' mmm=0 return end if mmmm1=mmm-1 Do 26 i=0,mmmm1 ip1=i+1 itmp=i itmpx=mod(itmp,mmmx) itmpy=itmp/mmmx Do 33 ik=1,jjx kkx(ik)=mod(itmpx,ifcx(ik)) itmpx=itmpx/ifcx(ik) 33 Continue Do 933 ik=1,jjy kky(ik)=mod(itmpy,ifcy(ik)) itmpy=itmpy/ifcy(ik) 933 Continue ! kkx now consists of an array the length of the number of ties with each ! element of the array being a number between (1 less than) 1 and the ! relevant n!. This number is used to select a particular permutation ! for each group of ties ! write(*,*)' kkx = ',(kkx(ik),ik=1,jjx) nnn=0 Do 27 ik=1,jjx if(iaix(ik).gt.1)then knt=kkx(ik)+1 call fct(knt,iaix(ik),iarray) Do 28 j=1,iaix(ik) m=nnn+j inarayx(m)=nnn+iarray(j) 28 Continue endif nnn=nnn+iaix(ik) 27 Continue ! write(*,*)' inarayx ',(inarayx(j),j=1,nn) nnn=0 Do 927 ik=1,jjy if(iaiy(ik).gt.1)then knt=kky(ik)+1 call fct(knt,iaiy(ik),iarray) Do 928 j=1,iaiy(ik) m=nnn+j inarayy(m)=nnn+iarray(j) 928 Continue endif nnn=nnn+iaiy(ik) 927 Continue ! write(*,*)' inarayy ',(inarayy(j),j=1,nn) Do 11 j=1,nn irankx(indxx(inarayx(j)))=j iranky(indxy(inarayy(j)))=j 11 Continue ! Do calculations on rank here write(13,*)(irankx(j),j=1,nn) write(13,*)(iranky(j),j=1,nn) id2(ip1)=0. do j=1,nn id2(ip1)=id2(ip1)+(irankx(j)-iranky(j))**2 end do write(13,*)' sum of square rank differences for this arrangement:- ',& id2(ip1) d2av=d2av+real(id2(ip1)) 26 Continue d2av=d2av/real(mmm) return end function ifac(m) ifac=1 Do i=1,m ifac=ifac*i end do return end windows call to Chisquare distribution subroutine cdistcal() ! routine for chi-square distribution use winteracter use resource type(win_message) :: message integer :: itype, check_state = 1 character(80) :: pstring,cstring ! Common/m/rc ! Display the input dialog open(40,file='mserror.dat') call WDialogLoad(IDD_cdist) call WDialogShow(-1, -1, 0, SemiModeless) ! Make sure that FieldChanged messages are reported 1 continue ! p=0. iflag=0 call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select else if(itype == FieldChanged) then ! Retreive the strings containing a real value if(message%value1 == IDF_cstring) then call read_realval(t,IDF_cstring, check_state) iflag=1 else if(message%value1 == IDF_dfstring) then call read_realval(df,IDF_dfstring, check_state) iflag=1 else if(message%value1 == IDF_pstring) then iflag=2 call read_realval(prob,IDF_pstring, check_state) end if ! write(40,*)' message=',message%value1 end if end do ! Read the current values of the input fields ! write(40,*)' mes later =',message%value1 if(iflag.eq.1)then call read_realval(chisq,IDF_cstring, check_state) call read_realval(df,IDF_dfstring, check_state) if(df.lt.0.999)then call WMessageBox(0,3,1,'Degrees of Freedom must be 1 or more',' ') else prob=gammq(0.5*df,0.5*chisq) prob=1.-prob call fwrite(prob,pstring) call WDialogPutString(IDF_pstring,pstring) end if else if(iflag.eq.2)then call read_realval(prob,IDF_pstring, check_state) if((prob.le.0.).or.(prob.ge.1.))then call WMessageBox(0,3,1,'Probability must be between 0 & 1',' ') else if(df.lt.0.999)then call WMessageBox(0,3,1,'Degrees of Freedom must be 1 or more',' ') else iflg=0 prob=1.-prob call invc(prob,df,chisq,iflg) call fwrite(chisq,cstring) call WDialogPutString(IDF_cstring,cstring) if(iflg.eq.1)then call fwrite(prob,pstring) call WDialogPutString(IDF_pstring,pstring) end if end if end if ! Hide the dialog instead of unloading, so that previously entered values are preserved ! call WdialogHide() goto 1 return end windows call to f distribution subroutine fdist() ! Integer input routine with error checking for sign test use winteracter use resource type(win_message) :: message integer :: itype, check_state = 1 character(80) :: pstring,fstring ! Common/m/rc ! Display the input dialog open(40,file='mserror.dat') call WDialogLoad(IDD_fdist) call WDialogShow(-1, -1, 0, SemiModeless) ! Make sure that FieldChanged messages are reported 1 continue iflag=0 call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select else if(itype == FieldChanged) then ! Retreive the strings containing a real value if(message%value1 == IDF_fstring) then call read_realval(f,IDF_fstring, check_state) iflag=1 else if(message%value1 == IDF_df1string) then call read_realval(df1,IDF_df1string, check_state) iflag=1 else if(message%value1 == IDF_df2string) then call read_realval(df2,IDF_df2string, check_state) iflag=1 else if(message%value1 == IDF_pstring) then iflag=2 call read_realval(p,IDF_pstring, check_state) end if ! write(40,*)' message=',message%value1 end if end do ! Read the current values of the input fields ! write(40,*)' mes later =',message%value1 if(iflag.eq.1)then call read_realval(f,IDF_fstring, check_state) call read_realval(df1,IDF_df1string, check_state) call read_realval(df2,IDF_df2string, check_state) ! write(40,*)' f,df1,df2 = ',f,df1,df2 if((df1.lt.0.999).or.(df2.lt.0.999))then call WMessageBox(0,3,1,'Degrees of Freedom must be 1 or more',' ') else p=betai(0.5*df2,0.5*df1,df2/(df2+df1*f)) ! prob=1.-0.5*p p=1.-p call fwrite(p,pstring) ! write(40,*)'p,pstring = ',p,pstring call WDialogPutString(IDF_pstring,pstring) end if else if(iflag.eq.2)then call read_realval(p,IDF_pstring, check_state) if((p.le.0.).or.(p.ge.1.))then call WMessageBox(0,3,1,'Probability must be between 0 & 1',' ') else if((df1.lt.0.999).or.(df2.lt.0.999))then call WMessageBox(0,3,1,'Degrees of Freedom must be 1 or more',' ') else ! write(40,*)' prob,df1,df2 at invfs call:',prob,df1,df2 p=1.-p call invfs(p,df1,df2,reqf) call fwrite(reqf,fstring) call WDialogPutString(IDF_fstring,fstring) end if end if ! Hide the dialog instead of unloading, so that previously entered values are preserved ! call WdialogHide() goto 1 return end windows call to normal distribution subroutine normlcal() ! Integer input routine with error checking for sign test use winteracter use resource type(win_message) :: message integer :: itype, check_state = 1 character(80) :: pstring,zstring ! Common/m/rc ! Display the input dialog open(40,file='mserror.dat') call WDialogLoad(IDD_normal) call WDialogShow(-1, -1, 0, SemiModeless) ! Make sure that FieldChanged messages are reported 1 continue ! p=0. iflag=0 call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select else if(itype == FieldChanged) then ! Retreive the strings containing a real value if(message%value1 == IDF_zstring) then call read_realval(z,IDF_zstring, check_state) iflag=1 else if(message%value1 == IDF_pstring) then iflag=2 call read_realval(p,IDF_pstring, check_state) end if ! write(40,*)' message=',message%value1 end if end do ! Read the current values of the input fields write(40,*)' mes later =',message%value1 if(iflag.eq.1)then call read_realval(z,IDF_zstring, check_state) ! write(40,*)' mes later =',message%value1 ! write(40,*)' p = ',p,' z = ',z p=gn(z,0.,1.) call fwrite(p,pstring) ! write(pstring,*)p call WDialogPutString(IDF_pstring,pstring) else if(iflag.eq.2)then call read_realval(p,IDF_pstring, check_state) if((p.le.0.).or.(p.ge.1.))then call WMessageBox(0,3,1,'Probability must be between 0 & 1',' ') else z=xinvgn(p) call fwrite(z,zstring) ! write(zstring,*)z call WDialogPutString(IDF_zstring,zstring) end if end if ! Hide the dialog instead of unloading, so that previously entered values are preserved ! call WdialogHide() goto 1 return end ! contains ! subroutine read_intval(i1, field_id, check_state) ! ! integer, intent(in out) :: i1 ! integer, intent(in) :: field_id, check_state ! integer :: stati ! character(20) :: intval_chars ! character(80) :: bigstr ! call WDialogGetString(field_id,intval_chars) ! if(len_trim(intval_chars) > 0) then !! open(40,file='mserror.dat') ! write(40,*)'intvalchars =',intval_chars ! Use an internal read to decode the integer value ! ! read(intval_chars,*,iostat=stati) i1 ! if (stati /= 0) then ! i1 = 0 ! if(check_state == 1) then ! call WDialogPutString(field_id,' ') ! bigstr = 'The input value is not a valid integer number.' ! call WMessageBox( OKOnly, & ! NoIcon, & ! CommonOK, & ! bigstr,& ! 'Error') ! end if ! end if ! end if ! end subroutine read_intval subroutine read_realval(r1, field_id, check_state) real, intent(in out) :: r1 integer, intent(in) :: field_id, check_state integer :: statr character(20) :: realval_chars character(80) :: bigstr call WDialogGetString(field_id, realval_chars) if(len_trim(realval_chars) > 0) then ! Use internal read to decode the real value read(realval_chars,*,iostat=statr) r1 if (statr /= 0) then r1 = 0. if(check_state == 1) then call WDialogPutString(field_id, ' ') bigstr = 'The input value is not a valid real number.' call WMessageBox( OKOnly,& NoIcon,& CommonOK,& bigstr,& 'Error') end if end if end if end subroutine read_realval windows call to fisher's exact test subroutine fishtcal() ! Integer input routine with error checking for fishers exact test use winteracter use resource type(win_message) :: message integer :: itype, check_state = 1 character(80) :: rc common/m/rc ! Character(80) moreinfostring1,moreinfostring2 !moreinfostring1=' For more information about the results of this test' !moreinfostring2=' press MoreInfo on the main menu' ! Display the input dialog call WDialogLoad(IDD_Fisher) call WDialogShow(-1, -1, 0, SemiModeless) ! Make sure that FieldChanged messages are reported call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select else if(itype == FieldChanged) then ! Retreive the strings containing an integer value if(message%value1 == IDF_string1) then call read_intval(i1,IDF_string1, check_state) else if(message%value1 == IDF_string2) then call read_intval(i2,IDF_string2, check_state) else if(message%value1 == IDF_string3) then call read_intval(i2,IDF_string3, check_state) else if(message%value1 == IDF_string4) then call read_intval(i2,IDF_string4, check_state) end if end if end do ! Read the current values of the input fields call read_intval(i,IDF_string1, check_state) call read_intval(k,IDF_string2, check_state) call read_intval(j,IDF_string3, check_state) call read_intval(m,IDF_string4, check_state) ! Hide the dialog instead of unloading, so that previously entered values are preserved call WdialogHide() ! Turn fieldchange reporting off call WMessageEnable(FieldChanged,0) ! open(40,file='mserror.dat') ! write(40,*)'i, j, k, m = ',i,j,k,m call fisht(i,j,k,m,pval2) ! write(rc,101)' Fishers exact test: p-value = ',pval2 !101 format(a31,f11.9) call pwrite(' Fishers exact test: p-value = ',pval2,0,rc) call mwrite() ! call windowclear() ! call windowoutstring(3000,1000,rc) ! call windowoutstring(100,2000,moreinfostring1) ! call windowoutstring(500,2400,moreinfostring2) return end Fishers exact test subroutine ! Last change: D 19 Dec 1999 7:43 am subroutine fisht(io,jo,ko,mo,pvalu2) ! Program to do Fisher's exact test real (kind=16)::comb,c,d,d2,sm,sum2 integer (4)::io,jo,ko,mo,i,j,k,m character*80 rc,rc2 common/m/rc,cisizp ! READ (*,*)io,jo,ko,mo ! work out which of these 4 should have the role of i & j write(12,*)' *** FISHER''S EXACT TEST ***' write(12,101)' Values imput:- ',io,ko write(12,101)' ',jo,mo 101 format(a16,10x,2i10) IF(io*mo.eq.jo*ko)then i=io;j=jo;k=ko;m=mo ! pvalu=0.5 ! pvalu2=1.0 ! GOTO 2 ELSEIF(io*mo.lt.jo*ko)then IF(io.le.mo)then i=io;m=mo else i=mo;m=io endif IF(jo.le.ko)then j=jo;k=ko else j=ko;k=jo endif ELSEIF(io*mo.gt.jo*ko)then IF(jo.le.ko)then i=jo;m=ko else i=ko;m=jo endif IF(io.le.mo)then j=io;k=mo else j=mo;k=io endif ENDIF ! If overflows are possible, use chisq approx ovfli=(i+j)*log10(real(i+j+k+m)) if(ovfli.lt.4000)then c=comb(i+k,i)*comb(j+m,j)/comb(i+j+k+m,i+j) IF(i==0)then pvalu=c else sm=1 d=1 DO ii=0,i-1 d=d*REAL((i-ii)*(m-ii))/REAL((j+ii+1)*(k+ii+1)) sm=sm+d END DO pvalu=c*sm endif ! work out 2nd tail prob d2=1 sum2=0 do ii=0,j-1 d2=d2*real((j-ii)*(k-ii))/real((i+ii+1)*(m+ii+1)) IF(d2.gt.1.)GOTO 1 sum2=sum2+d2 1 continue END do pvalu2=pvalu+sum2*c ! WRITE(12,103)' p-value (1-tail) = ',pvalu call pwrite(' p-value (1-tail) = ',pvalu,12,rc) else write(12,*)' Numbers too large for Fisher''s exact test' write(12,*)' - used chi square approximation' call chi2x2(i,j,k,m,pvalu2) endif ! WRITE(12,103)' p-value (2-tail) = ',pvalu2 call pwrite(' p-value (2-tail) = ',pvalu2,12,rc) ! Work out odds ratio & confidence intervals if((jo*ko.eq.0).or.(io*mo.eq.0))then write(12,*)' Since at least one cell = 0, odds ratio is not calculated' else oddsr=real(io*mo)/real(jo*ko) call pwrite(' Odds ratio = ',oddsr,12,rc2) cizp=(1.-cisizp/100.)/2. ciz=xinvgn(cizp) write(10,*)' ciz = ',ciz ciexp=ciz*sqrt(1./real(i)+1./real(j)+1./real(k)+1./real(m)) ciu=exp(log(oddsr)+ciexp) cil=exp(log(oddsr)-ciexp) write(12,103)cisizp,'% confidence interval on odds ratio',ciu,cil 103 format(f9.3,a,2f10.5) endif return end real (kind=16) function comb(nnn,kkk) integer (4)::nnn,kkk,n,k ! real (kind=16)::comb ! prog to calculate nCk ! make nnn and kkk intent in n=nnn;k=kkk comb=1. IF((k.gt.n).OR.(k.lt.0).OR.(n.lt.0))then WRITE(12,*)'error input comb' GOTO 1 endif IF((n.eq.0).OR.(k.eq.0).OR.(k.eq.n))GOTO 1 nmk=n-k IF(nmk.gt.k)then k=nmk nmk=n-k endif do i=0,nmk-1 comb=comb*REAL(n-i)/(REAL((nmk-i)*(k-i))) END do IF(k.eq.nmk)GOTO 2 do i=nmk,k-1 comb=comb*REAL(n-i)/REAL(k-i) end do 2 continue do i=k,n-1 comb=comb*REAL(n-i) end do 1 continue ! WRITE(*,*)' n k comb = ',nnn,kkk,comb return end subroutine chi2x2(i,j,k,m,p) ! character*80 rc,rc2 ! common/m/rc,cisizp character*80 rc2 ei=REAL((i+j)*(i+k))/REAL(i+j+k+m) ej=i+j-ei ek=i+k-ei em=k+m-ek if((ei.lt.5.).or.(ej.lt.5.).or.(ek.lt.5.).or.(em.lt.5.))& write(12,*)'Warning some expected values too small: chisq approx inadequate' chi2=(ei-i)**2/ei+(ej-j)**2/ej+(ek-k)**2/ek+(em-m)**2/em WRITE(12,101)' Actual value ',i,', correponding expected value ',ei WRITE(12,101)' Actual value ',j,', correponding expected value ',ej WRITE(12,101)' Actual value ',k,', correponding expected value ',ek WRITE(12,101)' Actual value ',m,', correponding expected value ',em call pwrite(' Chi square value:- ',chi2,12,rc2) ! write(12,*)' Chi square value:- ',chi2 101 format(a14,i7,a30,f11.3) p=gammq(0.5,0.5*chi2) return end windows call to t test subroutine tdistcal() ! Integer input routine with error checking for sign test use winteracter use resource type(win_message) :: message integer :: itype, check_state = 1 character(80) :: pstring,tstring ! Display the input dialog open(40,file='mserror.dat') call WDialogLoad(IDD_tdist) call WDialogShow(-1, -1, 0, SemiModeless) ! Make sure that FieldChanged messages are reported 1 continue ! p=0. iflag=0 call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select else if(itype == FieldChanged) then ! Retreive the strings containing a real value if(message%value1 == IDF_tstring) then call read_realval(t,IDF_tstring, check_state) iflag=1 else if(message%value1 == IDF_dfstring) then call read_realval(df,IDF_dfstring, check_state) iflag=1 else if(message%value1 == IDF_pstring) then iflag=2 call read_realval(p,IDF_pstring, check_state) end if ! write(40,*)' message=',message%value1 end if end do ! Read the current values of the input fields write(40,*)' mes later =',message%value1 if(iflag.eq.1)then call read_realval(t,IDF_tstring, check_state) call read_realval(df,IDF_dfstring, check_state) if(df.lt.0.999)then call WMessageBox(0,3,1,'Degrees of Freedom must be 1 or more',' ') elseif(df.gt.900)then call WMessageBox(0,3,1,'Degrees of Freedom too large, & & approximate using normal distribution',' ') call WdialogHide() call normlcal() return else p=betai(0.5*df,0.5,df/(df+t*t)) if(t.gt.0.)prob=1.-0.5*p if(t.lt.0.)prob=0.5*p call fwrite(prob,pstring) call WDialogPutString(IDF_pstring,pstring) end if else if(iflag.eq.2)then call read_realval(p,IDF_pstring, check_state) if((p.le.0.).or.(p.ge.1.))then call WMessageBox(0,3,1,'Probability must be between 0 & 1',' ') else if(df.lt.0.999)then call WMessageBox(0,3,1,'Degrees of Freedom must be 1 or more',' ') elseif(df.gt.900)then call WMessageBox(0,3,1,'Degrees of Freedom too large, & & approximate using normal distribution',' ') call WdialogHide() call normlcal() return else prob=abs(2.*p-1.) if((2.*p-1.).gt.0.)signt=1. if((2.*p-1.).lt.0.)signt=-1. if((2.*p-1.).eq.0.)t=0. if((2.*p-1.).ne.0.)call invts(prob,df,t) t=t*signt call fwrite(t,tstring) call WDialogPutString(IDF_tstring,tstring) end if end if ! Hide the dialog instead of unloading, so that previously entered values are preserved ! call WdialogHide() goto 1 return end Output formatting program for small probabilities subroutine fwrite(p,pc) character*80 fmtspec,pc if(p.eq.0.)then write(pc,fmt='(a)')' < 0.0000001 ' return elseif((p.gt.-1.).and.(p.lt.1.))then i=log10(abs(p)) j=-i+10 k=-i+7 else i=log10(abs(p)) if(i.le.6)then k=7-i j=10 else k=1 j=i+7 endif endif write(fmtspec,*)'(f',j,'.',k,')' write(pc,fmt=fmtspec)p return end library function for gamma distribution FUNCTION GAMMQ(A,X) IF(X.LT.0..OR.A.LE.0.)PAUSE IF(X.LT.A+1.)THEN CALL GSER(GAMSER,A,X,GLN) GAMMQ=1.-GAMSER ELSE CALL GCF(GAMMCF,A,X,GLN) GAMMQ=GAMMCF ENDIF RETURN END Gaussian normal subroutine !c function to calculate int -inf to y 1/root(2pi)exp(-0.5x^2) dx !c ie cumulative Gauss Normal, using Numerical Recipes erf functions etc !c standard Gauss if xmu=0. sigma =1. function gn(y,xmu,sigma) yy=(y-xmu)/sigma yonr2=yy/sqrt(2.) gn=erf1(yonr2)/2.+0.5 return end FUNCTION ERF1(X) IF(X.LT.0.)THEN ERF1=-GAMMP(.5,X**2) ELSE ERF1=GAMMP(.5,X**2) ENDIF RETURN END FUNCTION GAMMP(A,X) IF(X.LT.0..OR.A.LE.0.)PAUSE IF(X.LT.A+1.)THEN CALL GSER(GAMSER,A,X,GLN) GAMMP=GAMSER ELSE CALL GCF(GAMMCF,A,X,GLN) GAMMP=1.-GAMMCF ENDIF RETURN END SUBROUTINE GSER(GAMSER,A,X,GLN) PARAMETER (ITMAX=1000,EPS=3.E-5) GLN=GAMMLN(A) IF(X.LE.0.)THEN IF(X.LT.0.)PAUSE GAMSER=0. RETURN ENDIF AP=A SUM=1./A DEL=SUM DO 11 N=1,ITMAX AP=AP+1. DEL=DEL*X/AP SUM=SUM+DEL IF(ABS(DEL).LT.ABS(SUM)*EPS)GO TO 1 11 CONTINUE PAUSE 'A too large, ITMAX too small' 1 GAMSER=SUM*EXP(-X+A*LOG(X)-GLN) RETURN END SUBROUTINE GCF(GAMMCF,A,X,GLN) PARAMETER (ITMAX=1000,EPS=3.E-5) GLN=GAMMLN(A) GOLD=0. A0=1. A1=X B0=0. B1=1. FAC=1. DO 11 N=1,ITMAX AN=FLOAT(N) ANA=AN-A A0=(A1+A0*ANA)*FAC B0=(B1+B0*ANA)*FAC ANF=AN*FAC A1=X*A0+ANF*A1 B1=X*B0+ANF*B1 IF(A1.NE.0.)THEN FAC=1./A1 G=B1*FAC IF(ABS((G-GOLD)/G).LT.EPS)GO TO 1 GOLD=G ENDIF 11 CONTINUE PAUSE 'A too large, ITMAX too small' 1 GAMMCF=EXP(-X+A*ALOG(X)-GLN)*G RETURN END FUNCTION GAMMLN(XX) REAL*8 COF(6),STP,HALF,ONE,FPF,X,TMP,SER DATA COF,STP/76.18009173D0,-86.50532033D0,24.01409822D0,& -1.231739516D0,.120858003D-2,-.536382D-5,2.50662827465D0/ DATA HALF,ONE,FPF/0.5D0,1.0D0,5.5D0/ X=XX-ONE TMP=X+FPF TMP=(X+HALF)*LOG(TMP)-TMP SER=ONE DO 11 J=1,6 X=X+ONE SER=SER+COF(J)/X 11 CONTINUE GAMMLN=TMP+LOG(STP*SER) RETURN end Windows program for choice of method subroutine icform() use winteracter use resource type(win_message):: message ! Prog to choose type of import data ! Making the decision between exact & approximate methods call wdialogload(IDD_icform) call wdialogshow(-1,-1,0,SemiModeless) call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select endif end do call wdialoggetradiobutton(IDF_RADIOp,iset) call WDialogHide() ! Choice of paired or independent samples complete if(iset==1)then ! Paired call formatcalp() else ! Independent samples call formatcali() endif return end Formatting program subroutine imprint() Dimension ivalues(8) Character (80) filenm character*12 date,time,zone character*2 chh,chm,chs,chd,chmth,chy inquire(20,name=filenm) call date_and_time(date,time,zone,ivalues) ! call subroutine to translate ivalues containing date/time specs into ! characters - for ease of better formatting with eg. 07 in place of 7. call form(ivalues(5),chh) call form(ivalues(6),chm) call form(ivalues(7),chs) call form(ivalues(3),chd) call form(ivalues(2),chmth) call form(ivalues(1),chy) write(12,101)' Using data file ',trim(filenm),' at ',chh,':',chm,& ':',chs,' on ',chd,'/',chmth,'/',chy write(13,101)' Using data file ',trim(filenm),' at ',chh,':',chm,& ':',chs,' on ',chd,'/',chmth,'/',chy 101 format(14a) return end subroutine form(ival,ch) character*1 c1,c2 character*2 ch ival=mod(ival,100) if(ival.lt.10)then c1='0' c2=char(ival+48) ch=c1//c2 else i=mod(ival,10) c2=char(i+48) j=ival/10 c1=char(j+48) ch=c1//c2 end if ! write(*,101)'/',ch,'/' 101 format(3a) return end library sorting/indexing program SUBROUTINE INDEXX(N,ARRIN,INDX) DIMENSION ARRIN(N),INDX(N) DO 11 J=1,N INDX(J)=J 11 CONTINUE L=N/2+1 IR=N 10 CONTINUE IF(L.GT.1)THEN L=L-1 INDXT=INDX(L) Q=ARRIN(INDXT) ELSE INDXT=INDX(IR) Q=ARRIN(INDXT) INDX(IR)=INDX(1) IR=IR-1 IF(IR.EQ.1)THEN INDX(1)=INDXT RETURN ENDIF ENDIF I=L J=L+L 20 IF(J.LE.IR)THEN IF(J.LT.IR)THEN IF(ARRIN(INDX(J)).LT.ARRIN(INDX(J+1)))J=J+1 ENDIF IF(Q.LT.ARRIN(INDX(J)))THEN INDX(I)=INDX(J) I=J J=J+J ELSE J=IR+1 ENDIF GO TO 20 ENDIF INDX(I)=INDXT GO TO 10 END Chi square calculation (?inverse calculation) ! subroutine to calculate chi-square values subroutine invc(yr,df,chsq,iflg) iflg=0 xold=df factor=1. gapold=1000. Do 1 i=1,10000 df2=df/2.;xold2=xold/2. ! write(*,*)df,zdf2,xold,zxold2 y=gammq(df2,xold2) ! write(*,*)' zz= ',zz aldxdy=algama(1.+df/2.)+xold/2.-(df/2.)*log(xold/2.) ymyr=y-yr if(ymyr.eq.0.)ymyr=tiny(y) xincl=aldxdy+log(abs(ymyr)) xinc=2.*sign(1.,ymyr)*exp(xincl) xnew=xold+xinc/factor ! write(*,*)' y xold factor aldxdy= ',y,xold,factor,aldxdy ! gap=(xold-xnew) gap=ymyr if((abs(gap).ge.(0.95*abs(gapold))).and.((gap/gapold).lt.0.))factor=factor+1. gapold=gap 2 continue if(xnew.le.0.)then xinc=xinc/2. xnew=xold+xinc/factor goto 2 end if ! xold=xnew if(abs(ymyr/yr).lt.0.000001)goto 3 xold=xnew 1 Continue ! write(*,*)' Convergence failed' iflg=1 3 continue chsq=xold ! write(*,*)' xold y = ',xold,y return end Inverse F test calculation !c Subroutine to find the f value corresponding to a given prob of F>f !c for confidence interval calculation ! reqf required value of f !c df1 numerator dof df2 denominator degrees of freedom !c prob required probability ! p1 prob at initial guess !c f2=f1+(prob-p1)df/dy, is Newtons method here; f1=2 is arbitrary start pt. subroutine invfs(prob,df1,df2,reqf) bt=exp(gammln(df2/2.)+gammln(df1/2.)-gammln(df2/2.+df1/2.)) eps=0.00001 f1=2.0 p1=betai(0.5*df2,df1/2.,df2/(df2+df1*f1)) !c Fiddle to ensure step size is contracting if necessary !c (Geometric gap between prob and trial y is decreasing) !c multiply correction by factor cf which decreases prn rat=0.5 cf=1. gapo=10. gapn=abs(log(prob)-log(p1)) Do 1 i=1,200 2 Continue denom=(df2**(df2/2.))*((df1*f1)**(df1/2.)) f2=f1-cf*(prob-p1)*bt*f1*((df2+df1*f1)**((df2+df1)/2.))/denom !c Algorithm doesn't seem to converge always for high probs of F>f ! convergence is obtained by following loop to prevent negative "f"s but ! allow appropriate small positive "f"s. 4 continue if(f2.lt.0.)then f2=(f2+f1)/2. goto 4 endif p2=betai(0.5*df2,df1/2.,df2/(df2+df1*f2)) gapn=abs(log(prob)-log(p2)) if(gapn.lt.eps)goto 3 rat=gapn/gapo if(rat.ge.1.)then cf=0.5*cf goto 2 endif ! write(*,*)' f2, p2 = ',f2,p2 f1=f2 p1=p2 gapo=gapn 1 Continue 3 Continue ! write(*,*)' final f2, p2 = ',f2,p2 reqf=abs(f2) return end inverse t value calculation !c Subroutine to find the t value corresponding to a given prob of T>t !c for confidence interval calculation !c cisize - size of confidence interval !c cit t value corresponding to confidence interval size !c df degrees of freedom !c yinf required probability spread out over both tails !c for some strange reason, this algorithm based on !c t2=t1+(yinf-y1)dt/dy, gives the negative t value subroutine invts(cisize,df,cit) ! The exponential on line 31 overflows for large df so use normal approx ! if(df.gt.40.)then ! write(12,*)' Degrees of Freedom > 40 so Normal approximation used' ! prob=(1.-cisize)/2. ! cit=xinvgn(prob) ! return ! endif bt=exp(gammln(df/2.)+gammln(0.5)-gammln(df/2.+0.5)) yinf=1.-cisize eps=0.00001 t1=-2.0 y1=betai(0.5*df,0.5,df/(df+t1**2)) !c Fiddle to ensure step size is contracting if necessary !c (Geometric gap between yinf and trial y is decreasing) !c multiply correction by factor cf which decreases prn rat=0.5 cf=1. gapo=10. gapn=abs(log(yinf)-log(y1)) Do 1 i=1,200 2 Continue ! ((df+t1**2)**((df+1.)/2.))/(2.*df**(df/2.)) is a factor that overflows ! for df >200 (?>50) Therefore use a log work around factor=((df+1.)/2.)*log(df+t1**2)-(df/2.)*log(df) factor=exp(factor)/2. t2=t1+cf*(yinf-y1)*bt*factor !c Algorithm doesn't seem to converge always for positive t if(t2.gt.0.)t2=0.5*t1 y2=betai(0.5*df,0.5,df/(df+t2**2)) gapn=abs(log(yinf)-log(y2)) if(gapn.lt.eps)goto 3 rat=gapn/gapo if(rat.ge.1.)then cf=0.5*cf goto 2 endif !c write(*,*)' t2, y2 = ',t2,y2 t1=t2 y1=y2 gapo=gapn 1 Continue 3 Continue !c write(*,*)' final t2, y2 = ',t2,y2 cit=abs(t2) return end Independent samples t-test subroutine !c Prog to read in columns of unequal length for independent samples t test subroutine ist(x,m,n,mx,pval) !c x array of data Character (80) rc,rc2 common/m/rc,cisizp Dimension x(mx,2),lc(2),inec(2),x1(mx),x2(mx) !c x array of data !c mx max length or length of longest column !c inec index of non blank columns at any particular row !c nbc number of non blank cols at any particular row !c Assumes data consists of a row containing number of cols !c followed by a row containing length of each col !c followed by the cols of data !c Proceed row by row reading in data lc(1)=m lc(2)=n Do 2 m=1,mx !c find out how many non-blank cols in the mth row nbc=0 Do 3 i=1,2 if(m.le.lc(i))nbc=nbc+1 3 Continue !c Make an array containing the indices of the non-blank cols at level m ll=1 Do 4 j=1,nbc Do 5 i=ll,2 if(m.gt.lc(i))goto 5 inec(j)=i goto 8 5 Continue 8 Continue ll=i+1 4 Continue read(20,*)(x(m,inec(j)),j=1,nbc) 2 Continue !c Do 9 j=1,2 !c write(*,101)(x(i,j),i=1,lc(j)) !c101 format(10(f5.1,1x)) !c9 Continue !c !c write out data in form for ususal stats package analysis !c ie. with 2 cols 1st data and 2nd group number Open(13,file='data.txt') Do 31 j=1,2 Do 32 i=1,lc(j) write(13,*)x(i,j),j 32 Continue 31 Continue !c write(12,*)' *** INDEPENDENT SAMPLES t-TEST ***' call imprint() Do 10 i=1,lc(1) x1(i)=x(i,1) 10 Continue Do 11 i=1,lc(2) x2(i)=x(i,2) 11 Continue !c Calculate mean and standard deviation call mstdss(x1,lc(1),ave1,std1) call mstdss(x2,lc(2),ave2,std2) df1=lc(1)-1 df2=lc(2)-1 call invts(cisizp/100.,df1,cit1) call invts(cisizp/100.,df2,cit2) ave1m=ave1-cit1*std1/sqrt(float(lc(1))) ave1p=ave1+cit1*std1/sqrt(float(lc(1))) ave2m=ave2-cit2*std2/sqrt(float(lc(2))) ave2p=ave2+cit2*std2/sqrt(float(lc(2))) write(12,*)' Col 1: no. of vals ',lc(1),' mean ',ave1,' std dev ',std1,& cisizp,'% conf int ',ave1m,ave1p write(12,*)' Col 2: no. of vals ',lc(2),' mean ',ave2,' std dev ',std2,& cisizp,'% conf int ',ave2m,ave2p !107 format(a,f9.4,a,f9.4,f8.3,a,2f9.4) !c Do preliminary f test to see if it is reasonable to believe variances !c are the same. if(std1.le.std2)then f=(std2/std1) df1=lc(2)-1 df2=lc(1)-1 else f=(std1/std2) df1=lc(1)-1 df2=lc(2)-1 endif write(12,*)' CONVENTIONAL ANALYSIS - consists of three parts' write(12,*)' Part 1) - test to see if samples differ in their spread' call pwrite(' Ratio of standard deviations: ',f,12,rc2) f=f*f !c A 2-tail test is double the chance of being in the more unlikely side of !c the f distribution from the given value. Because of assymetry, !c it cannot be assumed that the more unlikely end is the larger values !c than the given value, even though this is certain to be > 1. pval=2.*betai(0.5*df2,0.5*df1,df2/(df2+df1*f)) if(pval.gt.1)pval=2.-pval pvalsd=pval call pwrite(' p- value for test of equality of variances = ',pval,12,rc) write(12,*)' Part 2) - assuming that samples have identical spread' write(12,*)' [reasonableness judged by Part 1) result]' write(12,*)' calculate p-value and confidence intervals' write(12,*)' for the difference in average of the two samples' !c Calculation assuming that the variances are the same df=lc(1)+lc(2)-2 avedif=ave2-ave1 !c Calculating pooled variance var=(float(lc(1)-1)*std1*std1+float(lc(2)-1)*std2*std2)/df stdm=sqrt(var*( 1./float(lc(1)) + 1./float(lc(2)) ) ) ts=avedif/stdm !c Calculating confidence interval for difference between means call invts(cisizp/100.,df,cit) avedfm=avedif-cit*stdm avedfp=avedif+cit*stdm pvalue=betai(0.5*df,0.5,df/(df+ts*ts)) pvaldf=pvalue call pwrite(' Difference between averages of the two groups = ',avedif,12,rc2) call pwrite(' Standard deviation of this average difference = ',stdm,12,rc2) call pwrite(' t value for hypothesis difference is 0 = ',ts,12,rc2) write(12,101)cisizp,'% conf int for difference',avedfm,avedfp 101 format(f7.3,a,2f9.4) call pwrite(' p-value (2-tail) = ',pvalue,12,rc) call pwrite(' Independent samples t test: p-value (2-tail) = ',pvalue,0,rc) call mwrite() !c Calculation assuming that the variances aren't the same write(12,*)' Part 3) - assuming that samples haven''t identical spread' write(12,*)' [reasonableness judged by Part 1) result]' write(12,*)' calculate p-value and confidence intervals' write(12,*)' for the difference in average of the two samples' var1=std1*std1 var2=std2*std2 vard=var1/float(lc(1))+var2/float(lc(2)) stdd=sqrt(vard) td=avedif/stdd varden=(var1/float(lc(1)))**2/float(lc(1)-1) varden=varden+(var2/float(lc(2)))**2/float(lc(2)-1) dfd=vard**2/varden pvalue=betai(0.5*dfd,0.5,dfd/(dfd+td*td)) call invts(cisizp/100.,dfd,citd) avedfm=avedif-citd*stdd avedfp=avedif+citd*stdd call pwrite(' Difference between averages of the two groups = ',avedif,12,rc2) call pwrite(' Standard deviation of this average difference = ',stdd,12,rc2) call pwrite(' t value for hypothesis difference is 0 = ',td,12,rc2) write(12,101)cisizp,'% conf int for difference',avedfm,avedfp call pwrite(' p-value (2-tail) = ',pvalue,12,rc2) write(12,*) ' COMMONSENSE ANALYSIS ' write(12,*)' Overall p-value for a difference in either average or spread& & of the two groups.' write(12,*)' Uses the idea that if two groups are different in spread they are& & bound to be' write(12,*)'not identical in average and vice versa. Under the null& & hypothesis then' write(12,*)'the p-value obtained for the two standard deviations coming from& & the same population' write(12,*)'and the p-value obtained for the differences in means assuming equal& & standard deviations' write(12,*)'are two independent selections of numbers equally likely to be& & anywhere between 0 and 1.' write(12,*)'The commonsense p-value is the probability that the minimum& & of two such numbers' write(12,*)'would be at least as small as the minimum of the two numbers& & obtained here.' opval=min(pvaldf,pvalsd) opval=2.*opval-opval**2 call pwrite(' p-value analysis based on commonsense (2-tail) = ',opval,12,rc2) return end ?Kruskall-Wallis test SUBROUTINE KSONE(DATA,N,xmu,sigma,gn,D,PROB) DIMENSION DATA(N) ! CALL SORT(N,DATA) EN=N D=0. FO=0. DO 11 J=1,N FN=J/EN FF=gn(DATA(J),xmu,sigma) DT=AMAX1(ABS(FO-FF),ABS(FN-FF)) IF(DT.GT.D)D=DT FO=FN 11 CONTINUE PROB=PROBKS(SQRT(EN)*D) RETURN END nCk calculation subroutine kwknck(n,k,nck) ! Simple non-recursive approximate subroutine for n choose k kk=k if(kk.gt.(n/2))kk=n-k f=1. Do i=0,k-1 f=f*real(n-i)/real(k-i) end do nck=f return end windows calling program for KW test (note I think there is an error here in the case of calculations that take into account ties) ! Last change: DLL 3 Sep 2000 6:54 pm !c Prog to read in columns of unequal length for independent t test and anova !c and to then perform one way non-parametric anova (Kruskal-Wallis) subroutine kwr(lc,nr,nc,pval) use winteracter use resource type(win_message):: message Dimension x(nr,nc),lc(nc),inec(nc),k(10,2),xx(nr*nc),kkct(99) Dimension iai(99),iaii(99,nc),iak(99),nck(99),iarray(50) common iind(99),kount,ntot,kk(10,2),xkwi(999),kntkw,nkw,itreq character*80 rc character*256 string1 common/m/rc,cisizp !c x array of data ! k, kk, k1 k?(i,1)=i (the group number) k?(i,2)= number in the group !c nc number of columns nr number of rows in longest column !c ml max length or length of longest column !c inec index of non blank columns at any particular row !c nbc number of non blank cols at any particular row !c Assumes data consists of a row containing number of cols !c followed by a row containing length of each col !c followed by the cols of data ! Space will be quantity giving maximum spacing between adjacent real numbers ! in order to distinguish genuine ties misread by computer due to floating ! point notation from genuine differences kount=0 space=0. if(nc.gt.10)then call WMessageBox(0,3,1,' Data set too large, ? try ANOVA',' ') return end if write(12,*)' ****KRUSKAL-WALLIS TEST**** ' call imprint() ! Proceed row by row reading in data Do 2 m=1,nr ! find out how many non-blank cols in the mth row nbc=0 Do 3 i=1,nc if(m.le.lc(i))nbc=nbc+1 3 Continue !c Make an array containing the indices of the non-blank cols at level m ll=1 Do 4 j=1,nbc Do 5 i=ll,nc if(m.gt.lc(i))goto 5 inec(j)=i goto 8 5 Continue 8 Continue ll=i+1 4 Continue read(20,*)(x(m,inec(j)),j=1,nbc) Do j=1,nbc space=max(space,spacing(x(m,inec(j)))) end do 2 Continue space=2.5*space !c !c write out data in form for ususal stats package analysis !c ie. with 2 cols 1st data and 2nd group number ! also make two 1-D arrays one for data & other for group number for sorting iii=0 Open(11,file='data.txt') Do 31 j=1,nc Do 32 i=1,lc(j) write(11,*)x(i,j),j iii=iii+1 xx(iii)=x(i,j) iind(iii)=j 32 Continue 31 Continue ntot=iii ! Work out approx how many iterations required itreq=amulti(nc,nr,ntot,lc) ! Arrange data in ascending order with i marking ith group call sort2i(ntot,xx,iind) ! write(12,104)(xx(i),i=1,ntot) ! write(12,103)(iind(i),i=1,ntot) 103 format(39i2) 104 format(39f2.0) Do i=1,nc k(i,1)=i k(i,2)=lc(i) end do ! Start of modification for ties ! initialise array of ties Do 14 i=1,ntot iai(i)=1 Do 141 j=1,nc iaii(i,j)=0 141 Continue 14 continue ! Generate an array iai listing number of ties so that iai(jj) is the ! number of ties in the jjth group of ties. ! If iai(i) > 1, iaii(i,j) is number in this group of ties from jth col of ! data. ia=1 jj=1 iaii(1,iind(1))=1 Do 22 ii=2,ntot if(abs(xx(ia)-xx(ii)).lt.space)then iai(jj)=iai(jj)+1 iaii(jj,iind(ii))=iaii(jj,iind(ii))+1 else jj=jj+1 ia=ii iaii(jj,iind(ii))=1 endif ! write(12,*)' ii jj xx(ii) iai iaii',ii,jj,xx(ii),iai(jj),(iaii(jj,mm),mm=1,3) 22 Continue if(jj.gt.99)then call WMessageBox(0,3,1,' Data set too large or too many ties, ? try ANOVA',' ') return end if ! jj is now the number of groups of ties (Counting unique values as a tie ! of length 1.) ! Calculate number of combinations where how a tie is broken makes a difference ! This depends on a product of all n-Choosemulti-k (nck) possible terms where n is ! number in a tie and k is the number of each type. Initialise this repeated ! multiplication with nkw nkw=1 Do 23 i=1,jj if(iai(i).eq.1)then nck(i)=1 else Do ii=1,nc iak(ii)=iaii(i,ii) end do nck(i)=nmulti(nc,iak) ! write(12,*)'iak ',(iak(ii),ii=1,nc),' nck(',i,')',nck(i) endif nkw=nkw*nck(i) ! write(12,*)' nkw = ',nkw 23 Continue if(nkw.ne.1)write(12,*)' The ties can be broken in ',nkw,' equally likely ways' ! Making the decision between exact & approximate methods if(ntot.le.10)then iset=1 elseif((ntot.ge.30).or.(nkw.gt.999))then iset=0 else call wdialogload(IDD_MW) call wdialogshow(-1,-1,0,SemiModeless) ! nit=nkw*(2**ntot)/100000 ! write(string1,201)' The exact method requires ',itreq,' & ! & iterations. Choose preferred method.' !201 format(a27,i6,a43) write(string1,*)' The exact method requires ',itreq,' & & iterations. Choose preferred method.' call WDialogPutString(IDF_Noofits,string1) call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select endif end do call wdialoggetradiobutton(IDF_RADIOe,iset) call WDialogHide() endif ! Choice of exact or approximate method complete ! ******************************************************************** ! Formulate & Step through a method of counting all these combinations ! and for each possible combination break the ties xkwiav=0. nkwm1=nkw-1 Do 26 ikt=0,nkwm1 iktp1=ikt+1 itmp=ikt Do 33 ii=1,jj kkct(ii)=mod(itmp,nck(ii)) itmp=itmp/nck(ii) 33 Continue ! kkct now consists of an array the length of the number of ties with each ! element of the array being a number between (1 less than) 1 and the ! relevant n-Choosemulti-k. This number is used to select a particular combination ! of ways each particular collection of ties between group 1 and 2 can be ! broken - using the subroutine nmultin. ! write(*,*)' kkct = ',(kkct(ii),ii=1,jj) n=0 Do 27 ii=1,jj if(nck(ii).gt.1)then Do j=1,nc iak(j)=iaii(ii,j) end do call nmultin(nc,iak,kkct(ii),iarray) Do 28 j=1,iai(ii) m=n+j iind(m)=iarray(j) 28 Continue endif n=n+iai(ii) 27 Continue ! Call MW calculation here ! write(12,104)(xx(ii),ii=1,ntot) ! write(12,103)(iind(ii),ii=1,ntot) ! Start of Kruskal-Wallis calculation - 1st find sum of ranks of each type sum=0. Do 12 i=1,nc ip=0 Do 13 l=1,ntot if(iind(l).eq.i)ip=ip+l 13 Continue sum=sum+real(ip**2)/real(k(i,2)) 12 Continue xkwi(iktp1)=12.*sum/real(ntot*(ntot+1))-real(3*(ntot+1)) write(12,*)' Order = ',(iind(ii),ii=1,ntot),' Kruskal-Wallis k = ',xkwi(iktp1) xkwiav=xkwiav+xkwi(iktp1) 26 continue xkwiav=xkwiav/real(nkw) ! Exact Calculation if(iset==1)then kntkw=0 n=ntot Do i=1,nc Do ii=1,2 kk(i,ii)=k(i,ii) end do end do call multi(n,k,nc) pval=real(kntkw)/real(nkw*kount) write(12,*)' pval = ',kntkw,'/',nkw*kount,' or ',pval,' or' call pwrite(' Kruskal-Wallis test: p-value = ',pval,12,rc) call pwrite(' Kruskal-Wallis test: p-value = ',pval,0,rc) call mwrite() else ! Note (from Rice p454) k is approx chisq with nc-1 dof pvala=gammq((nc-1)/2.,xkwiav/2.) write(12,*)' Using average Kruskal-Wallis k = ',xkwiav write(12,*)' pval (approx method) = ',pvala call pwrite(' Kruskal-Wallis test: p-value = ',pvala,0,rc) call mwrite() end if return end Combinatoric calculation recursive subroutine multi(n,k,nc) dimension k(10,2),k1(10,2) common iind(99),kount,ntot,kk(10,2),xkwi(999),kntkw,nkw,itreq ! nc number of k's into which n is to be divided ! first component of k is its group marker 'i' ! 2nd conponent of k is the number remaining to be distributed ! first test to see how many k's are left non-zero if only one is left ! then the remaining n's are that sort of k ! inz = index of the non-zero k ! write(*,*)n,(k(i,2),i=1,nc) ! First test to see if recursive iterations have eliminated all but one type itest=0 inz=0 do mt=1,nc if(k(mt,2).ne.0)then itest=itest+1 inz=mt end if end do ! if recursive iterations have eliminated all but one type remaining places ! are all of that type so - ! start of main if if(itest.eq.1)then Do i=1,n iind(i)=k(inz,1) end do kount=kount+1 ! Kilo iteration counter if( ((kount/1000)*1000).eq.kount)call kit(kount,itreq) ! do any calculations on the arrangements here ! write(*,*)' kount',kount,' iind ',(iind(i),i=1,ntot) ! Start of Kruskal-Wallis calculation - 1st find sum of ranks of each type sum=0. Do 2 i=1,nc ip=0 Do 3 l=1,ntot if(iind(l).eq.i)ip=ip+l 3 Continue sum=sum+real(ip**2)/real(kk(i,2)) 2 Continue xkw=12.*sum/real(ntot*(ntot+1))-real(3*(ntot+1)) ! write(*,*)' kount',kount,' iind ',(iind(i),i=1,ntot),' kw ',xkw Do ikt=1,nkw if(xkw.ge.xkwi(ikt))kntkw=kntkw+1 end do ! End of Kruskal-Wallis calc else ! Use each type of k for which there is more than 0 left, in turn to ! form this nth place of array iind Do 1 i=1,nc Do ii=1,nc Do jj=1,2 k1(ii,jj)=k(ii,jj) end do end do if(k(i,2).eq.0)goto 1 iind(n)=k(i,1) k1(i,2)=k(i,2)-1 n1=n-1 ! write(*,*)' Should (re)go into multi here' call multi(n1,k1,nc) ! write(*,*)' Have (re)exited multi here' 1 continue end if return end ! ! function to work out approx number of comparisons required function amulti(nc,nr,ntot,lc) dimension lc(nc),llc(nc) llc=lc nntot=ntot aaa=1. Do 2 j=1,nr Do 1 i=1,nc if(llc(i).lt.1)goto 1 aaa=aaa*real(nntot)/real(llc(i)) nntot=nntot-1 1 Continue llc=llc-1 2 Continue amulti=aaa return end ! ! subroutine to put out iteration/comparison counter subroutine kit(kount,kreq) character*256 rc,string2 string2=' To abort the program press ctrl-alt-del' call windowclear() write(rc,*)kreq,' comparisons required; ',kount& ,' comparisons completed.' call windowoutstring(500,1000,rc) call windowoutstring(1000,2000,string2) return end Two subroutines to describe data ! subroutine to calculate mean & interquartile range subroutine miqr(x,n) dimension x(n) rn=n Do i=1,n ri=i frac=ri/rn frac1=(rn-ri+1.)/rn frac2=(rn-ri)/rn if((frac.ge.nearest(0.25,-1.)).and.(frac1.ge.nearest(0.75,-1.)))then if((frac2.ge.nearest(0.75,-1.)))then q25=(x(i)+x(i+1))/2. else q25=x(i) end if exit end if end do Do i=1,n ri=i frac=ri/rn frac1=(rn-ri+1.)/rn frac2=(rn-ri)/rn if((frac.ge.nearest(0.5,-1.)).and.(frac1.ge.nearest(0.5,-1.)))then if((frac2.ge.nearest(0.5,-1.)))then q50=(x(i)+x(i+1))/2. else q50=x(i) end if exit end if end do Do i=1,n ri=i frac=ri/rn frac1=(rn-ri+1.)/rn frac2=(rn-ri)/rn if((frac.ge.nearest(0.75,-1.)).and.(frac1.ge.nearest(0.25,-1.)))then if((frac2.ge.nearest(0.25,-1.)))then q75=(x(i)+x(i+1))/2. else q75=x(i) end if exit end if end do xiqr=q75-q25 write(12,*)' Median ',q50,' Inter-quartile range ',xiqr write(12,*)' 1st quartile ',q25,' 3rd quartile ',q75 return end ! subroutine to calculate mean and std dev subroutine mstds(x,n,ave,std) dimension x(n) ! open(12,'moreinfo') sum=0. sum2=0. Do 1 i=1,n read(20,*)x(i) sum=sum+x(i) sum2=sum2+x(i)*x(i) 1 Continue ave=sum/real(n) var=(sum2-real(n)*ave**2)/real(n-1) std=sqrt(var) write(12,*)' mean = ',ave,' std = ',std rewind 20 return end ! subroutine to calculate mean and std dev subroutine mstdss(x,n,ave,std) dimension x(n) sum=0. sum2=0. Do 1 i=1,n sum=sum+x(i) sum2=sum2+x(i)*x(i) 1 Continue ave=sum/real(n) var=(sum2-real(n)*ave**2)/real(n-1) std=sqrt(var) ! write(12,*)x return end Windows calling program for data description subroutine mstdcall() use winteracter use resource type(win_message):: message ! Prog to read in columns of unequal length for Mann Whitney test ! Mann-Whitney test with ties Real, Dimension (:), allocatable::x Integer, Dimension (:), allocatable:: lc,inec Real, Dimension (:,:), allocatable::xx character*256 string3 Character(80) rc,moreinfostring Character*5 c(30),blk(15) character*160 fmtspec common/m/rc external gn ! x array of data ! ml max length or length of longest column ! inec index of non blank columns at any particular row ! nbc number of non blank cols at any particular row ! Assumes data consists of a row containing number of cols ! followed by a row containing length of each col ! followed by the cols of data moreinfostring=' For more information about this data press MoreInfo on the main menu' write(12,*)' *** DATA DESCRIPTION ***' call imprint() ! Select Data Format call wdialogload(IDD_Describe) call wdialogshow(-1,-1,0,SemiModeless) call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select endif end do call wdialoggetradiobutton(IDF_Radio1,iset) call WDialogHide() ! Select Case(iset) case(1) Open(22,file='sorted.dat') read(20,*)n allocate (x(n)) Do i=1,n read(20,*)x(i) end do call mstdss(x,n,ave,sd) sem=sd/sqrt(real(n)) write(string3,*)'mean = ',ave,' standard deviation = ',sd write(12,*)'No. of vals = ',n,' mean = ',ave,& ' standard deviation = ',sd,'Standard error of mean = ',sem call windowoutstring(300,1200,string3) call windowoutstring(300,1800,moreinfostring) call sort(n,x) write(22,*)n,' = number of data values below' do i=1,n write(22,*)x(i) end do call miqr(x,n) call ksone(x,n,ave,sd,gn,d,pval) write(12,*)' The approximate p-value for the hypothesis that& & the data is normally distributed is:-' write(12,*)' ',pval,'(Approximation reasonable for n>20)' write(12,*)' The sorted values are stored& & in file sorted.dat' rewind 20;deallocate (x);close(20);close(22) case(2) Open(21,file='diffs.dat') Open(22,file='sorted.dat') read(20,*)n allocate (xx(n,2)) allocate (x(n)) Do i=1,n read(20,*)xx(i,1),xx(i,2) end do Do j=1,2 x=xx(:,j) call mstdss(x,n,ave,sd) sem=sd/sqrt(real(n)) write(string3,*)'Column',j,'mean = ',ave,' standard deviation = ',sd write(12,*)'Column ',j,' no. of vals = ',n,' mean = ',ave,& ' standard deviation = ',sd,' Standard error of mean = ',sem call windowoutstring(300,600+600*j,string3) end do call windowoutstring(300,2400,moreinfostring) x=xx(:,2)-xx(:,1) call mstdss(x,n,ave,sd) write(12,*)'Differences:-' write(12,*)' Mean = ',ave,' standard deviation = ',sd write(21,*)n,' = number of data values below' Do i=1,n write(21,*)x(i) end do call sort(n,x) write(22,*)n,' = number of data values below' do i=1,n write(22,*)x(i) end do call miqr(x,n) call ksone(x,n,ave,sd,gn,d,pval) write(12,*)' The approximate p-value for the hypothesis that& & the differences are normally distributed is:-' write(12,*)' ',pval,'(Approximation reasonable for n>20)' write(12,*)' The differences column 2 - column 1 are stored& & in file diffs.dat' write(12,*)' The sorted values of the differences are stored& & in file sorted.dat' rewind 20;deallocate (xx);deallocate (x);close(20) close(21);close(22) case(3) Open(22,file='sorted.dat') read(20,*)mmm,nnn nr=max(mmm,nnn) allocate (xx(nr,2));allocate (lc(2));allocate (inec(2)) allocate (x(nr)) lc(1)=mmm lc(2)=nnn ! Proceed row by row reading in data Do 2 m=1,nr ! find out how many non-blank cols in the mth row nbc=0 Do i=1,2 if(m.le.lc(i))nbc=nbc+1 End do ! Make an array containing indices of the non-blank cols at level m ! Array is called inec = index non-empty column ll=1 Do 4 j=1,nbc Do 5 i=ll,2 if(m.gt.lc(i))goto 5 inec(j)=i goto 8 5 Continue 8 Continue ll=i+1 4 Continue read(20,*)(xx(m,inec(j)),j=1,nbc) 2 Continue Do j=1,2 Do i=1,lc(j) x(i)=xx(i,j) end do n=lc(j) call mstdss(x,n,ave,sd) write(string3,*)'Column',j,'mean = ',ave,' standard deviation = ',sd ! write(12,*)'Column',j sem=sd/sqrt(real(n)) write(12,*)'Column ',j,' no. of vals = ',n,' mean = ',ave,& ' standard deviation = ',sd,' Standard error of mean = ',sem call windowoutstring(300,600+600*j,string3) call sort(n,x) Do i=1,lc(j) xx(i,j)=x(i) end do call windowoutstring(300,2400,moreinfostring) call miqr(x,n) call ksone(x,n,ave,sd,gn,d,pval) write(12,*)' The approximate p-value for the hypothesis that& & the data in column',j,' is normally distributed is:-' write(12,*)' ',pval,'(Approximation reasonable for n>20)' end do nc=2 write(22,*)(lc(i),i=1,nc),' =number in each column' Do 42 m=1,nr ! find out how many non-blank cols in the mth row nbc=0 Do i=1,nc if(m.le.lc(i))nbc=nbc+1 End do ! Make an array containing indices of the non-blank cols at level m ! Array is called inec = index non-empty column ll=1 Do 44 j=1,nbc Do 45 i=ll,nc if(m.gt.lc(i))goto 45 inec(j)=i goto 48 45 Continue 48 Continue ll=i+1 44 Continue blk='f0,2x' Do 51 k=1,nc Do 52 j=1,nbc if(inec(j).eq.k)goto 51 52 Continue blk(k)='11x ' 51 Continue c(1)=' (' Do i=1,nc c(2*i)=blk(i) if(i.ne.nc)c(2*i+1)=' , ' end do c(2*nc+1)=' ) ' write(fmtspec,*)(c(i),i=1,2*nc+1) write(22,fmt=fmtspec)(xx(m,inec(j)),j=1,nbc) 42 Continue write(12,*)' The sorted values are stored& & in file sorted.dat' rewind 20;deallocate (xx);deallocate (x);close(20) deallocate (lc);deallocate (inec);close(22) Case (4) Open(22,file='sorted.dat') read(20,*)nc allocate (lc(nc));allocate (inec(nc)) read(20,*)(lc(i),i=1,nc) nr=maxval(lc) allocate (xx(nr,nc));allocate (x(nr)) ! Proceed row by row reading in data Do 12 m=1,nr ! find out how many non-blank cols in the mth row nbc=0 Do i=1,nc if(m.le.lc(i))nbc=nbc+1 End do ! Make an array containing indices of the non-blank cols at level m ! Array is called inec = index non-empty column ll=1 Do 14 j=1,nbc Do 15 i=ll,nc if(m.gt.lc(i))goto 15 inec(j)=i goto 18 15 Continue 18 Continue ll=i+1 14 Continue read(20,*)(xx(m,inec(j)),j=1,nbc) 12 Continue Do j=1,nc Do i=1,lc(j) x(i)=xx(i,j) end do n=lc(j) call mstdss(x,n,ave,sd) sem=sd/sqrt(real(n)) write(string3,*)'Column',j,'mean = ',ave,' standard deviation = ',sd write(12,*)'Column',j,' no. of vals = ',n,' mean = ',ave,& ' standard deviation = ',sd,' Standard error of mean = ',sem call windowoutstring(300,600+600*j,string3) call sort(n,x) Do i=1,n xx(i,j)=x(i) end do call miqr(x,n) call ksone(x,n,ave,sd,gn,d,pval) write(12,*)' The approximate p-value for the hypothesis that& & the data in column',j,' is normally distributed is:-' write(12,*)' ',pval,'(Approximation reasonable for n>20)' end do call windowoutstring(300,1200+nc*600,moreinfostring) write(22,*)nc,' =number of columns' write(22,*)(lc(i),i=1,nc),' =number in each column' Do 32 m=1,nr ! find out how many non-blank cols in the mth row nbc=0 Do i=1,nc if(m.le.lc(i))nbc=nbc+1 End do ! Make an array containing indices of the non-blank cols at level m ! Array is called inec = index non-empty column ll=1 Do 34 j=1,nbc Do 35 i=ll,nc if(m.gt.lc(i))goto 35 inec(j)=i goto 38 35 Continue 38 Continue ll=i+1 34 Continue blk='f0,2x' Do 21 k=1,nc Do 22 j=1,nbc if(inec(j).eq.k)goto 21 22 Continue blk(k)='11x ' 21 Continue c(1)=' (' Do i=1,nc c(2*i)=blk(i) if(i.ne.nc)c(2*i+1)=' , ' end do c(2*nc+1)=' ) ' ! write(10,*)' c= ',(c(i),i=1,2*nc+1) write(fmtspec,*)(c(i),i=1,2*nc+1) write(22,fmt=fmtspec)(xx(m,inec(j)),j=1,nbc) 32 Continue write(12,*)' The sorted values are stored& & in file sorted.dat' rewind 20;deallocate (xx);deallocate (lc);close(20) deallocate (inec);close(22) End Select write(12,*) return end windows calling program for outpput subroutine mwrite() Character(80) rc,moreinfostring1,moreinfostring2 Common/m/rc moreinfostring1=' For more information about the results of this test' moreinfostring2=' press MoreInfo on the main menu' call windowclear() ! call windowselect(0) call windowoutstring(100,1000,rc) call windowoutstring(100,2000,moreinfostring1) call windowoutstring(500,2400,moreinfostring2) return end Windows calling program for Mann-Whitney test subroutine mwtt(x,mmm,nnn,ml,pvalu) use winteracter use resource type(win_message):: message ! Prog to read in columns of unequal length for Mann Whitney test ! Mann-Whitney test with ties Dimension x(ml,2),lc(2),inec(2),xx(ml*2),iind(ml*2) Dimension iai(ml*2),iaii(ml*2),nck(ml*2),kk(ml*2),iarray(50),xmp(200) CHARACTER*12 fmtspec character*256 string1 Character(80) rc common/m/rc COMMON jjj(50),kount,nntot,nkk,maxmw,ncku,mww(200),nmw,mwp(200),iflg21 & ,nit000 ! x array of data ! ml max length or length of longest column ! inec index of non blank columns at any particular row ! nbc number of non blank cols at any particular row ! Assumes data consists of a row containing number of cols ! followed by a row containing length of each col ! followed by the cols of data ! mww - array of possible values for Mann-Whitney U for each way breaking ties ! mwav - counter for average Mann_Whitney U ! mwp - counter used in combmw2 for counts of combinations matching each ! possible U after arranging for each possible way of breaking ties ! xmp - ratio of mwp to total number of combinations write(12,*)' *** MANN-WHITNEY TEST ***' write(13,*)' *** MANN-WHITNEY TEST ***' call imprint() mwp=0 kount=0 lc(1)=mmm lc(2)=nnn ntot=lc(1)+lc(2) nntot=ntot nkk=nnn maxmw=lc(1)*lc(2) ncku=0 space=0. ! Proceed row by row reading in data Do 2 m=1,ml ! find out how many non-blank cols in the mth row nbc=0 Do 3 i=1,2 if(m.le.lc(i))nbc=nbc+1 3 Continue ! Make an array containing the indices of the non-blank cols at level m ! Array is called inec = index non-empty column ll=1 Do 4 j=1,nbc Do 5 i=ll,2 if(m.gt.lc(i))goto 5 inec(j)=i goto 8 5 Continue 8 Continue ll=i+1 4 Continue read(20,*)(x(m,inec(j)),j=1,nbc) space=max(space,spacing(x(m,1)),spacing(x(m,nbc))) 2 Continue space=2.5*space !c ! write out data in form for usual stats package analysis ! ie. with 2 cols 1st data and 2nd group number ! Also store all data in xx for sorting and iind to memorize group id write(12,*)'File data.txt stores data in format for other& & stats packages using columns for value & group_id.' write(12,*)' Additional information is available in file xsinfo.dat' write(12,*)' Number of values in the two columns are ',mmm,' & ',nnn Open(14,file='data.txt') Do 31 j=1,2 Do 32 i=1,lc(j) write(14,*)x(i,j),j if(j.eq.1)ind=i if(j.eq.2)ind=i+lc(1) jj=j-1 xx(ind)=x(i,j) iind(ind)=j-1 32 Continue 31 Continue ! Arrange data in ascending order with a 0 marking first group and 1 2nd. call sort2i(ntot,xx,iind) ! write(12,103)(iind(i),i=1,ntot) 103 format(39i2) ! Start of modification for ties ! initialise array of ties Do 14 i=1,ntot iai(i)=1 iaii(i)=0 14 continue ! Generate an array iai listing number of ties so that iai(jj) is the ! number of ties in the jjth group of ties. ! If iai(jj) > 1, iaii(jj) is number in this group of ties from 2nd col of ! data. ia=1 jj=1 Do 22 ii=2,ntot if(abs(xx(ia)-xx(ii)).lt.space)then ! if(xx(ia).eq.xx(ii))then iai(jj)=iai(jj)+1 if((ia.eq.ii-1).and.(iind(ia).eq.1))iaii(jj)=1 iaii(jj)=iaii(jj)+iind(ii) else jj=jj+1 ia=ii endif 22 Continue ! jj is now the number of groups of ties (Counting unique values as a tie ! of length 1.) write(13,*)' Number in each group of tied differences, value 1 means& & no tie' write(13,*)'(If number in group >1 bracketed number is the number& & from 2nd col)' write(13,*)(iai(i),'(',iaii(i),')',i=1,jj) ! write(*,*)(iai(i),i=1,jj) ! write(*,*)(iaii(i),i=1,jj) ! Calculate number of combinations where how a tie is broken makes a difference ! This depends on a product of all n-Choose-k (nck) possible terms where n is ! number in a tie and k is the number of type 2. Initialise this repeated ! multiplication with nmw nmw=1 Do 23 i=1,jj if(iai(i).eq.1)then nck(i)=1 else nck(i)=ncomb2(iai(i),iaii(i)) endif nmw=nmw*nck(i) 23 Continue ! Making the decision between exact & approximate methods if(ntot.le.10)then iset=1 elseif(ntot.ge.30)then iset=0 else call wdialogload(IDD_MW) call wdialogshow(-1,-1,0,SemiModeless) ! nit=nmw*(2**ntot)/100000 call kwknck(ntot,nnn,nit) nit000=nit/1000 write(string1,201)' The exact method requires ',nit000,' k& & comparisons. Choose preferred method.' 201 format(a27,i6,a43) call WDialogPutString(IDF_Noofits,string1) call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select endif end do call wdialoggetradiobutton(IDF_RADIOe,iset) call WDialogHide() endif ! Choice of exact or approximate method complete write(12,*)' Total number of rearrangements of ties = ',nmw if(iset==1)then ! ******************************************************************** ! Exact calculation ! Formulate & Step through a method of counting all these combinations ! and for each possible combination break the ties ! First:- headings lcc=max(ntot-6,1) write(fmtspec,112)' (',lcc,'x,a)' 112 format(a,i2,a) write(13,fmt=fmtspec)'Order Mann-Whitney U'! p-value' pvalu=0. nmwm1=nmw-1 mwav=0 Do 26 i=0,nmwm1 itmp=i Do 33 ii=1,jj kk(ii)=mod(itmp,nck(ii)) itmp=itmp/nck(ii) 33 Continue ! kk now consists of an array the length of the number of ties with each ! element of the array being a number between (1 less than) 1 and the ! relevant n-Choose-k. This number is used to select a particular combination ! of ways each particular collection of ties between group 1 and 2 can be ! broken - using the subroutine ncombn. ! write(*,*)' kk = ',(kk(ii),ii=1,jj) n=0 Do 27 ii=1,jj if(nck(ii).gt.1)then call ncombn2(iai(ii),iaii(ii),kk(ii),iarray) Do 28 j=1,iai(ii) m=n+j iind(m)=iarray(j) 28 Continue endif n=n+iai(ii) 27 Continue ! Call MW calculation here mw=0 Do 39 i1=2,ntot Do 40 j1=1,i1-1 if(iind(i1).eq.0)mw=mw+iind(j1) 40 Continue 39 Continue ! write(*,*)(iind(n1),n1=1,ntot),mw mwav=mwav+mw ! mw1=mw ! mw2=maxmw-mw1 ! mww(i+1)=min(mw1,mw2) mww(i+1)=mw ! Attempt to output results in the form order U pval write(rc,110)(iind(i1),i1=1,ntot) 110 format(80i1) write(13,113)trim(rc),mw 113 format(1x,a,i12,11x,f10.7) 26 continue xmwav=real(mwav)/real(nmw) write(13,*)' Average Mann-Whitney U+ = ',xmwav ! iflg21 -flag set to 1 if U+ larger than its mean iflg21=0 if(xmwav.gt.0.5*real(maxmw))then xmwav=real(maxmw)-xmwav mww=real(maxmw)-mww iflg21=1 end if write(12,*)' Average Mann-Whitney U value = ',xmwav call combmw2(ntot,lc(1)) xmp=real(mwp)/real(kount) write(13,*)'U vals & corresponding 1-sided p-vals ',(mww(i),xmp(i),i=1,nmw) kount=nmw*kount pval=2.*real(ncku)/real(kount) write(12,*)' p-value (2-tail) = ',ncku,'/',kount,' or ',pval,' or ' ! pvalu=pvalu+pval !26 Continue ! **************************************** ! Average the pvalues over all possible combinations ! pval=pvalu/float(nmw) call pwrite(' p-value (2-tail) exactly corrected for ties = ',pval,12,rc) call pwrite(' p-value (2-tail) exactly corrected for ties = ',pval,0,rc) ! ************************************************************************ ! Approximate Calculation else ! Approximate correction for ties (Reference R. Meddis: Stats using ranks) ! also ref Rice MW U derivation & my own elaboration for ties. it=0 Do 9 i=1,jj it=it+iai(i)**3-iai(i) 9 Continue ! Calculate MannWhitney U = fmw -floating point MannWhitney U with draws ! first find sum of ranks fmw=0 n=0 Do 42 ii=1,jj Do 41 j=1,iai(ii) m=n+j ! The term (iai(ii)+1)/2 averages out all the ranks belonging to the ties fmw=fmw+float(iind(m))*(float(n)+float(iai(ii)+1)/2.) 41 Continue n=n+iai(ii) 42 Continue fmw=fmw-float(lc(2)*(lc(2)+1)/2) fmw=min(fmw,(real(maxmw)-fmw)) write(12,*)' Average Mann-Whitney U value = ',fmw tcor=1.-float(it)/float(ntot**3-ntot) xmu=float(maxmw)/2. sigma=sqrt(float(maxmw*(ntot+1))/12.)*sqrt(tcor) ! sigmaa=sqrt(float(maxmw*(ntot+1))/12.) pval= gn(fmw,xmu,sigma) pval1=1.-pval pval=2.*min(pval,pval1) call pwrite(' Approximate p-value (2-tail), corrected for ties',pval,12,rc) ! write(12,105)' approx p-value = ',pval ! pval= gn(fmw,xmu,sigmaa) ! pval1=1.-pval ! pvalu=2.*min(pval,pval1) ! write(12,106)' approx p-value (no correction)= ',pval 105 format(a18,f10.7) 106 format(a34,f10.7) endif ! write(string1,111)' Mann-Whitney test: p-value = ',pval call pwrite(' Mann-Whitney test: p-value = ',pval,0,rc) 111 format(a38,f11.9) call mwrite() return end More combinatoric programs function ncomb2(n,k) COMMON/ncomb/ j(50),kount kount=0 call ncmb2(n,k) ncomb2=kount return end recursive subroutine ncmb2(n,k) COMMON/ncomb/ j(50),kount IF(k.eq.0)then DO i=n,1,-1 j(i)=0 END DO ELSEIF((n-k).eq.0)then DO i=n,1,-1 j(i)=1 END DO else DO i=0,1 j(n)=i k1=k-i n1=n-1 ! write(*,*)' n1 & k1 = ',n1,k1 call ncmb2(n1,k1) end do return endif 1 continue kount=kount+1 ! PRINT*,(j(i),i=1,3),kount return end subroutine ncombn2(n,k,jkount,iarray) dimension iarray(50) COMMON/ncombn/ j(50),kount ! COMMON j(50),kount kount=0 jktp1=jkount+1 call ncmbn2(n,k,jktp1,iarray) ! write(*,*)' jkount= ',jkount,' iarray ',(iarray(i),i=1,n) return end recursive subroutine ncmbn2(n,k,jktp1,iarray) dimension iarray(50) COMMON/ncombn/ j(50),kount ! COMMON j(50),kount if(kount.eq.jktp1)return IF(k.eq.0)then DO i=n,1,-1 j(i)=0 END DO ELSEIF((n-k).eq.0)then DO i=n,1,-1 j(i)=1 END DO else DO i=0,1 j(n)=i k1=k-i n1=n-1 ! write(*,*)' n1 & k1 = ',n1,k1 call ncmbn2(n1,k1,jktp1,iarray) end do return endif 1 continue kount=kount+1 ! PRINT*,' print statement',(j(i),i=1,3),' ',kount if(kount.eq.jktp1)iarray=j return end ! Subroutine to count multinomial coefficients function nmulti(nc,kkk) Dimension k(nc,2),kkk(nc) common/nmult/ j(50),kount,nn ! nc number of columns kount=0 Do i=1,nc k(i,1)=i k(i,2)=kkk(i) end do n=0 Do i=1,nc n=n+k(i,2) end do nn=n call mlti(n,k,nc) nmulti=kount return end recursive subroutine mlti(n,k,nc) dimension k(nc,2),k1(nc,2) common/nmult/ j(50),kount,nn ! nc number of k's into which n is to be divided ! first component of k is its marker 'i' ! 2nd conponent of k is the number remaining to be distributed ! first test to see how many k's are left non-zero if only one is left ! then the remaining n's are that sort of k ! inz = index of the non-zero k ! write(*,*)n,(k(i,2),i=1,nc) itest=0 inz=0 do mt=1,nc if(k(mt,2).ne.0)then itest=itest+1 inz=mt end if end do ! start of main if if(itest.eq.1)then Do i=1,n j(i)=k(inz,1) end do kount=kount+1 ! do any calculations on the arrangements here else Do 1 i=1,nc k1=k if(k(i,2).eq.0)goto 1 j(n)=k(i,1) k1(i,2)=k(i,2)-1 n1=n-1 call mlti(n1,k1,nc) 1 continue end if return end ! Subroutine to count multinomial coefficients subroutine nmultin(nc,kkk,jkount,iarray) Dimension k(nc,2),kkk(nc),iarray(50) common/nmult/ j(50),kount,nn ! nc number of columns kount=0 jktp1=jkount+1 Do i=1,nc k(i,1)=i k(i,2)=kkk(i) end do n=0 Do i=1,nc n=n+k(i,2) end do nn=n call mltin(n,k,jktp1,nc,iarray) return end recursive subroutine mltin(n,k,jktp1,nc,iarray) dimension k(nc,2),k1(nc,2),iarray(50) common/nmult/ j(50),kount,nn ! nc number of k's into which n is to be divided ! first component of k is its marker 'i' ! 2nd conponent of k is the number remaining to be distributed ! first test to see how many k's are left non-zero if only one is left ! then the remaining n's are that sort of k ! inz = index of the non-zero k ! write(*,*)n,(k(i,2),i=1,nc) if(kount.gt.jktp1)return itest=0 inz=0 do mt=1,nc if(k(mt,2).ne.0)then itest=itest+1 inz=mt end if end do ! start of main if if(itest.eq.1)then Do i=1,n j(i)=k(inz,1) end do kount=kount+1 ! do any calculations on the arrangements here if(kount.eq.jktp1)iarray=j else Do 1 i=1,nc k1=k if(k(i,2).eq.0)goto 1 j(n)=k(i,1) k1(i,2)=k(i,2)-1 n1=n-1 call mltin(n1,k1,jktp1,nc,iarray) 1 continue end if return end subroutine for Pearson's correlation ! Subroutine to calculate Pearson's correlation coefficient Subroutine pearson(x,n,r,pval) dimension x(n,2) character*80 rc,rc2 common/m/rc,cisizp sumxy=0.;sumx=0.;sumx2=0.;sumy=0.;sumy2=0. Do i=1,n sumxy=sumxy+x(i,1)*x(i,2) sumx=sumx+x(i,1) sumx2=sumx2+x(i,1)*x(i,1) sumy=sumy+x(i,2) sumy2=sumy2+x(i,2)*x(i,2) end do r=real(n)*sumxy-sumx*sumy r=r/sqrt((real(n)*sumx2-sumx**2)*(real(n)*sumy2-sumy**2)) call pwrite(' Pearson''s correlation coefficient (r) = ',r,12,rc2) r2=r*r call pwrite(' Coefficient of determination (r^2) = ',r2,12,rc2) if(r2.ne.1.)then ! Use t transformation to test H0 that r=0 ref p165Basic&ClinicalStatsDawson.. t=r*sqrt(real(n)-2.)/sqrt(1-r**2) df=n-2 pval=betai(0.5*df,0.5,df/(df+t*t)) call pwrite(' p-value (2-tail) for hypothesis "r=0" = ',pval,12,rc2) ! call pwrite(' p-value (2-tail) for hypothesis "r=0" = ',pval,0 ,rc) ! Use z transformation for conf intervals ref p166Basic&ClinicalStatsDawson.. if(n.gt.3)then cizp=1.-(1.-cisizp/100.)/2. ciz=xinvgn(cizp) ! write(12,*)' ciz = ',ciz zr=0.5*log((1.+r)/(1.-r)) zrp=zr+ciz*sqrt(1./(real(n)-3.)) zrm=zr-ciz*sqrt(1./(real(n)-3.)) rp=(exp(2.*zrp)-1.)/(exp(2.*zrp)+1.) rm=(exp(2.*zrm)-1.)/(exp(2.*zrm)+1.) write(12,*)cisizp,'% conf int for r (assuming bivariate normality)= ',rm,rp endif endif return end ?library program ? what does it do? is it called anywhere? FUNCTION PROBKS(ALAM) PARAMETER (EPS1=0.001, EPS2=1.E-8) A2=-2.*ALAM**2 FAC=2. PROBKS=0. TERMBF=0. DO 11 J=1,100 TERM=FAC*EXP(A2*J**2) PROBKS=PROBKS+TERM IF(ABS(TERM).LT.EPS1*TERMBF.OR.ABS(TERM).LT.EPS2*PROBKS)RETURN FAC=-FAC TERMBF=ABS(TERM) 11 CONTINUE PROBKS=1. RETURN END paired sample t test !c Prog to do paired sample t test subroutine pst(x,n,pval) Dimension x(n,2),d(n) !c x array of data ! character (20):: cp=' p-val(2-tail) = ' Character (80) rc common/m/rc,cisizp open(14,file='diffs.dat') write(14,*)n,' = number of values' write(12,*)' *** PAIRED SAMPLES t-TEST ***' call imprint() write(12,*)' Number of data pairs = ',n write(12,*)' Differences are saved in file diffs.dat' Do 1 i=1,n read(20,*)x(i,1),x(i,2) d(i)=x(i,2)-x(i,1) write(14,*)d(i) 1 Continue df=n-1 call mstdss(d,n,dif,std) stdm=std/sqrt(float(n)) t=dif/stdm call invts(cisizp/100.,df,cit) difm=dif-cit*stdm difp=dif+cit*stdm pval=betai(0.5*df,0.5,df/(df+t*t)) ! write(12,*)' Number of comparisons = ',n ! write(12,*)' Average difference = ',dif ! write(12,*)' Standard deviation of differences = ',std 101 format(f6.3,a,2f9.4) ! write(12,*)' t value = ',t call pwrite(' Average difference = ',dif,12,rc) call pwrite(' Standard deviation of differences = ',std,12,rc) write(12,101)cisizp,'% confidence interval for difference = ',difm,difp call pwrite(' p-value (2-tail) = ',pval,12,rc) call pwrite(' Paired samples t test: p-value = ',pval,0,rc) call mwrite() return end Output program subroutine pwrite(c,p,io,rc) character(len=*):: c character*120 fmtspec character*80 rc ! character*120 rc intent (in) c if(p.eq.0.)then if(io.eq.12)write(12,fmt='(a,a)')c,' < 0.000001' if(io.eq.0 )write(rc,fmt='(a,a)')c,' < 0.000001' return elseif((p.gt.-1.).and.(p.lt.1.))then i=log10(abs(p)) j=-i+8 k=-i+5 else i=log10(abs(p)) if(i.le.4)then k=5-i j=8 else k=1 j=i+5 endif endif write(fmtspec,*)'(a,f',j,'.',k,')' ! write(12,*)' fmtspec=',fmtspec ! write(12,*)' c = ',c if(io.eq.12)write(12,fmt=fmtspec)c,p if(io.eq.0)write(rc,fmt=fmtspec)c,p ! c=' ' return end Library program for ranking SUBROUTINE RANK(N,INDX,IRANK) DIMENSION INDX(N),IRANK(N) DO 11 J=1,N IRANK(INDX(J))=J 11 CONTINUE RETURN END Linear regression !c Prog to do simple linear regression Subroutine regress(xx,n,alpha,beta,tval) character*80 rc,rc2 common/m/rc,cisizp Dimension xx(n,2),x(n),y(n),yr(n) write(12,*)' *** Regression ***' Do i=1,n read(20,*)xx(i,1),xx(i,2) x(i)=xx(i,1);y(i)=xx(i,2) end do sumx=0. sumy=0. sumxy=0. sumx2=0. Do 1 i=1,n sumx=sumx+x(i) sumy=sumy+y(i) sumxy=sumxy+x(i)*y(i) sumx2=sumx2+x(i)**2 1 Continue beta=(float(n)*sumxy-sumx*sumy)/(float(n)*sumx2-sumx**2) alpha=sumy/float(n)-beta*sumx/float(n) !c Work out hypothesis test for beta=0 and ci for beta symyr=0. Do 2 i=1,n yr(i)=alpha+beta*x(i) symyr=symyr+(yr(i)-y(i))**2 2 Continue s=sqrt(symyr/float(n-2)) stdb=s/sqrt(sumx2-sumx**2/float(n)) stda=stdb*sqrt(sumx2/real(n)) covab=-(stdb**2*sumx/real(n)) ! tval=beta*sqrt(sumx2-sumx**2/float(n))/s tval=beta/stdb df=n-2 call invts(cisizp/100.,df,cit) betm=beta-cit*stdb betp=beta+cit*stdb alfm=alpha-cit*stda alfp=alpha+cit*stda pval=betai(0.5*df,0.5,df/(df+tval*tval)) ! write(12,*)' alpha, beta, t =',alpha,beta,tval write(12,*)' The fitted equation is y = alpha + beta * x' call pwrite(' The estimated value of alpha is: ',alpha,12,rc2) call pwrite(' The estimated value of beta is : ',beta,12,rc2) call pwrite(' The standard error of the estimate of alpha is: ',stda,12,rc2) call pwrite(' The standard error of the estimate of beta is : ',stdb,12,rc2) call pwrite(' The covariance of alpha and beta is : ',covab,12,rc2) write(12,*)cisizp,'% confidence intervals for alpha',alfm,alfp write(12,*)cisizp,'% confidence intervals for beta ',betm,betp write(12,*)' If the value of y is unaffected by the value of x, beta = 0' call pwrite(' The t value for the hypothesis "beta = 0" is : ',tval,12,rc2) call pwrite(' The p-value for the hypothesis "beta = 0" is : ',pval,12,rc2) call pwrite(' Regression: p-value for no linear association : ',pval,0 ,rc ) call mwrite() call pearson(xx,n,r,pval) return end Program created by leahy's fortran windows winteracter module to manage windows output ! Winteracter module created : 27/Jun/2003 14:47:46 ! MODULE RESOURCE IMPLICIT NONE INTEGER, PARAMETER :: IDR_MENU1 = 30001 INTEGER, PARAMETER :: ID_OPEN = 40002 INTEGER, PARAMETER :: ID_EXIT = 40003 INTEGER, PARAMETER :: ID_DESCR = 40005 INTEGER, PARAMETER :: ID_ITEM6 = 40006 INTEGER, PARAMETER :: ID_signt = 40007 INTEGER, PARAMETER :: ID_wsrt = 40008 INTEGER, PARAMETER :: ID_pst = 40009 INTEGER, PARAMETER :: ID_Fisht = 40010 INTEGER, PARAMETER :: ID_mwt = 40011 INTEGER, PARAMETER :: ID_ist = 40012 INTEGER, PARAMETER :: ID_chisq = 40013 INTEGER, PARAMETER :: ID_kw = 40014 INTEGER, PARAMETER :: ID_anova = 40015 INTEGER, PARAMETER :: ID_spears = 40016 INTEGER, PARAMETER :: ID_regres = 40017 INTEGER, PARAMETER :: ID_whattesthlp = 40018 INTEGER, PARAMETER :: ID_ITEM19 = 40019 INTEGER, PARAMETER :: ID_pvaluehlp = 40020 INTEGER, PARAMETER :: ID_confidhlp = 40021 INTEGER, PARAMETER :: IDD_Signt = 101 INTEGER, PARAMETER :: IDF_favi = 1001 INTEGER, PARAMETER :: IDF_fav = 1002 INTEGER, PARAMETER :: IDF_unfav = 1003 INTEGER, PARAMETER :: IDF_unfavi = 1004 INTEGER, PARAMETER :: ID_tables = 40022 INTEGER, PARAMETER :: ID_normal = 40023 INTEGER, PARAMETER :: ID_formathlp = 40024 INTEGER, PARAMETER :: ID_moreinfo = 40026 INTEGER, PARAMETER :: IDD_wsr = 102 INTEGER, PARAMETER :: IDF_wsrex = 1005 INTEGER, PARAMETER :: IDF_wsrap = 1006 INTEGER, PARAMETER :: IDF_wsrinf = 1007 INTEGER, PARAMETER :: ID_viewedit = 40028 INTEGER, PARAMETER :: IDD_Fisher = 103 INTEGER, PARAMETER :: IDF_Head = 1008 INTEGER, PARAMETER :: IDF_STRING1 = 1009 INTEGER, PARAMETER :: IDF_STRING2 = 1010 INTEGER, PARAMETER :: IDF_STRING3 = 1011 INTEGER, PARAMETER :: IDF_STRING4 = 1012 INTEGER, PARAMETER :: IDD_MW = 104 INTEGER, PARAMETER :: IDF_Noofits = 1013 INTEGER, PARAMETER :: IDF_RADIOe = 1014 INTEGER, PARAMETER :: IDF_RADIOa = 1015 INTEGER, PARAMETER :: ID_pearson = 40029 INTEGER, PARAMETER :: IDD_normal = 105 INTEGER, PARAMETER :: IDF_zlabel = 1016 INTEGER, PARAMETER :: IDF_probability = 1017 INTEGER, PARAMETER :: IDF_zstring = 1018 INTEGER, PARAMETER :: IDF_pstring = 1019 INTEGER, PARAMETER :: ID_tdist = 40030 INTEGER, PARAMETER :: IDD_tdist = 106 INTEGER, PARAMETER :: IDF_tlabel = 1020 INTEGER, PARAMETER :: IDF_dflabel = 1021 INTEGER, PARAMETER :: IDF_plabel = 1022 INTEGER, PARAMETER :: IDF_dfstring = 1023 INTEGER, PARAMETER :: IDF_tstring = 1024 INTEGER, PARAMETER :: ID_fdist = 40031 INTEGER, PARAMETER :: IDD_fdist = 107 INTEGER, PARAMETER :: IDF_flabel = 1025 INTEGER, PARAMETER :: IDF_df1label = 1026 INTEGER, PARAMETER :: IDF_df2label = 1027 INTEGER, PARAMETER :: IDF_df1string = 1028 INTEGER, PARAMETER :: IDF_df2string = 1029 INTEGER, PARAMETER :: IDF_meslabel = 1030 INTEGER, PARAMETER :: IDF_fstring = 1031 INTEGER, PARAMETER :: ID_cdist = 40032 INTEGER, PARAMETER :: IDD_cdist = 108 INTEGER, PARAMETER :: IDF_LABELdf = 1032 INTEGER, PARAMETER :: IDF_LABELc = 1033 INTEGER, PARAMETER :: IDF_cSTRING = 1034 INTEGER, PARAMETER :: IDF_LABELp = 1035 INTEGER, PARAMETER :: IDD_Describe = 109 INTEGER, PARAMETER :: IDF_RADIO1 = 1036 INTEGER, PARAMETER :: IDF_RADIO2 = 1037 INTEGER, PARAMETER :: IDF_RADIO3 = 1038 INTEGER, PARAMETER :: IDF_RADIO4 = 1039 INTEGER, PARAMETER :: IDD_spears = 110 INTEGER, PARAMETER :: IDD_formati = 111 INTEGER, PARAMETER :: IDF_idcol = 1041 INTEGER, PARAMETER :: IDF_datcol = 1040 INTEGER, PARAMETER :: IDF_info = 1042 INTEGER, PARAMETER :: IDF_info4 = 1043 INTEGER, PARAMETER :: IDF_datcoli = 1044 INTEGER, PARAMETER :: IDF_idcoli = 1045 INTEGER, PARAMETER :: ID_import = 40027 INTEGER, PARAMETER :: IDF_RADIOp = 1046 INTEGER, PARAMETER :: IDF_info3 = 1047 INTEGER, PARAMETER :: IDF_info5 = 1048 INTEGER, PARAMETER :: IDD_icform = 112 INTEGER, PARAMETER :: IDF_RADIOi = 1049 INTEGER, PARAMETER :: IDF_info2 = 1050 INTEGER, PARAMETER :: IDD_DIALOG013 = 113 INTEGER, PARAMETER :: IDD_formatp = 114 INTEGER, PARAMETER :: IDF_datcolj = 1051 INTEGER, PARAMETER :: IDF_datcoljj = 1052 INTEGER, PARAMETER :: IDF_datcolii = 1053 INTEGER, PARAMETER :: ID_technthlp = 40034 INTEGER, PARAMETER :: ID_ITEM32 = 40035 INTEGER, PARAMETER :: ID_ITEM33 = 40036 END MODULE RESOURCE Sign test subroutine subroutine signt(k,nmk,p) character*80 rc n=k+nmk write(12,*)' *** SIGN TEST ***' write(12,*)k,' favourable comparisons ',nmk,' unfavourable comparisons' If(k.gt.n/2)k=n-k xn=n xk=k !c if xn is too big it will cause overflows therefore use normal approximation if(xn.gt.100.)then write(12,*)' Numbers too big for exact calculation' write(12,103)' instead used approximation based on Normal distribu& &tion' 103 format(a58) ex=xn/2. sig=sqrt(xn/4.) xkk=xk+0.5 p=gn(xkk,ex,sig) p=p*2. call pwrite(' p-value (2 tail) = ',p,12,rc) else write(12,*)' Exact calculation of p-value' !c Use algorithm suggested on p127 of Ross for calculating binomial probs xi=1. sum=1. if(k==0)goto 2 Do 1 i=0,k-1 xi=xi*(xn-float(i))/(float(i)+1.) sum=sum+xi 1 Continue 2 Continue !c write(*,*)xi,sum p=sum*(0.5**(n-1)) !c p-value of gt 1 is possible if n is even and k=n/2 as P(X=k) is counted !c in both tails if(p.gt.1.)p=1. call pwrite(' p-value (2 tail) = ',p,12,rc) endif return end Windows calling program for sign test subroutine Signtcal() ! Integer input routine with error checking for sign test use winteracter use resource type(win_message) :: message ! character(240) :: bigstr integer :: itype, check_state = 1, i1 = 0 character(80) :: rc Common/m/rc ! Display the input dialog call WDialogLoad(IDD_Signt) call WDialogShow(-1, -1, 0, SemiModeless) ! Make sure that FieldChanged messages are reported call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select else if(itype == FieldChanged) then ! Retreive the strings containing an integer value if(message%value1 == IDF_favi) then call read_intval(i1,IDF_favi, check_state) else if(message%value1 == IDF_unfavi) then call read_intval(i1,IDF_unfavi, check_state) end if end if end do ! Read the current values of the input fields call read_intval(if,IDF_favi, check_state) call read_intval(iuf,IDF_unfavi, check_state) ! Hide the dialog instead of unloading, so that previously entered values are preserved call WdialogHide() ! Turn fieldchange reporting off call WMessageEnable(FieldChanged,0) ! open(40,file='mserror.dat') ! write(40,*)'if= ',if,'iuf= ',iuf call signt(if,iuf,pval) call pwrite(' Sign test: p-value = ',pval,0,rc) !101 format(a22,f11.9) return end ! contains subroutine read_intval(i1, field_id, check_state) integer, intent(in out) :: i1 integer, intent(in) :: field_id, check_state integer :: stati character(20) :: intval_chars character(80) :: bigstr call WDialogGetString(field_id,intval_chars) if(len_trim(intval_chars) > 0) then ! open(40,file='mserror.dat') ! write(40,*)'intvalchars =',intval_chars ! Use an internal read to decode the integer value read(intval_chars,*,iostat=stati) i1 if (stati /= 0) then i1 = 0 if(check_state == 1) then call WDialogPutString(field_id,' ') bigstr = 'The input value is not a valid integer number.' call WMessageBox( OKOnly, & NoIcon, & CommonOK, & bigstr,& 'Error') end if end if end if end subroutine read_intval Library programs for sorting SUBROUTINE sort2i(N,RA,iRB) DIMENSION RA(N),iRB(N) L=N/2+1 IR=N 10 CONTINUE IF(L.GT.1)THEN L=L-1 RRA=RA(L) iRRB=iRB(L) ELSE RRA=RA(IR) iRRB=iRB(IR) RA(IR)=RA(1) iRB(IR)=iRB(1) IR=IR-1 IF(IR.EQ.1)THEN RA(1)=RRA iRB(1)=iRRB RETURN ENDIF ENDIF I=L J=L+L 20 IF(J.LE.IR)THEN IF(J.LT.IR)THEN IF(RA(J).LT.RA(J+1))J=J+1 ENDIF IF(RRA.LT.RA(J))THEN RA(I)=RA(J) iRB(I)=iRB(J) I=J J=J+J ELSE J=IR+1 ENDIF GO TO 20 ENDIF RA(I)=RRA iRB(I)=iRRB GO TO 10 END SUBROUTINE SORT(N,RA) DIMENSION RA(N) L=N/2+1 IR=N 10 CONTINUE IF(L.GT.1)THEN L=L-1 RRA=RA(L) ELSE RRA=RA(IR) RA(IR)=RA(1) IR=IR-1 IF(IR.EQ.1)THEN RA(1)=RRA RETURN ENDIF ENDIF I=L J=L+L 20 IF(J.LE.IR)THEN IF(J.LT.IR)THEN IF(RA(J).LT.RA(J+1))J=J+1 ENDIF IF(RRA.LT.RA(J))THEN RA(I)=RA(J) I=J J=J+J ELSE J=IR+1 ENDIF GO TO 20 ENDIF RA(I)=RRA GO TO 10 END Spearman's rho ! Last change: DK 1 Jan 2000 5:39 pm ! Test prog to calculate spearman's rho subroutine spears(ntot,pval) use winteracter use resource type(win_message):: message Dimension x1(ntot),x2(ntot),rank1(ntot),rank2(ntot) Dimension id2(10000) character*80 rc,rc2 character*256 string1 common/m/rc,cisizp write(12,*)' *** SPEARMAN''S RHO TEST ***' call imprint() space1=0. space2=0. Do i=1,ntot read(20,*)x1(i),x2(i) space1=max(space1,spacing(x1(i))) space2=max(space2,spacing(x2(i))) end do space1=2.5*space1 space2=2.5*space2 ! write(12,*)' ntot = ',ntot if(ntot.lt.14) call esprdat(ntot,x1,x2,mmm,id2,d2av) ! Making the decision between exact & approximate methods if(ntot.le.7)then iset=1 elseif((ntot.ge.14).or.(mmm.eq.0))then iset=0 else call wdialogload(IDD_spears) call wdialogshow(-1,-1,0,SemiModeless) nit=ifac(ntot)/1000 write(string1,201)' The exact method requires ',nit,'*1000& & comparisons. Choose preferred method.' 201 format(a27,i6,a44) call WDialogPutString(IDF_Noofits,string1) call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select endif end do call wdialoggetradiobutton(IDF_RADIOe,iset) call WDialogHide() endif ! Choice of exact or approximate method complete ! write(12,*)' Total number of rearrangements of ties = ',mmm if(iset==1)then ! ******************************************************************** ! Exact calculation ! Formulate & Step through a method of counting all these combinations call sprfac(ntot,mmm,id2,pval) call pwrite(' Spearman''s test of correlation: p-value (2-tail)= ',pval,12,rc) call pwrite(' Spearman''s test of correlation: p-value (2-tail)= ',pval,0 ,rc) call mwrite() else ! Approximate correction for ties (Reference R. Meddis: Stats using ranks) call frank(x1,ntot,rank1,t1,space1) call frank(x2,ntot,rank2,t2,space2) sd=0 ss=0. st=t1+t2 do i=1,ntot sd=sd+(rank1(i)-rank2(i))**2 ss=ss+(rank1(i)+rank2(i))**2 end do rho1=1.-6.*sd/real(ntot**3-ntot) rho2=(ss/real(ntot**3-ntot)-real(ntot+1)/real(ntot-1))*12./st-1. if(abs(abs(rho2)-1.).lt.0.00001)then pval=2./gamma(ntot+1.) call pwrite(' Spearman''s test of correlation: p-value (2-tail)= ',pval,0 ,rc) call mwrite() write(12,*)' Spearman''s rho is exactly + or - 1' call pwrite(' Exact p-value (2-tail)= ',pval,12 ,rc2) return end if t=rho2*sqrt(real(ntot)-2.)/sqrt(1-rho2**2) df=ntot-2 ! write(*,*)'t,df',t,df pval=betai(0.5*df,0.5,df/(df+t*t)) if(abs(st-2.).gt.0.00001)then call pwrite(' Spearman''s rho with simple correction for ties = ',rho1,12,rc2) call pwrite(' Variance restoring adjustment to rho with ties = ',rho2,12,rc2) call pwrite(' Approximate p-value with correction for ties = ',pval,12,rc2) else call pwrite(' Spearman''s rho = ',rho1,12,rc2) call pwrite(' Approximate p-value = ',pval,12,rc2) endif ! call pwrite(' p-value (2-tail) for hypothesis "rho=0" = ',pval,12,rc2) call pwrite(' Spearman''s test of correlation: p-value (2-tail)= ',pval,0 ,rc) call mwrite() end if return end subroutine frank(x,ntot,rank,t,space) dimension x(ntot),rank(ntot),indx(ntot),iaa(ntot) call indexx(ntot,x,indx) ! t tie correction factor see Medis p275 t=0. Do 1 i=1,ntot iaa(i)=1 1 continue ! Generate an array iaa listing number of ties so that iaa(jj) is the ! number of ties in the jjth group of ties. ia=1 jj=1 Do 2 ii=2,ntot if(abs(x(indx(ia))-x(indx(ii))).lt.space)then ! if(x(indx(ia)).eq.x(indx(ii)))then iaa(jj)=iaa(jj)+1 else jj=jj+1 ia=ii endif 2 Continue ! jj is now the number of groups of ties (Counting unique values as a tie ! of length 1.) ! write(*,*)(iaa(i),i=1,jj) mm=0 isum=0 Do 3 ii=1,jj Do 4 kk=1,iaa(ii) mm=mm+1 rank(indx(mm))=real(isum)+real(iaa(ii)+1)/2. 4 Continue isum=isum+iaa(ii) t=t+iaa(ii)**3-iaa(ii) 3 Continue ! write(*,101)(x(i),i=1,ntot) ! write(*,101)(rank(i),i=1,ntot) 101 format(10f8.2) t=1.-t/real(ntot**3-ntot) return end factorial calculation ! subroutine to do factorials recursively subroutine sprfac(nn,mmm,id2,prob) Dimension id2(10000) common/spr/ kount,jj(19),ic,mr ic=0 kount=0 n=1 ! ipne & ipeq are counters for the number of arrangements in which the ! ssq differences in ranks are lt & equal (repectively) the ! actual possible values for ssq when tied ranks are broken ipne=0 ipeq=0 mr=ifac(nn) idenom=mmm*mr mr=mr/1000 call sprfct(nn,n,mmm,id2,ipne,ipeq) ! For 2-tail probs need to find which is smaller (.le.) or (.ge.) and double it iplt=idenom-ipne-ipeq ipne=min(ipne,iplt) ip=ipne+ipeq ip=min(2*ip,idenom) write(12,*)' Exact p-value = ',ip,'/',idenom prob=real(ip)/real(idenom) return end recursive subroutine sprfct(nn,n,mmm,id2,ipne,ipeq) dimension id2(10000) Character (80) rc,string2 common/m/rc common/spr/ kount,jj(19),ic,mr Do 1 i=1,nn Do 2 k=1,n-1 if(i.eq.jj(k))goto 3 2 Continue jj(n)=i np1=n+1 if(n.eq.nn)then kount=kount+1 ! goto 6 ! Kiloiteration counter if(kount.lt.1000)goto 6 im=1000 ic=kount/im if((ic*im).eq.kount)then call windowclear() write(rc,122)mr,' k comparisons required ',ic& ,' k comparisons completed' call windowoutstring(500,1000,rc) 122 format(i10,a27,i10,a27) string2=' To abort the program press ctrl-alt-del' call windowoutstring(1000,2000,string2) endif 6 Continue ! write(*,*)(jj(k),k=1,nn) idd2=0. do k=1,nn idd2=idd2+(jj(k)-k)**2 end do do k=1,mmm if(id2(k).gt.idd2)ipne=ipne+1 if(id2(k).eq.idd2)ipeq=ipeq+1 end do else call sprfct(nn,np1,mmm,id2,ipne,ipeq) end if 3 Continue 1 continue return end ! subroutine to do factorials recursively subroutine fct(knt,nn,iarray) Dimension iarray(10) common kount,kkount,jj(10),irray(10) kount=0 kkount=knt n=1 call fact(nn,n) Do k=1,nn iarray(k)=irray(k) end do return end recursive subroutine fact(nn,n) common kount,kkount,jj(10),iarray(10) Do 1 i=1,nn Do 2 k=1,n-1 if(i.eq.jj(k))goto 3 2 Continue jj(n)=i np1=n+1 if(n.eq.nn)then kount=kount+1 ! write(*,*)(jj(k),k=1,nn) if(kount.eq.kkount)then Do k=1,nn iarray(k)=jj(k) end do endif else call fact(nn,np1) end if 3 Continue 1 continue return end Wilcoxon signed rank test ! Prog to calculate Wilcoxon signed rank test (with ties) subroutine wsrt(x,n,pval) use winteracter use resource Dimension x(n,2),d(n),ad(n),isad(n),iai(n),iaii(n) Dimension ii(30),nck(n),kk(n),iarray(50),iww(0:2000),kisad(n) type(win_message) :: message Character (80) rc ! character*12 fmtspec,date,time,zone character*256 string1,string2 common/m/rc ! x array of data ! d difference ! ad absolute difference ! isad sign of absolute difference = 1 if >0 =0 if <0 ! iai number in each group of difference ties ! iaiinumber in each group of difference ties that are positive ! ii array of 1's & 0's corresponding to any particular arrangement of ! positive & negative differences ! nck(n) number of arrangements in nth tie group of + & - ! kk(n) counter for number of arrangements in nth tie group ! iarray particular arrangement in of + & - in a tie group ! space interval for floating point numbers to be regarded as distinct open(14,file='diffs.dat') open(15,file='sorted.dat') write(14,*)n,' = number of values' write(15,*)n,' = number of values' space=0. Do 11 i=1,n read(20,*,end=12)x(i,1),x(i,2) d(i)=x(i,2)-x(i,1) write(14,*)d(i) space=max(space,spacing(x(i,1)),spacing(x(i,2))) 11 Continue space=2.5*space 12 Continue write(12,*)' *** WILCOXON SIGNED RANK TEST ***' write(13,*)' *** WILCOXON SIGNED RANK TEST ***' call imprint() write(12,*)' Number of data pairs = ',n write(12,*)' Differences are saved in file diffs.dat, sorted differences& & are saved in file sorted.dat' write(12,*)' Additional information is available in file xsinfo.dat' Do 20 i=1,n ad(i)=abs(d(i)) isad(i)=(d(i)/ad(i)+1.00001)/2. 20 Continue call sort2i(n,ad,isad) d=ad*(2*isad-1) Do i=1,n write(15,*)d(i) end do ! write(12,125)( ad(i),i=1,n) ! write(12,123)(isad(i),i=1,n) 123 format(39i5) 125 format(39f5.1) ! Start of modification for ties ! initialise array of ties Do 124 i=1,n iai(i)=1 iaii(i)=0 124 continue ! Generate an array iai listing number of ties so that iai(jj) is the ! number of ties in the jjth group of ties. ! If iai(jj) > 1, iaii(jj) is number in this group of ties that are ties ! of positive differences ia=1 jj=1 Do 22 ik=2,n if(abs(ad(ia)-ad(ik)).lt.space)then iai(jj)=iai(jj)+1 if((ia.eq.ik-1).and.(isad(ia).eq.1))iaii(jj)=1 iaii(jj)=iaii(jj)+isad(ik) else jj=jj+1 ia=ik endif 22 Continue ! jj is now the number of groups of ties (Counting unique values as a tie ! of length 1.) ! write(12,*)(iai(i),i=1,jj) ! write(12,*)(iaii(i),i=1,jj) write(13,*)' Number in each group of tied differences, value 1 means& & no tie' write(13,*)'(If number in group >1 bracketed number is the number of& & positive differences)' write(13,*)(iai(i),'(',iaii(i),')',i=1,jj) ! Calculate number of combinations where how a tie is broken makes a difference ! This depends on a product of all n-Choose-k (nck) possible terms where n is ! number in a tie and k is the number of type 2. Initialise this repeated ! multiplication with mmm mmm=1 Do 23 i=1,jj if(iai(i).eq.1)then nck(i)=1 else nck(i)=ncomb2(iai(i),iaii(i)) endif mmm=mmm*nck(i) 23 Continue ! write(13,*)' Number combinations = ',mmm ! Decide on whether exact or approximate method is to be used if(n.le.10)then iset=1 elseif(n.gt.40)then iset=0 else call wdialogload(IDD_wsr) call wdialogshow(-1,-1,0,SemiModeless) nit=(2**n)/1000 write(string1,201)' The exact method requires ',nit,' k comparisons& &. Choose preferred method.' 201 format(a27,i6,a43) call WDialogPutString(IDF_wsrt,string1) call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select endif end do call wdialoggetradiobutton(IDF_wsrex,iset) call WDialogHide() ! write(12,*)' iset =',iset endif ! End of decision between exact and approximate method ! Formulate & Step through a method of counting all these combinations ! and for each possible combination break the ties ! First:- headings ! lcc=max(n-6,1) ! write(fmtspec,112)' (',lcc,'x,a)' 112 format(a,i2,a) ! write(12,fmt=fmtspec)'Order Wilcoxon SR W p-value' iwwav=0. mmmm1=mmm-1 Do 26 i=0,mmmm1 itmp=i Do 33 ik=1,jj kk(ik)=mod(itmp,nck(ik)) itmp=itmp/nck(ik) 33 Continue ! kk now consists of an array the length of the number of ties with each ! element of the array being a number between (1 less than) 1 and the ! relevant n-Choose-k. This number is used to select a particular combination ! of ways each particular collection of ties between group 1 and 2 can be ! broken - using the subroutine ncombn. ! write(*,*)' kk = ',(kk(ik),ik=1,jj) nnn=0 Do 27 ik=1,jj if(nck(ik).gt.1)then call ncombn2(iai(ik),iaii(ik),kk(ik),iarray) Do 28 j=1,iai(ik) m=nnn+j isad(m)=iarray(j) 28 Continue endif nnn=nnn+iai(ik) 27 Continue iwp1=0 Do 14 k=1,n kisad(k)=k*(isad(k)*2-1) iwp1=iwp1+isad(k)*k 14 Continue write(13,*)' Ranking ',(kisad(j),j=1,n),'Sum + ranks = ',iwp1 iww(i)=iwp1 iwwav=iwwav+iwp1 26 Continue wwav=real(iwwav)/real(mmm) If(mmm.eq.1)then iw0=min(wwav,(n*(n+1)/2-wwav)) write(12,*)' Wilcoxon''s W = ',iw0 else write(12,*)' There are ties which can be broken in ',mmm,' ways' write(12,*)' The Wilcoxon W+ values resulting from breaking ties in all& & possible ways are:-' write(12,*)(iww(i),i=0,mmmm1) w0=min(wwav,(real(n*(n+1)/2)-wwav)) write(12,*)' The average W is:- ',w0 endif ! Work out if W+ or W- values are appropriate If(wwav.gt.real(n*(n+1)/4))then wwav=n*(n+1)/2-wwav do i=1,mmm iww(i-1)=n*(n+1)/2-iww(i-1) end do end if ! ******************************************************************** ! Exact calculation if(iset.eq.1)then ! Cover all 2^n possibilities and obtain binary representation ip=0 ie=2**n-1 m=n-10 im=1000 103 format(a1,i10,a23,i10,a23) 122 format(i10,a27,i10,a27) ! 100Kiloiteration counter ic=0 Do 1 in=ie,0,-1 if(ie.lt.1000)goto 6 if(((in/im)*im).eq.in)then ic=ic+1 call windowclear() write(rc,122)nit,' k comparisons required ',ic& ,' k comparisons completed' call windowoutstring(500,1000,rc) string2=' To abort the program press ctrl-alt-del' call windowoutstring(1000,2000,string2) endif 6 Continue kkk=in ! Create an array ii() that on each iteration contains each of the 2**n ! possible assignments of positives and negatives for each of the n ranks Do 2 j=n,1,-1 ii(j)=kkk/2**(j-1) kkk=kkk-ii(j)*2**(j-1) 2 Continue ! write(*,*)ii iwp=0 Do 4 k=1,n iwp=iwp+ii(k)*k 4 Continue Do 29 i=0,mmmm1 ! write(12,*)iw,(iww(j),j=0,mmmm1),ip if(iwp.le.iww(i))ip=ip+1 29 Continue 1 Continue i2n=2**n nopt=i2n*mmm ! The above gives an a posteriori determined one sided test - double ip count ! for 2-sided test ip2=ip*2 pval=float(ip2)/float(nopt) ! pval can be greater than 1 as doubling a 1 sided test can count a central ! value twice as more than half the distribution may be less than or equal ! to the central value write(12,*)' Exact calculation of p-value' if(pval.lt.1)then write(12,*)' p-value (2-tail) = ',ip2,'/',nopt,' or ',pval,' or ' call pwrite(' p-value (2-tail) = ',pval,12,rc) else pval=1. write(12,*)' p-value (2-tail) = ',pval endif else xmu=float(n*(n+1))/4. sigma=sqrt(float(n*(n+1)*(2*n+1))/24.) pval= 2.*gn(wwav,xmu,sigma) write(12,*)' Approximate calculation of p-value' call pwrite(' p-value (2-tail) = ',pval,12,rc) endif call pwrite(' Wilcoxon signed rank test: p-value = ',pval,0,rc) call mwrite() close(14) close(15) return end Inverse gaussian normal function xinvgn(p) !c write(*,*)' prog to calculate inverse gaussian normal' ! pi=3.1415927 pi=3.1415927 if((p.le.0.).or.(p.ge.1.))then write(10,*)' Input error - p out of range in invgn =',p goto 2 endif sgn=1. pp=p if(p.lt.0.5)then sgn=-1. pp=1.-pp endif x0=10. icount=0 1 Continue icount=icount+1 phix=gn(x0,0.,1.) ord=exp(-0.5*x0*x0)/sqrt(2.0*pi) denom=ord-0.5*(pp-phix)*x0 x1=x0+(pp-phix)/denom test=gn(x1,0.,1.)/pp-1. if(abs(test).lt.0.000001)goto 2 if(icount.gt.100)then if(abs(test).gt.0.000003) write(10,*)& ' >100 iterations in invgn. Rel error = ',test goto 2 endif x0=x1 goto 1 2 continue xinvgn=x1*sgn return end Programs about formatting subroutine formatcal() ! Integer input routine with error checking for format import use winteracter use resource type(win_message) :: message integer :: itype, check_state = 1 character(80) :: rc Common/m/rc ! Display the input dialog call WDialogLoad(IDD_format) call WDialogShow(-1, -1, 0, SemiModeless) ! Make sure that FieldChanged messages are reported call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select else if(itype == FieldChanged) then ! Retreive the strings containing an integer value if(message%value1 == IDF_datcoli) then call read_intval(i1,IDF_datcoli, check_state) else if(message%value1 == IDF_idcoli) then call read_intval(i1,IDF_idcoli, check_state) end if end if end do ! Read the current values of the input fields call read_intval(ii,IDF_datcoli, check_state) call read_intval(jj,IDF_idcoli, check_state) ! Hide the dialog instead of unloading, so that previously entered values are preserved call WdialogHide() ! Turn fieldchange reporting off call WMessageEnable(FieldChanged,0) ! open(40,file='mserror.dat') ! write(40,*)'if= ',if,'iuf= ',iuf call cformat(ii,jj) return end subroutine formatcali() ! Integer input routine with error checking for format import use winteracter use resource type(win_message) :: message integer :: itype, check_state = 1 character(80) :: rc Common/m/rc ! Display the input dialog call WDialogLoad(IDD_formati) call WDialogShow(-1, -1, 0, SemiModeless) ! Make sure that FieldChanged messages are reported call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select else if(itype == FieldChanged) then ! Retreive the strings containing an integer value if(message%value1 == IDF_datcoli) then call read_intval(i1,IDF_datcoli, check_state) else if(message%value1 == IDF_idcoli) then call read_intval(i1,IDF_idcoli, check_state) end if end if end do ! Read the current values of the input fields call read_intval(ii,IDF_datcoli, check_state) call read_intval(jj,IDF_idcoli, check_state) ! Hide the dialog instead of unloading, so that previously entered values are preserved call WdialogHide() ! Turn fieldchange reporting off call WMessageEnable(FieldChanged,0) ! open(40,file='mserror.dat') ! write(40,*)'if= ',if,'iuf= ',iuf call cformati(ii,jj) return end subroutine formatcalp() ! Integer input routine with error checking for format import use winteracter use resource type(win_message) :: message integer :: itype, check_state = 1 character(80) :: rc Common/m/rc ! Display the input dialog call WDialogLoad(IDD_formatp) call WDialogShow(-1, -1, 0, SemiModeless) ! Make sure that FieldChanged messages are reported call WMessageEnable(FieldChanged,1) do ! perform on-the-fly error checking with the event loop call WMessage(itype, message) if(itype == PushButton) then select case(message%value1) case (IDOK) exit case (IDCANCEL) call WDialogHide() call WMessageEnable(FieldChanged,0) return end select else if(itype == FieldChanged) then ! Retreive the strings containing an integer value if(message%value1 == IDF_datcolii) then call read_intval(ii,IDF_datcolii, check_state) else if(message%value1 == IDF_datcoljj) then call read_intval(jj,IDF_datcoljj, check_state) end if end if end do ! Read the current values of the input fields call read_intval(ii,IDF_datcolii, check_state) call read_intval(jj,IDF_datcoljj, check_state) ! Hide the dialog instead of unloading, so that previously entered values are preserved call WdialogHide() ! Turn fieldchange reporting off call WMessageEnable(FieldChanged,0) open(40,file='mserror.dat') write(40,*)'ii= ',ii,'jj= ',jj call cformatp(ii,jj) return end Main windows statistical test calling program USE winteracter use resource ! real, dimension (:), allocatable::x integer, dimension (:), allocatable::lc real, dimension (:,:), allocatable::xx character*255 fname character*700 logo character (120)::string1,string2,string3 character(80) :: rc common/m/rc,cisizp TYPE(win_style):: window type(win_message)::message ! Open(11,file='settings.dat') read(11,*)cisizp ! open(10,'wninfo.dat') open(12,file='moreinfo.dat') open(13,file='xsinfo.dat') read(11,*)string1 fname='paired_no_ties.dat' call winitialise(' ') iscrw=winfoscreen(1) iscrh=winfoscreen(2) window%width=iscrw window%height=iscrh ! call WindowUnitsToPixels(9999,9999,ixpix,iypix) window%flags=sysmenuon+minbutton+maxbutton+maxwindow window%x=0 window%y=0 ! window%width=ixpix ! window%height=iypix window%menuid=IDR_MENU1 window%title='Public Domain Statistics - with page references to & & Statistics with Common Sense by D Kault, Greenwood Press 2003' call windowopen(window) call windowselect(0) logo='This program by David Kault, James Cook University, represents a tiny & & increment to centuries of intellectual effort in the cause of human& & advancement. It is therefore registered to humanity to be copied freely and & &used in the interest of humanity and the environment. Any use for private & & profit contrary to these interests renders the user liable to confiscation of& & all profits. For suggestions or source code, email David.Kault@jcu.edu.au& & Numbers are page references to the book Statistics with Common Sense by& & David Kault, Greenwood Press, 2003.' call WMessageBox(0,0,1,logo,' ') call WindowClear() iopenflag=0 iflag=0 Do call wmessage(itype,message) if(iflag.eq.1)call mwrite() select case(itype) case(MenuSelect) select case(Message%Value1) case(ID_OPEN) call wselectfile('data file|*.dat|',prompton,fname,'loaddat') if(winfodialog(exitbuttoncommon).eq.commonopen)then iopenflag=1 endif case(ID_viewedit) string2=trim(string1)//' '//fname ! write(10,*)string2 call system(string2) case(ID_import) call icform() fname='newdata.dat' iopenflag=1 case(ID_DESCR) if(iopenflag.eq.0)then call wmessagebox(0,0,1,'you must first open a data file','') else open(20,fname) call mstdcall() endif case(ID_normal) call normlcal() case(ID_tdist) call tdistcal() case(ID_fdist) call fdist() case(ID_cdist) call cdistcal() case(ID_Signt) call signtcal() call mwrite();iflag=1 case(ID_wsrt) if(iopenflag.eq.0)then call wmessagebox(0,0,1,'you must first open a data file','') else open(20,fname) read(20,*)n allocate (xx(n,2)) call wsrt(xx,n,pval) rewind 20;deallocate (xx);close(20);iflag=1 endif case(ID_pst) if(iopenflag.eq.0)then call wmessagebox(0,0,1,'you must first open a data file','') else open(20,fname) read(20,*)n allocate (xx(n,2)) call pst(xx,n,pval) rewind 20;deallocate (xx);close(20);iflag=1 endif case(ID_Fisht) call fishtcal();iflag=1 case(ID_mwt) if(iopenflag.eq.0)then call wmessagebox(0,0,1,'you must first open a data file','') else open(20,fname) read(20,*)m,n mx=max(m,n) allocate (xx(mx,2)) call mwtt(xx,m,n,mx,pval) rewind 20;deallocate (xx);close(20);iflag=1 endif case(ID_ist) if(iopenflag.eq.0)then call wmessagebox(0,0,1,'you must first open a data file','') else open(20,fname) read(20,*)m,n mx=max(m,n) allocate (xx(mx,2)) call ist(xx,m,n,mx,pval) rewind 20;deallocate (xx);close(20);iflag=1 endif case(ID_chisq) if(iopenflag.eq.0)then call wmessagebox(0,0,1,'you must first open a data file','') else open(20,fname) read(20,*)m,n allocate (xx(m,n)) call chisq(xx,m,n,pval) rewind 20;deallocate (xx);close(20);iflag=1 endif case(ID_kw) if(iopenflag.eq.0)then call wmessagebox(0,0,1,'you must first open a data file','') else open(20,fname) read(20,*)nc allocate (lc(nc)) read(20,*)(lc(i),i=1,nc) nr=maxval(lc) allocate (xx(nr,nc)) call kwr(lc,nr,nc,pval) rewind 20;deallocate (xx);deallocate (lc);close(20);iflag=1 endif case(ID_anova) if(iopenflag.eq.0)then call wmessagebox(0,0,1,'you must first open a data file','') else open(20,fname) read(20,*)nc allocate (lc(nc)) read(20,*)(lc(i),i=1,nc) nr=maxval(lc) allocate (xx(nr,nc)) call anova(lc,nr,nc) rewind 20;deallocate (xx);deallocate (lc);close(20);iflag=1 endif case(ID_spears) if(iopenflag.eq.0)then call wmessagebox(0,0,1,'you must first open a data file','') else open(20,fname) read(20,*)n call spears(n,pval) rewind 20;close(20);iflag=1 endif case(ID_regres) if(iopenflag.eq.0)then call wmessagebox(0,0,1,'you must first open a data file','') else open(20,fname) read(20,*)n allocate (xx(n,2)) call regress(xx,n,a,b,p) rewind 20;deallocate (xx);close(20);iflag=1 endif case(ID_pearson) if(iopenflag.eq.0)then call wmessagebox(0,0,1,'you must first open a data file','') else open(20,fname) read(20,*)n allocate (xx(n,2)) do i=1,n read(20,*)xx(i,1),xx(i,2) end do write(12,*)' *** Pearson''s correlation ***' call pearson(xx,n,r,pval) call pwrite(' p-value (2-tail) for hypothesis "r=0" = ',pval,0 ,rc) call mwrite() rewind 20;deallocate (xx);close(20);iflag=1 endif case(ID_moreinfo) close(12);close(13) string3=trim(string1)//' '//'moreinfo.dat' call system(string3) open(12,file='moreinfo.dat',position='APPEND') open(13,file='xsinfo.dat',position='APPEND') ! call mwrite() case(ID_pvaluehlp) string3=trim(string1)//' '//'pvalue.txt' call system(string3) case(ID_whattesthlp) string3=trim(string1)//' '//'whattest.txt' call system(string3) case(ID_formathlp) string3=trim(string1)//' '//'format.txt' call system(string3) case(ID_confidhlp) string3=trim(string1)//' '//'confid.txt' call system(string3) case(ID_technthlp) string3=trim(string1)//' '//'technote.txt' call system(string3) case(ID_EXIT) exit endselect Case(CloseRequest) exit end select end do call windowclose() end ***Model data for ANOVA with no ties calculation 4 = Number of columns 3 2 2 4 These numbers are the number of values in the 1st 2nd 3rd & 4th cols 1 4 1.1 8 2 5 2.1 9 3 10 11 Data in this format can be analysed with the Kruskal-Wallis test or the 1-way ANOVA test Summary statistics, that is simple data description can also be obtained by clicking on data description ***Model data for ANOVA with ties calculation 4 = Number of columns 3 2 2 4 These numbers are the number of values in the 1st 2nd 3rd & 4th cols 1 4 1 4 2 5 2 5 3 10 11 Data in this format can be analysed with the Kruskal-Wallis test or the 1-way ANOVA test Summary statistics can also be obtained by clicking on Data description ***Model data for Chi-square calculation 3 4 This tells the program there are 3 rows & 4 columns 10 9 8 21 20 18 16 14 30 27 24 7 *** CHI-SQUARE TEST OF ASSOCIATION *** Using data file C:\pdsw\chisquare.dat at 13:56:06 on 20/05/03 Obs 10.0000000 9.00000000 8.00000000 21.0000000 Exp 14.1176472 12.7058821 11.2941179 9.88235283 Obs 20.0000000 18.0000000 16.0000000 14.0000000 Exp 20.0000000 18.0000000 16.0000000 14.0000000 Obs 30.0000000 27.0000000 24.0000000 7.00000000 Exp 25.8823528 23.2941170 20.7058830 18.1176472 Degrees of Freedom = 6 Chi-square = 24.3409 Chi-square test of association: p-value = 0.00045197 ***Model data for paired samples test 5 = Number of pairs 1 3 2 5 3 7 4 8.1 5 9.2 Data in this format can be analysed with the Wilcoxon signed rank test or the Paired samples t-test or Spearman rho test or Pearson correlation or Regression Summary statistics can also be obtained by clicking on Data description 5 = Number of pairs 1 -2 2 5 3 7 4 8 5 9 Data in this format can be analysed with the Wilcoxon signed rank test or the Paired samples t-test or Spearman rho test or Pearson correlation or Regression Summary statistics can also be obtained by clicking on Data description95. This specifies the size of the confidence interval used notepad This is the directory and name of the editor Settings.dat is a file that specifies the settings above for pds - the Public Domain Statistics program 3 1 10 2 20 3 30 Data in this format can be analysed with the Wilcoxon signed rank test or the Paired samples t-test or Spearman's rho test or Pearson correlation or Regression. However, this data set has been designed to show the capability of the program in performing a Spearman's rho test where there are no ties and obtaining an exact answer. Summary statistics can also be obtained by clicking on Data description 7 = Number of pairs 1 1 1 2 2 2 7 9 7 10 9 11 11 12 Data in this format can be analysed with the Wilcoxon signed rank test or the Paired samples t-test or Spearman rho test or Pearson correlation or Regression. However, this data set has been designed to show the capability of the program in performing a Spearman's rho test where there are ties in both columns and obtaining an exact answer. Summary statistics can also be obtained by clicking on Data description 8 3 The 8 & 3 are the number of values in 1st & 2nd columns respectively 14 20.1 15 19.1 16 23.1 17 18 19 20 18 Data in this format can be analysed with the Mann-Whitney test or the Independent samples t-test Summary statistics can also be obtained by clicking on Data description8 3 The 8 & 3 are the number of values in 1st & 2nd columns respectively 14 20 15 19 16 23 17 18 19 20 18 Data in this format can be analysed with the Mann-Whitney test or the Independent samples t-test Summary statistics can also be obtained by clicking on Data descriptionTheory ***text information The term "confidence interval" is a misleading term for a range of values in which a population average might lie. This range is calculated according to whether it would be compatible to some extent with the observed data. Compatibility is assessed by use of p-values. A 95% confidence interval corresponds to compatibility as assessed using the criterion of a p-value of 0.05 or 5%; a 99% confidence interval similarly corresponds to use of a p-value of 0.01 or 1%. Confidence intervals can be calculated in a number of different circumstances. One common example is the calculation of confidence intervals for the difference made by an intervention. In making this calculation we assume that there is some underlying average value for something of interest that we can measure on individuals in the population. As the population may be large or infinite we never know the true average value exactly, but we can get an idea of this average by measuring a sample. We then perform an intervention on this sample from the population and measure the same thing of interest after the intervention. What we want to know is how much difference on average does the intervention make to the population, but all we know is how much difference it makes to the sample. It is not possible to go from knowledge about the average change in the sample to a statement about the what the average change in the population is likely to be in the light of this knowledge. This is not possible because we cannot define probability in the light of some knowledge without first having defined probability in the absence of that knowledge. Undefined ideas about what the population might be like in the absence of knowledge about the sample + precise knowledge about the average and variability in the sample cannot equal precisely defined probability about the population. Therefore we cannot say that, for instance, after measuring the differences the intervention made on the individuals in the sample, that "there is a 95% chance that the intervention moves the population average by somewhere between -2 and +9". However, we can make a weaker statement. The weaker statement would be that "if the true effect of the intervention on the population average was a change anywhere in the range from -2 to +9, then the average difference that we got in our sample would not be "suprising". The criteria we use for "not suprising" is that the average change in the sample is one of the "more common values" that would occur should the true difference to the population average turn out to be a figure anywhere between -2 and +9". The "more common values" here correspond to values that occur 95% of the time. In particular the average change in the sample might have been +3.5. We also have information about how good this sample is at giving us an accurate picture from knowing the number of individuals in the sample and knowing the variability in the sample. This knowledge allows us to say that if the true population change was -2 then taking sample of this size from a population this variable would, 95% of the time, give us a sample whose average change was in the range -7.5 to +3.5 - the amount +3.5 is then (only just) one of the more common values that we would expect if the population change is actually -2. Likewise if the true population change was +9 then taking sample of this size from a population this variable would, 95% of the time, give us a sample whose average change was in the range +3.5 to +14.5 - the amount +3.5 is again one of the more common values that we would expect even if the population amount is actually +9. Put another way, our criterion for values that are not suprising, is the criterion that the p-value is > 0.05. We abbreviate all this by saying that the 95% confidence interval is -2 to +9. We can calculate the confidence interval by knowing three things - how much the average in the sample changed, the amount of variability of the changes for the individuals in the sample and the numbers of individuals in the sample. If we genuinely have no prior idea whatsoever about where the population value is likely to be, it may be reasonable to think of confidence intervals as probability intervals. However, this interpretation is wrong whenever we have prior ideas. For example, our intervention might be something which commonsense tells us can only improve an average, not worsen it. The improvement may be minimal, but in the true English meaning of the word "confident", we can be 100% confident that the true average amount of improvement is a number greater than zero even though the 95% confidence interval is -2 to +9. In that case although both the figures -2 and +9 are equally compatible with the data, our general knowledge rules out the possibility of figures below zero. Just because negative figures are compatible with the data does not mean they are at all probable. Negative figures might be in the "95% confidence interval", but this is just a misleading expression that really means that some negative figures are compatible with the data, even though there may be no possibility of a negative figure for the true average amount of improvement. In a rational world "95% confidence intervals" would be renamed "5% compatibility intervals", but unfortunately the misleading terminology has become ossified by long standing usage. Changing confidence intervals used by pds By default, pds gives 95% confidence intervals. If other confidence intervals are required then in the file settings.dat change the number 95. at the start of the first line of the file, to any other number between 0. and 100. (excluding the two values 0. & 100. themselves). Numerical difficulties causing pds to abort are possible when values very close to either 0. or 100. are entered. You will have to exit pds and re-enter it for the new setting to take effect. Further explanation is given on pages 149-162 of my book (Statistics with Common Sense. David Kault, Greenwood Press 2003). 14.0000000 1 15.0000000 1 16.0000000 1 17.0000000 1 18.0000000 1 19.0000000 1 20.0000000 1 18.0000000 1 20.1000004 2 19.1000004 2 23.1000004 2 Using data file C:\LF9555\pdsw\unpaired_no_ties.dat at 12:41:35 on 13/05/02 Data is input into pds in two ways - A) In the case of tests where the data comprises just a few numbers (Sign test, Fisher's Exact test/2*2 Chi-squared test), data is entered directly from the keyboard. B) For the other tests data is typed into a text file using an editor such as "Notepad". The various tests expect data to have particular formats. Model data files are included with with pds. The requirements for these data files are described in detail below, but inspection of the included model files may make the requirements clear. New data can be entered in three ways - B1) It can be typed in by opening one of the model files from within pds (click "open" in the pds file menu) edit the file (click view/edit in the file menu - this invokes the Notepad editor) then save it using "save as" (click Save As in the Notepad file menu) under a new name. The new name should have the extension ".dat". Then exit Notepad and click open in the pds file menu and select the file containing your new data. (By default the text editor used by pds is Notepad. This default can be changed by editing the line containing the word "notepad" in the settings.dat file and any other editor/word processor that can save data in ASCII format can be used instead.) B2) Alternatively, your data can be entered using Notepad (or any other editor/word processor that saves the data as ASCII text) outside the pds program. The data should have the appropriate format, be given a name with the extension ".dat" and it is convenient to keep the data in the same subdirectory/folder as pds. B3) Data can also be imported from other statistics packages, provided it has first been exported from the other package into an ASCII file named "import.dat" located in the same subdirectory/folder as pds. Pds is simpler than most packages and assumes only one or two measurements have been made on each individual. Most other packages have the facility for recording more than two measurements on each individual in an appropriate number of columns. The import facility allows pds to pick out the 1 or 2 columns that are required for a particular analysis. Unlike other statistics packages, pds expects measurement on two or more groups to be formatted in an intuitive way with adjacent columns giving measurements in each of the groups with appropriate headings (see model data files unpaired_no_ties.dat, unpaired_with_ties.dat, anova_no_ties.dat, anova_with_ties.dat). By contrast, most statistics packages string the measurements within all of the groups into one long column and then require an adjacent column of equal length identifying the group number to which each item of data belongs. The import facility under the file menu on pds handles all the required format changes and stores the reformatted data in the file newdata.dat. Data export to other statistics packages Data entered in the format that pds expects for 2 or more independent groups (eg. the format for Mann-Whitney test, independent samples t test, Kruskal-Wallis test or ANOVA) is automatically exported into a format more compatible with the format used by other statistics packages when the appropriate tests are run by pds. (The discussion under the heading B3 above indicates the type of format changes required). The data in the format suitable for import into the other packages is stored in the file "data.txt". Data formats required for each test Sign test Input from keyboard Wilcoxon Signed Rank test The number of data pairs must be given on the first line, say this number is 19 (this should be written without a decimal point). The following 19 lines must each contain two numbers (decimal points may be used here as necessary) any writing after the single number on the first line or the two numbers on each of the subsequent 19 lines is ignored. Further lines are ignored. Ignored writing can be used for comments. (The preceeding 3 sentences also apply to subsequent data format specifications.) Example: Data files - paired_no_ties.dat paired_with_ties.dat Paired t test Data format identical to that for Wilcoxon Signed Rank test Fisher's Exact test Input from keyboard Mann-Whitney test The number of measurements on each of the two groups must be given as two numbers on the first line, say these numbers are 7 and 19 (The word "and" should be omitted in the data file.) The following 7 lines should contain two numbers, the first number on each of these lines applies to the first group, the second to the second group. The subsequent 12 (=19-7) lines should contain just one number, each represents a measurement on the second group. Example: Data files - unpaired_no_ties.dat unpaired_with_ties.dat Independent samples t test Data format identical to that for Mann-Whitney test Chi-squared test Two numbers must be given on the first line. The first number is the number of rows, the second number is the number of columns, say these numbers are 3 5. The following 3 lines should each contain 5 numbers. Example: Data file - chisquared.dat Kruskal-Wallis test One number must be given on the first line - this is the number of groups, say this was 3. The second line must contain 3 numbers giving the number of measurements in each group. Say this line was 5 2 4. The following 2 lines should each contain 3 figures - giving measurements in each group The next 2 lines should each contain 2 figures - the first gives the value in the first group and the second gives the value in the third group. The next line should contain 1 figure giving the fifth value for the first group. Example: Data files - anova_no_ties.dat anova_with_ties.dat ANOVA (one way) Data format identical to that for Kruskal-Wallis test Spearman's rho test Data format identical to that for Wilcoxon Signed Rank test Regression Data format identical to that for Wilcoxon Signed Rank test Pearson's correlation Data format identical to that for Wilcoxon Signed Rank test To install pds, put all the files in one directory or folder (for example you could make a directory named pds either using the DOS md command or by running windows explorer clicking on file, new & folder). The pds files consist of this readme.txt file, sample data files with a .dat extension, a settings file called settings.dat, help files with a .txt extension and the executable pds.exe file. You can now run pds by opening the directory/folder and clicking on pds. A "shortcut" can be created in the usual way (By selecting pds.exe in windows explorer, clicking on file, create shortcut and dragging the icon onto the desktop). However, a number of difficulties can arise. Pds needs to know where to find your favourite editor. If it can't find it, you won't be able to view the data files or read moreinfo about the results of a test. By default pds assumes that your favourite editor is notepad and it is in the current path that the computer searches to find commands. If this is not the case you will need to change the settings.dat file. In place of notepad type in the command line of your favourite editor/word processor, with the full path if it is not in the current path. For example, if you used the editor called bb and it was located in the directory edit on c: drive, you would need to replace the word notepad with c:\edit\bb. If you use the write word processor substitute the word write for notepad. There are some problems with the windows 95 notepad editor - It doesn't show or respond to file names longer than 8 characters - names are abbreviated. It doesn't automatically wrap text so that a long line is written out over several lines - you have to select the word wrap option from the edit menu for this to happen. It may also be useful to change the default font used by Notepad - some fonts use proportional spacing giving for example less space to the letter i than the letter a, but this mucks up the alignment of some of the text in help and output - try using the courier or the fixedsys font instead. You can use any other editor to view the data files, the help files and moreinfo (more information). For example, in place of notepad you could write c:\windows\command\edit to invoke another editor that comes with the system (though this also does not handle wrapping text, or you could invoke your word processor instead (for example, write c:\windows\write if you use the "write" word processor) though this slows things down a bit and you must save data in text format). Technical Notes 29/4/2003 A. Notes on the Computations B. Known faults in the program C. Errata in the book Statistics with Common Sense A. Notes on the Computations Most of the motivations and methods used in the calculations performed in this program are fully explained in standard statistical texts. There are two exceptions: 1) In the case of non-parametric tests on data containing ties (other than the sign test), the overall p-value is determined by breaking each tie in all possible ways, calculating a p-value for every possible (equally likely) combination of tie breaks, then averaging the results. As an example, consider the calculation of Spearman's rho on the following data: (written in rows rather than columns and with the heading giving number of data pairs omitted for typographical convenience) 4 4 5 8 6 7 7 9 The ranks of the elements of the first row could be either 1 2 3 4 or 2 1 3 4 The ranks of the elements of the second row could be either 1 2 3 4 or 1 3 2 4 In terms of ranks of the data pairs, the following combinations are all equally likely: 1 2 3 4 1 2 3 4 2 1 3 4 2 1 3 4 1 2 3 4 1 3 2 4 1 2 3 4 1 3 2 4 The Spearman's rho p-value for each of these 4 combinations are 1/24, 4/24, 4/24, 9/24 respectively. The overall Spearman's rho p-value is then given as the average of these 4 values giving a 1 tail p-value of 18/96. 2 tail p-values are obtained by doubling the 1 tail value. When the evidence for the effect of an intervention is minimal, this can give a value greater than 1 (for the same reason that the probability of something being less than or equal to an average value + the probability it being greater than or equal to the same value will often be greater than 1). In this case the 2 tail probability is taken to be 1. 2) In the case of the independent samples t-test, an additional unorthodox p-value is calculated and given along with other information in the moreinfo file. The motivation underlying its calculation is as follows. It is almost impossible to conceive of a real situation in which an intervention would effect the standard deviation of a population but leave the mean absolutely unchanged. The converse also seems true. In other words, it seems reasonable to believe that if there is any difference between the two populations, the means are different. The orthodox approach to the independent samples t test requires 2 tests, one concerning the equality of the means and one concerning the equality of the variances. My additional unorthodox approach is to regard these as both equally valid tests to see if the populations are different, in which case we should reject the hypothesis that the means are the same. We therefore have 2 p-values to check whether we should believe the populations are the same. The unorthodox test takes the point of view that under the null hypothesis both these p-values are equally likely to be any number between 0 and 1. The unorthodox p-value calculation then answers the question "How often would the minimum of two choices of a number chosen at random between 0 and 1 be less than or equal to the minimum observed here?". For the most part the algorithms used in pds follow well known methods described in standard textbooks such as Mathematical Statistics and Data Analysis by J Rice. Algorithms on non-parametric tests are based on methods described in Statistics Using Ranks by R Meddis. Some basic algorithms for sorting, the error function and the incomplete beta and gamma functions come from Numerical Recipes by W Press et al. The algorithm for the inverse of the normal & other statistical functions comes from an unremembered source and are iterative methods that seem to be variants of Newton's method. Mostly they converge rapidly, but they may have trouble with unusual p-values causing the program to stall. B. Known faults in the Program 1) Regression & Correlation calculations fail when correlations are very close to 1 and the two related measures are of very different orders of magnitude This program has undergone only limited testing. It is anticipated that it may contain bugs which will cause it to stall/abort under some circumstances. To notify the author of any such problems, email the details and the data set that caused the program to stall to David.Kault@jcu.edu.au C. Errata in Book referred to by the program - Statistics with Common Sense Page 18 Line 25 - Last word of sentence should be "mean" not "mode". Page 55 Line 3 from bottom - probability 0.999999 means 999,999 chances in 1,000,000, not 1 chance in 999,999. All the listed statistical tests answer one of two related questions:- I "Is there a difference?" or II "Is there an association?" Question I "Is there a difference?". This question is asked in various contexts according to the source of the data and the different types of data. A) Source of data: two related measures (eg. measures before and after an intervention on the same individual, measures on one of a pair who had an intervention and the other of the pair who didn't) Type of Data: 1. Dichotomous (eg. better/worse) Test: Sign test (in some circumstances this test is called McNemar's test) 2. Numerical but not normal Test: Wilcoxon signed rank test 3. Numerical and normal Test: Paired sample t test B) Source of data: a single measure on two unrelated samples (eg. measuring the same quantity on men and women) Type of Data: 1. Dichotomous (eg. disease/no disease) Test: Fisher's exact test 2. Numerical but not normal Test: Mann-Whitney 3. Numerical and normal Test: Independent samples sample t test C) Source of data: a single measure on more than two unrelated samples (eg. measures on say 5 groups of individuals with each group receiving a different treatment) Type of Data: 1. Dichotomous or nominal (categorical) Test: Chi-squared test of association 2. Numerical but not normal Test: Kruskal-Wallis (non-parametric ANOVA) 3. Numerical and normal Test: ANOVA Question II The question "Is there a difference?" becomes "Is there an association?" when we are looking at data whose source is numerical measures of two different quantities on each individual (eg. height and income). Instead of asking "Does height make a difference to income?" we ask "Are height and income associated?" The tests here assume the association is linear. Type of Data: 1. Both measures are numerical but not normal Test: Spearman's rho 2. At least one of the measures is normally distributed as the dependent variable scattered about a regression line Test: to see if the slope of the regression line (beta) is zero. 3. Both measures are normally distributed (Actually more strict assumption is made - bivariate normality) Test: (Pearson's) correlation coefficient See also pages 234-235 Statistics with Common Sense, David Kault, Greenwood Press, 2003 5 = number of values -3.00000000 3.00000000 4.00000000 4.00000000 4.00000000 mes later = 1 mes later = 1 mes later = 1 mes later = 1 mes later = 1 mes later = 1 mes later = 1 5 = number of values -3.00000000 3.00000000 4.00000000 4.00000000 4.00000000 Using data file C:\pdsw\chisquare.dat at 13:56:06 on 20/05/03