/* *************************************************************** simPPM - a SAS macro for a 3rd order percentile-based power method Copyright (C) 2014 Jennifer Koran and Todd C. Headrick This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *************************************************************** */ %macro simPPM(v, percentfile, corrfile, cortype, n, rnd, outfile); /* assign name to the input datafile */ filename per "&percentfile."; /* read in input datafile */ data per1; infile per; input var1-var&v ; run; /* set value for cortype when there is no correlation */ %if &v = 1 %then %let cortype = 0 ; /* set up test for valid correlation type */ %let validtype = 0; %if (&cortype = 1 OR &cortype = 2) %then %let validtype = 1 ; /* instructions for multivariate PPM only */ %IF &v > 1 %THEN %DO; /* assign name to the input datafile */ filename incor "&corrfile."; /* read in input datafile */ data incor1; infile incor; input var1-var&v ; run; %END; /* end instrutions for multivariate PPM only */ /* **************************** */ proc IML; /* begin IML */ /* move data into matrix */ Use per1; Read all into Percent; Close per1; n= &n ; v= &v ; rnd= &rnd ; /* find dimensions of the input percentiles */ NP = NCOL(Percent); /* *************************************************** */ /* instructions for multivariate PPM only */ if &v > 1 then do; /* move data into matrix */ Use incor1; Read all into Cor; Close incor1; /* check valid correlation type */ if &validtype ^= 1 then; do; ERROR = "Invalid correlation type entered."; print ERROR; INSTR = "Use 1 for Pearson or 2 for Spearman."; print INSTR; print "Program terminated due to error."; ABORT; /* go to the end of the program */ end; /* end do statements */ /* find dimensions of the input correlations */ NC = NCOL(Cor); NR = NROW(Cor); /* check that Cor is square */ if NC ^= NR then do; ERROR = "number of columns of correlation matrix does not equal number of rows of correlation matrix."; print ERROR; INSTR = "Check both number of variables and correlation input file for consistency and accuracy."; print INSTR; print "Program terminated due to error."; ABORT; /* go to the end of the program */ end; /* end do statements */ /* finite-precision test of whether a matrix is symmetric */ start SymCheck(A); B = (A + A`)/2; scale = max(abs(A)); delta = scale * constant("SQRTMACEPS"); return( all( abs(B-A) < delta ) ); finish; /* check that Cor is symmetric */ IsSym = SymCheck(Cor); if IsSym ^= 1 then do; ERROR = "Correlation matrix is not symmetric."; print ERROR; print "Program terminated due to error."; ABORT; /* go to the end of the program */ end; /* end do statements */ /* check that Cor is positive definite */ IsPD = DET(Cor); if IsPD = 0 then do; ERROR = "Correlation matrix is not positive definite."; print ERROR; print "Program terminated due to error."; ABORT; /* go to the end of the program */ end; /* end do statements */ /* check that number of columns of Percent matches the dimensions of Cor */ if NP ^= NR then do; ERROR = "Number of variables does not match the input files (percentiles file and/or correlations file)."; print ERROR; INSTR = "Check number of variables as well as both input files for consistency."; print INSTR; print "Program terminated due to error."; ABORT; /* go to the end of the program */ end; /* end do statements */ end; /* end instrutions for multivariate PPM only */ /* ********************************** */ /* separate the percentiles */ P10 = Percent[1,]; P25 = Percent[2,]; P50 = Percent[3,]; P75 = Percent[4,]; P90 = Percent[5,]; /* check that the percentiles are monotonic increasing */ chk = J(4,NP,0); chk[1,] = P25 - P10; chk[2,] = P50 - P25; chk[3,] = P75 - P50; chk[4,] = P90 - P75; if any(chk < 0) then do; ERROR = "Percentiles entered are not monotonic increasing."; print ERROR; INSTR = "Check order of percentiles P10 < P25 < P50 < P75 < P90."; print INSTR; print "Program terminated due to error."; ABORT; /* go to the end of the program */ end; /* end do statements */ /* obtain the location, scale, and shape parameters for each variable */ g_1 = P50; g_2 = P90 - P10; g_3 = (P50 - P10)/(P90 - P50); g_4 = (P75 - P25)/g_2; /* obtain z scores for standard normal distribution */ Z90 = quantile("Normal", 0.90); Z75 = quantile("Normal", 0.75); /* obtain the polynomial coefficients for each variable */ c_1 = g_1; c_2 = ( g_2#( (g_4#(Z90##3)) - (Z75##3) ) )/( (2#(Z90##3)#Z75) - (2#Z90#(Z75##3)) ); c_3 = ( g_2#( 1 - g_3 ) )/( 2#(1 + g_3)#(Z90##2) ); c_4 = -( g_2#( (g_4#(Z90)) - (Z75) ) )/( (2#(Z90##3)#Z75) - (2#Z90#(Z75##3)) ); C = c_1//c_2//c_3//c_4; print "Percentile Power Method coefficients"; print C; title; /* check boundary conditions */ D = c_3##2 - 3#c_2#c_4; if any(D >= 0) then do; ERROR = "Solution is outside bounds for a valid pdf"; print ERROR; print "Program terminated due to error."; ABORT; /* go to the end of the program */ end; /* end do statements */ /* *************************************************** */ /* instructions for multivariate PPM only */ if &v > 1 then do; /* if Spearman correlations have been entered */ if &cortype = 2 then do; /* define Intermediate Pearson correlation function */ start IntermedS(q, r, S, n); pi = constant("pi"); /* set the constant pi */ q=(((6/pi)#((((n-2)/(n-1))#arsin(r/2))+((1/(n-1))#arsin(r/2))))- S); finish; /* define bisection function */ start bisectionS(a, b, S, n); dx = 1e-6; dy = 1e-4; do i = 1 to 1000; /** max iterations **/ c = (a+b)/2; call IntermedS(vala,a, S, n); call IntermedS(valc,c, S, n); if abs(valc) < dy | (b-a)/2 < dx then return(c); if vala#valc > 0 then a = c; else b = c; end; return (.); /** no convergence **/ finish; /* compute intermediate Pearson correlations using numeric solver */ P = I(NC); do i = 1 to (NC-1); do j = (i+1) to NC; S = Cor[j,i]; P[j,i] = bisectionS(-1, 1, S, n); end; /* end do loop for j */ end; /* end do loop for i */ end; /* end do statements for Spearman correlations */ /* if Pearson correlations have been entered */ if &cortype = 1 then do; /* define Intermediate Pearson correlation function */ start IntermedP(q, r, S, c1j, c2j, c3j, c4j, c1k, c2k, c3k, c4k, mj ,mk, vj ,vk); exv = c1j#mk + c2j#c2k#r + 3#c2k#c4j#r + 3#c2j#c4k#r + 9#c4j#c4k#r + 6#c4j#c4k#r#r#r + c3j#(mk + 2#c3k#r#r); q=(((exv-(mj#mk))/(sqrt(vj#vk)))- S); finish; /* define bisection function */ start bisectionP(a, b, S, c1j, c2j, c3j, c4j, c1k, c2k, c3k, c4k, mj ,mk, vj ,vk); dx = 1e-6; dy = 1e-4; do i = 1 to 1000; /** max iterations **/ c = (a+b)/2; call IntermedP(vala,a, S, c1j, c2j, c3j, c4j, c1k, c2k, c3k, c4k, mj ,mk, vj ,vk); call IntermedP(valc,c, S, c1j, c2j, c3j, c4j, c1k, c2k, c3k, c4k, mj ,mk, vj ,vk); if abs(valc) < dy | (b-a)/2 < dx then return(c); if vala#valc > 0 then a = c; else b = c; end; return (.); /** no convergence **/ finish; /* compute intermediate Pearson correlations using numeric solver */ P = I(NC); do j = 1 to (NC-1); c1j=C[1,j]; c2j=C[2,j]; c3j=C[3,j]; c4j=C[4,j]; do k = (j+1) to NC; c1k=C[1,k]; c2k=C[2,k]; c3k=C[3,k]; c4k=C[4,k]; S = Cor[k,j]; mj = c1j + c3j; mk = c1k + c3k; vj = c2j#c2j + 2#c3j#c3j + 6#c2j#c4j + 15#c4j#c4j; vk = c2k#c2k + 2#c3k#c3k + 6#c2k#c4k + 15#c4k#c4k; P[k,j] = bisectionP(-5, 5, S, c1j, c2j, c3j, c4j, c1k, c2k, c3k, c4k, mj ,mk, vj ,vk); end; /* end do loop for k */ end; /* end do loop for j */ /* check that intermediate correlations are valid */ if any(P < -1 | P > 1) then do; ERROR = "At least one intermediate correlation exceeds the valid range [-1,+1]."; print ERROR; print "Program terminated due to error."; ABORT; /* go to the end of the program */ end; end; /* end do statements for Pearson correlations */ /* reflect the intermediate Pearson correlation matrix across the diagonal */ d = vech(P); PI = sqrvech(d); /* check that intermediate correlation matrix is positive definite */ IsPD = DET(PI); if IsPD <= 0 then do; ERROR = "Intermediate Pearson correlation matrix is not positive definite."; print ERROR; print "Program terminated due to error."; ABORT; /* go to the end of the program */ end; /* end do statements */ print "Intermediate Pearson correlations"; print PI; title; end; /* end instrutions for multivariate PPM only */ /* ********************************** */ /* instructions for univariate PPM only */ if &v = 1 then do; PI = 1; end; /* end instrutions for univariate PPM only */ /* generate multivariate normal variables */ Mean = J(&v ,1,0); call randseed(rnd); /** set seed for the RandNormal module **/ Z = RandNormal(n, Mean, PI); /* use the polynomial coefficients to transform the normal variables */ NN = J(n, &v ,.); do i = 1 to NP; X0 = J(n,1,1); X1 = Z[,i]; X2 = X1#X1; X3 = X2#X1; X = X0||X1||X2||X3; coef = C[,i]; pZ = X*coef; NN[,i] = pZ; end; /* end do loop for i */ /* export the data file with the nonnormal variates */ CREATE PPM FROM NN; APPEND FROM NN; quit; /* quit proc IML */ /* **************************** */ /* output work.PPM */ PROC EXPORT DATA= work.PPM OUTFILE= "&outfile " DBMS=DLM REPLACE; DELIMITER='20'x; PUTNAMES=NO; RUN; %mend simPPM;