***************************************************************************************** MACRO FELM FIXED EFFECTS LINEAR MODELS ***************************************************************************************** DESCRIPTION Macro FELM computes a) one or multiway (experimental) fixed effect regression for - OLS models - 2SLS IV models Multiway regression is based on the Method of Alternating Projections to sweep out multiple group effects from the normal equations before estimating the remaining coefficients with OLS. For the moment the multi-way fixed effects is doing a loop on proc standard and proc compare. It is not very efficient and remains very slow ! Beware ! (PROC IML version demanded !) Check the number of iterations and the threshold to avoid blocking the computer for too long. b) Multi-way Robust clustered standard errors (up to 3 clusters) for - Simple OLS - One way or multiway FE OLS - IV regression We follow here Cameron et al. (2011) and use the following formula for estimating V[B] the variance-covariance matrix of the parameters B for two-way clustering : V[B] =V_g[B] + V_h[B] - V_g*h[B] and for three-way clustering : V[B] =V_g[B] + V_h[B]+V_i[B] - V_g*h[B] - V_h*i[B] - V_g*i[B] + V_g*h*i[B] (with g, h, and i three clustering variables) Cameron, A.C., J.B. Gelbach and D.L. Miller (2011) Robust inference with multiway clustering, Journal of Business & Economic Statistics 29 (2011), no. 2, 238--249. ***************************************************************************************** SYNTAX felm(data=,dependent=,var=,fe=,class=,format=,cluster=,endogenous=,instruments=,weight=, multi=,threshold=0.000000001,maxiter=1000,nestedness=YES,deldata=YES); ***************************************************************************************** ARGUMENTS data: name of the dataset. Strongly recommended to use a dataset already in the work library. dependent: name of the dependent variables var: list of independent variables (which should include also the ones in the class statement) fe: list of fixed effects to demean. For multiway fixed effects, as the procedure is very slow, rather used dummies if the number of dummies is estimatable with surveyreg (<500). class: class variables as in a SAS class instruction. (Only for simple multiway clustering OLS) format: format as in a SAS format instruction. (Only for simple multiway clustering OLS) cluster: list of variables for multi-way clustering (max=3) endogenous: list of endogenous variables in an IV model instruments: list of instruments in an IV model (no need to add the other control variables) weight: SAS weight variable multi: 1 if multiple observations in intersection of clusters, 0 otherwise. If empty (default), the macro will determine its value. threshold: threshold of convergence for sweaping out the fixed effects demeaning. Default=0.000000001 Criteria of convergence at iteration k, Max_j(Max_i(Dkji-Dk-1ji))-Min_j(Min_i(Dkji-Dk-1ji)) forthcoming. CREDITS : Part of the code comes from %ClusteredTSLS Macro written by Tanguy Brachet which clusters standard errors of an IV regression : https://www.researchgate.net/publication/303288577_Clustered_IV Another part (two way clustering) of the code comes %REG2DSE Macro from Mark (Shuai) Ma (2015) which offers two-way clustering of an OLS simple regression: https://sites.google.com/site/markshuaima/home/two-way-clustered-standard-errors-and-sas-code Note on two way clustering : "To obtain unbiased estimates in finite sample, the clustered standard errors are adjusted by (N-1)/(N-P)×G/(G-1), where N is the sample size, P is the number of independent variables, and G is the number of clusters." (Ma 2015) The code is inspired by the Simon Gaure's felm function in the excellent lfe package for R and by Sergio Correa's incredible reghdfe function for Stata . Code comes with no warranty of accurateness. ***************************************************************************************** Olivier Godechot. V1.1 14 Apr 2019 *****************************************************************************************; OPTION NOCENTER NODATE NONUMBER PS=MAX LS=MAX; %MACRO felm(data=,dependent=,var=,fe=,class=, format=, cluster=,endogenous=,instruments=,weight=,multi=,threshold=0.000000001,maxiter=1000,nestedness=YES,deldata=YES); %if &fe=%str() and &instruments=%str() %then %do; %CLUST3(data=&data,dependent=&dependent, var=&var, class=&class,format=&format, cluster=&cluster,weight=&weight, multi=&multi, output=&data._EST_CL); %goto myend; %end; %if &class NE %str() or &format NE %str() %then %do; %put %str(Class variables (and their formats) not yet available with fixed effects or instrumental variables ! Create dummies !); %goto myend; %end; %let methodnes1=%str(); %let methodnes2=%str(); %if &fe NE %str() and &cluster NE %str() %then %do; %let methodnes1=Correction for nestedness of the 3 first fixed effects in the 3 first clusters; %let methodnes2=No correction for nestedness; %end; %let Within=%str(); %if &fe NE %str() %then %do; %let demean=_demean; %let Within=Within; %end; %else %let demean=_clean; %let fe1=%scan(&fe,1); %let fe2=%scan(&fe,2); %let fe3=%scan(&fe,3); %let fe4=%scan(&fe,4); %let fe_other=%str(); %if &fe4 NE %str() %then %let fe_other=%substr(&fe,%index(&fe,&fe4),%length(&fe-%index(&fe,&fe4))); %let fe1_det=%str(); %let fe2_det=%str(); %let fe3_det=%str(); %let cluster1=%scan(&cluster,1); %let cluster2=%scan(&cluster,2); %let cluster3=%scan(&cluster,3); %let cluster1_det=%str(); %let cluster2_det=%str(); %let cluster3_det=%str(); %let N_FE1=0; %let N_FE2=0; %let N_FE3=0; %let DF_FE_all=0; %let N_FE1_CL1=0; %let N_FE1_CL2=0; %let N_FE1_CL3=0; %let N_FE2_CL1=0; %let N_FE2_CL2=0; %let N_FE2_CL3=0; %let N_FE3_CL1=0; %let N_FE3_CL2=0; %let N_FE3_CL3=0; %let R2=%str(); %let warning=%str(); %let print=YES; %if &instruments=%str() %then %let model=OLS; %else %let model=IV; %if &fe=%str() %then %let model_fe=%str(); %else %let model_fe=FIXED EFFECTS; %if &cluster=%str() %then %let model_cl=%str(); %else %let model_cl=CLUSTER; *Subprogram for using list of variables type V1-V10 and AA--ZZ; %let hypen1=%str( -); %do %while (%index(&instruments,&hypen1)>0); %let length=%index(&instruments,&hypen1); %let instruments=%qsubstr(&instruments,1,%eval(&length-1))%str(-)%qsubstr(&instruments,%eval(&length+2)); %end; %do %while (%index(&var,&hypen1)>0); %let length=%index(&var,&hypen1); %let var=%qsubstr(&var,1,%eval(&length-1))%str(-)%qsubstr(&var,%eval(&length+2)); %end; %let hypen2=%str(- ); %do %while (%index(&instruments,&hypen2)>0); %let length=%index(&instruments,&hypen2); %let instruments=%qsubstr(&instruments,1,%eval(&length-1))%str(-)%qsubstr(&instruments,&length+2); %end; %do %while (%index(&var,&hypen2)>0); %let length=%index(&var,&hypen2); %let var=%qsubstr(&var,1,%eval(&length-1))%str(-)%qsubstr(&var,&length+2); %end; %if %index(&var,--)>0 or %index(&instruments,--)>0 %then %do; proc contents data=&data noprint; run; %end; %do %while (%index(&instruments,--)>0); %let i=1; %do %while(%index(%scan(&instruments,&i,%str( )),--)=0); %let i=%eval(&i+1); %end; %let length2=%length(%scan(&instruments,&i,%str( ))); %let Vd=%scan(&instruments,&i); %let length=%index(&instruments,&vd); %let Vf=%scan(&instruments,&i+1); %local z; %local zz; %let z=%sysfunc(open(&data)); %let posd=%sysfunc(varnum(&z,&vd)); %let posf=%sysfunc(varnum(&z,&vf)); %let j=%eval(&posd+1); %let white=%str( ); %let allvar=&vd&white; %do %while (&j<&posf); %let vj=%sysfunc(varname(&z,&j)); %let allvar=&allvar&vj&white; %let j=%eval(&j+1); %end; %let allvar=&allvar&vf; %let zz=%sysfunc(close(&z)); %let instruments=%qsubstr(&instruments,1,%eval(&length-1))%str(&allvar)%qsubstr(&instruments,%eval(&length+&length2)); %end; %do %while (%index(&instruments,-)>0); %let i=1; %do %while(%index(%scan(&instruments,&i,%str( )),-)=0); %let i=%eval(&i+1); %end; %let length2=%length(%scan(&instruments,&i,%str( ))); %let Vd=%scan(&instruments,&i,%str( -)); %let length=%index(&instruments,&vd); %let Vf=%scan(&instruments,&i+1,%str( -)); %let k=%length(&Vd); %do %while(%verify(%substr(&vd,&k),0123456789)=0); %let posd=%substr(&vd,&k); %let k=%eval(&k-1); %end; %let k=%length(&Vf); %do %while(%verify(%substr(&vf,&k),0123456789)=0); %let posf=%substr(&vf,&k); %let k=%eval(&k-1); %end; %let j=%eval(&posd+1); %let white=%str( ); %let allvar=&vd&white; %do %while (&j<&posf); %let vj=%substr(&vd,1,&k); %let allvar=&allvar&vj&j&white; %let j=%eval(&j+1); %end; %let allvar=&allvar&vf; %let instruments=%qsubstr(&instruments,1,%eval(&length-1))%str(&allvar)%qsubstr(&instruments,%eval(&length+&length2)); %end; %do %while (%index(&var,--)>0); %let i=1; %do %while(%index(%scan(&var,&i,%str( )),--)=0); %let i=%eval(&i+1); %end; %let length2=%length(%scan(&var,&i,%str( ))); %let Vd=%scan(&var,&i); %let length=%index(&var,&vd); %let Vf=%scan(&var,&i+1); %local z; %local zz; %let z=%sysfunc(open(&data)); %let posd=%sysfunc(varnum(&z,&vd)); %let posf=%sysfunc(varnum(&z,&vf)); %let j=%eval(&posd+1); %let white=%str( ); %let allvar=&vd&white; %do %while (&j<&posf); %let vj=%sysfunc(varname(&z,&j)); %let allvar=&allvar&vj&white; %let j=%eval(&j+1); %end; %let allvar=&allvar&vf; %let zz=%sysfunc(close(&z)); %let var=%qsubstr(&var,1,%eval(&length-1))%str(&allvar)%qsubstr(&var,%eval(&length+&length2)); %end; %do %while (%index(&var,-)>0); %let i=1; %do %while(%index(%scan(&var,&i,%str( )),-)=0); %let i=%eval(&i+1); %end; %let length2=%length(%scan(&var,&i,%str( ))); %let Vd=%scan(&var,&i,%str( -)); %let length=%index(&var,&vd); %let Vf=%scan(&var,&i+1,%str( -)); %let k=%length(&Vd); %do %while(%verify(%substr(&vd,&k),0123456789)=0); %let posd=%substr(&vd,&k); %let k=%eval(&k-1); %end; %let k=%length(&Vf); %do %while(%verify(%substr(&vf,&k),0123456789)=0); %let posf=%substr(&vf,&k); %let k=%eval(&k-1); %end; %let j=%eval(&posd+1); %let white=%str( ); %let allvar=&vd&white; %do %while (&j<&posf); %let vj=%substr(&vd,1,&k); %let allvar=&allvar&vj&j&white; %let j=%eval(&j+1); %end; %let allvar=&allvar&vf; %let var=%qsubstr(&var,1,%eval(&length-1))%str(&allvar)%qsubstr(&var,%eval(&length+&length2)); %end; *END of Subprogram for using list of variables type V1-V10 and AA--ZZ; %let var_c = %sysfunc(tranwrd(&var,*,_x_)); %let var_d = %sysfunc(tranwrd(&var,*,%str( ))); %let instruments_c = %sysfunc(tranwrd(&instruments,*,_x_)); %let instruments_d = %sysfunc(tranwrd(&instruments,*,%str( ))); %let endogenous_c = %sysfunc(tranwrd(&endogenous,*,_x_)); %let endogenous_d = %sysfunc(tranwrd(&endogenous,*,%str( ))); %if &fe NE %str() %then %do; DATA &data._tmp1; %let FEinfo=fixed effects (by &fe) and; %let NOINT=NOINT; %end; %else %do; DATA &data.&demean; %let FEinfo=%str(); %let NOINT=%str(); %end; SET &data (keep=&fe &cluster &dependent &var_d &instruments_d &weight &endogenous_d); IF &dependent NE .; %Let i=1; %do %while(%scan(&var_c,&i,%str( )) NE); %let v_a=%scan(&var_c,&i,%str( )); %put %str(&v_a); %IF %index(&v_a,_x_)>0 %then %do; %let v_b=%sysfunc(tranwrd(&v_a,_x_,*)); &v_a=&v_b; %put %str(&v_a); %put %str(&v_b); IF MISSING(&v_a)=0; %end; %else %do; IF MISSING(%scan(&var,&i))=0; %end; %if &weight NE %str() %then %do; IF MISSING(&weight)=0; %end; %Let i=%eval(&i+1); %end; %Let i=1; %do %while(%scan(&instruments_c,&i,%str( )) NE); %let v_a=%scan(&instruments_c,&i,%str( )); %put %str(&v_a); %IF %index(&v_a,_x_)>0 %then %do; %let v_b=%sysfunc(tranwrd(&v_a,_x_,*)); &v_a=&v_b; %put %str(&v_a); %put %str(&v_b); IF MISSING(&v_a)=0; %end; %else %do; IF MISSING(%scan(&instruments,&i))=0; %end; %Let i=%eval(&i+1); %end; %Let i=1; %do %while(%scan(&endogenous_c,&i,%str( )) NE); %let v_a=%scan(&endogenous_c,&i,%str( )); %put %str(&v_a); %IF %index(&v_a,_x_)>0 %then %do; %let v_b=%sysfunc(tranwrd(&v_a,_x_,*)); &v_a=&v_b; %put %str(&v_a); %put %str(&v_b); IF MISSING(&v_a)=0; %end; %else %do; IF MISSING(%scan(&endogenous,&i))=0; %end; %Let i=%eval(&i+1); %end; %if &fe NE %str() %then %do; %Let i=1; %do %while(%scan(&fe,&i) NE); IF MISSING(%scan(&fe,&i))=0; %Let i=%eval(&i+1); %end; %end; %if &cluster NE %str() %then %do; %Let i=1; %do %while(%scan(&cluster,&i) NE); IF MISSING(%scan(&cluster,&i))=0; %Let i=%eval(&i+1); %end; %end; MYKEY=_N_; RUN; %let var=&var_c; %let instruments=&instruments_c; %let endogenous=&endogenous_c; TITLE "Descriptives variables (rough)"; PROC MEANS ; VAR &dependent &var &endogenous &instruments &weight; OUTPUT OUT=&data._DES; %if &weight ne %str() %then %do; WEIGHT &weight; %end; RUN; *MULTIWAY DEMEANING WITH METHOD OF ALTERNATE PROJECTIONS; %if &fe NE %str() %then %do; %Let i=1; %do %while(%scan(&fe,&i) NE); PROC SORT DATA=&data._tmp&i; BY %scan(&fe,&i); RUN; %Let j=%eval(&i+1); PROC STANDARD DATA=&data._tmp&i OUT=&data._tmp&j MEAN=0; VAR &dependent &var &endogenous &instruments ; BY %scan(&fe,&i); RUN; %Let i=%eval(&i+1); %end; %if &i>2 %then %do; options nonotes; %let gap=10000; %let round=0; %let gap=%eval(&gap); %do %while( &gap NE 0 AND &round<&maxiter); data &data._tmp1; set &data._tmp&j; run; %Let i=1; %do %while(%scan(&fe,&i) NE); PROC SORT DATA=&data._tmp&i; BY %scan(&fe,&i); RUN; %Let j=%eval(&i+1); PROC STANDARD DATA=&data._tmp&i OUT=&data._tmp&j MEAN=0; VAR &dependent &var &endogenous &instruments ; BY %scan(&fe,&i); RUN; %Let i=%eval(&i+1); %end; PROC SORT DATA=&data._tmp&j; BY %scan(&fe,1); RUN; PROC COMPARE BASE=&data._tmp&j COMPARE=&data._tmp1 OUT=&data._tmp_comp NOPRINT; VAR &dependent &var &endogenous &instruments ; RUN; PROC MEANS DATA=&data._tmp_comp NOPRINT; VAR &dependent &var &endogenous &instruments ; OUTPUT OUT=_DOCTMP_comp2; RUN; DATA _NULL_; SET _DOCTMP_comp2 (KEEP=&dependent &var &instruments _STAT_ WHERE=(_STAT_ IN ("MAX"))) end=last; COMPMAX=MAX(OF &dependent &var &endogenous &instruments ) ; IF last THEN DO; CALL SYMPUT ('COMPMAX',COMPMAX); END; RUN; DATA _NULL_; SET _DOCTMP_comp2 (KEEP=&dependent &var &instruments _STAT_ WHERE=(_STAT_ IN ("MIN"))) end=last; COMPMIN=MIN(OF &dependent &var &endogenous &instruments ); IF last THEN DO; CALL SYMPUT ('COMPMIN',COMPMIN); END; RUN; DATA _NULL_; SET _DOCTMP_comp2 (OBS=1); GAP=ROUND(%sysevalf(&COMPMAX)-%sysevalf(&COMPMIN),&threshold) ; CALL SYMPUT ('GAP',GAP); RUN; %let round=%eval(&round+1); %let gap=%sysevalf(&gap); %let gap2=%sysevalf(&COMPMAX-&COMPMIN); %put %str(Round &round. Gap: &gap2); %end; options notes; %if &round=&maxiter %then %let warning=!Alternating projections demeaning did not converge in &round iterations. Beware!; %put %str(&warning); %end; PROC DATASETS LIBRARY=work NOLIST NODETAILS; DELETE &data.&demean; QUIT; PROC DATASETS LIBRARY=work NOLIST; CHANGE &data._tmp&j=&data.&demean; QUIT; TITLE "Descriptives variables (demeaned by &FE)"; PROC MEANS data=&data.&demean; VAR &dependent &var &endogenous &instruments ; OUTPUT OUT=&data._DES_WITHIN; %if &weight ne %str() %then %do; WEIGHT &weight; %end; RUN; *NESTEDNESS OF FE IN CLUSTERS; %let i=1; %do %while (%scan(&fe,&i) NE %str()); PROC FREQ DATA=&data.&demean NOPRINT; TABLES %scan(&fe,&i) / OUT=&data._tmp_n_fe&i; RUN; DATA _NULL_; SET &data._tmp_n_fe&i; COUNT=_N_; CALL SYMPUT ("N_FE&i",COUNT); RUN; %if %scan(&cluster,1) NE %str() %then %do; PROC FREQ DATA=&data.&demean NOPRINT; TABLES %scan(&fe,&i)*%scan(&cluster,1) / OUT=&data._tmp_n_fe&i._cl1 LIST; RUN; DATA _NULL_; SET &data._tmp_n_fe&i._cl1; COUNT=_N_; CALL SYMPUT ("n_fe&i._cl1",COUNT); RUN; %if %scan(&cluster,2) NE %str() %then %do; PROC FREQ DATA=&data.&demean NOPRINT; TABLES %scan(&fe,&i)*%scan(&cluster,2) / OUT=&data._tmp_n_fe&i._cl2 LIST; RUN; DATA _NULL_; SET &data._tmp_n_fe&i._cl2; COUNT=_N_; CALL SYMPUT ("n_fe&i._cl2",COUNT); RUN; %if %scan(&cluster,3) NE %str() %then %do; PROC FREQ DATA=&data.&demean NOPRINT; TABLES %scan(&fe,&i)*%scan(&cluster,3) / OUT=&data._tmp_n_fe&i._cl3 LIST; RUN; DATA _NULL_; SET &data._tmp_n_fe&i._cl3; COUNT=_N_; CALL SYMPUT ("n_fe&i._cl3",COUNT); RUN; %end; %end; %end; %let N_FE_i=&&N_FE&i; %let DF_FE_all=%eval(&DF_FE_all-1+&N_FE_i); %let i=%eval(&i+1); %end; %let DF_FE_all=%eval(&DF_FE_all+1); %end; %else %do; PROC SORT DATA=&data.&demean; BY %scan(&cluster,1); RUN; %end; %if &instruments =%str() %then %do; %let print=NO; %CLUST3(data=&data.&demean,dependent=&dependent, var=&var, cluster=&cluster,weight=&weight, multi=&multi, output=&data._EST_CL,NOINT=&NOINT,print=&print); DATA &data._EST_CL; SET &data._EST_CL; N_FE1=&N_FE1; N_FE2=&N_FE2; N_FE3=&N_FE3; DF_FE_all=&DF_FE_all; N_FE1_CL1=&N_FE1_CL1; N_FE1_CL2=&N_FE1_CL2; N_FE1_CL3=&N_FE1_CL3; N_FE2_CL1=&N_FE2_CL1; N_FE2_CL2=&N_FE2_CL2; N_FE2_CL3=&N_FE2_CL3; N_FE3_CL1=&N_FE3_CL1; N_FE3_CL2=&N_FE3_CL2; N_FE3_CL3=&N_FE3_CL3; *Method 1 : Robust Cluster standard error corrected for fixed effect nestedness; StdErr_cl1=StdErr_or_cl1*(((N-K)/(N-K-DF_FE_all+(N_FE1_CL1=N_FE1)*Max(N_FE1-1,0)+(N_FE2_CL1=N_FE2)*Max(N_FE2-1,0)+(N_FE3_CL1=N_FE3)*Max(N_FE3-1,0)))**0.5); %if %scan(&cluster,2) NE %str() %then %do; StdErr_cl2=StdErr_or_cl2*(((N-K)/(N-K-DF_FE_all+(N_FE1_CL2=N_FE1)*Max(N_FE1-1,0)+(N_FE2_CL2=N_FE2)*Max(N_FE2-1,0)+(N_FE3_CL2=N_FE3)*Max(N_FE3-1,0)))**0.5); %end; %if %scan(&cluster,3) NE %str() %then %do; StdErr_cl3=StdErr_or_cl3*(((N-K)/(N-K-DF_FE_all+(N_FE1_CL3=N_FE1)*Max(N_FE1-1,0)+(N_FE2_CL3=N_FE2)*Max(N_FE2-1,0)+(N_FE3_CL3=N_FE3)*Max(N_FE3-1,0)))**0.5); StdErr_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr_cl1_cl3=StdErr_or_cl1_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr_cl2_cl3=StdErr_or_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr_cl1_cl2_cl3=StdErr_or_cl1_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr=(StdErr_cl1**2+StdErr_cl2**2+StdErr_cl3**2-StdErr_cl1_cl2**2-StdErr_cl2_cl3**2-StdErr_cl1_cl3**2+StdErr_cl1_cl2_cl3**2)**0.5; DENDF=MIN(DENDF_CL1,DENDF_CL2,DENDF_CL3); %end; %else %if %scan(&cluster,2) NE %str() %then %do; StdErr_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr=(StdErr_cl1**2+StdErr_cl2**2-StdErr_cl1_cl2**2)**0.5; DENDF=MIN(DENDF_CL1,DENDF_CL2); %end; %else %do; StdErr=StdErr_cl1; DENDF=DENDF_CL1; %end; T=ESTIMATE/StdErr; PValue=(1-probt(abs(T),DENDF))*2; IF PValue NE . THEN DO; IF PValue <0.001 THEN Stars="***"; ELSE IF PValue <0.01 THEN Stars="** "; ELSE IF PValue <0.05 THEN Stars="* "; ELSE IF PValue <0.10 THEN Stars=". "; END; *Method 2 : Robust Cluster standard error uncorrected for fixed effect nestedness; StdErr2_cl1=StdErr_or_cl1*(((N-K)/(N-K-DF_FE_all))**0.5); %if %scan(&cluster,2) NE %str() %then %do; StdErr2_cl2=StdErr_or_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); %end; %if %scan(&cluster,3) NE %str() %then %do; StdErr2_cl3=StdErr_or_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl1_cl3=StdErr_or_cl1_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl2_cl3=StdErr_or_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl1_cl2_cl3=StdErr_or_cl1_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2=(StdErr2_cl1**2+StdErr2_cl2**2+StdErr2_cl3**2-StdErr2_cl1_cl2**2-StdErr2_cl2_cl3**2-StdErr2_cl1_cl3**2+StdErr2_cl1_cl2_cl3**2)**0.5; %end; %else %if %scan(&cluster,2) NE %str() %then %do; StdErr2_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2=(StdErr2_cl1**2+StdErr2_cl2**2-StdErr2_cl1_cl2**2)**0.5; %end; %else %do; StdErr2=StdErr2_cl1; %end; T2=ESTIMATE/StdErr2; DENDF2=N-K-DF_FE_all; PValue2=(1-probt(abs(T2),DENDF2))*2; IF PValue2 NE . THEN DO; IF PValue2 <0.001 THEN Stars2="***"; ELSE IF PValue2 <0.01 THEN Stars2="** "; ELSE IF PValue2 <0.05 THEN Stars2="* "; ELSE IF PValue2 <0.10 THEN Stars2=". "; END; CALL SYMPUT ('DENDF2',DENDF2); CALL SYMPUT ('DENDF',DENDF); CALL SYMPUT ('N',N); CALL SYMPUT ('G_CL1',DENDF_CL1+1); CALL SYMPUT ('G_CL2',DENDF_CL2+1); CALL SYMPUT ('G_CL3',DENDF_CL3+1); RUN; %if &N_FE1>0 %then %let fe1_det=(N1=%scan(&N_FE1,1)); %if &N_FE2>0 %then %let fe2_det=(N2=%scan(&N_FE2,1)); %if &N_FE3>0 %then %let fe3_det=(N3=%scan(&N_FE3,1)); %if &G_CL1>0 %then %let cluster1_det=(G1=%scan(&G_CL1,1)); %if &G_CL2>0 %then %let cluster2_det=(G2=%scan(&G_CL2,1)); %if &G_CL3>0 %then %let cluster3_det=(G3=%scan(&G_CL3,1)); DATA _NULL_; SET &data.&demean._tmp_fit_cl1 (OBS=1); CALL SYMPUT ('R2',cvalue1); RUN; %if &nestedness NE NO %then %do; TITLE "FINAL &model &model_fe ESTIMATES WITH ROBUST &model_cl STANDARD ERRORS"; TITLE2 "Dependent variable: &dependent"; TITLE3 "Fixed effects: &fe1 &fe1_det &fe2 &fe2_det &fe3 &fe3_det &fe_other &warning"; TITLE4 "Clusters: &cluster1 &cluster1_det &cluster2 &cluster2_det &cluster3 &cluster3_det"; TITLE5 "DF for Student T test: Min(G1,G2,G3)-1= %scan(&DENDF,1)"; TITLE6 "Number of observations: %scan(&N,1)"; TITLE7 "&Within R2: %scan(&R2,1,%str( ))"; TITLE8 "Estimation à la Stata-Reghdfe. &methodnes1"; PROC PRINT DATA=&data._EST_CL; VAR Parameter--ESTIMATE StdErr Stars PValue ; RUN; %end; %if &nestedness NE YES %then %do; TITLE "FINAL &model &model_fe ESTIMATES WITH ROBUST &model_cl STANDARD ERRORS"; TITLE2 "Dependent variable: &dependent"; TITLE3 "Fixed effects: &fe1 &fe1_det &fe2 &fe2_det &fe3 &fe3_det &fe_other &warning"; TITLE4 "Clusters: &cluster1 &cluster1_det &cluster2 &cluster2_det &cluster3 &cluster3_det"; TITLE5 "DF for Student T test. (N-K): %scan(&DENDF2,1)"; TITLE6 "Number of observations: %scan(&N,1)"; TITLE7 "&Within R2: %scan(&R2,1,%str( ))"; TITLE8 "Estimation à la R-felm. &methodnes2"; PROC PRINT DATA=&data._EST_CL; VAR Parameter--ESTIMATE StdErr2 Stars2 PValue2 ; RUN; %end; %end; %else %if &instruments NE %str() %then %do; %let j=1; %do %while (%scan(&endogenous,&j) NE %str()); %let print=NO; %CLUST3(data=&data.&demean,dependent=%scan(&endogenous,&j), var=&instruments &var, cluster=&cluster, weight=&weight, multi=&multi, output=&data._EST_CL_STEP1_&j,NOINT=&NOINT,print=&print); DATA &data._EST_CL_STEP1_&j; SET &data._EST_CL_STEP1_&j; N_FE1=&N_FE1; N_FE2=&N_FE2; N_FE3=&N_FE3; DF_FE_all=&DF_FE_all; N_FE1_CL1=&N_FE1_CL1; N_FE1_CL2=&N_FE1_CL2; N_FE1_CL3=&N_FE1_CL3; N_FE2_CL1=&N_FE2_CL1; N_FE2_CL2=&N_FE2_CL2; N_FE2_CL3=&N_FE2_CL3; N_FE3_CL1=&N_FE3_CL1; N_FE3_CL2=&N_FE3_CL2; N_FE3_CL3=&N_FE3_CL3; *Method 1 : Robust Cluster standard error corrected for fixed effect nestedness; StdErr_cl1=StdErr_or_cl1*(((N-K)/(N-K-DF_FE_all+(N_FE1_CL1=N_FE1)*Max(N_FE1-1,0)+(N_FE2_CL1=N_FE2)*Max(N_FE2-1,0)+(N_FE3_CL1=N_FE3)*Max(N_FE3-1,0)))**0.5); %if %scan(&cluster,2) NE %str() %then %do; StdErr_cl2=StdErr_or_cl2*(((N-K)/(N-K-DF_FE_all+(N_FE1_CL2=N_FE1)*Max(N_FE1-1,0)+(N_FE2_CL2=N_FE2)*Max(N_FE2-1,0)+(N_FE3_CL2=N_FE3)*Max(N_FE3-1,0)))**0.5); %end; %if %scan(&cluster,3) NE %str() %then %do; StdErr_cl3=StdErr_or_cl3*(((N-K)/(N-K-DF_FE_all+(N_FE1_CL3=N_FE1)*Max(N_FE1-1,0)+(N_FE2_CL3=N_FE2)*Max(N_FE2-1,0)+(N_FE3_CL3=N_FE3)*Max(N_FE3-1,0)))**0.5); StdErr_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr_cl1_cl3=StdErr_or_cl1_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr_cl2_cl3=StdErr_or_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr_cl1_cl2_cl3=StdErr_or_cl1_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr=(StdErr_cl1**2+StdErr_cl2**2+StdErr_cl3**2-StdErr_cl1_cl2**2-StdErr_cl2_cl3**2-StdErr_cl1_cl3**2+StdErr_cl1_cl2_cl3**2)**0.5; DENDF=MIN(DENDF_CL1,DENDF_CL2,DENDF_CL3); %end; %else %if %scan(&cluster,2) NE %str() %then %do; StdErr_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr=(StdErr_cl1**2+StdErr_cl2**2-StdErr_cl1_cl2**2)**0.5; DENDF=MIN(DENDF_CL1,DENDF_CL2); %end; %else %do; StdErr=StdErr_cl1; DENDF=DENDF_CL1; %end; T=ESTIMATE/StdErr; PValue=(1-probt(abs(T),DENDF))*2; IF PValue NE . THEN DO; IF PValue <0.001 THEN Stars="***"; ELSE IF PValue <0.01 THEN Stars="** "; ELSE IF PValue <0.05 THEN Stars="* "; ELSE IF PValue <0.10 THEN Stars=". "; END; *Method 2 : Robust Cluster standard error uncorrected for fixed effect nestedness; StdErr2_cl1=StdErr_or_cl1*(((N-K)/(N-K-DF_FE_all))**0.5); %if %scan(&cluster,2) NE %str() %then %do; StdErr2_cl2=StdErr_or_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); %end; %if %scan(&cluster,3) NE %str() %then %do; StdErr2_cl3=StdErr_or_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl1_cl3=StdErr_or_cl1_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl2_cl3=StdErr_or_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl1_cl2_cl3=StdErr_or_cl1_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2=(StdErr2_cl1**2+StdErr2_cl2**2+StdErr2_cl3**2-StdErr2_cl1_cl2**2-StdErr2_cl2_cl3**2-StdErr2_cl1_cl3**2+StdErr2_cl1_cl2_cl3**2)**0.5; %end; %else %if %scan(&cluster,2) NE %str() %then %do; StdErr2_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2=(StdErr2_cl1**2+StdErr2_cl2**2-StdErr2_cl1_cl2**2)**0.5; %end; %else %do; StdErr2=StdErr2_cl1; %end; T2=ESTIMATE/StdErr2; DENDF2=N-K-DF_FE_all; PValue2=(1-probt(abs(T2),DENDF2))*2; IF PValue2 NE . THEN DO; IF PValue2 <0.001 THEN Stars2="***"; ELSE IF PValue2 <0.01 THEN Stars2="** "; ELSE IF PValue2 <0.05 THEN Stars2="* "; ELSE IF PValue2 <0.10 THEN Stars2=". "; END; CALL SYMPUT ('DENDF2',DENDF2); CALL SYMPUT ('DENDF',DENDF); CALL SYMPUT ('N',N); CALL SYMPUT ('G_CL1',DENDF_CL1+1); CALL SYMPUT ('G_CL2',DENDF_CL2+1); CALL SYMPUT ('G_CL3',DENDF_CL3+1); RUN; %if &N_FE1>0 %then %let fe1_det=(N1=%scan(&N_FE1,1)); %if &N_FE2>0 %then %let fe2_det=(N2=%scan(&N_FE2,1)); %if &N_FE3>0 %then %let fe3_det=(N3=%scan(&N_FE3,1)); %if &G_CL1>0 %then %let cluster1_det=(G1=%scan(&G_CL1,1)); %if &G_CL2>0 %then %let cluster2_det=(G2=%scan(&G_CL2,1)); %if &G_CL3>0 %then %let cluster3_det=(G3=%scan(&G_CL3,1)); DATA _NULL_; SET &data.&demean._tmp_fit_cl1 (OBS=1); CALL SYMPUT ('R2',cvalue1); RUN; %if &nestedness NE NO %then %do; TITLE "FINAL &model FIRST STEP &model_fe ESTIMATES WITH WITH ROBUST &model_cl STANDARD ERRORS"; TITLE2 "Dependent variable: %scan(&endogenous,&j)"; TITLE3 "Fixed effects: &fe1 &fe1_det &fe2 &fe2_det &fe3 &fe3_det &fe_other &warning"; TITLE4 "Clusters: &cluster1 &cluster1_det &cluster2 &cluster2_det &cluster3 &cluster3_det"; TITLE5 "DF for Student T test. Min(G1,G2,G3)-1= %scan(&DENDF,1)"; TITLE6 "Number of observations: %scan(&N,1)"; TITLE7 "&Within R2: %scan(&R2,1,%str( ))"; TITLE8 "Estimation à la Stata-Reghdfe. &methodnes1"; PROC PRINT DATA=&data._EST_CL_STEP1_&j; VAR Parameter--ESTIMATE StdErr Stars PValue ; RUN; TITLE; %end; %if &nestedness NE YES %then %do; TITLE "FINAL &model FIRST STEP &model_fe ESTIMATES WITH WITH ROBUST &model_cl STANDARD ERRORS"; TITLE2 "Dependent variable: %scan(&endogenous,&j)"; TITLE3 "Fixed effects: &fe1 &fe1_det &fe2 &fe2_det &fe3 &fe3_det &fe_other &warning"; TITLE4 "Clusters: &cluster1 &cluster1_det &cluster2 &cluster2_det &cluster3 &cluster3_det"; TITLE5 "DF for Student T test. (N-K): %scan(&DENDF2,1)"; TITLE6 "Number of observations: %scan(&N,1)"; TITLE7 "&Within R2: %scan(&R2,1,%str( ))"; TITLE8 "Estimation à la R-felm. &methodnes2"; PROC PRINT DATA=&data._EST_CL_STEP1_&j; VAR Parameter--ESTIMATE StdErr2 Stars2 PValue2 ; RUN; %end; *1.2 Reestimate the first stage using PROG REG to save the predicted values (since SURVEYREG can`t do it); PROC REG DATA=&data.&demean NOPRINT; MODEL %scan(&endogenous,&j)= &instruments &var ; OUTPUT OUT=&data._tmp_S1_&j (keep=MYKEY pred) PREDICTED = pred; %if &weight ne %str() %then %do; WEIGHT &weight; %end; RUN; %let j=%eval(&j+1); %end; * 2.1 Run SYSLIN to get the structural estimates and save the TSLS residuals ; PROC SYSLIN data=&data.&demean 2SLS OUT=&data._tmp_S2_r ; TITLE "Intermediary step 2: Second Stage Results &FEinfo unclustered"; %if &FE NE %str() %then %do; TITLE2 "PS: Unclustered SEs need to be multiplied by ((N-K)/(N-K-(G-1)))**0.5"; TITLE3 " to take into account the demeaning of the fixed effect &FE. (G-1: DF of &FE)"; %end; ENDOGENOUS &endogenous ; INSTRUMENTS &instruments &var; MODEL &dependent = &endogenous &var / &NOINT ; OUTPUT RESIDUAL = IV_resid ; ODS GRAPHICS OFF; ods output ParameterEstimates=&data._TMP_IV_EST FitStatistics=&data._IV_FIT; %if &weight ne %str() %then %do; WEIGHT &weight; %end; RUN; * 3.1 Merge the TSLS resids with the first stage preds; DATA &data._tmp_S1; MERGE &data._tmp_S1_1 (rename=(pred=%scan(&endogenous,1)) drop=%scan(&endogenous,&j)) %let j=2; %do %while (%scan(&endogenous,&j) NE %str()); &data._tmp_S1_&j (rename=(pred=%scan(&endogenous,&j)) drop=MYKEY ) %let j=%eval(&j+1); %end; ;RUN; PROC SQL; CREATE TABLE &data._tmp_iv_r AS SELECT * FROM &data._tmp_S2_r(drop=&endogenous) AS AA INNER JOIN &data._tmp_S1 AS BB ON AA.MYKEY=BB.MYKEY; QUIT; %let print=NO; %CLUST3(data=&data._tmp_iv_r,dependent=IV_resid, var=&endogenous &var, cluster=&cluster, weight=&weight, multi=&multi, output=&data._EST_CL_STEP2,NOINT=&NOINT,print=&print); DATA &data._EST_CL_STEP2; MERGE &data._TMP_IV_EST (keep=VARIABLE ESTIMATE) &data._EST_CL_STEP2 (drop=ESTIMATE); N_FE1=&N_FE1; N_FE2=&N_FE2; N_FE3=&N_FE3; DF_FE_all=&DF_FE_all; N_FE1_CL1=&N_FE1_CL1; N_FE1_CL2=&N_FE1_CL2; N_FE1_CL3=&N_FE1_CL3; N_FE2_CL1=&N_FE2_CL1; N_FE2_CL2=&N_FE2_CL2; N_FE2_CL3=&N_FE2_CL3; N_FE3_CL1=&N_FE3_CL1; N_FE3_CL2=&N_FE3_CL2; N_FE3_CL3=&N_FE3_CL3; *Method 1 : Robust Cluster standard error corrected for fixed effect nestedness; StdErr_cl1=StdErr_or_cl1*(((N-K)/(N-K-DF_FE_all+(N_FE1_CL1=N_FE1)*Max(N_FE1-1,0)+(N_FE2_CL1=N_FE2)*Max(N_FE2-1,0)+(N_FE3_CL1=N_FE3)*Max(N_FE3-1,0)))**0.5); %if %scan(&cluster,2) NE %str() %then %do; StdErr_cl2=StdErr_or_cl2*(((N-K)/(N-K-DF_FE_all+(N_FE1_CL2=N_FE1)*Max(N_FE1-1,0)+(N_FE2_CL2=N_FE2)*Max(N_FE2-1,0)+(N_FE3_CL2=N_FE3)*Max(N_FE3-1,0)))**0.5); %end; %if %scan(&cluster,3) NE %str() %then %do; StdErr_cl3=StdErr_or_cl3*(((N-K)/(N-K-DF_FE_all+(N_FE1_CL3=N_FE1)*Max(N_FE1-1,0)+(N_FE2_CL3=N_FE2)*Max(N_FE2-1,0)+(N_FE3_CL3=N_FE3)*Max(N_FE3-1,0)))**0.5); StdErr_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr_cl1_cl3=StdErr_or_cl1_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr_cl2_cl3=StdErr_or_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr_cl1_cl2_cl3=StdErr_or_cl1_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr=(StdErr_cl1**2+StdErr_cl2**2+StdErr_cl3**2-StdErr_cl1_cl2**2-StdErr_cl2_cl3**2-StdErr_cl1_cl3**2+StdErr_cl1_cl2_cl3**2)**0.5; DENDF=MIN(DENDF_CL1,DENDF_CL2,DENDF_CL3); %end; %else %if %scan(&cluster,2) NE %str() %then %do; StdErr_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr=(StdErr_cl1**2+StdErr_cl2**2-StdErr_cl1_cl2**2)**0.5; DENDF=MIN(DENDF_CL1,DENDF_CL2); %end; %else %do; StdErr=StdErr_cl1; DENDF=DENDF_CL1; %end; T=ESTIMATE/StdErr; PValue=(1-probt(abs(T),DENDF))*2; IF PValue NE . THEN DO; IF PValue <0.001 THEN Stars="***"; ELSE IF PValue <0.01 THEN Stars="** "; ELSE IF PValue <0.05 THEN Stars="* "; ELSE IF PValue <0.10 THEN Stars=". "; END; *Method 2 : Robust Cluster standard error uncorrected for fixed effect nestedness; StdErr2_cl1=StdErr_or_cl1*(((N-K)/(N-K-DF_FE_all))**0.5); %if %scan(&cluster,2) NE %str() %then %do; StdErr2_cl2=StdErr_or_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); %end; %if %scan(&cluster,3) NE %str() %then %do; StdErr2_cl3=StdErr_or_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl1_cl3=StdErr_or_cl1_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl2_cl3=StdErr_or_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2_cl1_cl2_cl3=StdErr_or_cl1_cl2_cl3*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2=(StdErr2_cl1**2+StdErr2_cl2**2+StdErr2_cl3**2-StdErr2_cl1_cl2**2-StdErr2_cl2_cl3**2-StdErr2_cl1_cl3**2+StdErr2_cl1_cl2_cl3**2)**0.5; %end; %else %if %scan(&cluster,2) NE %str() %then %do; StdErr2_cl1_cl2=StdErr_or_cl1_cl2*(((N-K)/(N-K-DF_FE_all))**0.5); StdErr2=(StdErr2_cl1**2+StdErr2_cl2**2-StdErr2_cl1_cl2**2)**0.5; %end; %else %do; StdErr2=StdErr2_cl1; %end; T2=ESTIMATE/StdErr2; DENDF2=N-K-DF_FE_all; PValue2=(1-probt(abs(T2),DENDF2))*2; IF PValue2 NE . THEN DO; IF PValue2 <0.001 THEN Stars2="***"; ELSE IF PValue2 <0.01 THEN Stars2="** "; ELSE IF PValue2 <0.05 THEN Stars2="* "; ELSE IF PValue2 <0.10 THEN Stars2=". "; END; CALL SYMPUT ('DENDF2',DENDF2); CALL SYMPUT ('DENDF',DENDF); CALL SYMPUT ('N',N); CALL SYMPUT ('G_CL1',DENDF_CL1+1); CALL SYMPUT ('G_CL2',DENDF_CL2+1); CALL SYMPUT ('G_CL3',DENDF_CL3+1); RUN; %if &N_FE1>0 %then %let fe1_det=(N1=%scan(&N_FE1,1)); %if &N_FE2>0 %then %let fe2_det=(N2=%scan(&N_FE2,1)); %if &N_FE3>0 %then %let fe3_det=(N3=%scan(&N_FE3,1)); %if &G_CL1>0 %then %let cluster1_det=(G1=%scan(&G_CL1,1)); %if &G_CL2>0 %then %let cluster2_det=(G2=%scan(&G_CL2,1)); %if &G_CL3>0 %then %let cluster3_det=(G3=%scan(&G_CL3,1)); DATA _NULL_; SET &data._IV_FIT (OBS=1); CALL SYMPUT ('R2',cvalue1); RUN; %if &nestedness NE NO %then %do; TITLE "FINAL &model &model_fe ESTIMATES WITH ROBUST &model_cl STANDARD ERRORS"; TITLE2 "Dependent variable: &dependent"; TITLE3 "Endogenous variables: &endogenous"; TITLE4 "Instruments: &instruments"; TITLE5 "Fixed effects: &fe1 &fe1_det &fe2 &fe2_det &fe3 &fe3_det &fe_other &warning"; TITLE6 "Clusters: &cluster1 &cluster1_det &cluster2 &cluster2_det &cluster3 &cluster3_det"; TITLE7 "DF for Student T test. Min(G1,G2,G3)-1=%scan(&DENDF,1)"; TITLE8 "Number of observations: %scan(&N,1)"; TITLE9 "&Within R2: %scan(&R2,1,%str( ))"; TITLE10 "Estimation à la Stata-Reghdfe. &methodnes1"; PROC PRINT DATA=&data._EST_CL_STEP2; VAR Parameter ESTIMATE StdErr Stars PValue ; RUN; %end; %if &nestedness NE YES %then %do; TITLE "FINAL &model &model_fe ESTIMATES WITH ROBUST &model_cl STANDARD ERRORS"; TITLE2 "Dependent variable: &dependent"; TITLE3 "Endogenous variables: &endogenous"; TITLE4 "Instruments: &instruments"; TITLE5 "Fixed effects: &fe1 &fe1_det &fe2 &fe2_det &fe3 &fe3_det &fe_other &warning"; TITLE6 "Clusters: &cluster1 &cluster1_det &cluster2 &cluster2_det &cluster3 &cluster3_det"; TITLE7 "DF for Student T test. (N-K): %scan(&DENDF2,1)"; TITLE8 "Number of observations: %scan(&N,1)"; TITLE9 "&Within R2: %scan(&R2,1,%str( ))"; TITLE10 "Estimation à la R-felm. &methodnes2"; PROC PRINT DATA=&data._EST_CL_STEP2; VAR Parameter ESTIMATE StdErr2 Stars2 PValue2 ; RUN; %end; %end; *Deletings datasets; PROC DATASETS LIB=work NOLIST NODETAILS; DELETE _DOCTMP:; DELETE &data._TMP:; DELETE &data.&demean._TMP:; %if %upcase(&deldata)=YES %then %do; DELETE &data.&demean; %end; QUIT; TITLE; %myend :; %MEND; %MACRO CLUST3(data, dependent, var, class, format, cluster, weight, output, multi,NOINT,print); %let cluster1=%scan(&cluster,1); %let cluster2=%scan(&cluster,2); %let cluster3=%scan(&cluster,3); TITLE; TITLE "Intermediary step 1.a: Cluster with &cluster1"; PROC SURVEYREG data=&data ; CLUSTER &cluster1; %if &class NE %str() %then %do; CLASS &class; %end; %if &format NE %str() %then %do; FORMAT &format; %end; MODEL &dependent = &var/ covb &NOINT solution; ODS output parameterestimates=&data._tmp_est_cl1 FitStatistics=&data._tmp_fit_cl1 DataSummary=&data._tmp_n_cl1 covb=&data._tmp_cov_cl1; %if &weight ne %str() %then %do; WEIGHT &weight; %end; RUN; QUIT; DATA _NULL_; SET &data._tmp_est_cl1; COUNT=_N_; CALL SYMPUT ('K',COUNT); RUN; DATA _NULL_; SET &data._tmp_n_cl1; IF _N_=1 then CALL SYMPUT ('N',cValue1); RUN; %if &cluster2 NE %str() %then %do; TITLE "Intermediary step 1.b: Cluster with &cluster2"; PROC SURVEYREG data=&data ; CLUSTER &cluster2; %if &class NE %str() %then %do; CLASS &class; %end; %if &format NE %str() %then %do; FORMAT &format; %end; MODEL &dependent = &var/covb &NOINT solution; ODS output parameterestimates=&data._tmp_est_cl2 covb=&data._tmp_cov_cl2; %if &weight ne %str() %then %do; WEIGHT &weight; %end; RUN; QUIT; %if &cluster3 NE %str() %then %do; TITLE "Intermediary step 1.c: Cluster with &cluster3"; PROC SURVEYREG data=&data ; CLUSTER &cluster3; %if &class NE %str() %then %do; CLASS &class; %end; %if &format NE %str() %then %do; FORMAT &format; %end; MODEL &dependent = &var/covb &NOINT solution; ODS output parameterestimates=&data._tmp_est_cl3 covb=&data._tmp_cov_cl3; %if &weight ne %str() %then %do; WEIGHT &weight; %end; RUN; QUIT; %end; %if &multi = %str() %then %do; PROC FREQ DATA=&data; %if &cluster3 NE %str() %then %do; TABLES &cluster1*&cluster2*&cluster3/NOPRINT LIST OUT=&data._tmp_cell; %end; %else %do; TABLES &cluster1*&cluster2/NOPRINT LIST OUT=&data._tmp_cell; %end; RUN; PROC MEANS DATA=&data._tmp_cell NOPRINT; VAR count ; OUTPUT OUT=&data._tmp_cell2; RUN; DATA &data._tmp_cell2; SET &data._tmp_cell2; WHERE _STAT_="MAX"; CALL SYMPUT ('maxcell',count); RUN; %if %sysevalf(&maxcell)>1 %then %let multi=1; %else %let multi=0; %put %str(maxcell : &maxcell); %put %str(multi: &multi); %end; PROC SURVEYREG data=&data ; %if &multi=1 or &cluster3 NE %str() %then %do; TITLE "Intermediary step 2.a: Cluster with &cluster1*&cluster2"; cluster &cluster2 &cluster1; %end; %else %do; TITLE "Intermediary step 2.a: White Heteroscedasticity consistant standard errors (Cell=1)"; %end; %if &class NE %str() %then %do; CLASS &class; %end; %if &format NE %str() %then %do; FORMAT &format; %end; model &dependent = &var/ &NOINT covb solution; ODS output parameterestimates=&data._tmp_est_cl1_cl2 covb=&data._tmp_cov_cl1_cl2; %if &weight ne %str() %then %do; WEIGHT &weight; %end; RUN; QUIT; %if &cluster3 NE %str() %then %do; TITLE "Intermediary step 2.b: Cluster with &cluster2*&cluster3 "; PROC SURVEYREG data=&data ; %if &class NE %str() %then %do; CLASS &class; %end; %if &format NE %str() %then %do; FORMAT &format; %end; cluster &cluster2 &cluster3; model &dependent = &var/ &NOINT covb solution; ODS output parameterestimates=&data._tmp_est_cl2_cl3 covb=&data._tmp_cov_cl2_cl3; %if &weight ne %str() %then %do; WEIGHT &weight; %end; RUN; QUIT; TITLE "Intermediary step 2.c: Cluster with &cluster1*&cluster3 "; PROC SURVEYREG data=&data ; %if &class NE %str() %then %do; CLASS &class; %end; %if &format NE %str() %then %do; FORMAT &format; %end; cluster &cluster1 &cluster3; model &dependent = &var/ &NOINT covb solution; ODS output parameterestimates=&data._tmp_est_cl1_cl3 covb=&data._tmp_cov_cl1_cl3; %if &weight ne %str() %then %do; WEIGHT &weight; %end; RUN; QUIT; PROC SURVEYREG data=&data ; %if &multi=1 %then %do; TITLE "Intermediary step 2.d: Cluster with &cluster1*&cluster2*&cluster3 "; cluster &cluster1 &cluster2 &cluster3; %end; %else %do; TITLE "Intermediary step 2.d: White Heteroscedasticity consistant standard errors (Cell=1) "; %end; %if &class NE %str() %then %do; CLASS &class; %end; %if &format NE %str() %then %do; FORMAT &format; %end; model &dependent = &var/ &NOINT covb solution; ODS output parameterestimates=&data._tmp_est_cl1_cl2_cl3 covb=&data._tmp_cov_cl1_cl2_cl3; %if &weight ne %str() %then %do; WEIGHT &weight; %end; RUN; QUIT; %end; %end; %if &output=%str() %then %let output=&data._tmp_cl_out; DATA &output; MERGE &data._tmp_est_cl1 (DROP=tvalue probt RENAME=(stderr=StdErr_or_cl1 DENDF=DENDF_CL1)) %if &cluster2 NE %str() %then %do; &data._tmp_est_cl2 (DROP=PARAMETER ESTIMATE tvalue probt RENAME=(stderr=StdErr_or_cl2 DENDF=DENDF_CL2)) %if &cluster3 NE %str() %then %do; &data._tmp_est_cl3 (DROP=PARAMETER ESTIMATE tvalue probt RENAME=(stderr=StdErr_or_cl3 DENDF=DENDF_CL3)) %end; &data._tmp_est_cl1_cl2 (DROP= PARAMETER ESTIMATE tvalue probt RENAME=(stderr=StdErr_or_cl1_cl2 DENDF=DENDF_Cl1_CL2)) %if &cluster3 NE %str() %then %do; &data._tmp_est_cl2_cl3 (DROP= PARAMETER ESTIMATE tvalue probt RENAME=(stderr=StdErr_or_cl2_cl3 DENDF=DENDF_Cl2_CL3)) &data._tmp_est_cl1_cl3 (DROP= PARAMETER ESTIMATE tvalue probt RENAME=(stderr=StdErr_or_cl1_cl3 DENDF=DENDF_Cl1_CL3)) &data._tmp_est_cl1_cl2_cl3 (DROP= PARAMETER ESTIMATE tvalue probt RENAME=(stderr=StdErr_or_cl1_cl2_cl3 DENDF=DENDF_Cl1_CL2_cl3)) %end; %end; ; K=&k; N=&N; %if &print NE NO %then %do; %if &cluster3 NE %str() %then %do; StdErr=(StdErr_or_cl1**2+StdErr_or_cl2**2+StdErr_or_cl3**2-StdErr_or_cl1_cl2**2-StdErr_or_cl2_cl3**2-StdErr_or_cl1_cl3**2+StdErr_or_cl1_cl2_cl3**2)**0.5; DENDF=MIN(DENDF_CL1,DENDF_CL2,DENDF_CL3); %end; %else %if &cluster2 NE %str() %then %do; StdErr=(StdErr_or_cl1**2+StdErr_or_cl2**2-StdErr_or_cl1_cl2**2)**0.5; DENDF=MIN(DENDF_CL1,DENDF_CL2); %end; %else %do; StdErr=StdErr_or_cl1; DENDF=DENDF_CL1; %end; T=ESTIMATE/StdErr; PValue =(1-probt(abs(T),DENDF))*2; IF PValue NE . THEN DO; IF PValue<0.001 THEN Stars="***"; ELSE IF PValue<0.01 THEN Stars="** "; ELSE IF PValue<0.05 THEN Stars="* "; ELSE IF PValue<0.10 THEN Stars=". "; END; %end; CALL SYMPUT ('DENDF',DENDF); CALL SYMPUT ('G_CL1',DENDF_CL1+1); CALL SYMPUT ('G_CL2',DENDF_CL2+1); CALL SYMPUT ('G_CL3',DENDF_CL3+1); RUN; DATA _NULL_; SET &data._tmp_fit_cl1 (OBS=1); CALL SYMPUT ('R2',cvalue1); RUN; %let cluster1_det=%str(); %let cluster2_det=%str(); %let cluster3_det=%str(); %if &G_CL1>0 %then %let cluster1_det=(G1=%scan(&G_CL1,1)); %if &G_CL2>0 %then %let cluster2_det=(G2=%scan(&G_CL2,1)); %if &G_CL3>0 %then %let cluster3_det=(G3=%scan(&G_CL3,1)); %if &print NE NO %then %do; TITLE "FINAL OLS ESTIMATES WITH ROBUST CLUSTER STANDARD ERRORS"; TITLE2 "Dependent variable: &dependent"; TITLE3 "Clusters: &cluster1 &cluster1_det &cluster2 &cluster2_det &cluster3 &cluster3_det"; TITLE4 "DF for Student T test: Min(G1,G2,G3)-1= %scan(&DENDF,1) "; TITLE5 "Number of observations: %scan(&N,1)"; TITLE6 "R2: %scan(&R2,1,%str( ))"; PROC PRINT DATA=&output; VAR Parameter--Estimate StdErr Stars PValue; RUN; PROC DATASETS LIB=work /*NOLIST NODETAILS*/; DELETE _DOCTMP:; DELETE &data._tmp:; QUIT; %end; TITLE; %MEND;