%**************************************************; %* Olivier Godechot, V1.5 le 19.04.2008 *; %**************************************************; %* Macro calculating the Clustering coefficient.*; %**************************************************; %Macro cluster(dataset=,orig=,dest=,value=,type=directed,form=Yes); %window Welcome #1 @1 "This macro calculates the clustering coefficient for a network" #3 @1 "Data must be organized the following way : " #5 @1 "Origin_id Destiny_id Value" #6 @1 "a1572 a1759 2" #7 @1 "a1088 a2716 1" #8 @1 "a1863 a2716 1" #10 @1 "Reading : An arc of value 2 is going from vertex a1572 to vertex a1759" #12 @1 "Name of your SAS dataset ? (data=)" @75 dataset 30 attr=rev_video auto=yes #14 @1 "Variable name that identifies the origin of an arc ? (orig=)" @75 orig 30 attr=rev_video auto=yes #16 @1 "Variable name that identifies the destiny of an arc ? (dest=)" @75 dest 30 attr=rev_video auto=yes #18 @1 "Variable name for the value of an arc ? (value=)" @75 value 30 attr=rev_video auto=yes #20 @1 "Undirected or directed arcs for the clustering coefficients ? (type=)" @75 type 30 attr=rev_video auto=yes #24 @1 "Once completed, press Enter " #26 @1 %nrstr("[Complete syntax: %burtcstr(dataset=,orig=,dest=,value=,form=); ]") #27 @1 "* Required. Id variables must not contain the character # " #30 @50 "Credits : Olivier GODECHOT. http://olivier.godechot.free.fr/ " ; %if &form=Yes %then %display Welcome; %let dat0=_%scan(&dataset,1)_CC0; %let dat1=_%scan(&dataset,1)_CC1; %let dat2=_%scan(&dataset,1)_CCi; %* let orig_=%str(&orig)_; %let dest_=%scan(&dest,1); proc iml; use &dataset; read all var{&orig} into orig; read all var{&dest} into dest; %if &value= %then %do; value=J(nrow(orig),1,1); %end; %else %do; read all var{&value} into value; %end; %* adj fonction from J. Moody; start adj(snd,rcv,value); nomset=unique(snd,rcv); if type(nomset)='C' then do; nomset=setdif(nomset,'.'); end; else do; nomset=setdif(nomset,.); end; adjmat=j(ncol(nomset),ncol(nomset),0); do i=1 to nrow(snd); sendloc=loc(nomset=snd[i]); if type(sendloc)='N' then do; rcvset=unique(rcv[i,]); if type(rcvset)='C' then do; rcvset=setdif(rcvset,'.'); end; else do; rcvset=setdif(rcvset,.); end; if type(rcvset)^='U' then do; do j=1 to ncol(rcvset); jloc=loc(nomset=rcvset[j]); adjmat[sendloc,jloc]=adjmat[sendloc,jloc]+value[i]; end; end; end; end; nomset=nomset`; if type(nomset)='N' then do; adjmat=nomset||adjmat; end; else do; print 'Character values can not be appended to Adjacency Matrix.'; print 'The nodes are thus labeled from 1 to g, in the following order:'; idx=1:nrow(adjmat); idx=idx`; print nomset idx; adjmat=idx||adjmat; end; return(adjmat); finish; %* reach fonction from J. Moody; start reach (inmat); r=inmat; rt=r; t=2; do until (sdif=0); rt=rt*inmat; /* multiply tmat by inmat=taking it to the next power */ rt0=rt-diag(rt); /* give matrix with diag=0 */ rt01=rt0>0; /* make nonzero elements=1 */ mark=rt01>r; tmark=t#mark; /* makes a replacement matrix of vavlue t */ k=tmark+r; sk=sum(k); sr=sum(r); sdif=sum(sk-sr); /* when this is zero, no new paths can be made */ t=t+1; r=tmark+r; free drt rt0 rt01 mark k sk sr; end; return(r); finish; %* My fonction; start matid(id); n=nrow(id); do i=1 to nrow(id); do j=1 to nrow(id); couple=id[i] || id[j]; matid=matid || T(couple); end; end; return(T(matid)); finish; adjmat=adj(orig,dest,value); adjmat=adjmat[,2:ncol(adjmat)]; * Size of the matrice; n=nrow(adjmat); * Symetrize if undirected; %if &type=undirected %then %do; adjmat=adjmat+T(adjmat); %end; matb=adjmat#(J(n,n,1)-I(n)); dic=matb>J(n,n,0); symdic=(matb+T(matb))>J(n,n,0); * Importance of the clustering from j towards vers i ; CCij=(symdic*dic)#symdic; id=T(unique(orig,dest)); * Sum of the clustering; deg=symdic*J(n,1,1); %if &type=undirected %then %do; potlink=deg#(deg-1) || deg; %end; %else %do; potlink=deg#(deg-1) || deg; %end; ; CC= CCij*J(n,1,1) || potlink; *Creation of a sas dataset; *CCijbis=matid(id) || T(shape(CCij,1)); *print CCijbis; /* create &dat1 from CCijbis[colname={&orig &dest_ CCij}]; append from CCijbis; */ create &dat0 from id[colname={&orig }]; append from id; create &dat1 from CC[colname={nblink_1 potlink deg}]; append from CC; quit; data &dat2; merge &dat0 &dat1; if potlink>0 then CC=min(nblink_1/potlink,1); run; proc means data=&dat2 N NMISS MEAN STD MIN Q1 MEDIAN Q3 MAX ; var CC; Title "Statistics on clustering coefficients"; run; DATA _NULL_; file print; PUT "Clustering coefficients are stored in the &dat2 dataset."; PUT "The macro program was "; put %nrstr("%cluster"); PUT "(dataset=&dataset,orig=&orig,dest=&dest,value=&value,form=&form);"; run; proc datasets; delete &dat1 &dat0; quit; %MEND;