%**************************************************; %* Olivier Godechot, V1.1 le 12.04.2008 *; %**************************************************; %* Macro calculating geodesic distances *; %**************************************************; %Macro geodesic(dataset=,orig=,dest=,type=undirected,value=,form=Yes); %window Welcome #1 @1 "This macro calculates the undirected or directed geodesic distances for a network" #3 @1 "Data must be organized the following way : " #5 @1 "Origin_id Destiny_id " #6 @1 "a1572 a1759" #7 @1 "a1088 a2716" #8 @1 "a1863 a2716" #10 @1 "Reading : An arc is going from vertex a1572 to vertex a1759" #12 @1 "What is the name of your SAS dataset ? * (dataset=) " @75 dataset 30 attr=rev_video auto=yes #14 @1 "Variable name that identifies the origin vertex of an arc ? * (orig=)" @75 orig 30 attr=rev_video auto=yes #16 @1 "Variable name that identifies the destination vertex of an arc ? * (dest=)" @75 dest 30 attr=rev_video auto=yes #18 @1 "Variable name for the value of an arc ? (value=)" @75 value 30 attr=rev_video auto=yes #20 @1 "Do you prefer an undirected or directed geodesic distance ? (type=)" @75 type 30 attr=rev_video auto=yes #22 @1 "Once completed, press Enter " #24 @1 %nrstr("[Complete syntax: %geodesic(dataset=,orig=,dest=,value=,type=,form=); ]") #25 @1 "(*) Required. Id variables must not contain the character # " #30 @50 "Credits : Olivier GODECHOT http://olivier.godechot.free.fr/ On the basis of IML programs from James Moody" ; %if &form=Yes %then %display Welcome; %let dat0=_%scan(&dataset,1)_d0; %let dat1=_%scan(&dataset,1)_d1; %let dat2=_%scan(&dataset,1)_dist; %let dest_=%scan(&dest,1); proc iml; use &dataset; read all var{&orig} into orig; read all var{&dest} into dest; %if &value= %then %do; value=J(nrow(orig),1,1); %end; %else %do; read all var{&value} into value; %end; %* adj fonction from J. Moody; start adj(snd,rcv,value); nomset=unique(snd,rcv); if type(nomset)='C' then do; nomset=setdif(nomset,'.'); end; else do; nomset=setdif(nomset,.); end; adjmat=j(ncol(nomset),ncol(nomset),0); do i=1 to nrow(snd); sendloc=loc(nomset=snd[i]); if type(sendloc)='N' then do; rcvset=unique(rcv[i,]); if type(rcvset)='C' then do; rcvset=setdif(rcvset,'.'); end; else do; rcvset=setdif(rcvset,.); end; if type(rcvset)^='U' then do; do j=1 to ncol(rcvset); jloc=loc(nomset=rcvset[j]); adjmat[sendloc,jloc]=adjmat[sendloc,jloc]+value[i]; end; end; end; end; nomset=nomset`; if type(nomset)='N' then do; adjmat=nomset||adjmat; end; else do; idx=1:nrow(adjmat); idx=idx`; adjmat=idx||adjmat; end; return(adjmat); finish; %* reach fonction from J. Moody; start reach (inmat); r=inmat; rt=r; t=2; do until (sdif=0); rt=rt*inmat; /* multiply tmat by inmat=taking it to the next power */ rt0=rt-diag(rt); /* give matrix with diag=0 */ rt01=rt0>0; /* make nonzero elements=1 */ mark=rt01>r; tmark=t#mark; /* makes a replacement matrix of vavlue t */ k=tmark+r; sk=sum(k); sr=sum(r); sdif=sum(sk-sr); /* when this is zero, no new paths can be made */ t=t+1; r=tmark+r; free drt rt0 rt01 mark k sk sr; end; return(r); finish; %* My fonction; start matid(id); n=nrow(id); do i=1 to nrow(id); do j=1 to nrow(id); couple=id[i] || id[j]; matid=matid || T(couple); end; end; return(T(matid)); finish; adjmat=adj(orig,dest,value); adjmat=adjmat[,2:ncol(adjmat)]; %if &type=undirected %then %do; adjmat=adjmat+T(adjmat); %end; geod=reach(adjmat); id=T(unique(orig,dest)); geodat= T(shape(geod,1)); matid=matid(id); create &dat0 from matid[colname={&orig &dest_}]; append from matid; create &dat1 from geodat[colname={dist}]; append from geodat; quit; data &dat2; merge &dat0 &dat1; if dist =0 and &orig ne &dest_ then dist=.; run; proc freq data=&dat2; tables dist / missing; where &orig ne &dest_; Title "Distribution of geodesic distances"; run; proc means data=&dat2 N NMISS MEAN STD MIN Q1 MEDIAN Q3 MAX ; var dist; where &orig ne &dest_; Title "Statistics on geodesic distances"; run; DATA _NULL_; file print; PUT "Distances are stored in the &dat2 dataset."; PUT "The macro program was "; put %nrstr("%geodesic"); PUT "(dataset=&dataset,orig=&orig,dest=&dest,value=&value,type=&type,form=&form);"; run; proc datasets; delete &dat1 &dat0; quit; %MEND;