olivier godechot

Quelques fonctions utiles pour R

Je construis parfois des petites fonctions qui peuvent être utiles.

## Lags and forwards

#Simplelag is the equivalent of the lag function in SAS. Forward is the opposite of simple lag.

simplelag <-function(x,mylag=1,outside=NA)
{
  myend<-length(x)- mylag
  y<-c(replicate(mylag,outside),x[1:myend])
}
forward<-function(x,myforward=1,outside=NA)
{
  mystart<-myforward+1
  y<-c(x[mystart:length(x)],replicate(myforward,outside))
}

#Example

#Value of d$x one row above
d$lag_x<-simplelag(d$x)
#Value of d$x three rows above, 0 in the two first rows
d$lag3_x<-simplelag(d$x,mylag=3, outside=0)
#Value of d$x two rows below
d$fwd2_x<-forward(d$x,myforward=2)

## Retain and forward

Retain is the equivalent of the retain instruction in SAS. Obtain is the opposite of Retain.

retain<-function(x,event,outside=NA)
{
  indices <- c(1,which(event==TRUE), length(x)+1)
  values <- c(outside,x[event==TRUE])
  y<- rep(values, diff(indices))
}

obtain<-function(x,event,outside=NA)
{
  indices <- c(0,which(event==TRUE), length(x))
  values <- c(x[event==TRUE],outside)
  y<- rep(values, diff(indices))
}

#Example

#Retain in the following rows the last x where event =1
d$last_x<-retain(d$x,d$event==1)

#Obtain in the first rows the first x where event =2
d$first_x<-obtain(d$x,d$event==2)

## Winsoring

winsor<-function(x,tmax=0.99,tmin=0.01)
{
  y<-pmin(x,quantile(x,probs=tmax,na.rm=TRUE))
  y<-pmax(y,quantile(y,probs=tmin,na.rm=TRUE))
}

## egen : Statistics by group

# Egen produces statistics of a variable by group, such as mean, median, standard deviation, etc. It is the equivalent of Stata's egen function.

egen<-function (x,y,myfun=mean)
{
  data_in<-data.frame(x,y)
  data_out<-within(data_in,{z = ave(x,y,FUN=myfun,na.rm=TRUE)} )
  return(data_out$z)
}

#Example

#Mean of x by category
d$mean_x<-egen(d$x,d$category)

#Standard deviation of x by category
d$sd_x<-egen(d$x,d$category,myfun=sd)

#Median of x by category
d$median_x<-egen(d$x,d$category,myfun=median)

## Triming

trim <- function (x) gsub("^\\s+|\\s+$", "", x)

## Reversing a string

strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse="") 

## Clustering

# clx and mclx enable to robust-cluster standard errors in a regressions. Those functions are not mine. clx clusters according to one variable, mclx according to two variables.

clx <-
  function(fm, dfcw, cluster){
    library(sandwich)
    library(lmtest)
    M <- length(unique(cluster))
    N <- length(cluster)
    dfc <- (M/(M-1))*((N-1)/(N-fm$rank))
    u <- apply(estfun(fm),2,
               function(x) tapply(x, cluster, sum))
    vcovCL <- dfc*sandwich(fm, meat=crossprod(u)/N)*dfcw
    coeftest(fm, vcovCL) }

mclx <-
  function(fm, dfcw, cluster1, cluster2){
    library(sandwich)
    library(lmtest)
    cluster12 = paste(cluster1,cluster2, sep="")
    M1 <- length(unique(cluster1))
    M2 <- length(unique(cluster2))
    M12 <- length(unique(cluster12))
    N <- length(cluster1)
    K <- fm$rank
    dfc1 <- (M1/(M1-1))*((N-1)/(N-K))
    dfc2 <- (M2/(M2-1))*((N-1)/(N-K))
    dfc12 <- (M12/(M12-1))*((N-1)/(N-K))
    u1 <- apply(estfun(fm), 2,
                function(x) tapply(x, cluster1, sum))
    u2 <- apply(estfun(fm), 2,
                function(x) tapply(x, cluster2, sum))
    u12 <- apply(estfun(fm), 2,
                 function(x) tapply(x, cluster12, sum))
    vc1 <- dfc1*sandwich(fm, meat=crossprod(u1)/N )
    vc2 <- dfc2*sandwich(fm, meat=crossprod(u2)/N )
    vc12 <- dfc12*sandwich(fm, meat=crossprod(u12)/N)
    vcovMCL <- (vc1 + vc2 - vc12)*dfcw
    coeftest(fm, vcovMCL)}


 
 


Français | English

Actualités   

OgO: plus ici|more here

[Publications] Andersson (Jenny), Godechot (Olivier) [eds.], , Destabilizing Orders – Understanding the Consequences of Neoliberalism: plus ici|more here

Tweets (rarely/rarement): @OlivierGodechot

[Webmestre]

[Fil rss]

[V. 0.93]

HOP

Système d'aide à la publication sur Internet


0019.33

clics / mois.