# We generate data with a model # where people prefer to cite sources whose ideology is close to theirs. # # seed of 7 yields overestimates, 3 yields underestimates... set.seed(7) # 10 legislators with biases that span the range -1 to 1: Nleg <- 10 Lx <- seq(-1,1,length=Nleg) # and 10 think tanks, with "ideology" measures similarly disposed: Ntt <- 10 Tb <- seq(-1,1,length=Ntt) # and random "valences": Ta <- runif(Ntt) # In the expression for citation probabilities # we replace G&M's Tb[j]*Lx[i] # with (1-abs(Tb[j]-Lx[i])/2) M3LCP <- matrix(nrow=Nleg, ncol=Ntt) for(i in 1:Nleg){ for(j in 1:Ntt){ M3LCP[i,j] <- exp(Ta[j]+(1-abs(Tb[j]-Lx[i])/2))/sum(exp(Ta[1:Ntt]+(1-abs(Tb[1:Ntt]-Lx[i])/2))) } } # noise parameter Gsd <- 3 Cnoise <- matrix(nrow=Nleg, ncol=Ntt, data=rnorm(Nleg*Ntt,sd=Gsd)) M3LCC <- round(100*M3LCP +Cnoise) M3LCC[M3LCC<0] <- 0 # counts can't be negative! # Now work back to estimated legislator-by-thinktank citation probabilities: M3LCPH <- matrix(nrow=Nleg, ncol=Ntt) for(i in 1:Nleg){ M3LCPH[i,] <- M3LCC[i,]/sum(M3LCC[i,]) } # Now estimate Ta and Tb, using data generated by new model, # but assuming original G&H model in parameter optimization: # Map legislators' biases into 0:1 interval: LxH <- (Lx+1)/2 # random starting parameter values for nlm(): X <- c(runif(Ntt,min=-1,max=1),runif(Ntt,min=-1,max=1)) # Function we'll ask nlm() to optimize: f7 <- function(X) { e <- 0 Ntt <- 10 TaH <- X[1:Ntt] TbH <- X[Ntt+1:(2*Ntt)] for(i in 1:Nleg){ esum = sum(exp(TaH[1:Ntt]+TbH[1:Ntt]*LxH[i])) for(j in 1:Ntt){ e = e + (M3LCPH[i,j] - exp(TaH[j]+TbH[j]*LxH[i])/esum)^2 } } return(e) } nlm.f7 <- nlm(f7,X,print.level=0) TaE7 <- nlm.f7$estimate[1:10] TbE7 <- nlm.f7$estimate[11:20] # OK, now get Cm estimates: # First generate media citation probs, also assuming new (1-abs(diff)) model: Nmed <- 15 Cm <- seq(-1,1,length=Nmed) M3MCP <- matrix(nrow=Nmed,ncol=Ntt) for(i in 1:Nmed){ for(j in 1:Ntt){ M3MCP[i,j] <- exp(Ta[j]+(1-abs(Tb[j]-Cm[i])/2))/sum(exp(Ta[1:Ntt]+(1-abs(Tb[1:Ntt]-Cm[i])/2))) } } Cnoise <- matrix(nrow=Nmed, ncol=Ntt, data=rnorm(Nmed*Ntt,sd=Gsd)) M3MCC <- round(100*M3MCP + Cnoise) M3MCC[M3MCC<0] <- 0 # work back from counts to estimated media-by-thinktank citation probabilities: M3MCPH <- matrix(nrow=Nmed, ncol=Ntt) for(i in 1:Nmed){ M3MCPH[i,] <- M3MCC[i,]/sum(M3MCC[i,]) } # Now use skewed TaE7 and TbE7 estimates to estimate CmH from "observed" citations: Y <- runif(Nmed, min=-1, max=1) # random starting point f8 <- function(Y) { e <- 0 CmH <- Y for(i in 1:Nmed){ esum = sum(exp(TaE7[1:Ntt]+TbE7[1:Ntt]*CmH[i])) for(j in 1:Ntt){ e = e + (M3MCP[i,j] - exp(TaE7[j]+TbE7[j]*CmH[i])/esum)^2 } } return(e) } nlm.f8 <- nlm(f8,Y,print.level=0) CmE8 <- nlm.f8$estimate CmS <- (Cm-min(Cm))/(max(Cm)-min(Cm)) CmE8S <- (CmE8-min(CmE8))/(max(CmE8)-min(CmE8)) png(filename="GMover%d.png", width=700, height=700) plot(CmS[2:Nmed-1],CmE8S[2:Nmed-1], type="n", xlab="True 'media biases' (scaled into 0:1)", ylab="Estimated 'media biases' (scaled into 0:1)", main="Mismatch between generating model and estimated model") text(0,.85,"Generating model uses 1-abs(b[j]-c[m]),\n\nEstimated model assumes b[j]*c[m]", pos=4, offset=0, cex=.9) points(CmS[2:Nmed-1],CmE8S[2:Nmed-1],pch="o",col="red") abline(0,1)