# We generate data with a model
#   where people prefer to cite sources whose ideology is close to theirs.
#
# seed of 7 yields overestimates, 3 yields underestimates...
set.seed(7)

# 10 legislators with biases that span the range -1 to 1:
Nleg <- 10
Lx <- seq(-1,1,length=Nleg)

# and 10 think tanks, with "ideology" measures similarly disposed:
Ntt <- 10
Tb <- seq(-1,1,length=Ntt)
# and random "valences":
Ta <- runif(Ntt)

# In the expression for citation probabilities
#     we replace G&M's Tb[j]*Lx[i]
#     with (1-abs(Tb[j]-Lx[i])/2)

M3LCP <- matrix(nrow=Nleg, ncol=Ntt)
for(i in 1:Nleg){
 for(j in 1:Ntt){
   M3LCP[i,j] <- exp(Ta[j]+(1-abs(Tb[j]-Lx[i])/2))/sum(exp(Ta[1:Ntt]+(1-abs(Tb[1:Ntt]-Lx[i])/2)))
 }
}
# noise parameter
Gsd <- 3
Cnoise <- matrix(nrow=Nleg, ncol=Ntt, data=rnorm(Nleg*Ntt,sd=Gsd))
M3LCC <- round(100*M3LCP +Cnoise)
M3LCC[M3LCC<0] <- 0 # counts can't be negative!

# Now work back to estimated legislator-by-thinktank citation probabilities:
M3LCPH <- matrix(nrow=Nleg, ncol=Ntt)
for(i in 1:Nleg){
   M3LCPH[i,] <- M3LCC[i,]/sum(M3LCC[i,])
}

# Now estimate Ta and Tb, using data generated by new model, 
#     but assuming original G&H model in parameter optimization:
# Map legislators' biases into 0:1 interval:
LxH <- (Lx+1)/2

# random starting parameter values for nlm():
X <- c(runif(Ntt,min=-1,max=1),runif(Ntt,min=-1,max=1))

# Function we'll ask nlm() to optimize:
f7 <- function(X) {
   e <- 0
   Ntt <- 10
   TaH <- X[1:Ntt]
   TbH <- X[Ntt+1:(2*Ntt)]
   for(i in 1:Nleg){
     esum = sum(exp(TaH[1:Ntt]+TbH[1:Ntt]*LxH[i]))
     for(j in 1:Ntt){
	e = e + (M3LCPH[i,j] - exp(TaH[j]+TbH[j]*LxH[i])/esum)^2
     }
   }
   return(e)
}

nlm.f7 <- nlm(f7,X,print.level=0)

TaE7 <- nlm.f7$estimate[1:10]
TbE7 <- nlm.f7$estimate[11:20]

# OK, now get Cm estimates:

# First generate media citation probs, also assuming new (1-abs(diff)) model:
Nmed <- 15
Cm <- seq(-1,1,length=Nmed)
M3MCP <- matrix(nrow=Nmed,ncol=Ntt)
for(i in 1:Nmed){
 for(j in 1:Ntt){
   M3MCP[i,j] <-  exp(Ta[j]+(1-abs(Tb[j]-Cm[i])/2))/sum(exp(Ta[1:Ntt]+(1-abs(Tb[1:Ntt]-Cm[i])/2)))
 }
}
Cnoise <- matrix(nrow=Nmed, ncol=Ntt, data=rnorm(Nmed*Ntt,sd=Gsd))
M3MCC <- round(100*M3MCP + Cnoise)
M3MCC[M3MCC<0] <- 0

# work back from counts to estimated media-by-thinktank citation probabilities:

M3MCPH <- matrix(nrow=Nmed, ncol=Ntt)
for(i in 1:Nmed){
   M3MCPH[i,] <- M3MCC[i,]/sum(M3MCC[i,])
}

# Now use skewed TaE7 and TbE7 estimates to estimate CmH from "observed" citations:

Y <- runif(Nmed, min=-1, max=1) # random starting point
f8 <- function(Y) {
   e <- 0
   CmH <- Y
   for(i in 1:Nmed){
     esum = sum(exp(TaE7[1:Ntt]+TbE7[1:Ntt]*CmH[i]))
     for(j in 1:Ntt){
	e = e + (M3MCP[i,j] - exp(TaE7[j]+TbE7[j]*CmH[i])/esum)^2
     }
   }
   return(e)
}
nlm.f8 <- nlm(f8,Y,print.level=0)

CmE8 <- nlm.f8$estimate
CmS <- (Cm-min(Cm))/(max(Cm)-min(Cm))
CmE8S <- (CmE8-min(CmE8))/(max(CmE8)-min(CmE8))

png(filename="GMover%d.png", width=700, height=700)
plot(CmS[2:Nmed-1],CmE8S[2:Nmed-1],
     type="n",
     xlab="True 'media biases' (scaled into 0:1)",
     ylab="Estimated 'media biases' (scaled into 0:1)",
     main="Mismatch between generating model and estimated model")

text(0,.85,"Generating model uses 1-abs(b[j]-c[m]),\n\nEstimated model assumes b[j]*c[m]", pos=4, offset=0, cex=.9)
points(CmS[2:Nmed-1],CmE8S[2:Nmed-1],pch="o",col="red")
abline(0,1)