# This example is from Hoff, 2009, Chapters 3 and 4. The data are not quite the same. The figures are # similar to those found in Hoff. # We read in the data. The first 111 rows are the number of children # of women without a college degree. The last 44 rows are number of children # of women with a college degree. data = scan("NChildren.txt") n = length(data) y1 = data[1:111] y2 = data[112:n] par(mar=c(3,3,1,1),mgp=c(1.75,.75,0)) par(mfrow=c(1,2)) set.seed(1) n1<-length(y1) ; n2<-length(y2) s1<-sum(y1) s2<-sum(y2) par(mfrow=c(1,2),mar=c(3,3,1,1),mgp=c(1.75,.75,0)) plot(table(y1), type="h",xlab=expression(italic(y)),ylab=expression(italic(n[1](y))),col=gray(.5) ,lwd=3) mtext("Less than bachelor's",side=3) plot(table(y2), type="h",xlab=expression(italic(y)),ylab=expression(italic(n[2](y))),col=gray(0),lwd=3) mtext("Bachelor's or higher",side=3,lwd=3) ###### # We now plot the prior for the two lambdas and the two posterior distributions. All three are gamma. par(mar=c(3,3,1,1),mgp=c(1.75,.75,0)) par(mfrow=c(1,2)) a<-2 b<-1 xlambda<-seq(0,5,length=1000) plot(xlambda,dgamma(xlambda,a+s1,b+n1),type="l",col=gray(.5),xlab=expression(lambda), ylab=expression(paste(italic("p("),lambda,"|",y[1],"...",y[n],")",sep=""))) lines(xlambda,dgamma(xlambda,a+s2,b+n2),col=gray(0),lwd=2) lines(xlambda,dgamma(xlambda,a,b),type="l",lty=2,lwd=2) abline(h=0,col="black") # Here we plot the posterior predictive distributions for the number of children of women # without (gray) and with (black) college degrees. The posterior predictives are both # negative binomials. y<-(0:12) plot(y-.1, dnbinom(y, size=(a+s1), mu=(a+s1)/(b+n1)) , col=gray(.5) ,type="h", ylab=expression(paste(italic("p("),y[n+1],"|",y[1],"...",y[n],")",sep="")), xlab=expression(italic(y[n+1])),ylim=c(0,.35),lwd=3) points(y+.1, dnbinom(y, size=(a+s2), mu=(a+s2)/(b+n2)) , col=gray(0) ,type="h",lwd=3) legend(4,.3,legend=c("Less than bachelor's","Bachelor's or higher"),bty="n", lwd=c(3,3),col=c(gray(.5),gray(0))) # We now do the same but without analytical work. We compute the posterior predictive # distributions using simulation. a<-2 b<-1 n1<-length(y1) ; n2<-length(y2) s1<-sum(y1) s2<-sum(y2) lambda1 = rgamma(1000, a+s1, b+n1) lambda2 = rgamma(1000, a+s2, b+n2) par(mar=c(3,3,1,1),mgp=c(1.75,.75,0)) par(mfrow=c(1,2)) hist(lambda1, main="") hist(lambda2, main="") summary(lambda1) quantile(lambda1, c(0.025, 0.975)) summary(lambda2) quantile(lambda2, c(0.025, 0.975)) y1.mc = rpois(1000, lambda1) y2.mc = rpois(1000, lambda2) par(mar=c(3,3,1,1),mgp=c(1.75,.75,0)) par(mfrow=c(1,1)) diff.mc = y1.mc-y2.mc ds = -8:8 plot(ds,(table(c(diff.mc,ds))-1)/length(diff), type="h",lwd=3, xlab=expression(italic(D==tilde(Y)[1]-tilde(Y)[2])), ylab=expression(paste(italic("p(D"),"|",bold(y[1]),",",bold(y[2]),")",sep=""))) # Does the model fit? We compute t.mc, which is the posterior odds of having two children # versus having one child, for women without a college degree. We also compute the observed # odds (t.obs) and compare the two. If the predicted odds are similar to the observed odds, # then this indicates that the model fits, in at least that one aspect. t.mc = NULL for(s in 1:1000) { lambda1 = rgamma(1,a+sum(y1), b+length(y1)) y1.mc=rpois(length(y1),lambda1) t.mc = c(t.mc, sum(y1.mc==2)/sum(y1.mc==1)) } t.obs<-sum(y1==2)/sum(y1==1) mean(t.mc>=t.obs) par(mar=c(3,3,1,1),mgp=c(1.75,.75,0)) par(mfrow=c(1,2)) ecdf = (table(c(y1,0:9))-1 )/sum(table(y1)) ecdf.mc<- dnbinom(0:9,size=a+sum(y1),mu=(a+sum(y1))/(b+length(y1))) plot(0:9 +.1, ecdf.mc,type="h",lwd=5,xlab="number of children", ylab=expression(paste("Pr(",italic(Y[i]==y[i]),")",sep="")),col="gray", ylim=c(0,.35)) points(0:9-.1, ecdf,lwd=5,col="black",type="h") legend(1.8,.35, legend=c("empirical distribution","predictive distribution"), lwd=c(2,2),col= c("black","gray"),bty="n",cex=.8) hist(t.mc,prob=T,main="",ylab="",xlab=expression(t(tilde(Y))) ) segments(t.obs,0,t.obs,1.5,col="black",lwd=3)