R using ggplot2 to plot mixEM data

Question

I have a vector of length 370 that I would like to fit to a mixture of Gaussians. I have followed the example here: Any suggestions for how I can plot mixEM type data using ggplot2 to plot the data, but as you can see from the image link, my results are different from those in the example: Plot of a mixture of three Gaussians

Here is a snippet of the code that I used:

library(ggplot2)
library(mixtools)

gg.mixEM <- function(EM) {
  require(ggplot2)
  x       <- with(EM,seq(min(x),max(x),len=1000))
  pars    <- with(EM,data.frame(comp=colnames(posterior), mu, sigma,lambda))
  em.df   <- data.frame(x=rep(x,each=nrow(pars)),pars)
  em.df$y <- with(em.df,lambda*dnorm(x,mean=mu,sd=sigma))
  ggplot(data.frame(x=EM$x),aes(x,y=..density..)) + 
    geom_histogram(fill=NA,color="black",bins=41)+
    geom_polygon(data=em.df,aes(x,y,fill=comp),color="grey50", alpha=0.5)+
    scale_fill_discrete("Component\nMeans",labels=format(em.df$mu,digits=3))+
    theme_bw()
}

dput(gradesCS)
c(6.5, 22.375, 20.5, 24.25, 33.25, 24, 26.75, 30.75, 35.5, 23.5, 
26.875, 24, 35.5, 29.875, 29.75, 31.25, 32.875, 33.75, 34, 29, 
33, 24, 12, 26.375, 6.75, 31.25, 21.625, 32.875, 29.25, 27.125, 
28.25, 26.25, 24.875, 35.5, 26.5, 37.5, 35.375, 27.5, 33, 27.5, 
39.5, 34.25, 28.125, 28, 32.625, 37.625, 34.5, 29.5, 38.5, 37.5, 
28.75, 38, 16, 35.75, 30, 33.5, 36, 31.125, 29.75, 32.5, 35, 
24.375, 23.375, 28, 32.125, 36, 31.5, 33.5, 1.5, 30.5, 37, 29.5, 
29.5, 31.125, 32.5, 20.5, 28.75, 30.25, 32.5, 28, 36, 37.5, 28.5, 
35.5, 30.25, 36.375, 36, 23.25, 31.5, 25.125, 33.5, 34, 19.5, 
31.75, 39.5, 33.25, 24.875, 26.75, 23.375, 34, 16.5, 37, 33.375, 
31.25, 31.75, 35.5, 32, 27.5, 23.375, 20.625, 35.5, 31.5, 25.375, 
24.5, 27.25, 25.25, 35.75, 24, 28.25, 33.125, 31.5, 39.5, 39.25, 
24.75, 37, 25.5, 34.75, 34, 20.25, 37.625, 30.5, 32.375, 15, 
32.75, 33.5, 32.75, 31.5, 29.25, 30, 37.25, 34.5, 23, 32.5, 38.25, 
35.625, 33, 35, 31.125, 37, 28.125, 29.25, 31.75, 34.75, 34.625, 
36.625, 15.25, 35.5, 37, 33.5, 30.875, 35, 31.625, 22.75, 31, 
31.125, 25.125, 35.5, 2, 36.125, 25.25, 32.5, 28, 38.5, 35.5, 
38.5, 30.5, 34, 28.125, 38, 29.25, 29.75, 33.25, 25.125, 35, 
34.5, 32, 35, 26.875, 20.5, 35.5, 23.25, 26.25, 36, 35.5, 38, 
39.25, 22, 38.5, 31, 35.5, 33.5, 31.5, 26, 30.375, 35.75, 29.75, 
34, 37.625, 38, 35.5, 34.25, 24.375, 30, 33.75, 39.5, 36.5, 36.5, 
32, 36.5, 29.75, 29.75, 25, 32, 29.25, 32.125, 31.25, 38, 33.5, 
33.5, 38.5, 37.25, 31.125, 33.5, 31, 28, 29.75, 36, 36, 37, 22, 
29, 36.5, 32.25, 30.75, 38.5, 24.125, 28.75, 38.25, 32.5, 34.75, 
29, 30.375, 33.5, 31.25, 30, 33, 33.5, 27.5, 26.5, 30.25, 34.75, 
33.5, 39, 33.25, 38.5, 27, 39.5, 34.25, 33, 35.125, 38, 31.25, 
32.75, 22.75, 31.125, 34.5, 33, 37.125, 31, 18.75, 30.25, 31.75, 
34, 30.75, 29, 34.5, 36, 36.5, 31.5, 26, 27.5, 27.5, 36.5, 19.75, 
33, 35.125, 16, 19.75, 31.5, 38.5, 34.25, 36.5, 27, 22, 21.75, 
36, 31.5, 33, 29.75, 32.5, 26.25, 33.5, 35.75, 33, 39, 35, 34.25, 
28.5, 25.5, 30.5, 28, 21.25, 39.125, 22.75, 28.375, 29.125, 30, 
34.125, 31.25, 32, 26.25, 36, 24.5, 30.25, 32.75, 29.625, 16, 
34, 16.75, 25.25, 33, 38, 28, 24.75, 29.75, 24.5, 19.25, 32.75, 
27.5, 24.75, 17.375, 25.25, 30.125, 38, 28, 35, 11.75, 27.75, 
38, 28.625, 31.25, 31.25, 32, 17.25, 18.25, 32.625, 25.5, 27.5, 
35.25, 35.5)

b <- gradesCS
c <- b[sample(length(b), length(b)) ]
c3 <- normalmixEM(c, lambda=NULL, mu=NULL, sigma=NULL,k=3,maxit=1000,epsilon = 1e-2)
gg.mixEM(c3)

It would be easier to help you if you posted a reproducible example with sample input data so we can run and test the code ourselves. Explicitly list any non-base R packages you are using. — MrFlick
– MrFlick, Commented Aug 21, 2017 at 18:35

komodovaran_ · Accepted Answer · 2017-08-23 07:42:38Z

The problem is that polygons freak out if they don't have continuous drawing space (e.g. if you end abruptly at 0, but the polygon function has not reached 0).

In the first line of the ggplot function, add extra spacing on each side of x. I'm going with 5 here, but you just need enough for the function to hit 0.

x <- with(EM,seq(min(x)-5,max(x)+5,len=1000))

In the bottom, we cut off the excess space with

coord_cartesian(xlim = c(0,42),
                expand = c(0,0))

This renders the graph with your spacing, and then "zooms in" on the selected x interval.

fit_test <- normalmixEM(
    test,
    k = 2)

gg.mixEM <- function(EM) {
    require(ggplot2)
    x       <- with(EM,seq(min(x)-5,max(x)+5,len=1000))
    pars    <- with(EM,data.frame(comp=colnames(posterior), mu, sigma,lambda))
    em.df   <- data.frame(x=rep(x,each=nrow(pars)),pars)
    em.df$y <- with(em.df,lambda*dnorm(x,mean=mu,sd=sigma))
    ggplot(data.frame(x=EM$x),aes(x,y=..density..)) + 
        geom_histogram(fill=NA,color="black",bins=41)+
        geom_polygon(data=em.df,aes(x,y,fill=comp),color="grey50", alpha=0.5)+
        scale_fill_discrete("Component\nMeans",labels=format(em.df$mu,digits=3))+
        theme_bw() +
        coord_cartesian(xlim = c(0,42),
                        expand = c(0,0))
}

gg.mixEM(fit_test)

And we get

Collectives™ on Stack Overflow

R using ggplot2 to plot mixEM data

1 Answer 1

Comments

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

Comments

Your Answer

Sign up or log in

Post as a guest

Linked

Related