4

I am studying a data set with multiple observation of a parameter overtime. the data is like:

test<-data.frame(t = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.33, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 0.67, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.33, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 1.67, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10), int = c(76.44609375, 94.6619686800895, 112.148907103825, 75.1003097802036, 74.1037037037037, 76.7526662128432, 74.0734830988873, 87.9052100068855, 81.0525931336742, 92.1907873244038, 84.0708929788684, 88.8232221775814, 98.1323678006063, 115.175322139789, 91.2653104925053, 76.3661620658949, 152.637799717913, 107.054702135631, 83.4693197755961, 91.658991910392, 81.3991787335206, 106.153762268266, 100.919789842382, 67.2119436084271, 137.558914728682, 89.1182608695652, 156.10352233677, 108.180911207183, 87.9794680354643, 77.7501400560224, 80.7675382653061, 95.6662793399954, 92.5649630541872, 88.3301402668491, 84.3891875746714, 76.4318673395818, 111.413893510815, 82.4753828420879, 119.099190283401, 192.539417212559, 208.49203187251, 106.919937512205, 105.370936371214, 180.028767711464, 130.29369773608, 170.193357597816, 172.703180212014, 178.061569518042, 182.097607918614, 227.066976984743, 153.856101031661, 432.991580916745, 299.143735224586, 144.118156808803, 396.36644895153, 334.538796516231, 350.186359610275, 200.781101530882, 279.866079790223, 122.542700519331, 235.199555308505, 204.924140655867, 229.181848967152, 225.542753383955, 468.308974987739, 269.306058221873, 229.969282013323, 255.553846153846, 621.021220159151, 255.017211703959, 396.658265826583, 273.300663227708, 232.449965010497, 303.343894502483, 276.952483801296, 327.419805194805, 241.136864249474, 457.961489497136, 498.901714285714, 280.9558101473, 322.089588377724, 386.754533152909, 364.356809338521, 340.416035518412, 428.482916666667, 668.447197400487, 387.671341748481, 471.049545829893, 255.8802020688, 361.979536152797, 192.224629418472, 284.088954468803, 170.763997760358, 237.869065100343, 365.08237271854, 294.266488413547, 718.279750479846, 211.599427030671, 294.045375597047, 207.099267015707, 194.209973045822, 251.306358381503, 190.786794766966, 400.396083385976, 183.133240482823, 130.442107867392, 167.231452991453, 345.110896351776, 299.304645622394, 192.078204692282, 121.273544841369, 153.996295438759, 97.6034616378197, 362.80049522462, 130.498551774077, 106.031656035908, 117.682936668011, 90.1247837370242, 140.855475040258, 169.050049067713, 244.290241606527, 120.603356419819, 173.413333333333, 125.896389002872, 206.543873212215, 186.668320340184, 85.0988108720272, 106.57849117175, 102.867232728676, 216.232957110609, 86.6538461538462, 149.459777852575, 212.498573059361, 93.3816390633923, 105.567730417318, 120.095470383275, 137.205696941396, 141.156985871272, 90.578857338351, 84.8457760314342, 127.092660685395, 136.859870967742, 188.406440382942, 86.0879705400982))
class(test)

I managed to plot the density for each time point using:

ggplot(test, aes(int, group = as.factor(t),colour=t))+ geom_density()

But I would like to do the same graph but instead of the density I would like to plot a log normal fit of the density.

I know how to plot the lognormal fitting on one time point using fitdistr and passing parameter to stat_function whit this code

library(MASS)
fit <- fitdistr(subset(test, t == 0,select='int')$int, "lognormal")
ggplot(data=subset(test, t == 0,select='int'), aes(x=int)) +stat_function(fun = dlnorm,args = list(mean = fit$estimate[1], sd = fit$estimate[2]))

But how can I do it for all t with the colour of the line being given by the value of t is it possible to provide a function in the args list?

2 Answers 2

2

I thought of another naive solution: Predicting the values of every dlnorm().

## Split up the data according to t
tt     <- split(test, test$t)

## Fit a lognormal to every dataset
fits   <- lapply(tt, function(x) fitdistr(x$int, "lognormal"))

## Predict values
fitted <- lapply(fits, function(x) dlnorm(x = 1:max(test$int),
                               mean = x$estimate[1], sd = x$estimate[2]))

## Wrap everything into a data.frame ggplot can handle
plot.data <- data.frame(y = unlist(fitted), int = 1:max(test$int),
                        t = rep(unique(test$t),
                            each = length(unlist(fitted))/length(unique(test$t))))

## Plot
ggplot(test, aes(int, group = as.factor(t), colour=t)) +
  #geom_density() +
  geom_line(data = plot.data, aes(y = y), lwd = 1)
Sign up to request clarification or add additional context in comments.

Comments

2

What about a naive solution, adding iteratively stat_function()?

    cols <- brewer.pal(length(unique(test$t)),"Set1")
    g <- ggplot(data=subset(test, t == 0, select='int'), aes(x=int))
    n <- 1
    for(i in unique(test$t)){ 
        fit <- fitdistr(subset(test, t == i, select='int')$int, "lognormal")
        g <- g+stat_function(fun = dlnorm, 
                             args=list(mean=fit$estimate[1],sd=fit$estimate[2]), 
                             col=cols[n])
        n <- n + 1
    }
    g

3 Comments

It works. I have a problem getting the brewer.pal function to work. Could you specify in which package I can find it?
It is in RColorBrewer. But any way of choosing a color in a dynamic list would do. For example just set col=n.
I see, so it means it uses discret colors, right? (Set1 is limited to 9 colors...) Is it possible to use continuous colours?

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.