8

I would like to plot the following dataset

structure(list(X = structure(c(3L, 12L, 11L, 7L, 13L, 2L, 1L, 
10L, 5L, 4L, 8L, 14L, 9L, 6L), .Label = c("BUM", "DDR", "ETB", 
"EXP", "HED", "HEDOS", "KON", "LEIT", "MAIN", "MAT", "PER", "PMA", 
"TRA", "TRADITION"), class = "factor"), Geschaeft = c(0.0468431771894094, 
0.0916666666666667, 0.0654761904761905, 0.0905432595573441, 0.0761904761904762, 
0.0672097759674134, 0.0869565217391304, 0.0650887573964497, 0.0762250453720508, 
0.0518234165067179, 0.0561330561330561, 0.060077519379845, 0.0865384615384615, 
0.0628683693516699), Gaststaette = c(0.0855397148676171, 0.0604166666666667, 
0.0555555555555556, 0.0764587525150905, 0.0895238095238095, 0.0712830957230143, 
0.075098814229249, 0.0631163708086785, 0.0780399274047187, 0.0383877159309021, 
0.0561330561330561, 0.0581395348837209, 0.0596153846153846, 0.0648330058939096
), Bank = c(0.065173116089613, 0.0854166666666667, 0.0972222222222222, 
0.0824949698189135, 0.060952380952381, 0.0529531568228106, 0.0731225296442688, 
0.0828402366863905, 0.0725952813067151, 0.0806142034548944, 0.0686070686070686, 
0.0503875968992248, 0.0807692307692308, 0.0550098231827112), 
    Hausarzt = c(0.0712830957230143, 0.0833333333333333, 0.0912698412698413, 
    0.0704225352112676, 0.0628571428571429, 0.0672097759674134, 
    0.106719367588933, 0.0710059171597633, 0.108892921960073, 
    0.0940499040307102, 0.0852390852390852, 0.0794573643410853, 
    0.0826923076923077, 0.110019646365422), Einr..F..Aeltere = c(0.10183299389002, 
    0.104166666666667, 0.107142857142857, 0.100603621730382, 
    0.12, 0.116089613034623, 0.112648221343874, 0.112426035502959, 
    0.121597096188748, 0.0998080614203455, 0.118503118503119, 
    0.131782945736434, 0.121153846153846, 0.104125736738703), 
    Park = c(0.0855397148676171, 0.0666666666666667, 0.0912698412698413, 
    0.0804828973843058, 0.0704761904761905, 0.0672097759674134, 
    0.0731225296442688, 0.0670611439842209, 0.0834845735027223, 
    0.0806142034548944, 0.0686070686070686, 0.0658914728682171, 
    0.0884615384615385, 0.0609037328094303), Sportstaette = c(0.0855397148676171, 
    0.0791666666666667, 0.0952380952380952, 0.0824949698189135, 
    0.0933333333333333, 0.114052953156823, 0.0810276679841897, 
    0.0788954635108481, 0.0780399274047187, 0.0825335892514395, 
    0.0831600831600832, 0.0852713178294574, 0.0884615384615385, 
    0.1237721021611), OEPNV = c(0.0529531568228106, 0.05625, 
    0.0456349206349206, 0.0583501006036217, 0.0666666666666667, 
    0.0366598778004073, 0.0434782608695652, 0.0571992110453649, 
    0.0344827586206897, 0.0633397312859885, 0.0478170478170478, 
    0.062015503875969, 0.0519230769230769, 0.0235756385068762
    ), Mangel.an.Gruenflaechen = c(0.0692464358452139, 0.0645833333333333, 
    0.0694444444444444, 0.0422535211267606, 0.0666666666666667, 
    0.0692464358452139, 0.0711462450592885, 0.0749506903353057, 
    0.0598911070780399, 0.0959692898272553, 0.0623700623700624, 
    0.0717054263565891, 0.0653846153846154, 0.0746561886051081
    ), Kriminalitaet = c(0.0672097759674134, 0.0541666666666667, 
    0.0476190476190476, 0.0422535211267606, 0.0628571428571429, 
    0.0509164969450102, 0.0454545454545455, 0.0532544378698225, 
    0.058076225045372, 0.072936660268714, 0.0602910602910603, 
    0.063953488372093, 0.0461538461538462, 0.0648330058939096
    ), Auslaender = c(0.0244399185336049, 0.04375, 0.0416666666666667, 
    0.0663983903420523, 0.0228571428571429, 0.0509164969450102, 
    0.0237154150197628, 0.0236686390532544, 0.0217785843920145, 
    0.0441458733205374, 0.024948024948025, 0.0232558139534884, 
    0.0230769230769231, 0.0451866404715128), Umweltbelastung = c(0.0468431771894094, 
    0.0479166666666667, 0.0476190476190476, 0.0402414486921529, 
    0.0438095238095238, 0.0468431771894094, 0.0454545454545455, 
    0.0512820512820513, 0.0417422867513612, 0.0518234165067179, 
    0.0478170478170478, 0.0445736434108527, 0.0442307692307692, 
    0.0451866404715128), Einr..f..Kinder = c(0.0753564154786151, 
    0.075, 0.0555555555555556, 0.0724346076458753, 0.0533333333333333, 
    0.0794297352342159, 0.075098814229249, 0.0788954635108481, 
    0.0598911070780399, 0.0460652591170825, 0.0977130977130977, 
    0.0930232558139535, 0.0634615384615385, 0.0451866404715128
    ), Einr..f..Jugendliche = c(0.122199592668024, 0.0875, 0.0892857142857143, 
    0.0945674044265594, 0.11047619047619, 0.109979633401222, 
    0.0869565217391304, 0.120315581854043, 0.105263157894737, 
    0.0978886756238004, 0.122661122661123, 0.11046511627907, 
    0.0980769230769231, 0.119842829076621)), .Names = c("X", 
"Geschaeft", "Gaststaette", "Bank", "Hausarzt", "Einr..F..Aeltere", 
"Park", "Sportstaette", "OEPNV", "Mangel.an.Gruenflaechen", "Kriminalitaet", 
"Auslaender", "Umweltbelastung", "Einr..f..Kinder", "Einr..f..Jugendliche"
), row.names = c(NA, -14L), class = "data.frame")

So that it look like this picture (or better with each line in a seperate plot) that I created with Excel.

Image created with Excel

But I can't figure out how...

Thanks a lot for your help. Dominik

UPDATE: Here is just a map of what the groups (BUM,DDR,ETB etc.) mean. Milieu map

1
  • 1
    +1 for supplying dput() sample data Commented Aug 8, 2011 at 16:56

3 Answers 3

10

This is an extension to @Andrie's solution. It combines the faceting idea with that of overplotting (stolen liberally from the learnr blog, which I find results in a cool visualization. Here is the code and the resulting output. Comments are welcome

mdf <- melt(df, id.vars="X")
mdf = transform(mdf, variable = reorder(variable, value, mean), Y = X)

ggplot(mdf, aes(x = variable, y = value)) + 
  geom_line(data = transform(mdf, X = NULL), aes(group = Y), colour = "grey80") +
  geom_line(aes(group = X)) +
  facet_wrap(~X) +
  opts(axis.text.x = theme_text(angle=90, hjust=1))

enter image description here

EDIT: If you have groupings of milieus, then a better way to present might be the following

mycols = c(brewer.pal(4, 'Oranges'), brewer.pal(4, 'Greens'), 
           brewer.pal(3, 'Blues'), brewer.pal(3, 'PuRd'))
mdf2 = read.table(textConnection("
  V1,  V2
  ETB, LEIT
  PMA, LEIT
  PER, LEIT
  LEIT, LEIT
  KON, TRADITION
  TRA, TRADITION
  DDR, TRADITION
  TRADITION, TRADITION
  BUM, MAIN
  MAT, MAIN
  MAIN, MAIN
  EXP, HEDOS
  HED, HEDOS
  HEDOS, HEDOS"), sep = ",", header = T, stringsAsFactors = F)

mdf2 = data.frame(mdf2, mycols = mycols)
mdf3 = merge(mdf, mdf2, by.x = 'X', by.y = "V1")

p1 = ggplot(mdf3, aes(x = variable, y = value, group = X, colour = mycols)) + 
  geom_line(subset = .(nchar(as.character(X)) == 3)) +
  geom_line(subset = .(nchar(as.character(X)) != 3), size = 1.5) +
  facet_wrap(~ V2) +
  scale_color_identity(name = 'Milieus', breaks = mdf2$mycols, labels = mdf2$V1) +
  theme_bw() + 
  opts(axis.text.x = theme_text(angle=90, hjust=1)) 

enter image description here

Sign up to request clarification or add additional context in comments.

9 Comments

This example is awesome! That is exactly what I was looking for. The only questions are now how to order the plots (I don't want them in alphabetical order) and how to plot e.g. TRADITION in ETB. And is it possible to define specific colors for the lines that are black right now?
Three questions (1) how would you want to order your plots, (2) what do you mean by plotting TRADITION IN ETB and (3) what would the purpose of colors be, since each line is already identified by the title in the box
Ok, that with the colours is really easy... define the colours with mycols <- ('#3168FF', '#00CCFF', ...) and modify the line geom_line(aes(group = X, colour = mycols)) what I don't know yet is, how I define that HED = '#00CCFF' even if that group is not at the second position...
easiest way would be to create a data frame mapping X with colors you want to use and then merging with the original frame and then using scale_colour_manual. my question however still remains, why would you need colors when each box is already identified by X?
(1) The order is based on a theoretical concept. Each plot represent one milieu. Therefor the order should be correspondeng to their "Lebenswelt" - living environment? which is ETB, PMA, PER (as leading milieus - LEIT) KON, TRA, DDR (as traditional milieus TRADITION) BUM, MAT (as mainstream milieus MAIN) EXP, HED (as hedonistic milieus HEDO) (2) This correspond as well to the plotting TRADITION in ETB. While ETB is part of TRADITION it might be nice to see how ETB, PMA and PER differ from TRADITION. So I would like to plot the line of TRADITION also in the plot of ETB
|
4

The trick is to reshape your data into tall format before you pass it to ggplot. This is easy when using the melt function in package reshape2:

Assuming your data is a variable called df:

library(reshape2)
library(ggplot2)

mdf <- melt(df, id.vars="X")
str(mdf)
ggplot(mdf, aes(x=variable, y=value, colour=X, group=X)) + geom_line() +
    opts(axis.text.x = theme_text(angle=90, hjust=1))

enter image description here


Edit As @Chase suggests, you can use facetting to make the plot more readable:

ggplot(mdf, aes(x=X, y=value)) + geom_point() +
    opts(axis.text.x = theme_text(angle=90, hjust=1)) + facet_wrap(~variable)

enter image description here

3 Comments

Also consider facet_wrap(~X) to plot them in small multiples. I'd probably drop the colour aesthetic if you go that route as it becomes redundant information at that point.
@Chase Good suggestion. I have modified my answer to give an example of this.
I'd also recommend reordering the x variable.
1

First, melt the data to put it in a long format.

melted_data <- melt(the_data, id.vars = "X")

Now draw the plot with a numeric x axis, and fix up the labels.

p <- ggplot(melted_data, aes(as.numeric(variable), value, colour = X)) + 
  geom_line() + 
  scale_x_continuous(
    breaks = seq_len(nlevels(melted_data$variable)), 
    labels = levels(melted_data$variable)
  ) +
  opts(axis.text.x = theme_text(angle = 90))
p

Having answered this, I'm not sure what the plot tells you &ndahs; it's just a jumble of lines to me. You might be better greying out most of the lines, and highlighting one or two interesting ones.

Add a column that picks out, e.g., EXP.

melted_data$is_EXP <- with(melted_data, X == "EXP")

Ignore my previous anser; Andrie's is better. Use manual colour and size scales to highlight your new column.

p <- ggplot(melted_data, aes(variable, value, colour = is_EXP, size = is_EXP, group = X)) + 
  geom_line() + 
  scale_colour_manual(values = c("grey80", "black")) + 
  scale_size_manual(values = c(0.5, 1.5)) +
  opts(axis.text.x = theme_text(angle = 90, hjust=1))
p

enhanced plot

1 Comment

nice idea of greying out details. this can be done more generally by using an overplotting trick in ggplot. see my solution if you are interested

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.