5

Can you please tell me that is there any other way to plot data with duplication in more better way than this one ? The duplication is not clear in this plot.

enter image description here

 library(ggplot2)
 p <- ggplot(output, aes(output$Longitudes, output$Latitudes))
 p + geom_text(aes(x = jitter(output$Longitudes), y = 
 jitter(output$Latitudes)),check_overlap = FALSE, size =5)
 p + geom_point(position =  "jitter")

The purpose of showing the duplication on a particular point is to show the occurrences.

0

2 Answers 2

3

To visualize duplicate points you can:

  • Add jitter (eg., use geom_jitter)
  • Lower alpha (eg., alpha = 0.1)
  • Decrease size of points (eg., size = 1)
  • Change shape of points (eg., shape = 21)

Code:

# Generate data
df <- reshape2::melt(data.frame(A = rep(0, 1e3), B = rep(1, 1e3)))
# Plot data
library(ggplot2)
ggplot(df, aes(variable, value)) +
    geom_jitter(alpha = 0.5, size = 2, shape = 21) +
    theme_classic()

Plot:

enter image description here

Sign up to request clarification or add additional context in comments.

Comments

3

This is actually one of my biggest gripes about ggplot.

So much so that I wrote my own solution (other than jitter / alpha).

Solution

Essentially it is a new "position" called position_bunch, which distributes the points at each unique (X,Y) according to a pattern. It can be used like:

g = ggplot(...) +
  geom_point(
    position = position_bunch(
      shape = 'hex',
      width = .7,
      sort  = TRUE,
    )
  )

yielding stuff like:

example output

Implementation

 position_bunch = function(shape='hex',width=0.5,sort=1) {
   if (shape == 'hex') {
     n.layer.fun = n.layer.hex
     delta.fun   = delta.hex
   }
   if (shape == 'square') {
     n.layer.fun = n.layer.square
     delta.fun   = delta.square
   }
   if (shape == 'spiral') {
     n.layer.fun = n.layer.spiral
     delta.fun   = delta.spiral
   }
   if (sort) {
     sort.fun = sorting.fun
   } else {
     sort.fun = identity
   }
   cols = c('x','y')
   return(ggproto('PositionBunch',Position,
   required_aes = cols,
    compute_layer = function(self,data,params,layout) {
      select = function(x,y) {
        return((data$x==x) & (data$y==y))
      }
      u = unique(data[,cols])
      n = mapply(function(x,y) {sum(select(x,y))},u$x,u$y)
      l.max = n.layer.fun(max(n))
      delta = sort.fun(delta.fun(l.max),1)
      for (i in 1:nrow(u)) {
        rows = select(u$x[i],u$y[i])
        delta.i = sort.fun(delta[1:sum(rows),cols],sort) * (width/2/l.max)
        data[rows,cols] = data[rows,cols] + delta.i
      }
      return(data)
    })
  )
}
sorting.fun = function(delta,dir) {
  distance = apply(delta,1,function(d){sum(d^2)})
  return(delta[order(distance,decreasing=(dir==-1)),])
}
# -----------------------------------------------------------------------------
# hex
n.layer.hex = function(n) {
  return(floor(1+(-3+sqrt(9+12*(n-1)))/6))
}
delta.hex = function(layers) {
  yv = sqrt(3)/2; yh = 0; xv = 0.5; xh = 1;
  rep.steps = function(steps,layer,dim) {
    steps = rep(steps,each=layer)
    steps[1] = steps[1] + xv*(dim=='x') - yv*(dim=='y')
    return(steps)
  }
  dx = 0; dy = 0;
  for (layer in 1:layers) {
    dx = c(dx,rep.steps(c(+xv,-xv,-xh,-xv,+xv,+xh),layer,dim='x'))
    dy = c(dy,rep.steps(c(+yv,+yv, yh,-yv,-yv, yh),layer,dim='y'))
  }
  return(data.frame(x=cumsum(dx),y=cumsum(dy)))
}
# -----------------------------------------------------------------------------
# square
n.layer.square = function(n) {
  return(floor(1+(-2+sqrt(4+8*(n-1)))/4))
}
delta.square = function(layers) {
  yv = 1; yh = 0; xv = 0; xh = 1;
  rep.steps = function(steps,layer,dim) {
    steps = rep(steps,each=2*layer)
    steps[1] = steps[1] + xh*(dim=='x') - yv*(dim=='y')
    return(steps)
  }
  dx = 0; dy = 0;
  for (layer in 1:layers) {
    dx = c(dx,rep.steps(c( xv,-xh, xv,+xh),layer,dim='x'))
    dy = c(dy,rep.steps(c(+yv, yh,-yv, yh),layer,dim='y'))
  }
  return(data.frame(x=cumsum(dx),y=cumsum(dy)))
}
# -----------------------------------------------------------------------------
# spiral
f.spiral = pi*(1+sqrt(5))
n.layer.spiral = function(n) {
  return(ceiling(n/f.spiral))
}
delta.spiral = function(layers){
  i  = 0:ceiling(layers*f.spiral)
  r  = layers/2*sqrt(i/layers)
  t  = pi*(1+sqrt(5))*i
  dx = r * cos(t)
  dy = r * sin(t)
  return(data.frame(x=cumsum(dx),y=cumsum(dy)))
}

Test Code

library('ggplot2')
library('gridExtra')
library('viridis')
source('ggpositions.r')
set.seed(1234)

g.list = list()
for (N in c(10,100,500)){
  data = data.frame(
    x = factor(floor(runif(N,1,3+1)),labels=c('A','B','C')),
    y = factor(floor(runif(N,1,3+1))),
    z = rev(sort(runif(N,1,N)))
  )
  for (shape in c('hex','square','spiral')){
    g = ggplot(data,aes(x=x,y=y,color=z)) +
      geom_point(position=position_bunch(
        shape = shape,
        width = .7,
      ),size=sqrt(2)/log10(N)) +
      scale_color_viridis() +
      xlab(NULL) + ylab(NULL) +
      theme(legend.position='none')
    g.list[[length(g.list)+1]] = g
  }
}
G = do.call(arrangeGrob,g.list)
ggsave('test.png',G)

Notes

  • It's a work in progress -- feedback welcome!
  • I've only tested it with geom_point using aes(x= ,y= ) so far
  • Point sizes are hard to scale reliably, so you may have to tinker manually
  • After cleaning & testing, I plan to upload to the ggplot2 extensions library

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.