1

I have a hash H (see bottom) and need to perform a deep invert operation on it, such that a new hash H2 is returned where each key K is a value inside the original hash. The keys in H2 map to an array of arrays of all the sequences of keys that, when applied onto the original hash H would give you the key K which is a value in the original hash.

Perhaps I should use a different data structure for the output, such as a hash of hashes?

I'd like this to work with hashes of arbitrary nesting levels.

I don't know where to start designing an optimal algorithm

Original Hash

What the input might look like

{
  u: {
    u: { u: :phe, c: :phe, a: :leu, g: :leu },
    c: { u: :ser, c: :ser, a: :ser, g: :ser },
    a: { u: :tyr, c: :tyr, a: :STOP, g: :STOP },
    g: { u: :cys, c: :cys, a: :STOP, g: :trp }
  },
  c: {
    u: { u: :leu, c: :leu, a: :leu, g: :leu },
    c: { u: :pro, c: :pro, a: :pro, g: :pro },
    a: { u: :his, c: :his, a: :gln, g: :gln },
    g: { u: :arg, c: :arg, a: :arg, g: :arg }
  },
  {...}
}

Simplified Output

What output would look like

{
  phe: [[:u,:u,:u],[:u,:u,:c]],
  leu: [[:u,:u,:a],[:u,:u,:g]],
  ser: [[:u,:c,:u],[:u,:c,:c],[:u,:u,:a],[:u,:u,:g]],
  tyr: [[:u,:a,:u],[:u,:a,:c]],
  "...": [[...]]
}

Why? I'm writing my own bioinformatics library and want to be able to return the possible nucleotide sequences for a given protein, denoted by the three character :symbols

9
  • 1
    I don't understand your description. Where do :val and :ala come from and why does the output hash contain strings? Commented Jul 31, 2015 at 16:43
  • 1
    What have you attempted? Commented Jul 31, 2015 at 16:46
  • are you just looking for [:val,:ala].zip(h.map {|k,v| [k.to_s].product(v.keys.map(&:to_s))}).to_h ? That should do it for your example but I have no idea what the application for this would be that's why I won't make this an actual answer. Also when someone says "deep" I don't generally think one level which seems to be what you want. Commented Jul 31, 2015 at 16:49
  • 2
    Thanks for editing, it makes sense now. Commented Jul 31, 2015 at 17:05
  • 1
    My output differs from yours for the key :leu. Could you please check that? It's an interesting question, but in future please: 1) pare the examples down to the minimum (e.g., it would have been sufficient for the inner-most hashes to have just two elements); 2) no ... and 3) when you give input data include a name (e.g., H = { ... }, so that those giving solutions need not define it. (Yes, you did say it was H, but better to just write H =.... Commented Jul 31, 2015 at 18:18

1 Answer 1

3

Code

def recurse(h, arr=[])
  h.each_with_object({}) { |(k,v),g| g.update((Hash===v) ?
    recurse(v, arr + [k]) : { v=>[arr+[k]] }) { |_,o,n| o+n } }
end

The recursion uses the form of Hash#update (aka merge!) that employs the block { |_,o,n| o+n } } to determine the values of keys that are present in both hashes being merged.

Example 1

h =
{
  u: {
    u: { u: :phe, c: :phe, a: :leu, g: :leu },
    c: { u: :ser, c: :ser, a: :ser, g: :ser },
    a: { u: :tyr, c: :tyr, a: :STOP, g: :STOP },
    g: { u: :cys, c: :cys, a: :STOP, g: :trp }
  },
  c: {
    u: { u: :leu, c: :leu, a: :leu, g: :leu },
    c: { u: :pro, c: :pro, a: :pro, g: :pro },
    a: { u: :his, c: :his, a: :gln, g: :gln },
    g: { u: :arg, c: :arg, a: :arg, g: :arg }
  },
}

recurse h
  #=> {:phe=>[[:u, :u, :u], [:u, :u, :c]],
  #    :leu=>[[:u, :u, :a], [:u, :u, :g], [:c, :u, :u],
  #      [:c, :u, :c], [:c, :u, :a], [:c, :u, :g]],
  #    :ser=>[[:u, :c, :u], [:u, :c, :c], [:u, :c, :a], [:u, :c, :g]], 
  #    :tyr=>[[:u, :a, :u], [:u, :a, :c]],
  #    :STOP=>[[:u, :a, :a], [:u, :a, :g], [:u, :g, :a]],
  #    :cys=>[[:u, :g, :u], [:u, :g, :c]],
  #    :trp=>[[:u, :g, :g]],
  #    :pro=>[[:c, :c, :u], [:c, :c, :c], [:c, :c, :a], [:c, :c, :g]], 
  #    :his=>[[:c, :a, :u], [:c, :a, :c]],
  #    :gln=>[[:c, :a, :a], [:c, :a, :g]],
  #    :arg=>[[:c, :g, :u], [:c, :g, :c], [:c, :g, :a], [:c, :g, :g]]}

Example 2

h =
{
  u: {
    u: { u: :phe, a: :leu },
    c: { u: :ser, c: :phe },
    a: { u: :tyr, c: { a: { u: :leu, c: :ser }, u: :tyr } }
  },
  c: {
    u: { u: :leu, c: :pro },
    a: { u: :arg }
  },
}

recurse(h)
  #=> {:phe=>[[:u, :u, :u], [:u, :c, :c]],
  #    :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u], [:c, :u, :u]],
  #    :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]],
  #    :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]],
  #    :pro=>[[:c, :u, :c]], :arg=>[[:c, :a, :u]]}

Explanation

Here is the code modified to display the calculations that are being performed:

def recurse(h, arr=[], level = 0)
  indent = ' '*(2*level)
  puts "#{indent}level = #{level}"
  puts "#{indent}h= #{h}"
  puts "#{indent}arr= #{arr}"
  g = h.each_with_object({}) do |(k,v),g|
    puts "#{indent}  level = #{level}"
    puts "#{indent}  k=#{k}"
    puts "#{indent}  v=#{v}"
    puts "#{indent}  g=#{g}"
    case v
    when Hash
      puts "#{indent}  v is Hash"
      g.update(recurse(v, arr + [k], level+1)) { |_,o,n| o+n }
    else
      puts "#{indent}  v is not a Hash"
      g.update({ v=>[arr+[k]] }) { |_,o,n| o+n }
    end
  end
  puts "#{indent}return #{g}"
  g
end

The output for recurse h follows, for Example 2 (for diehards only).

level = 0
h= {:u=>{:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe}, :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}, :c=>{:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}}
arr= []
  level = 0
  k=u
  v={:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe},
     :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}
  g={}
  v is Hash
  level = 1
  h= {:u=>{:u=>:phe, :a=>:leu}, :c=>{:u=>:ser, :c=>:phe},
      :a=>{:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}}
  arr= [:u]
    level = 1
    k=u
    v={:u=>:phe, :a=>:leu}
    g={}
    v is Hash
    level = 2
    h= {:u=>:phe, :a=>:leu}
    arr= [:u, :u]
      level = 2
      k=u
      v=phe
      g={}
      v is not a Hash
      level = 2
      k=a
      v=leu
      g={:phe=>[[:u, :u, :u]]}
      v is not a Hash
    return {:phe=>[[:u, :u, :u]], :leu=>[[:u, :u, :a]]}
    level = 1
    k=c
    v={:u=>:ser, :c=>:phe}
    g={:phe=>[[:u, :u, :u]], :leu=>[[:u, :u, :a]]}
    v is Hash
    level = 2
    h= {:u=>:ser, :c=>:phe}
    arr= [:u, :c]
      level = 2
      k=u
      v=ser
      g={}
      v is not a Hash
      level = 2
      k=c
      v=phe
      g={:ser=>[[:u, :c, :u]]}
      v is not a Hash
    return {:ser=>[[:u, :c, :u]], :phe=>[[:u, :c, :c]]}
    level = 1
    k=a
    v={:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}
    g={:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a]], :ser=>[[:u, :c, :u]]}
    v is Hash
    level = 2
    h= {:u=>:tyr, :c=>{:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}}
    arr= [:u, :a]
      level = 2
      k=u
      v=tyr
      g={}
      v is not a Hash
      level = 2
      k=c
      v={:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}
      g={:tyr=>[[:u, :a, :u]]}
      v is Hash
      level = 3
      h= {:a=>{:u=>:leu, :c=>:ser}, :u=>:tyr}
      arr= [:u, :a, :c]
        level = 3
        k=a
        v={:u=>:leu, :c=>:ser}
        g={}
        v is Hash
        level = 4
        h= {:u=>:leu, :c=>:ser}
        arr= [:u, :a, :c, :a]
          level = 4
          k=u
          v=leu
          g={}
          v is not a Hash
          level = 4
          k=c
          v=ser
          g={:leu=>[[:u, :a, :c, :a, :u]]}
          v is not a Hash
        return {:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]]}
        level = 3
        k=u
        v=tyr
        g={:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]]}
        v is not a Hash
      return {:leu=>[[:u, :a, :c, :a, :u]], :ser=>[[:u, :a, :c, :a, :c]],
              :tyr=>[[:u, :a, :c, :u]]}
    return {:tyr=>[[:u, :a, :u], [:u, :a, :c, :u]], :leu=>[[:u, :a, :c, :a, :u]],
            :ser=>[[:u, :a, :c, :a, :c]]}
  return {:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u]],
          :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]], :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]]}
  level = 0
  k=c
  v={:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}
  g={:phe=>[[:u, :u, :u], [:u, :c, :c]], :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u]],
     :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]], :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]]}
  v is Hash
  level = 1
  h= {:u=>{:u=>:leu, :c=>:pro}, :a=>{:u=>:arg}}
  arr= [:c]
    level = 1
    k=u
    v={:u=>:leu, :c=>:pro}
    g={}
    v is Hash
    level = 2
    h= {:u=>:leu, :c=>:pro}
    arr= [:c, :u]
      level = 2
      k=u
      v=leu
      g={}
      v is not a Hash
      level = 2
      k=c
      v=pro
      g={:leu=>[[:c, :u, :u]]}
      v is not a Hash
    return {:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]]}
    level = 1
    k=a
    v={:u=>:arg}
    g={:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]]}
    v is Hash
    level = 2
    h= {:u=>:arg}
    arr= [:c, :a]
      level = 2
      k=u
      v=arg
      g={}
      v is not a Hash
    return {:arg=>[[:c, :a, :u]]}
  return {:leu=>[[:c, :u, :u]], :pro=>[[:c, :u, :c]], :arg=>[[:c, :a, :u]]}
return {:phe=>[[:u, :u, :u], [:u, :c, :c]],
        :leu=>[[:u, :u, :a], [:u, :a, :c, :a, :u], [:c, :u, :u]],
        :ser=>[[:u, :c, :u], [:u, :a, :c, :a, :c]],
        :tyr=>[[:u, :a, :u], [:u, :a, :c, :u]],
        :pro=>[[:c, :u, :c]],
        :arg=>[[:c, :a, :u]]}
  #=> <the last value returned above> 
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.