Find Key/Value Pairs Deep Inside a Hash Containing an Arbitrary Number of Nested Hashes and Arrays

Find key/value pairs deep inside a hash containing an arbitrary number of nested hashes and arrays

Here's a simple recursive solution:

def nested_hash_value(obj,key)
  if obj.respond_to?(:key?) && obj.key?(key)
    obj[key]
  elsif obj.respond_to?(:each)
    r = nil
    obj.find{ |*a| r=nested_hash_value(a.last,key) }
    r
  end
end

h = { foo:[1,2,[3,4],{a:{bar:42}}] }
p nested_hash_value(h,:bar)
#=> 42

On Array of Hashes need to count values while creating other key pairs same time in ruby

You could use Enumerable#group_by.

aoh.group_by { |h| h[:interface] }
   .map do |k,a|
      n = a.count { |h| h[:status] == "online" }
      { interface: k, online_hosts: n, offline_hosts: a.size - n }
    end
  #=> [{:interface=>"1A", :online_hosts=>2, :offline_hosts=>1},
  #    {:interface=>"2A", :online_hosts=>1, :offline_hosts=>2}]

Note:

aoh.group_by { |h| h[:interface] }
  #=> {"1A"=>[{:interface=>"1A", :host=>"host_1", :status=>"online"},
  #           {:interface=>"1A", :host=>"host_2", :status=>"online"},
  #           {:interface=>"1A", :host=>"host_3", :status=>"offline"}],
  #    "2A"=>[{:interface=>"2A", :host=>"host_4", :status=>"offline"},
  #           {:interface=>"2A", :host=>"host_5", :status=>"offline"},
  #           {:interface=>"2A", :host=>"host_6", :status=>"online"}]}

Another way is to use the form of Hash#update (a.k.a. merge!) that employs a block to determine the value of keys that are present in both hashes being merged. Here that block is do |_,o,n| ... end. See the doc for the definitions of the three block variables, _, o and n. (_ holds the common key. I've used an underscore to represent that variable to signal to the reader that it is not used in the block calculation, a common convention.)

aoh.each_with_object({}) do |g,h|
  h.update(
    g[:interface]=>
      { interface: g[:interface],
        online_hosts: g[:status] == "online" ? 1 : 0,
        offline_hosts: g[:status] == "online" ? 0 : 1
      }
  ) do |_,o,n|
      { interface: o[:interface],
        online_hosts: o[:online_hosts]+n[:online_hosts],
        offline_hosts: o[:offline_hosts]+n[:offline_hosts]
      }
    end
end.values
  #=> [{:interface=>"1A", :online_hosts=>2, :offline_hosts=>1},
  #    {:interface=>"2A", :online_hosts=>1, :offline_hosts=>2}]

Note that the receiver of Hash#values is:

{"1A"=>{:interface=>"1A", :online_hosts=>2, :offline_hosts=>1},
 "2A"=>{:interface=>"2A", :online_hosts=>1, :offline_hosts=>2}}

search for key in a nested hash in Rails

Don't know if this is the best solution, but i would do:

h = {seal: 5, test: 3, answer: { nested: "damn", something: { email: "yay!" } } }

def search_hash(h, search)
  return h[search] if h.fetch(search, false)

  h.keys.each do |k|
    answer = search_hash(h[k], search) if h[k].is_a? Hash
    return answer if answer
  end

  false
end

puts search_hash(h, :email)

This will return the value if the key exists or false.

Reduce an array of nested hashes to a flattened array of key/value pairs

▶ flattener = ->(k, v) do
▷   case v
▷   when Enumerable then v.flat_map(&flattener)  
▷   when NilClass then []  
▷   else [k, v]  
▷   end  
▷ end  
#⇒ #<Proc:0x000000032169e0@(pry):26 (lambda)>
▶ input.flat_map(&flattener).each_slice(2).to_a
#⇒ [
#    [:created_at, "07/28/2017"],
#    [:valid_record, "true"],
#    [:gender, "m"],
#    [:race, "w"],
#    [:description, "possess"],
#    [:a, "a"],
#    [:b, "b"],
#    [:c, "c"]
#  ]

Is there a function in Ruby to traverse dynamic multi level hashes where all that is known is the required key?

The dig-deep gem seems to be what you are looking for:

require 'dig-deep'

scenario_1.dig_deep(:my_key)
=> "here"
scenario_2.dig_deep(:my_key)
=> "here"

Find multiple objects in nested hash

def nested_hash_values(obj,key)
  r = []  
  if obj.is_a?(Hash)        
    r.push(obj[key]) if obj.key?(key) 
    obj.each_value { |e| r += nested_hash_values(e,key) }
  end
  if obj.is_a?(Array)
    obj.each { |e| r += nested_hash_values(e,key) }
  end
  r
end

a = {"foo"=>["bar", "x", {"bar"=>["hello", {"foo"=>"world"}, "world!"], "foo"=>"BAR!"}, "enough?"], "bar"=>"foo"}
nested_hash_values(a, "foo")
=> [["bar", "x", {"bar"=>["hello", {"foo"=>"world"}, "world!"], "foo"=>"BAR!"}, "enough?"], "BAR!", "world"]

should return an array with all values found for a given key.

You could add additional if e.is_a?(Array) || e.is_a?(Hash) in the each blocks. This would avoid unnecessary method calls and speed the function up a little, but add additional code.

How would I remove a nested value from a hash that occurs multiple times

Code

def defidder(h)
  h.each_with_object({}) do |(k,v),h|
    h[k] =
    case v
    when Array
      v.reject { |s| s.match?(/\AFID\d+\z/) } if k == "within"
    when Hash
      defidder(v)
    else
      v
    end
  end
end

Example

I've added another layer of hash nesting to the example given in the question:

hash = {
  "lock_version"=>4, 
  "exhibition_quality"=>false,
  "within"=>["FID6", "S2"],
  "repository"=>{
    "ref"=>"/repositories/2",
    "repository"=>{"ref"=>"/repositories/2"},
    "within"=>["FID6", "S2"],
    "1more"=>{ a: 1, "within"=>["FID999", "S7"] }
  }
}

defidder hash
  #=> {
  #     "lock_version"=>4,
  #     "exhibition_quality"=>false, "within"=>["S2"], 
  #     "repository"=>{
  #       "ref"=>"/repositories/2",
  #       "repository"=>{"ref"=>"/repositories/2"},
  #       "within"=>["S2"],
  #       "1more"=>{:a=>1, "within"=>["S7"]
  #     }
  #   }

We may verify hash was not mutated.

hash
  #=> {
  #     "lock_version"=>4, 
  #     "exhibition_quality"=>false,
  #     "within"=>["FID6", "S2"],
  #     "repository"=>{
  #       "ref"=>"/repositories/2",
  #       "repository"=>{"ref"=>"/repositories/2"},
  #       "within"=>["FID6", "S2"],
  #       "1more"=>{ a: 1, "within"=>["FID999", "S7"] }
  #     }
  #   }

add a key value pair to hash in first array depending on the substring from second array

But I get the error

This is because you do topic.first.keys instead of topic.keys.first; the difference is that topic.first behaves like topic.to_a.first, and Hash#to_a returns an array of key/value pairs, e.g. [[k1, v1], [k2, v2], ...].

The below solution goes the other way around; it iterates over export_configs, and then tries to find the corresponding topic. In your example, each export_config hash comprises one key/value pair, but I don't see a reason why there can't be more, so this code does another loop inside.

It also makes use of Enumerable#find to get an object out of an array that matches a certain predicate; this is useful for your problem in particular, despite it not being the fastest.

export_configs.map do |config|
  # defensive: there could be more keys in config
  config.map do |key, values|
    # this operation scans topics each time, not very fast
    topic = topics.find { |url| url.include? key }
    # make a copy here, so that original variable isn't modified
    values = values + [{ 'url' => topic }] if topic
    [key, values]
  end.to_h
end

Build paths of edge keys in nested hashes in ruby

def recurse(h)
  h.flat_map do |k,v|
    if v.is_a?(Hash)      
      recurse(v).map { |str| "%s.%s" % [k.to_s, str] }
    else
      k.to_s
    end
  end
end

h = {a: {m: {b: 2, c:1}, d: {e: {f: nil}, g: 3}}}
recurse(h)
  #=> ["a.m.b", "a.m.c", "a.d.e.f", "a.d.g"]

h = {a: {m: {b: 2, c:1}, d: 5 }, e: {f: {g: nil}, h: 3}}
recurse(h)
  #=> ["a.m.b", "a.m.c", "a.d", "e.f.g", "e.h"]

Efficient way of matching 2 hashes having maximum similarity

sqlfiddle I have put the table here with the same data as above. I have created a view from many different tables.

Do it in SQL, this is what it's good at.

Use a self-join to get the number of overlaps for each pair.

select
  a.emp_id emp_id1,
  b.emp_id emp_id2,
  count(a.option_id) as overlap
from data a
join data b on
  -- Ensure we count each pair only once
  a.emp_id < b.emp_id and
  a.option_id = b.option_id
group by a.emp_id, b.emp_id

Then use that as a CTE to select the pairs with the most overlaps.

with overlaps as (
  select
    a.emp_id emp_id1,
    b.emp_id emp_id2,
    count(a.option_id) as overlap
  from data a
  join data b on
    a.emp_id < b.emp_id and
    a.option_id = b.option_id
  group by a.emp_id, b.emp_id
)
select *
from overlaps
where overlap = (
  select max(overlap)
  from overlaps
)

So long as you're indexed, that should perform much better than pulling all the data out into Ruby.

create index idx_option_emp_ids on data(option_id, emp_id);

Even without indexes, it should perform much better than pulling it all out into Ruby.

Find Key/Value Pairs Deep Inside a Hash Containing an Arbitrary Number of Nested Hashes and Arrays