Find by Key Deep in a Nested Array

Find by key deep in a nested array

Recursion is your friend. I updated the function to account for property arrays:

function getObject(theObject) {
var result = null;
if(theObject instanceof Array) {
for(var i = 0; i < theObject.length; i++) {
result = getObject(theObject[i]);
if (result) {
break;
}
}
}
else
{
for(var prop in theObject) {
console.log(prop + ': ' + theObject[prop]);
if(prop == 'id') {
if(theObject[prop] == 1) {
return theObject;
}
}
if(theObject[prop] instanceof Object || theObject[prop] instanceof Array) {
result = getObject(theObject[prop]);
if (result) {
break;
}
}
}
}
return result;
}

updated jsFiddle: http://jsfiddle.net/FM3qu/7/

Find deeply nested object by id

from your data here is a solution that can help you achieve that easily

function findById(array, id) {
for (const item of array) {
if (item.id === id) return item;
if (item.children?.length) {
const innerResult = findById(item.children, id);
if (innerResult) return innerResult;
}
}
}

const foundItem = findById(x, "2.2");
console.log(foundItem);
/*
The result obtained here is: {id: '2.2', name: 'name2.2', children: Array(2)}
*/

Find all children's key by specific key in a deep nested array object

You can recursively loop over all the children and get the keys that either match one of the target keys or if any of their ancestors have matched one the target keys.

const data = [
{
title: "0-0",
key: "0-0",
children: [
{
title: "0-0-0",
key: "0-0-0",
children: [
{ title: "0-0-0-0", key: "0-0-0-0" },
{ title: "0-0-0-1", key: "0-0-0-1" },
{ title: "0-0-0-2", key: "0-0-0-2" },
],
},
{
title: "0-0-1",
key: "0-0-1",
children: [
{ title: "0-0-1-0", key: "0-0-1-0" },
{ title: "0-0-1-1", key: "0-0-1-1" },
{ title: "0-0-1-2", key: "0-0-1-2" },
],
},
{
title: "0-0-2",
key: "0-0-2",
},
],
},
{
title: "0-1",
key: "0-1",
children: [
{ title: "0-1-0-0", key: "0-1-0-0" },
{ title: "0-1-0-1", key: "0-1-0-1" },
{ title: "0-1-0-2", key: "0-1-0-2" },
],
},
{
title: "0-2",
key: "0-2",
},
];

function getKeys(data, targetKeys) {
const targetKeysSet = new Set(targetKeys);
const outputKeys = [];
function getKeysHelper(data, hasParentMatched = false) {
data?.forEach((d) => {
if (targetKeysSet.has(d.key) || hasParentMatched) {
outputKeys.push(d.key);
getKeysHelper(d.children, true);
} else {
getKeysHelper(d.children);
}
});
}
getKeysHelper(data);
return outputKeys;
}

getKeys(data, ["0-0-0"]);

Find object from an array of nested objects by key in JavaScript

You can wrap your parent obj in an array using findObject, which then invokes an auxiliary function findObjectAux that is responsible for performing the iteration over your array to find your match (note that for the parent object, you'll only have one item in your array, but subsequent calls could contain more). For each object in the array, you can check if its id matches the one you passed through into your function. If it does, you can return it to the calling function, if it doesn't match, then you can recurse down its child array. If looking through the objects child array happens to return a value, then you've found a match that you can return, otherwise, your for loop can continue looking at any remaining objects in your array:

const obj = { "id": "A", "name": "Item A", "child": [{ "id": "B", "name": "Item B", "child": [{ "id": "C", "name": "Item C", "child": [] }] }, { "id": "D", "name": "Item D", "child": [] }] };

const findObject = (obj, id) => findObjectAux([obj], id);
const findObjectAux = (arr, id) => {
for (const obj of arr) {
if (obj.id === id) return obj;
const nestedObj = findObjectAux(obj.child, id);
if (nestedObj) return nestedObj;
}
}

const result = findObject(obj, "C");
console.log(result);

Search a deeply nested array to update an object

Here is a quick example of what I think you are attempting.

findNestedColorObject() accepts an array of and a color string to search for and returns the first object whose color property matches.

updateMoreColors() accepts an object as returned from the above and first assigns moreColors if it doesn't exist, and then pushes to array.

function findNestedColorObject(array, color) {
let colorObject;

for (const obj of array) {
if (obj.color === color) {
colorObject = obj;
} else if (obj.moreColors !== undefined) {
colorObject = findNestedColorObject(obj.moreColors, color);
}

if (colorObject !== undefined) {
break;
}
}

return colorObject;
}

function updateMoreColors(colorObject, colors) {
colorObject.moreColors ??= [];

for (const color of [].concat(colors)) {
colorObject.moreColors.push({ color });
}
}

const myData = [{ "color": "green", "moreColors": [{ "color": "beige" }, { "color": "black", "moreColors": [{ "color": "grey" }, { "color": "white", "moreColors": ["ochre"] }] }] }];

const greyObject = findNestedColorObject(myData, 'grey');
console.log('found:')
console.log(greyObject);

updateMoreColors(greyObject, 'purple');
console.log('updated:');
console.log(greyObject);

const beigeObject = findNestedColorObject(myData, 'beige');
console.log('found:')
console.log(beigeObject);

updateMoreColors(beigeObject, ['salmon', 'crimson']);
console.log('updated:');
console.log(beigeObject);

// edit per the commments
const data = [{ color: "beige", moreColors: [{ color: "blue" }] }, { color: "black", moreColors: [{ color: "white" }] }];
const blue = findNestedColorObject(data, 'blue');
console.log('fixed overwrite error:')
console.log(blue);

Find all values by specific key in a deep nested object

You could make a recursive function like this:

idArray = []

function func(obj) {
idArray.push(obj.id)
if (!obj.children) {
return
}

obj.children.forEach(child => func(child))
}

Snippet for your sample:

const myObj = {

id: 1,

children: [{

id: 2,

children: [{

id: 3

}]

},

{

id: 4,

children: [{

id: 5,

children: [{

id: 6,

children: [{

id: 7,

}]

}]

}]

},

]

}

idArray = []

function func(obj) {

idArray.push(obj.id)

if (!obj.children) {

return

}

obj.children.forEach(child => func(child))

}

func(myObj)

console.log(idArray)

Search nested array of objects

I have provided two methods below:

  1. Search for first match in the tree, return if found and containing array.
  2. Search for all matches, returns containing array, object it was found in, and depth it was found at

Method 1:
CodeSandbox

Here is a recursive method that doesnt use reduce. Simple type checking to search for a provided key value, and will search through any child arrays, with names that do not have to be "children". It provides a "found" result as well as an array in which it was found. You could extend this to return the object in which it was found too, quite easily.

const recursivelyFindKeyValue= (key, keyValue, list) => {
console.log("Searching list: ", list);

for (let i = 0; i < list.length; i++) {
const item = list[i];

for (const key of Object.keys(item)) {
//check if its array of more options, search it
if (Array.isArray(item[key])) {
console.log("child array found, searching", item);
const res = recursivelyFindKeyValue(key, keyValue, item[key]);
if (res.found === true) return res;
}
//Test the keyValue
else if (item[key] === keyValue) {
//found, return the list
console.log("found ", keyValue);
return { found: true, containingArray: list };
}
}
}

return { found: false, containingArray: [] };
};

usage

const res = recursivelyFindKeyValue("link", "top2-child-2", menuList);
const res = recursivelyFindKeyValue("link", "nested-child-of-3", menuList);

Method 2:
CodeSandbox

This method will search the entire tree, return all results with their parent array, object which they key:value was found, as well as the depth at which it was found.

const recursivelyFindKeyValue = (key, keyValue, list, depth = 0) => {
console.log("Searching list: ", list);
let results = [];

for (let i = 0; i < list.length; i++) {
const item = list[i];

for (const key of Object.keys(item)) {
//check if its not an array
if (Array.isArray(item[key])) {
console.log("child array found, searching", item);
let res = recursivelyFindKeyValue(key, keyValue, item[key], depth + 1);
results = results.concat(res);
}
//we have found it
else if (item[key] === keyValue) {
//found, return the list
console.log("found ", keyValue);
results.push({
found: true,
containingArray: list,
depth: depth,
object: item
});
}
}
}

return results;
};

I created another link "top2-child-2" in the codesandbox example deeper down so you can see the result of multiple depths.

Find deep difference between nested array of object with lodash or underscore or vanilla js

Understanding the problem

The example objects in the question use an invalid notation and also assign a global variable anotherArr as a side effect, which was probably unintended. (Update: the example objects in the question were changed after I posted this answer. The objects below still resemble the original example objects.) In this answer, I will assume the following notation instead:

const prevObj = {
arr: [{
name: 'Ankur',
}, {
email: 'ankur4736@gmail.com',
}, {
key: 456,
}, [{
name: 'Shubham',
anotherNestedArr: [{
name: 'Gundeep Paji'
}],
}]],
};

const newObj = {
arr: [{
name: 'Shubham',
}, {
email: 'shubham@gmail.com',
}, {
key: 466,
}, [{
name: 'Gundeep Paji',
anotherNestedArr: [{
name: 'Ankur'
}],
}]],
};

If I'm understanding the question correctly, you not only want to perform a deep comparison between the objects, but also create a new object that contains a description of the differences. You state that your current code works for objects but not for arrays. From your code, I'm gathering that you want to the diff to look like this:

const diff = {
arr: [{
name: {
oldValue: 'Ankur',
newValue: 'Shubham',
},
}, {
email: {
oldValue: 'ankur4736@gmail.com',
newValue: 'shubham@gmail.com',
},
}, {
key: {
oldValue: 456,
newValue: 466,
},
}, [{
name: {
oldValue: 'Shubham',
newValue: 'Gundeep Paji',
},
anotherNestedArr: [{
name: {
oldValue: 'Gundeep Paji',
newValue: 'Ankur',
},
}],
}]],
};

Sparse vs. dense arrays

Before we commence, I should point out that there are two different ways one may create a new array containing only the values from the old array that have changed in the new array. For simplicity, I will use two arrays of numbers as an example:

const oldArr = [1, 2, 3, 4, 5, 6, 7];
const newArr = [1, 1, 3, 4, 7, 6, 7];
// differences: ^ ^

For the sake of illustration, assume for a moment that we only store the old values in the diff, instead of {oldValue, newValue} objects. Our diff array will start out empty either way, but if we choose the dense way of representing the diff, we simply append every element of oldArr that has changed in newArr to the end:

const denseDiff = [2, 5];

The advantage of this representation is that denseDiff.length will tell us a straightforward truth, i.e., two elements have changed. The disadvantage is that we have no way to tell what was the original index of the changed elements.

The sparse way is more complicated:

const sparseDiff = [, 2, , , 5];

sparseDiff.length, which is 5, no longer tells us the number of changed elements. Instead, it only conveys the highest index of a changed element. In addition, if we take an index between 0 and sparseDiff.length that didn't change, for example sparseDiff[3], it will always tell us undefined, of which the meaning is not clear.

There is a lot of room for confusion here, which generally isn't a good thing in programming. However, the sparse way has the major advantage that the indices of changed elements are always equal between oldArr and sparseDiff. That is, we maintain the following invariant for every index i:

oldArr[i] == newArr[i] ? sparseDiff[i] == undefined : sparseDiff[i] == oldArr[i]

Fortunately, there is also a way for us to find directly which indices correspond to the changed elements. _.keys (or Object.keys) will tell us the indices of just the elements that were set explicitly (because they changed):

_.keys(sparseDiff)  // ['1', '4']

_.keys(sparseDiff).length will even tell us the number of elements that changed. We can get all the information we could obtain with the dense representation, just in a more roundabout and confusing way.

Concluding, sparse arrays are arguably the correct approach here, because they allow us to obtain all the information we might need. I pointed out the disadvantages of sparse arrays anyway, to hightlight that there is still a tradeoff.

Your code

Before jumping to solutions, let's take this opportunity to learn as much as possible. There are a couple of things in your code that stood out to me.

Double outer function

This is the first thing I noticed:

function difference(oldValues, newValues) {
function changes(oldValues, newValues) {
//...
}

return changes(oldValues, newValues);
}

The difference function is simply forwarding its arguments in the same order to changes and then echoing its return value. That means that difference isn't really doing anything. You could remove the outermost difference function, call changes instead of difference and still get the same result:

function changes(oldValues, newValues) {
//...
}

_.transform

The second thing I noticed is that you're using _.transform. If we look at how this function was conceived, it turns out to be a variant of _.reduce that can stop iteration early by returning false from the iteratee. Since the iteratee must be able to return false but we must also be able to obtain a result at the end, this means that the iteratee must modify the accumulator in place, instead of returning it as we would do in _.reduce. There are two major problems with this.

Firstly, modifying in place doesn't work with immutable values such as strings. For this reason, _transform only works on objects and arrays. Consider the following function to reverse a string, which can work with _.reduce but not with _.transform:

function reverse(string) {
return _.reduce(string, (result, char) => char + result);
}

You could work around this by wrapping the result in an object and then unwrapping it afterwards, but this means you're working around the limitations of _.transform while you should be using _.reduce in the first place.

Secondly and more importantly, partially transforming an object makes no sense. The keys of an object are unordered. Suppose that during iteration, you find the stop condition on a particular key. By pure coincidence, some of the other keys have already been visited while others haven't yet. There is no defensible reason why the keys that have already been visited should be part of your transformed result and the other keys shouldn't, since this division is completely arbitrary.

_.transform is useless for immutable values and meaningless for unordered collections. How about ordered collections, arrays? At first sight, there are some reasonable scenarios where you might want to partially transform an array. For example, to compute the least number above a threshold that can be obtained by summing numbers in an array from the start:

function thresholdPartialSum(numbers, threshold) {
return _.transform(numbers, function(result, number) {
result.wrapped += number;
return result.wrapped <= threshold;
}, { wrapped: 0 }).wrapped;
}

Note the wrapped appearing in four places. We are working around _.transform's limitation of having to modify the accumulator in place here, too.

Admittedly, _.transform has a performance advantage over _.reduce in this case. However, we need neither. Partially iterating arrays is already handled perfectly well by good old _.find and similar functions. Moreover, since we have to modify the accumulator in place anyway, we might as well close over it. _.find will handle scenarios like these just as well as _.transform, with less complexity:

function thresholdPartialSum(numbers, threshold) {
let partialSum = 0;
find(numbers, function(number) {
partialSum += number;
return partialSum > threshold;
});
return partialSum;
}

In conclusion, you should never use _.transform. Use _.reduce or _.find instead.

Duplicate recursion

This is the third thing I noticed:

if (!_.isEqual(value, oldValues[key])) {
// recursion here
}

While this will not lead to incorrect results, _.isEqual is itself recursive. It bases its return value on whether any difference is found. This means that when a difference is found, you end up recursively iterating both values again, just to find back the difference that _.isEqual already identified but didn't tell you the position of.

At first glance, this may seem not so problematic. You might recurse twice instead of once. Not the most efficient possible, but at most twice as slow, right? Unfortunately, the duplication doesn't stop here. Once you recursively call your own changes function, _.isEqual will be invoked again for each subelement of the element you recursed into, and it will be recursive again.

To quantify this, let's start by visualizing a nested object as a tree.

const tree = {
mainBranch1: {
branch1_1: {
twig1_1_1: {
leaf1_1_1_1: 'value 1.1.1.1',
leaf1_1_1_2: 'value 1.1.1.2',
},
twig1_1_2: {
leaf1_1_2_1: 'value 1.1.2.1',
leaf1_1_2_2: 'value 1.1.2.2',
},
},
branch1_2: {
twig1_2_1: {
leaf1_2_1_1: 'value 1.2.1.1',
leaf1_2_1_2: 'value 1.2.1.2',
},
twig1_2_2: {
leaf1_2_2_1: 'value 1.2.2.1',
leaf1_2_2_2: 'value 1.2.2.2',
},
},
},
mainBranch2: {
branch2_1: {
twig2_1_1: {
leaf2_1_1_1: 'value 2.1.1.1',
leaf2_1_1_2: 'value 2.1.1.2',
},
twig2_1_2: {
leaf2_1_2_1: 'value 2.1.2.1',
leaf2_1_2_2: 'value 2.1.2.2',
},
},
branch2_2: {
twig2_2_1: {
leaf2_2_1_1: 'value 2.2.1.1',
leaf2_2_1_2: 'value 2.2.1.2',
},
twig2_2_2: {
leaf2_2_2_1: 'value 2.2.2.1',
leaf2_2_2_2: 'value 2.2.2.2',
},
},
},
};

We have a tree with four levels of branching (not including tree itself). There are sixteen leafs.

Recursion will stop at the leaf keys, so let's count how many times _.isEqual will visit a leaf key. Assume that we will be comparing tree to another object with a similar structure and about half of the leaf values has changed.

First, we call changes on tree and the other object. We invoke _.isEqual once for each mainBranch. _.isEqual recursively calls itself until it reaches the leaf keys. It needs to visit about half of the leafs in order to find a difference, at which point it knows it must return false. At this point, we have visited each leaf half a time on average.

Every time _.isEqual returns false, we recursively call changes on the corresponding main branch. Each call to changes then invokes _.isEqual once for each branch within. _.isEqual recursively calls itself again until it reaches the leaf keys. Now we have visited each leaf once on average.

_.isEqual again returns false every time, so we recurse into changes again. The whole pattern repeats and we're at one and a half visits per leaf.

We finally repeat the whole pattern one more time with the twigs and we end up at two invocations of _.isEqual per leaf key on average. That means 16 * 2 = 32 invocations of _.isEqual on a leaf key in total. The total number of invocations per leaf is equal to the depth of the tree times the number of leafs, divided by two. In complexity analysis, we call this log-linear or quasilinear time.

This is the worst case. With fewer than half of the leafs differing, changes will recurse less often, while with more than half of the leafs differing, _.isEqual will stop its own recursion sooner. Nevertheless, without the duplicate recursion, we would need to visit each leaf only once, regardless of the depth of the tree. After all, when we determine whether a key has changed, we might as well directly record the difference. With large, deeply nested structures this can be a big difference, especially if it happens in a hot loop.

Removing the duplication requires that we change the algorithm from a pre-order traversal to a post-order traversal. That is, instead of first determining whether there are any differences with a larger branch and then finding those differences within each of its smaller branches, we have to go straight to collecting all the differences within the smallest branches. If we find any differences within a smaller branch, we then know that we should also record the containing larger branch.

Solution

Enough theory. The following code addresses the issues I discussed above and works for objects as well as arrays. It works regardless of whether you're using Underscore or Lodash.

function changes(oldCollection, newCollection) {
return _.reduce(oldCollection, function(result, oldValue, key) {
const newValue = newCollection[key];
if (_.isObject(oldValue) && _.isObject(newValue)) {
const diff = changes(oldValue, newValue);
if (!_.isEmpty(diff)) result[key] = diff;
} else if (oldValue !== newValue) {
result[key] = { oldValue, newValue };
}
return result;
}, _.isArray(oldCollection) ? [] : {});
}

Final remarks

As in the original question, the diff object returned by changes contains a nested {oldValue, newValue} object for each changed leaf key. This could lead to ambiguities if either the old or the new object contains keys with the name oldValue or newValue. This is not as crazy as it may seem; consider comparing diffs at two different points in time.

To avoid such situations, you could opt to store only the old value (or only the new value) in the diff. Since you'll know the exact location of the value that changed, it will be trivial to look up the corresponding key in the other object. You need to change only one line to make the algorithm behave this way, but this is left as an exercise to the reader.

Also as in the original question, the above solution does not take into account that the new collection might have new keys that didn't exist in the old collection. If you want to detect new keys and you can accept that deleted keys will be missed, simply iterate over newCollection instead of over oldCollection. If you want to pick up missing keys on both sides, determine the common keys and the uncommon keys by the symmetric difference first. Treat the common keys as above while recording the uncommon keys directly as differences. This is, again, left as an exercise to the reader.

Update: detecting both deleted and added elements

In the "final remarks" above, I commented that my solution above, like in the original question, did not detect newly added elements in objects and arrays. Ankur Sharma commented that this was actually one of the problems he was seeking a solution for in his question. To address this, below is a new version of my solution.

The code below assumes Underscore. For Lodash 4, use _.mapValues instead of _.mapObject.

// Helper for the call to _.mapObject/_.mapValues below.
const wrapAddedKey = newValue => ({ oldValue: undefined, newValue });

function changes(oldCollection, newCollection) {
const diff = _.reduce(oldCollection, function(result, oldValue, key) {
const newValue = newCollection[key];
if (_.isObject(oldValue) && _.isObject(newValue)) {
const diff = changes(oldValue, newValue);
if (!_.isEmpty(diff)) result[key] = diff;
} else if (oldValue !== newValue) {
result[key] = { oldValue, newValue };
}
return result;
}, _.isArray(oldCollection) ? [] : {});
const addedKeys = _.difference(_.keys(newCollection), _.keys(oldCollection));
const additions = _.pick(newCollection, addedKeys);
return _.extend(diff, _.mapObject(additions, wrapAddedKey));
}

Update 2

From another update to the question, it became clear that the asker wanted to represent missing values as null instead of as undefined. The following code addresses this.

// Helper for the call to _.mapObject/_.mapValues below.
const wrapAddedKey = newValue => ({ oldValue: null, newValue });

function changes(oldCollection, newCollection) {
const diff = _.reduce(oldCollection, function(result, oldValue, key) {
const newValue = key in newCollection ? newCollection[key] : null;
if (_.isObject(oldValue) && _.isObject(newValue)) {
const diff = changes(oldValue, newValue);
if (!_.isEmpty(diff)) result[key] = diff;
} else if (oldValue !== newValue) {
result[key] = { oldValue, newValue };
}
return result;
}, _.isArray(oldCollection) ? [] : {});
const addedKeys = _.difference(_.keys(newCollection), _.keys(oldCollection));
const additions = _.pick(newCollection, addedKeys);
return _.extend(diff, _.mapObject(additions, wrapAddedKey));
}


Related Topics



Leave a reply



Submit