How to Sort Yaml Files

read and sort line block of yaml file in python

I had some time to work on it, and I came up with the following solution:

#!/usr/bin/env python3

import yaml
#from functools import reduce

with open("sprd_variator.yml", 'r') as input_file:
try:
dict = yaml.load(input_file)
i = 'networks'
if i in dict:
sorted_sub = sorted(dict[i].items())
sorted_sub_keys = sorted(dict[i].keys())
sorted_sub_keys.sort(key = lambda x: int(x.rsplit('/',2)[1]))
networks = []
for x in sorted_sub_keys:
facters = [item for item in sorted_sub if item[0] == x]
networks.append(facters)
one_list = [item for sublist in networks for item in sublist]
#one_list = reduce(lambda x,y: x+y,networks)
print(one_list)
else:
print('error')
except yaml.YAMLError as err:
print(err)

Thanks a lot.

How to sort through a yaml file and provide values for each tree

I am not 100% sure, but I don't think a function call within an f-string is evaluated, so try and do:

    print("KEY: {} - VALUE: {}".format(key, value[0].items()))

instead.

Additionally the value for the key config is a sequence (i.e. loaded as a Python list) that has only one element, so you cannot index further in that sequence then 0.

You'd better try and do something like:

for key, values in dataMap.items():
for idx, value in enumerate(values):
print("KEY: {} - VALUE[{}]: {}".format(key, idx, value.items()))
print("====")

How to recursively sort YAML (with anchors) using CommentedMap?

The approach I take with solving these kind of things, is first to add the
expected and necessary imports, define the input and expected output as
multiline strings, and add a useful diff method to the YAML instance.

String input is easier to work with than files while testing as everything is in
one file (need to remove some trailing spaces?) and you cannot overwrite your
input and start the next run with something different than the first.

import sys
import difflib
import ruamel.yaml
from ruamel.yaml.comments import merge_attrib

yaml_in = """\
_world:
anchor_struct: &anchor_struct
foo:
val: "foo"
bar:
val: "foo"
string: "string"
newmsg: &newmsg
msg: "msg"
foo: "foo"
new: "new"
anchr_val: &anchor_val famous_val
bool: True
elem2:
myStruct:
<<: *anchor_struct
anchor_val: *anchor_val
<<: *anchor_struct
zzz: zorglub
www: web
anchor_struct:
<<: *anchor_struct
other_elem: "other_elem"
elem1:
<<: *anchor_struct
zzz: zorglub
newmsg:
<<: *newmsg
msg: "msg2"
myStruct:
<<: *anchor_struct
anchor_struct:
second_elem: "second_elem"
<<: *anchor_struct
other_elem: "other_elem"
www: web
anchor_val: *anchor_val
"""

yaml_out = """\
_world:
anchor_struct: &anchor_struct
bar:
val: "foo"
foo:
val: "foo"
anchr_val: &anchor_val famous_val
bool: True
newmsg: &newmsg
foo: "foo"
msg: "msg"
new: "new"
string: "string"
elem1:
<<: *anchor_struct
anchor_struct:
<<: *anchor_struct
other_elem: "other_elem"
second_elem: "second_elem"
anchor_val: *anchor_val
myStruct:
<<: *anchor_struct
newmsg:
<<: *newmsg
msg: "msg2"
www: web
zzz: zorglub
elem2:
<<: *anchor_struct
anchor_struct:
<<: *anchor_struct
other_elem: "other_elem"
anchor_val: *anchor_val
myStruct:
<<: *anchor_struct
www: web
zzz: zorglub
"""

def diff_yaml(self, data, s, fnin="in", fnout="out"):
# dump data if necessary and compare with s
inl = [l.rstrip() + '\n' for l in s.splitlines()] # trailing space at end of line disregarded
if not isinstance(data, str):
buf = ruamel.yaml.compat.StringIO()
self.dump(data, buf)
outl = buf.getvalue().splitlines(True)
else:
outl = [l.rstrip() + '\n' for l in data.splitlines()]
diff = difflib.unified_diff(inl, outl, fnin, fnout)
result = True
for line in diff:
sys.stdout.write(line)
result = False
return result

ruamel.yaml.YAML.diff = diff_yaml

yaml = ruamel.yaml.YAML()
# yaml.indent(mapping=4, sequence=4, offset=2)
yaml.boolean_representation = ["False", "True"]
yaml.preserve_quotes = True

Then make sure your excpected output is valid, and can be round-tripped:

dout = yaml.load(yaml_out)
buf = ruamel.yaml.compat.StringIO()
yaml.dump(dout, buf)
assert yaml.diff(dout, yaml_out)

which should not give output nor an assertion error ( there is trailing
whitespace in your expected output, as well as the not default True boolean). If the expected output cannot be round-tripped, ruamel.yaml might not be able dump your expected output.

If you are stuck can now inspect dout to determine what your parsed input should look like.

So now try the recursive_sort

def recursive_sort_mappings(s):
if isinstance(s, list):
for elem in s:
recursive_sort_mappings(elem)
return
if not isinstance(s, dict):
return
for key in sorted(s, reverse=True):
value = s.pop(key)
recursive_sort_mappings(value)
s.insert(0, key, value)

din = yaml.load(yaml_in)
recursive_sort_mappings(din)
yaml.diff(din, yaml_out)

Which gives quite a bit of output, as the recursive_sort_mappings doesn't know
about merges and runs over all the keys, tries to keep merge keys in their original position, and additionally when popping a key (before reinserting it in the
first position), does some magic in case the popped value exists in a merged mapping:

--- in
+++ out
@@ -1,8 +1,8 @@
_world:
anchor_struct: &anchor_struct
- bar:
+ bar: &id001
val: "foo"
- foo:
+ foo: &id002
val: "foo"
anchr_val: &anchor_val famous_val
bool: True
@@ -14,24 +14,38 @@
elem1:
<<: *anchor_struct
anchor_struct:
+ bar: *id001
<<: *anchor_struct
+ foo: *id002
other_elem: "other_elem"
second_elem: "second_elem"
anchor_val: *anchor_val
+ bar: *id001
+ foo: *id002
myStruct:
<<: *anchor_struct
+ bar: *id001
+ foo: *id002
newmsg:
<<: *newmsg
+ foo: "foo"
msg: "msg2"
+ new: "new"
www: web
zzz: zorglub
elem2:
- <<: *anchor_struct
anchor_struct:
<<: *anchor_struct
+ bar: *id001
+ foo: *id002
other_elem: "other_elem"
anchor_val: *anchor_val
+ <<: *anchor_struct
+ bar: *id001
+ foo: *id002
myStruct:
<<: *anchor_struct
+ bar: *id001
+ foo: *id002
www: web
zzz: zorglub

To solve this you need to do multiple things. First you need to abandon the .insert(), which emulation (for the Python3 built-in OrderedDict) the method defined C ordereddict package ruamel.ordereddict. This emulation recreates the OrderedDict and
that leads to duplication. Python3 C implementation, has a less powerful (than .insert()), but in this case useful
method move_to_end (Which could be be used in an update to the .insert() emulation in ruamel.yaml).

Second you need only to walk over the "real" keys, not those keys provided by merges, so you cannot use for key in.

Third you need the merge key to move to the top of mapping if it is somewhere else.

(The level argument was added for debugging purposes)

def recursive_sort_mappings(s, level=0):
if isinstance(s, list):
for elem in s:
recursive_sort_mappings(elem, level=level+1)
return
if not isinstance(s, dict):
return
merge = getattr(s, merge_attrib, [None])[0]
if merge is not None and merge[0] != 0: # << not in first position, move it
setattr(s, merge_attrib, [(0, merge[1])])

for key in sorted(s._ok): # _ok -> set of Own Keys, i.e. not merged in keys
value = s[key]
# print('v1', level, key, super(ruamel.yaml.comments.CommentedMap, s).keys())
recursive_sort_mappings(value, level=level+1)
# print('v2', level, key, super(ruamel.yaml.comments.CommentedMap, s).keys())
s.move_to_end(key)

din = yaml.load(yaml_in)
recursive_sort_mappings(din)
assert yaml.diff(din, yaml_out)

And then the diff no longer gives output.

How can I sort a dimensional array output by key value from jekyll data yaml files?

Your not applying the sort on a valid array of hashes.

If you do a {{ site.data.cat | inspect }} you get something like {"t1"=>{"name"=>"test1"}, "t2"=>{"name"=>"allo"}, "t3"=>{"name"=>"jekyll"}} (I've simplified data files for brevity but it works the same with more complicated data files like yours).

You currently are applying sort filter on {"name"=>"test1"} object which cannot sort itself.

What you need to do is to get all your data hashes in a single array. then you can sort it.

{% assign datas = "" | split: "" %}
{% for cat in site.data.cat %}
{% assign datas = datas | push: cat[1] %}
{% endfor %}

DEBUG : {{ datas | inspect }}

You now have an array that can be sorted.

{% assign datas = datas | sort: "name" %}
DEBUG : {{ datas | inspect }}

You can now print your datas sorted by name.

Full code :

{% assign datas = "" | split: "" %}
{% for cat in site.data.cat %}
{% assign datas = datas | push: cat[1] %}
{% endfor %}

{% assign datas = datas | sort: "name" %}

<div class="row">
{% for cat in datas %}
<div class="col-6 col-12-narrower">
<section>
<header>
<a class="image featured" href="{{ cat.permalink }}" title="{{ cat.name }}">
<h3>{{ cat.name }}</h3>
</a>
</header>
<p>{{ cat.web }}</p>
</section>
</div>
{% endfor %}
</div>

Note that inspect filter is used for debugging only.



Related Topics



Leave a reply



Submit