Using Os.Walk() to Recursively Traverse Directories in Python

Using os.walk() to recursively traverse directories in Python

This will give you the desired result

#!/usr/bin/python

import os

# traverse root directory, and list directories as dirs and files as files
for root, dirs, files in os.walk("."):
path = root.split(os.sep)
print((len(path) - 1) * '---', os.path.basename(root))
for file in files:
print(len(path) * '---', file)

Recursive problems in Python using os.walk()

os.walk itself works recursively. Don't call it recursively:

def CheckSubFolder( folder ):
for root, directories, files in os.walk(folder):
for d in directories:
print "folder : " os.path.join(root, d)
for f in files:
print "file : " os.path.join(root, f)

# Code Entry
path = sys.argv[1]
CheckSubFolder(path)

traverse directory structure in python recursively without os.walk

Try using a recursive function,

def lastline(fil):
with open(fil) as f:
for li in f.readlines():
if li.startswith("Final Value:"):
print(li)

## If it still doesnt work try putting 'dirs=[]' here
def lookforfiles(basepath):
contents = os.listdir(basepath)

dirs = []
i = 0

while i <= len(contents):
i += 1

for n in contents:
f = os.path.join(basepath, n)

if os.path.isfile(f):
lastline(f)
print("\n\nfile %s" % n)
elif os.path.isdir(f):
print("Adding dir")
if f in dirs:
pass
else:
dirs.append(f)

else:
for x in dirs:
print("dir %s" % x)
lookforfiles(x)

sorry if this doesn't fit your example precisely but I had a hard time understanding what you were trying to do.

Python recursive folder read

Make sure you understand the three return values of os.walk:

for root, subdirs, files in os.walk(rootdir):

has the following meaning:

  • root: Current path which is "walked through"
  • subdirs: Files in root of type directory
  • files: Files in root (not in subdirs) of type other than directory

And please use os.path.join instead of concatenating with a slash! Your problem is filePath = rootdir + '/' + file - you must concatenate the currently "walked" folder instead of the topmost folder. So that must be filePath = os.path.join(root, file). BTW "file" is a builtin, so you don't normally use it as variable name.

Another problem are your loops, which should be like this, for example:

import os
import sys

walk_dir = sys.argv[1]

print('walk_dir = ' + walk_dir)

# If your current working directory may change during script execution, it's recommended to
# immediately convert program arguments to an absolute path. Then the variable root below will
# be an absolute path as well. Example:
# walk_dir = os.path.abspath(walk_dir)
print('walk_dir (absolute) = ' + os.path.abspath(walk_dir))

for root, subdirs, files in os.walk(walk_dir):
print('--\nroot = ' + root)
list_file_path = os.path.join(root, 'my-directory-list.txt')
print('list_file_path = ' + list_file_path)

with open(list_file_path, 'wb') as list_file:
for subdir in subdirs:
print('\t- subdirectory ' + subdir)

for filename in files:
file_path = os.path.join(root, filename)

print('\t- file %s (full path: %s)' % (filename, file_path))

with open(file_path, 'rb') as f:
f_content = f.read()
list_file.write(('The file %s contains:\n' % filename).encode('utf-8'))
list_file.write(f_content)
list_file.write(b'\n')

If you didn't know, the with statement for files is a shorthand:

with open('filename', 'rb') as f:
dosomething()

# is effectively the same as

f = open('filename', 'rb')
try:
dosomething()
finally:
f.close()

python os.walk to certain level

you could do like this:

depth = 2

# [1] abspath() already acts as normpath() to remove trailing os.sep
#, and we need ensures trailing os.sep not exists to make slicing accurate.
# [2] abspath() also make /../ and ////, "." get resolved even though os.walk can returns it literally.
# [3] expanduser() expands ~
# [4] expandvars() expands $HOME
# WARN: Don't use [3] expanduser and [4] expandvars if stuff contains arbitrary string out of your control.
#stuff = os.path.expanduser(os.path.expandvars(stuff)) # if trusted source
stuff = os.path.abspath(stuff)

for root,dirs,files in os.walk(stuff):
if root[len(stuff):].count(os.sep) < depth:
for f in files:
print(os.path.join(root,f))

key is: if root[len(stuff):].count(os.sep) < depth

It removes stuff from root, so result is relative to stuff. Just count the number of files separators.

The depth acts like find command found in Linux, i.e. -maxdepth 0 means do nothing, -maxdepth 1 only scan files in first level, and -maxdepth 2 scan files included sub-directory.

Of course, it still scans the full file structure, but unless it's very deep that'll work.

Another solution would be to only use os.listdir recursively (with directory check) with a maximum recursion level, but that's a little trickier if you don't need it. Since it's not that hard, here's one implementation:

def scanrec(root):
rval = []

def do_scan(start_dir,output,depth=0):
for f in os.listdir(start_dir):
ff = os.path.join(start_dir,f)
if os.path.isdir(ff):
if depth<2:
do_scan(ff,output,depth+1)
else:
output.append(ff)

do_scan(root,rval,0)
return rval

print(scanrec(stuff)) # prints the list of files not below 2 deep

Note: os.listdir and os.path.isfile perform 2 stat calls so not optimal. In Python 3.5, the use of os.scandir could avoid that double call.

Traverse directories recursively and return a nested list with the subdirectories and files in Python

You can use os.listdir in recursion:

import os
def to_tree(s=os.getcwd()):
return [{'name':i} if os.path.isfile(f'{s}/{i}') else
{'name':i, 'children':to_tree(f'{s}/{i}')} for i in os.listdir(s)]

When running the function above on a similar file structure as your example, the result is:

import json
print(json.dumps(to_tree(), indent=4))

Output:

[
{
"name": "file1.txt"
},
{
"name": "file2.txt"
},
{
"name": "sub1",
"children": [
{
"name": "subfile1.txt"
},
{
"name": "subsub",
"children": [
{
"name": "subsubfile1.txt"
}
]
}
]
},
{
"name": "sub2",
"children": []
}
]

os.walk without digging into directories below

Use the walklevel function.

import os

def walklevel(some_dir, level=1):
some_dir = some_dir.rstrip(os.path.sep)
assert os.path.isdir(some_dir)
num_sep = some_dir.count(os.path.sep)
for root, dirs, files in os.walk(some_dir):
yield root, dirs, files
num_sep_this = root.count(os.path.sep)
if num_sep + level <= num_sep_this:
del dirs[:]

It works just like os.walk, but you can pass it a level parameter that indicates how deep the recursion will go.



Related Topics



Leave a reply



Submit