Excluding Directories in Os.Walk

Excluding directories in os.walk

Modifying dirs in-place will prune the (subsequent) files and directories visited by os.walk:

# exclude = set(['New folder', 'Windows', 'Desktop'])
for root, dirs, files in os.walk(top, topdown=True):
dirs[:] = [d for d in dirs if d not in exclude]

From help(os.walk):

When topdown is true, the caller can modify the dirnames list in-place
(e.g., via del or slice assignment), and walk will only recurse into
the subdirectories whose names remain in dirnames; this can be used to
prune the search...

How to scan only some directories or exclude directories while using os.walk()

After reading @tripleee is comment I have made this piece of code that gets most recently modified files.

import os

os.chdir('Folder')
projloc = os.getcwd() #getting the folder to scan

list_of_dirs_to_exclude = []

def get_recent_files():
max_mtime = 0

for root, dirs, files in os.walk(projloc):
if root not in list_of_dirs_to_exclude: # I have made a change by adding the `not` in unlike @tripleee's answer
for fname in files:
full_path = os.path.join(root, fname)
mtime = os.stat(full_path).st_mtime
if mtime > max_mtime:
max_mtime = mtime
max_dir = root
max_file = fname

list_of_dirs_to_exclude.insert(0, max_dir)
print(max_file)

if len(list_of_dirs_to_exclude) == 5: #You can keep whatever number you want such as 6, 7, 4 etc...
pass

else:
get_recent_files()

get_recent_files()

Here is updated code if you want the code all in the same def

def get_recent_files():
list_of_dirs_to_exclude = []
list_of_dirs = []
max_mtime = 0

for dirs in os.listdir(projloc): #projloc is predefined for me. I got it using the same method in the above code
list_of_dirs.insert(0, dirs)

while len(list_of_dirs) != 5:
for root, dirs, files in os.walk(projloc):
if root not in list_of_dirs_to_exclude:
for fname in files:
full_path = os.path.join(root, fname)
mtime = os.stat(full_path).st_mtime
if mtime > max_mtime:
max_mtime = mtime
max_dir = root
max_file = fname

list_of_dirs_to_exclude.insert(0, max_dir)
print(max_file)
max_mtime = 0

if len(list_of_dirs_to_exclude) == 5:
break

Exclude specific folders and subfolders in os.walk

The following solution seems to be working, any directory specified in the exclude set will be ignored, any extension in the extensions set will be included.

import os

exclude = set(['folder3'])
extensions = set(['.txt', '.dat'])
for root, dirs, files in os.walk('c:/temp/folder', topdown=True):
dirs[:] = [d for d in dirs if d not in exclude]
files = [file for file in files if os.path.splitext(file)[1] in extensions]
for fname in files:
print(fname)

This code uses the option topdown=True to modify the list of dir names in place as specified in the docs:

When topdown is True, the caller can modify the dirnames list in-place
(perhaps using del or slice assignment), and walk() will only recurse
into the subdirectories whose names remain in dirnames; this can be
used to prune the search

Exclude root directories using os.walk

You can use startswith with tuple (not list)

if not root.startswith( ('/Users/teste/', '/other/folder') ):


import os

exclude = ['/Users/teste/',]

exclude = tuple(exclude)

for root, dirs, files in os.walk("\\", topdown=False):
if not root.startswith(exclude):
for name in files:
print(name)

BTW:

If you want to use function which can't get list or tuple then you can use any() with list comprehension to check all elements on list

For example for startswith()

if not any(root.startswith(x) for x in exclude):

or for regex (which can be useful to create more complex element in exclude)

if not any(re.findall(x, root) for x in exclude):

How to exclude directory in os.walk()?

this is how i usually exclude directories when iterating over os.walk:

for root, dirs, files in os.walk(drv):
dirs[:] = [d for d in dirs if d not in extf]

the point here is to use a slice-assignment (dirs[:] = ...) in order to change dirs in-place (reassigning dirs to the newly created list).

if you want to have a slight speedup, i suggest to turn extf into a set:

extf = set(('$RECYCLE.BIN','System Volume Information'))

Skip directories in list with os.walk?

You can edit dirnames in place by deleting directories that meet your requirements. when topdown=True (the default) for os.walk, that prevents those directories from being enumerated.

Make sure it is an in place edit, for example:

dirnames[:] = [dirname for dirname in dirnames if dirname not in excludes]

Filtering os.walk() dirs and files

This solution uses fnmatch.translate to convert glob patterns to regular expressions (it assumes the includes only is used for files):

import fnmatch
import os
import os.path
import re

includes = ['*.doc', '*.odt'] # for files only
excludes = ['/home/paulo-freitas/Documents'] # for dirs and files

# transform glob patterns to regular expressions
includes = r'|'.join([fnmatch.translate(x) for x in includes])
excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.'

for root, dirs, files in os.walk('/home/paulo-freitas'):

# exclude dirs
dirs[:] = [os.path.join(root, d) for d in dirs]
dirs[:] = [d for d in dirs if not re.match(excludes, d)]

# exclude/include files
files = [os.path.join(root, f) for f in files]
files = [f for f in files if not re.match(excludes, f)]
files = [f for f in files if re.match(includes, f)]

for fname in files:
print fname

os.walk without digging into directories below

Use the walklevel function.

import os

def walklevel(some_dir, level=1):
some_dir = some_dir.rstrip(os.path.sep)
assert os.path.isdir(some_dir)
num_sep = some_dir.count(os.path.sep)
for root, dirs, files in os.walk(some_dir):
yield root, dirs, files
num_sep_this = root.count(os.path.sep)
if num_sep + level <= num_sep_this:
del dirs[:]

It works just like os.walk, but you can pass it a level parameter that indicates how deep the recursion will go.

Python os.walk Include only specific folders

I am not sure if I understand what you need, but I think you overcomplicate a few things. If the code below doesn't help you, let me know and we will think about other approaches.

I run this to create an example like yours.

# setup example project structure

import os
import sys

PLATFORM = 'windows' if sys.platform.startswith('win') else 'linux'
DESKTOP_DIR = \
os.path.join(os.path.join(os.path.expanduser('~')), 'Desktop') \
if PLATFORM == 'linux' \
else os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')

example_dirs = ['20180829', '20180828', '20180827', '20180826']

for _dir in example_dirs:
path = os.path.join(DESKTOP_DIR, 'dir_from', 'dir_1', 'dir_2', 'dir_3', _dir)
os.makedirs(path, exist_ok=True)

And here's what you need.

# do what you want to do

dir_from = os.path.join(DESKTOP_DIR, 'dir_from')
dir_to = os.path.join(DESKTOP_DIR, 'dir_to')
target = '20180828'

for root, dirs, files in os.walk(dir_from, topdown=True):
for _dir in dirs:
if _dir == target:
path = os.path.join(root, _dir).replace(dir_from, dir_to)
os.makedirs(path, exist_ok=True)
continue


Related Topics



Leave a reply



Submit