Excluding directories in os.walk
Modifying dirs
in-place will prune the (subsequent) files and directories visited by os.walk
:
# exclude = set(['New folder', 'Windows', 'Desktop'])
for root, dirs, files in os.walk(top, topdown=True):
dirs[:] = [d for d in dirs if d not in exclude]
From help(os.walk):
When topdown is true, the caller can modify the dirnames list in-place
(e.g., via del or slice assignment), and walk will only recurse into
the subdirectories whose names remain in dirnames; this can be used to
prune the search...
How to scan only some directories or exclude directories while using os.walk()
After reading @tripleee is comment I have made this piece of code that gets most recently modified files.
import os
os.chdir('Folder')
projloc = os.getcwd() #getting the folder to scan
list_of_dirs_to_exclude = []
def get_recent_files():
max_mtime = 0
for root, dirs, files in os.walk(projloc):
if root not in list_of_dirs_to_exclude: # I have made a change by adding the `not` in unlike @tripleee's answer
for fname in files:
full_path = os.path.join(root, fname)
mtime = os.stat(full_path).st_mtime
if mtime > max_mtime:
max_mtime = mtime
max_dir = root
max_file = fname
list_of_dirs_to_exclude.insert(0, max_dir)
print(max_file)
if len(list_of_dirs_to_exclude) == 5: #You can keep whatever number you want such as 6, 7, 4 etc...
pass
else:
get_recent_files()
get_recent_files()
Here is updated code if you want the code all in the same def
def get_recent_files():
list_of_dirs_to_exclude = []
list_of_dirs = []
max_mtime = 0
for dirs in os.listdir(projloc): #projloc is predefined for me. I got it using the same method in the above code
list_of_dirs.insert(0, dirs)
while len(list_of_dirs) != 5:
for root, dirs, files in os.walk(projloc):
if root not in list_of_dirs_to_exclude:
for fname in files:
full_path = os.path.join(root, fname)
mtime = os.stat(full_path).st_mtime
if mtime > max_mtime:
max_mtime = mtime
max_dir = root
max_file = fname
list_of_dirs_to_exclude.insert(0, max_dir)
print(max_file)
max_mtime = 0
if len(list_of_dirs_to_exclude) == 5:
break
Exclude specific folders and subfolders in os.walk
The following solution seems to be working, any directory specified in the exclude set will be ignored, any extension in the extensions set will be included.
import os
exclude = set(['folder3'])
extensions = set(['.txt', '.dat'])
for root, dirs, files in os.walk('c:/temp/folder', topdown=True):
dirs[:] = [d for d in dirs if d not in exclude]
files = [file for file in files if os.path.splitext(file)[1] in extensions]
for fname in files:
print(fname)
This code uses the option topdown=True
to modify the list of dir names in place as specified in the docs:
When topdown is True, the caller can modify the dirnames list in-place
(perhaps using del or slice assignment), and walk() will only recurse
into the subdirectories whose names remain in dirnames; this can be
used to prune the search
Exclude root directories using os.walk
You can use startswith
with tuple
(not list)
if not root.startswith( ('/Users/teste/', '/other/folder') ):
import os
exclude = ['/Users/teste/',]
exclude = tuple(exclude)
for root, dirs, files in os.walk("\\", topdown=False):
if not root.startswith(exclude):
for name in files:
print(name)
BTW:
If you want to use function which can't get list or tuple then you can use any()
with list comprehension to check all elements on list
For example for startswith()
if not any(root.startswith(x) for x in exclude):
or for regex
(which can be useful to create more complex element in exclude
)
if not any(re.findall(x, root) for x in exclude):
How to exclude directory in os.walk()?
this is how i usually exclude directories when iterating over os.walk
:
for root, dirs, files in os.walk(drv):
dirs[:] = [d for d in dirs if d not in extf]
the point here is to use a slice-assignment (dirs[:] = ...
) in order to change dirs
in-place (reassigning dirs
to the newly created list).
if you want to have a slight speedup, i suggest to turn extf
into a set
:
extf = set(('$RECYCLE.BIN','System Volume Information'))
Skip directories in list with os.walk?
You can edit dirnames
in place by deleting directories that meet your requirements. when topdown=True
(the default) for os.walk
, that prevents those directories from being enumerated.
Make sure it is an in place edit, for example:
dirnames[:] = [dirname for dirname in dirnames if dirname not in excludes]
Filtering os.walk() dirs and files
This solution uses fnmatch.translate
to convert glob patterns to regular expressions (it assumes the includes only is used for files):
import fnmatch
import os
import os.path
import re
includes = ['*.doc', '*.odt'] # for files only
excludes = ['/home/paulo-freitas/Documents'] # for dirs and files
# transform glob patterns to regular expressions
includes = r'|'.join([fnmatch.translate(x) for x in includes])
excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.'
for root, dirs, files in os.walk('/home/paulo-freitas'):
# exclude dirs
dirs[:] = [os.path.join(root, d) for d in dirs]
dirs[:] = [d for d in dirs if not re.match(excludes, d)]
# exclude/include files
files = [os.path.join(root, f) for f in files]
files = [f for f in files if not re.match(excludes, f)]
files = [f for f in files if re.match(includes, f)]
for fname in files:
print fname
os.walk without digging into directories below
Use the walklevel
function.
import os
def walklevel(some_dir, level=1):
some_dir = some_dir.rstrip(os.path.sep)
assert os.path.isdir(some_dir)
num_sep = some_dir.count(os.path.sep)
for root, dirs, files in os.walk(some_dir):
yield root, dirs, files
num_sep_this = root.count(os.path.sep)
if num_sep + level <= num_sep_this:
del dirs[:]
It works just like os.walk
, but you can pass it a level
parameter that indicates how deep the recursion will go.
Python os.walk Include only specific folders
I am not sure if I understand what you need, but I think you overcomplicate a few things. If the code below doesn't help you, let me know and we will think about other approaches.
I run this to create an example like yours.
# setup example project structure
import os
import sys
PLATFORM = 'windows' if sys.platform.startswith('win') else 'linux'
DESKTOP_DIR = \
os.path.join(os.path.join(os.path.expanduser('~')), 'Desktop') \
if PLATFORM == 'linux' \
else os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
example_dirs = ['20180829', '20180828', '20180827', '20180826']
for _dir in example_dirs:
path = os.path.join(DESKTOP_DIR, 'dir_from', 'dir_1', 'dir_2', 'dir_3', _dir)
os.makedirs(path, exist_ok=True)
And here's what you need.
# do what you want to do
dir_from = os.path.join(DESKTOP_DIR, 'dir_from')
dir_to = os.path.join(DESKTOP_DIR, 'dir_to')
target = '20180828'
for root, dirs, files in os.walk(dir_from, topdown=True):
for _dir in dirs:
if _dir == target:
path = os.path.join(root, _dir).replace(dir_from, dir_to)
os.makedirs(path, exist_ok=True)
continue
Related Topics
How to Properly Assert That an Exception Gets Raised in Pytest
Escape String Python for MySQL
How to Compare String and Integer in Python
How to Color Python Logging Output
How to Calculate Mean Values Grouped on Another Column in Pandas
Reimport a Module While Interactive
How to Convert a Time.Struct_Time Object into a Datetime Object
Adding a Background Image to a Plot
How to Save an Image Locally Using Python Whose Url Address I Already Know
Attributeerror: 'Tensor' Object Has No Attribute 'Numpy'
Unicodedecodeerror When Redirecting to File
Making a Countdown Timer with Python and Tkinter
Getting Console.Log Output from Chrome with Selenium Python API Bindings