How to Get the Source Code of a Python Function

Finding the source code for built-in Python functions?

Since Python is open source you can read the source code.

To find out what file a particular module or function is implemented in you can usually print the __file__ attribute. Alternatively, you may use the inspect module, see the section Retrieving Source Code in the documentation of inspect.

For built-in classes and methods this is not so straightforward since inspect.getfile and inspect.getsource will return a type error stating that the object is built-in. However, many of the built-in types can be found in the Objects sub-directory of the Python source trunk. For example, see here for the implementation of the enumerate class or here for the implementation of the list type.

how to get source code of python print function?

The print function is implemented in C language. That's why you can not reach its source code with the inspect module. The code is here: https://github.com/python/cpython/blob/2.7/Python/bltinmodule.c#L1580

Getting a Python function's source code without the definition lines

You can do something like this:

import inspect
from itertools import dropwhile


def get_function_body(func):
source_lines = inspect.getsourcelines(func)[0]
source_lines = dropwhile(lambda x: x.startswith('@'), source_lines)
def_line = next(source_lines).strip()
if def_line.startswith('def ') and def_line.endswith(':'):
# Handle functions that are not one-liners
first_line = next(source_lines)
# Find the indentation of the first line
indentation = len(first_line) - len(first_line.lstrip())
return ''.join([first_line[indentation:]] + [line[indentation:] for line in source_lines])
else:
# Handle single line functions
return def_line.rsplit(':')[-1].strip()

Demo:

def some_decorator(x):
return x


@some_decorator
def foo():
print("bar")


def func():
def inner(a, b='a:b'):
print (100)
a = c + d
print ('woof!')
def inner_inner():
print (200)
print ('spam!')
return inner

def func_one_liner(): print (200); print (a, b, c)

print (get_function_body(foo))
print (get_function_body(func()))
print (get_function_body(func_one_liner))

func_one_liner = some_decorator(func_one_liner)
print (get_function_body(func_one_liner))

Output:

print("bar")

print (100)
a = c + d
print ('woof!')
def inner_inner():
print (200)
print ('spam!')

print (200); print (a, b, c)
print (200); print (a, b, c)

Update:

To handle async and functions with multiline argument signature get_function_body should be updated to:

import inspect
import re
from itertools import dropwhile


def get_function_body(func):
print()
print("{func.__name__}'s body:".format(func=func))
source_lines = inspect.getsourcelines(func)[0]
source_lines = dropwhile(lambda x: x.startswith('@'), source_lines)
source = ''.join(source_lines)
pattern = re.compile(r'(async\s+)?def\s+\w+\s*\(.*?\)\s*:\s*(.*)', flags=re.S)
lines = pattern.search(source).group(2).splitlines()
if len(lines) == 1:
return lines[0]
else:
indentation = len(lines[1]) - len(lines[1].lstrip())
return '\n'.join([lines[0]] + [line[indentation:] for line in lines[1:]])

Demo:

def some_decorator(x):
return x


@some_decorator
def foo():
print("bar")


def func():
def inner(a, b='a:b'):
print (100)
a = c + d
print ('woof!')
def inner_inner():
print (200)
print ('spam!')
return inner


def func_one_liner(): print (200); print (a, b, c)
async def async_func_one_liner(): print (200); print (a, b, c)


def multi_line_1(
a=10,
b=100): print (100); print (200)


def multi_line_2(
a=10,
b=100
): print (100); print (200)


def multi_line_3(
a=10,
b=100
):
print (100 + '\n')
print (200)

async def multi_line_4(
a=10,
b=100
):
print (100 + '\n')
print (200)

async def multi_line_5(
a=10,
b=100
): print (100); print (200)

def func_annotate(
a: 'x', b: 5 + 6, c: list
) -> max(2, 9): print (100); print (200)


print (get_function_body(foo))
print (get_function_body(func()))
print (get_function_body(func_one_liner))
print (get_function_body(async_func_one_liner))

func_one_liner = some_decorator(func_one_liner)
print (get_function_body(func_one_liner))


@some_decorator
@some_decorator
def foo():
print("bar")

print (get_function_body(foo))
print (get_function_body(multi_line_1))
print (get_function_body(multi_line_2))
print (get_function_body(multi_line_3))
print (get_function_body(multi_line_4))
print (get_function_body(multi_line_5))
print (get_function_body(func_annotate))

Output:

foo's body:
print("bar")

inner's body:
print (100)
a = c + d
print ('woof!')
def inner_inner():
print (200)
print ('spam!')

func_one_liner's body:
print (200); print (a, b, c)

async_func_one_liner's body:
print (200); print (a, b, c)

func_one_liner's body:
print (200); print (a, b, c)

foo's body:
print("bar")

multi_line_1's body:
print (100); print (200)

multi_line_2's body:
print (100); print (200)

multi_line_3's body:
print (100 + '\n')
print (200)

multi_line_4's body:
print (100 + '\n')
print (200)

multi_line_5's body:
print (100); print (200)

func_annotate's body:
print (100); print (200)

Is it possible to get the source code of a (possibly decorated) Python function body, including inline comments?

I wrote a simple regex that does the trick. I tried this script with classes and without. It seemed to work fine either way. It just opens whatever file you designate in the Main call, at the bottom, rewrites the entire document with all function/method bodies doc-stringed and then save it as whatever you designated as the second argument in the Main call.

It's not beautiful, and it could probably have more efficient regex statements. It works though. The regex finds everything from a decorator (if one) to the end of a function/method, grouping tabs and the function/method body. It then uses those groups in finditer to construct a docstring and place it before the entire chunk it found.

import re

FUNC_BODY = re.compile(r'^((([ \t]+)?@.+\n)+)?(?P<tabs>[\t ]+)?def([^\n]+)\n(?P<body>(^([\t ]+)?([^\n]+)\n)+)', re.M)
BLANK_LINES = re.compile(r'^[ \t]+$', re.M)

class Main(object):
def __init__(self, file_in:str, file_out:str) -> None:
#prime in/out strings
in_txt = ''
out_txt = ''

#open resuested file
with open(file_in, 'r') as f:
in_txt = f.read()

#remove all lines that just have space characters on them
#this stops FUNC_BODY from finding the entire file in one shot
in_txt = BLANK_LINES.sub('', in_txt)

last = 0 #to keep track of where we are in the file

#process all matches
for m in FUNC_BODY.finditer(in_txt):
s, e = m.span()
#make sure we catch anything that was between our last match and this one
out_txt = f"{out_txt}{in_txt[last:s]}"
last = e
tabs = m.group('tabs') if not m.group('tabs') is None else ''
#construct the docstring and inject it before the found function/method
out_txt = f"{out_txt}{tabs}'''\n{m.group('body')}{tabs}'''\n{m.group()}"

#save as requested file name
with open(file_out, 'w') as f:
f.write(out_txt)


if __name__ == '__main__':
Main('test.py', 'test_docd.py')

EDIT:

Apparently, I "missed the entire point" so I wrote it again a different way. Now you can get the body while the code is running and decorators don't matter, at all. I left my other answer here because it is also a solution, just not a "real time" one.

import re, inspect

FUNC_BODY = re.compile('^(?P<tabs>[\t ]+)?def (?P<name>[a-zA-Z0-9_]+)([^\n]+)\n(?P<body>(^([\t ]+)?([^\n]+)\n)+)', re.M)

class Source(object):
@staticmethod
def investigate(focus:object, strfocus:str) -> str:
with open(inspect.getsourcefile(focus), 'r') as f:
for m in FUNC_BODY.finditer(f.read()):
if m.group('name') == strfocus:
tabs = m.group('tabs') if not m.group('tabs') is None else ''
return f"{tabs}'''\n{m.group('body')}{tabs}'''"


def decorator(func):
def inner():
print("I'm decorated")
func()
return inner

@decorator
def test():
a = 5
b = 6
return a+b

print(Source.investigate(test, 'test'))


Related Topics



Leave a reply



Submit