Pandas Finding Local Max and Min

Pandas finding local max and min

Assuming that the column of interest is labelled data, one solution would be

df['min'] = df.data[(df.data.shift(1) > df.data) & (df.data.shift(-1) > df.data)]
df['max'] = df.data[(df.data.shift(1) < df.data) & (df.data.shift(-1) < df.data)]

For example:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Generate a noisy AR(1) sample
np.random.seed(0)
rs = np.random.randn(200)
xs = [0]
for r in rs:
xs.append(xs[-1]*0.9 + r)
df = pd.DataFrame(xs, columns=['data'])

# Find local peaks
df['min'] = df.data[(df.data.shift(1) > df.data) & (df.data.shift(-1) > df.data)]
df['max'] = df.data[(df.data.shift(1) < df.data) & (df.data.shift(-1) < df.data)]

# Plot results
plt.scatter(df.index, df['min'], c='r')
plt.scatter(df.index, df['max'], c='g')
df.data.plot()

Sample Image

Pandas finding local max and min, not plateau values

IIUC, you want to apply local max and min over the unique consecutive values, so do the following:

import pandas as pd

df = pd.DataFrame({'data': [1, 1, 2, 2, 1, 0, 0, -2, 0]})

# remove consecutive duplicates
res = df[df['data'] != df['data'].shift()]

# find min and max
res['min'] = res.data[(res.data.shift(1) > res.data) & (res.data.shift(-1) > res.data)]
res['max'] = res.data[(res.data.shift(1) < res.data) & (res.data.shift(-1) < res.data)]

# put back in original df
output = pd.concat((df, res[['min', 'max']]), axis=1)
print(output)

Output

   data  min  max
0 1 NaN NaN
1 1 NaN NaN
2 2 NaN 2.0
3 2 NaN NaN
4 1 NaN NaN
5 0 NaN NaN
6 0 NaN NaN
7 -2 -2.0 NaN
8 0 NaN NaN

Finding local minimum between two peaks

You could do something like this with your data:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.signal import argrelextrema

np.random.seed(1234)
rs = np.random.randn(500)
xs = [0]
for r in rs:
xs.append(xs[-1] * 0.999 + r)
df = pd.DataFrame(xs, columns=['point'])

which gives this data

point
0 0.000000
1 0.471435
2 -0.720012
3 0.713415
4 0.400050
.. ...
496 3.176240
497 3.007734
498 3.123841
499 1.045736
500 0.041935

[501 rows x 1 columns]

You can choose how often you want to mark a local ma or min by playing with a parameter:

n = 10

df['min'] = df.iloc[argrelextrema(df.point.values, np.less_equal,
order=n)[0]]['point']
df['max'] = df.iloc[argrelextrema(df.point.values, np.greater_equal,
order=n)[0]]['point']

plt.scatter(df.index, df['min'], c='r')
plt.scatter(df.index, df['max'], c='r')
plt.plot(df.index, df['point'])
plt.show()

Which gives:

Sample Image

Another choice for n might be (and it all depends on what you want):

n = 40

df['min'] = df.iloc[argrelextrema(df.point.values, np.less_equal,
order=n)[0]]['point']
df['max'] = df.iloc[argrelextrema(df.point.values, np.greater_equal,
order=n)[0]]['point']

plt.scatter(df.index, df['min'], c='r')
plt.scatter(df.index, df['max'], c='g')
plt.plot(df.index, df['point'])
plt.show()

Sample Image

To get a marking for which points actually where max and min, you can make a new df:

new_df = pd.DataFrame(np.where(df.T == df.T.max(), 1, 0),index=df.columns).T

which gives the information about which row in df is a maximum or a minimum. Otherwise, the original df contains that information in the created min and max columns, those instance that aren't nan

EDIT: Finding peaks above threshold

If you are intrested of peaks above a certain value, then you should use find_peaks in the following way:

from scipy.signal import find_peaks 
peaks, _ = find_peaks(df['point'], height = 15)
plt.plot(df['point'])
plt.plot(peaks, df['point'][peaks], "x")
plt.show()

which will produce:

peaks,_

(array([304, 309, 314, 317, 324, 329, 333, 337, 343, 349, 352, 363, 366,
369, 372, 374, 377, 379, 381, 383, 385, 387, 391, 394, 397, 400,
403, 410, 413, 418, 424, 427, 430, 433, 436, 439, 442, 444, 448],
dtype=int64),
{'peak_heights': array([15.68868141, 15.97184882, 15.04790966, 15.6146908 , 16.49191501,
18.0852033 , 18.11467247, 19.48469432, 21.32391722, 19.90407526,
19.93683051, 24.40980129, 28.00319793, 26.1080406 , 24.44322213,
23.16993982, 22.27505873, 21.47500832, 22.3236231 , 24.02484906,
23.83727054, 24.32609486, 21.25365717, 21.10295203, 20.03162979,
20.64021444, 19.78510855, 21.62624829, 22.34904425, 21.60431638,
18.41968769, 18.24153961, 18.00747871, 18.02793964, 16.72552016,
17.58573207, 16.90982675, 16.9905686 , 16.30563852])})

and graphically
Sample Image

Finding local maxima using find_peaks

You should change the line peak_pos = numbers[peaks[0]] to peak_pos = peaks[0] because peaks[0] gives you the index of the peaks which are the actual x coordinates you want to pass to ax.scatter.

To get the peak at 422, we can set the threshold to None (so that you aren't constraining yourself by vertical distance to the neighbors), and make the distance larger, such as 10.

Then you can add the heights as text annotations:

import pandas as pd
import numpy as np
from scipy.signal import find_peaks
import matplotlib.pyplot as plt

Data = [95,95,95,95,95,95,95,95,94,94,94,94,94,94,94,94,229,444,457,387,280,188,236,181,183,183,185,186,189,190,190,190,179,165,151,151,161,214,213,213,214,213,212,195,179,160,158,155,114,98,164,346,229,39,134,149,194,1,153,171,187,185,104,102,100,90,90,92,92,92,93,93,93,93,93,93,94,94,94,94,94,11,1,11,11,70,182,104,58,60,134,115,99,97,99,98,98,97,97,97,97,97,97,97,97,97,96,96,96,96,96,96,96,96,96,96,96,96,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,94,94,94,94,94,94,94,94,94,94,94,94,94,94,94,93,93,152,206,221,286,326,341,360,377,391,392,393,393,393,394,406,418,420,422,422,408,389,345,329,276,224,166,113,-6,91,91,91,442,324,387,389,387,443,393,393,393,393,391,381,379,377,303,174,131,0,115,112,112,111,111,109,107,106,104,104,103,102,101,101,101,101,100,100,1,1,12,13,65,138,87]
df = pd.DataFrame({'Data':Data})

# convert to 1D array
number_column = df.loc[:,'Data']
numbers = number_column.values

#finding peaks for 1D array
# peaks = find_peaks(numbers, height = 300, threshold = 1, distance = 5)

peaks = find_peaks(numbers, height = 300, threshold = None, distance=10)
height = peaks[1]['peak_heights'] #list of heights of peaks
peak_pos = peaks[0]
print(peaks)

# plot the peaks
fig = plt.figure()
ax = fig.subplots()
ax.plot(numbers)
ax.scatter(peak_pos, height,color = 'r', s = 25, label = 'Maxima')
ax.legend

## add numbers as text annotations
for i, text in enumerate(height):
if text.is_integer():
ax.annotate(int(text), (peak_pos[i], height[i]), size=10)
else:
ax.annotate(text, (peak_pos[i], height[i]), size=10)
plt.show()

Sample Image



Related Topics



Leave a reply



Submit