Python: Justifying Numpy Array

Python: Justifying NumPy array

Here's a vectorized approach inspired by this other post and generalized to cover non-zeros for all four directions -

def justify(a, invalid_val=0, axis=1, side='left'):    
    """
    Justifies a 2D array

    Parameters
    ----------
    A : ndarray
        Input array to be justified
    axis : int
        Axis along which justification is to be made
    side : str
        Direction of justification. It could be 'left', 'right', 'up', 'down'
        It should be 'left' or 'right' for axis=1 and 'up' or 'down' for axis=0.

    """

    if invalid_val is np.nan:
        mask = ~np.isnan(a)
    else:
        mask = a!=invalid_val
    justified_mask = np.sort(mask,axis=axis)
    if (side=='up') | (side=='left'):
        justified_mask = np.flip(justified_mask,axis=axis)
    out = np.full(a.shape, invalid_val) 
    if axis==1:
        out[justified_mask] = a[mask]
    else:
        out.T[justified_mask.T] = a.T[mask.T]
    return out

Sample runs -

In [473]: a # input array
Out[473]: 
array([[1, 0, 2, 0],
       [3, 0, 4, 0],
       [5, 0, 6, 0],
       [6, 7, 0, 8]])

In [474]: justify(a, axis=0, side='up')
Out[474]: 
array([[1, 7, 2, 8],
       [3, 0, 4, 0],
       [5, 0, 6, 0],
       [6, 0, 0, 0]])

In [475]: justify(a, axis=0, side='down')
Out[475]: 
array([[1, 0, 0, 0],
       [3, 0, 2, 0],
       [5, 0, 4, 0],
       [6, 7, 6, 8]])

In [476]: justify(a, axis=1, side='left')
Out[476]: 
array([[1, 2, 0, 0],
       [3, 4, 0, 0],
       [5, 6, 0, 0],
       [6, 7, 8, 0]])

In [477]: justify(a, axis=1, side='right')
Out[477]: 
array([[0, 0, 1, 2],
       [0, 0, 3, 4],
       [0, 0, 5, 6],
       [0, 6, 7, 8]])

Generic case (ndarray)

For a ndarray, we could modify it to -

def justify_nd(a, invalid_val, axis, side):    
    """
    Justify ndarray for the valid elements (that are not invalid_val).

    Parameters
    ----------
    A : ndarray
        Input array to be justified
    invalid_val : scalar
        invalid value
    axis : int
        Axis along which justification is to be made
    side : str
        Direction of justification. Must be 'front' or 'end'.
        So, with 'front', valid elements are pushed to the front and
        with 'end' valid elements are pushed to the end along specified axis.
    """
    
    pushax = lambda a: np.moveaxis(a, axis, -1)
    if invalid_val is np.nan:
        mask = ~np.isnan(a)
    else:
        mask = a!=invalid_val
    justified_mask = np.sort(mask,axis=axis)
    
    if side=='front':
        justified_mask = np.flip(justified_mask,axis=axis)
            
    out = np.full(a.shape, invalid_val)
    if (axis==-1) or (axis==a.ndim-1):
        out[justified_mask] = a[mask]
    else:
        pushax(out)[pushax(justified_mask)] = pushax(a)[pushax(mask)]
    return out

Sample runs -

Input array :

In [87]: a
Out[87]: 
array([[[54, 57,  0, 77],
        [77,  0,  0, 31],
        [46,  0,  0, 98],
        [98, 22, 68, 75]],

       [[49,  0,  0, 98],
        [ 0, 47,  0, 87],
        [82, 19,  0, 90],
        [79, 89, 57, 74]],

       [[ 0,  0,  0,  0],
        [29,  0,  0, 49],
        [42, 75,  0, 67],
        [42, 41, 84, 33]],

       [[ 0,  0,  0, 38],
        [44, 10,  0,  0],
        [63,  0,  0,  0],
        [89, 14,  0,  0]]])

To 'front', along axis =0 :

In [88]: justify_nd(a, invalid_val=0, axis=0, side='front')
Out[88]: 
array([[[54, 57,  0, 77],
        [77, 47,  0, 31],
        [46, 19,  0, 98],
        [98, 22, 68, 75]],

       [[49,  0,  0, 98],
        [29, 10,  0, 87],
        [82, 75,  0, 90],
        [79, 89, 57, 74]],

       [[ 0,  0,  0, 38],
        [44,  0,  0, 49],
        [42,  0,  0, 67],
        [42, 41, 84, 33]],

       [[ 0,  0,  0,  0],
        [ 0,  0,  0,  0],
        [63,  0,  0,  0],
        [89, 14,  0,  0]]])

Along axis=1 :

In [89]: justify_nd(a, invalid_val=0, axis=1, side='front')
Out[89]: 
array([[[54, 57, 68, 77],
        [77, 22,  0, 31],
        [46,  0,  0, 98],
        [98,  0,  0, 75]],

       [[49, 47, 57, 98],
        [82, 19,  0, 87],
        [79, 89,  0, 90],
        [ 0,  0,  0, 74]],

       [[29, 75, 84, 49],
        [42, 41,  0, 67],
        [42,  0,  0, 33],
        [ 0,  0,  0,  0]],

       [[44, 10,  0, 38],
        [63, 14,  0,  0],
        [89,  0,  0,  0],
        [ 0,  0,  0,  0]]])

Along axis=2 :

In [90]: justify_nd(a, invalid_val=0, axis=2, side='front')
Out[90]: 
array([[[54, 57, 77,  0],
        [77, 31,  0,  0],
        [46, 98,  0,  0],
        [98, 22, 68, 75]],

       [[49, 98,  0,  0],
        [47, 87,  0,  0],
        [82, 19, 90,  0],
        [79, 89, 57, 74]],

       [[ 0,  0,  0,  0],
        [29, 49,  0,  0],
        [42, 75, 67,  0],
        [42, 41, 84, 33]],

       [[38,  0,  0,  0],
        [44, 10,  0,  0],
        [63,  0,  0,  0],
        [89, 14,  0,  0]]])

To the 'end' :

In [94]: justify_nd(a, invalid_val=0, axis=2, side='end')
Out[94]: 
array([[[ 0, 54, 57, 77],
        [ 0,  0, 77, 31],
        [ 0,  0, 46, 98],
        [98, 22, 68, 75]],

       [[ 0,  0, 49, 98],
        [ 0,  0, 47, 87],
        [ 0, 82, 19, 90],
        [79, 89, 57, 74]],

       [[ 0,  0,  0,  0],
        [ 0,  0, 29, 49],
        [ 0, 42, 75, 67],
        [42, 41, 84, 33]],

       [[ 0,  0,  0, 38],
        [ 0,  0, 44, 10],
        [ 0,  0,  0, 63],
        [ 0,  0, 89, 14]]])

Python: up and down justify the index of a bool numpy array

Simply sort it along each column, which pushes down the True values, while brings up the False ones for down-justified version. For up-justified one, do a flipping on the sorted version.

Sample run to showcase the implementation -

In [216]: mask
Out[216]: 
array([[False,  True,  True,  True,  True,  True],
       [False, False,  True,  True, False,  True],
       [False,  True, False,  True, False, False],
       [ True,  True,  True,  True, False,  True]], dtype=bool)

In [217]: np.sort(mask,0)  # Down justified
Out[217]: 
array([[False, False, False,  True, False, False],
       [False,  True,  True,  True, False,  True],
       [False,  True,  True,  True, False,  True],
       [ True,  True,  True,  True,  True,  True]], dtype=bool)

In [218]: np.sort(mask,0)[::-1]   # Up justified
Out[218]: 
array([[ True,  True,  True,  True,  True,  True],
       [False,  True,  True,  True, False,  True],
       [False,  True,  True,  True, False,  True],
       [False, False, False,  True, False, False]], dtype=bool)

Python: moving all elements greater than 0 to left and right in numpy array

One vectorized approach making use of masks -

def justify_rows(a, side='left'):
    mask = a>0
    justified_mask = np.sort(mask,1)
    if side=='left':
        justified_mask = justified_mask[:,::-1]
    out = np.zeros_like(a) 
    out[justified_mask] = a[mask]
    return out

Basically the steps are :

Make a mask of greater than zeros.
Get a left or right justified mask where greater than elements are to be placed in a zeros initialized array. To get such a justified mask, we simply sort the mask from step-1 along each row, which throws the True ones in each row to the right. Thus, additionally we need flipping of each row for the left justified case.
Finally, use the justified mask to assign into output array and the mask from step-1 to select from input array.

Sample runs -

In [105]: a
Out[105]: 
array([[ 0.,  5.,  0.,  2.],
       [ 0.,  0.,  3.,  2.],
       [ 0.,  0.,  0.,  0.],
       [ 2.,  0.,  0.,  1.]])

In [106]: justify_rows(a, side='left')
Out[106]: 
array([[ 5.,  2.,  0.,  0.],
       [ 3.,  2.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 2.,  1.,  0.,  0.]])

In [107]: justify_rows(a, side='right')
Out[107]: 
array([[ 0.,  0.,  5.,  2.],
       [ 0.,  0.,  3.,  2.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  2.,  1.]])

Suppress Scientific Notation in Numpy When Creating Array From Nested List

This is what you need:

np.set_printoptions(suppress=True)

Here is the documentation.

Fast way to format and save a numpy array of x, y, z coordinates to a text file

I finally ended up writing a C extension, since it did not seem like there was any way to squeeze more performance out of python/numpy implementation.

First I was using sprintf for formatting and got these results -

In [7]: a = np.random.normal(0, 1, (1234567, 3))

In [8]: %timeit ObjWrite.write(a, 'a.txt')
1.21 s ± 17.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

Compared to ~2.5 seconds this is a bit of an improvement, but not nearly enough to justify writing the extension

Since almost all of the time was being spent on formatting the string, I wrote a sprintf replacement just for formatting doubles (accurate to 15-17th decimal place for values b/w -10^7 and 10^7, which is acceptable for my use case)

In [9]: %timeit ObjWrite.writeFast(a, 'a-fast.txt')
302 ms ± 22.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

~300ms - Decent!

Here is the module -

ObjWrite.c

#include <stdio.h>
#include <Python.h>
#include <numpy/arrayobject.h>

#define CHUNK_SIZE 32768

/*
Write vertices to given file, use sprintf for formatting
    python-interface: ObjWrite.write(arr: ndarray, filepath: string)
*/
static PyObject* methodWriteIter(PyObject *self, PyObject *args) {
    // Parse arguments
    PyArrayObject *arr;
    char *filepath = NULL;
    if (!PyArg_ParseTuple(args, "O!s", &PyArray_Type, &arr, &filepath)) return PyLong_FromLong(-1);

    npy_intp size = PyArray_SIZE(arr);
    // Handle zero-sized arrays specially, if size is not a multiple of 3, exit
    if (size == 0 || size % 3 != 0) return PyLong_FromLong(-1);

    // get iterator
    NpyIter* iter;
    NpyIter_IterNextFunc *iternext;
    PyArray_Descr *dtype;
    dtype = PyArray_DescrFromType(NPY_DOUBLE);
    iter = NpyIter_New(arr, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_NO_CASTING, dtype);
    if (iter == NULL) return PyLong_FromLong(-1);

    // get iternext function for fast access
    iternext = NpyIter_GetIterNext(iter, NULL);
    if (iternext == NULL) {
        NpyIter_Deallocate(iter);
        return PyLong_FromLong(-1);
    }

    // get data pointer, this will get updated by the iterator
    double **dataptr;
    dataptr = (double **) NpyIter_GetDataPtrArray(iter);

    // open file, exit if null
    FILE *fp = fopen(filepath, "w");
    if (fp == NULL) {
        NpyIter_Deallocate(iter);
        return PyLong_FromLong(-1);
    }

    // init file buffer, writing in chunks does not seem to offer any significant benefit
    // but it should still will be useful when disk utilization is high
    char fileBuffer[CHUNK_SIZE + 128];
    int bufferCount = 0;

    double x, y, z;
    do {
        // get 3 doubles from array
        x = **dataptr;
        iternext(iter);
        y = **dataptr;
        iternext(iter);
        z = **dataptr;
        // use sprintf to format and write to buffer
        bufferCount += sprintf(&fileBuffer[bufferCount], "v %.17f %.17f %.17f\n", x, y, z);
        // if the chunk is big enough, write it.
        if (bufferCount >= CHUNK_SIZE) {
            fwrite(fileBuffer, bufferCount, 1, fp);
            bufferCount = 0;
       }
    } while (iternext(iter));
    // write remainder
    if (bufferCount > 0) fwrite(fileBuffer, 1, bufferCount, fp);

    // clean-up and exit with success
    NpyIter_Deallocate(iter);
    fclose(fp);
    return PyLong_FromLong(0);
}

/*
Turns out that maximum proportion of time is taken by sprintf call in the above implementation
So, the next part is basically implementing a faster way to format doubles
*/

static const char DIGITS[] = "0123456789";  // digit-char lookup table

/* get powers of 10, can overflow but we only need this for digits <= 9 */
int powOf10(int digits) {
    int res = 1;
    while (digits > 0) {
        res *= 10;
        digits--;
    }
    return res;
}

/* a fast way to get number of digits in a positive integer */
int countDigitsPosInt(int n) {
    if (n < 100000) {  // 5 or less
        if (n < 100) {  // 1 or 2
            if (n < 10) { return 1; } else { return 2; }
        } else {  // 3 or 4 or 5
            if (n < 1000) { return 3; }
            else {  // 4 or 5
                if (n < 10000) { return 4; } else { return 5; }
            }
        }
    } else {  // 6 or more
        if (n < 10000000) {  // 6 or 7
            if (n < 1000000) { return 6; } else { return 7; }
        } else {  // 8 to 10
            if (n < 100000000) { return 8; }
            else {  // 9 or 10
                if (n < 1000000000) { return 9; } else { return 10; }
            }
        }
    }
}

/* format positive integers into `digits` length strings, zero-pad if number of digits too high
if number digits are greater then `digits`, it will get truncated, so watch out */
int posIntToStringDigs(char *s, int n, int digits) {
    int q = n;
    int r;
    int i = digits - 1;
    while (i >= 0 && q > 0) {  // assign digits from last to first
        r = q % 10;
        q = q / 10;
        *(s + i) = DIGITS[r];  // get char from lookup table
        i--;
    }
    while (i >= 0) {  // we are here because q=0 and still some digits remain
        *(s + i) = '0';  // 0 pad these
        i--;
    }
    return digits;
}

/* format positive integers - no zero padding */
int posIntToString(char *s, int n) {
    if (n == 0) { // handle 0 case, no need of counting digits in this case
        *s = '0';
        return 1;
    }
    // call posIntToStringDigs with exactly the number of digits as in the integer
    return posIntToStringDigs(s, n, countDigitsPosInt(n));
}


static const int MAX_D = 8;  // max number of digits we'll break things into
static const int _10D = 100000000;  // 10 ^ MAX_D

/*
format positive doubles

accurate to 15-17th digit for numbers that are not huge (< 10^7), fairly accurate for huge numbers
I personally do not need this to be crazy accurate, for the range of numbers I am expecting, this will do just fine
*/
int posDoubleToString(char *s, double f, int precision) {

    // length of the generated string
    int len = 0;

    // to make big numbers int friendly, divide by 10 ^ MAX_D until the whole part would fit in an int
    int steps = 0;
    while (f > _10D) {
        f /= _10D;
        steps++;
    }
    int intPart = (int) f;
    double decPart = f - intPart;
    // add the first whole part to the string, we have no idea how many digits would be there
    len += posIntToString(&s[len], intPart);

    // if the number was bigger then 10 ^ MAX_D, we need to return it to its former glory, i.e. add rest to integer string
    while (steps > 0) {
        decPart = decPart * _10D;
        intPart = (int) decPart;
        decPart = decPart - intPart;
        len += posIntToStringDigs(&s[len], intPart, MAX_D);  // appending 0's important here
        steps--;
    }

    // add the decimal point
    s[len++] = '.';

    // after the decimal, piggy back int-to-string function to `precision` number of digits
    while (precision > 0) {
        if (precision > MAX_D) {
            decPart = decPart * _10D;
            intPart = (int) decPart;
            decPart = decPart - intPart;
            len += posIntToStringDigs(&s[len], intPart, MAX_D);
            precision -= MAX_D;
        } else {
            decPart = decPart * powOf10(precision);
            intPart = (int) decPart;
            decPart = decPart - intPart;
            if (decPart > 0.5) intPart += 1;  // round of
            len += posIntToStringDigs(&s[len], intPart, precision);
            precision = 0;
        }
    }

    // truncate following zeros, loop on string in reverse
    /* commented to mimic sprintf
    int index = len - 1;
    while (index > 0) {
        if (s[index] != '0') break;  // if last char is not 0 our work is done, nothing more to do
        if (s[index - 1] == '.') break;  // if char is 0 but its the last 0 before decimal point, stop
        len--;
        index--;
    }*/

    return len;
}

/* format positive or negative doubles */
int doubleToString(char *s, double f, int pre) {
    // handle negatives
    int len = 0;
    if (f < 0) {
        *s = '-';
        len++;
        f *= -1;  // change to positive
    }
    len += posDoubleToString(&s[len], f, pre);
    return len;
}


/*
Write vertices to given file, use our doubleToString for formatting
    python-interface: ObjWrite.writeFast(arr: ndarray, filepath: string)
*/
static PyObject* methodWriteIterFast(PyObject *self, PyObject *args) {
    // Parse arguments
    PyArrayObject *arr;
    char *filepath = NULL;
    if (!PyArg_ParseTuple(args, "O!s", &PyArray_Type, &arr, &filepath)) return PyLong_FromLong(-1);

    npy_intp size = PyArray_SIZE(arr);
    // Handle zero-sized arrays specially, if size is not a multiple of 3, exit
    if (size == 0 || size % 3 != 0) return PyLong_FromLong(-1);

    // get iterator
    NpyIter* iter;
    NpyIter_IterNextFunc *iternext;
    PyArray_Descr *dtype;
    dtype = PyArray_DescrFromType(NPY_DOUBLE);
    iter = NpyIter_New(arr, NPY_ITER_READONLY, NPY_KEEPORDER, NPY_NO_CASTING, dtype);
    if (iter == NULL) return PyLong_FromLong(-1);

    // get iternext function for fast access
    iternext = NpyIter_GetIterNext(iter, NULL);
    if (iternext == NULL) {
        NpyIter_Deallocate(iter);
        return PyLong_FromLong(-1);
    }

    // get data pointer, this will get updated by the iterator
    double **dataptr;
    dataptr = (double **) NpyIter_GetDataPtrArray(iter);

    // open file, exit if null
    FILE *fp = fopen(filepath, "w");
    if (fp == NULL) {
        NpyIter_Deallocate(iter);
        return PyLong_FromLong(-1);
    }

    // init file buffer, writing in chunks does not seem to offer any significant benefit
    // but it should still will be useful when disk utilization is high
    char fileBuffer[CHUNK_SIZE + 128];
    int bufferCount = 0;

    double x, y, z;
    do {
        // get 3 doubles from array
        x = **dataptr;
        iternext(iter);
        y = **dataptr;
        iternext(iter);
        z = **dataptr;

        // use doubleToString to format and write to buffer
        fileBuffer[bufferCount++] = 'v';
        fileBuffer[bufferCount++] = ' ';
        bufferCount += doubleToString(&fileBuffer[bufferCount], x, 17);
        fileBuffer[bufferCount++] = ' ';
        bufferCount += doubleToString(&fileBuffer[bufferCount], y, 17);
        fileBuffer[bufferCount++] = ' ';
        bufferCount += doubleToString(&fileBuffer[bufferCount], z, 17);
        fileBuffer[bufferCount++] = '\n';

        // if the chunk is big enough, write it.
        if (bufferCount >= CHUNK_SIZE) {
            fwrite(fileBuffer, bufferCount, 1, fp);
            bufferCount = 0;
       }
    } while (iternext(iter));
    // write remainder
    if (bufferCount > 0) fwrite(fileBuffer, 1, bufferCount, fp);

    // clean-up and exit with success
    NpyIter_Deallocate(iter);
    fclose(fp);
    return PyLong_FromLong(0);
}


/* Set up the methods table */
static PyMethodDef objWriteMethods[] = {
    {"write", methodWriteIter, METH_VARARGS, "write numpy array to a text file in .obj format"},
    {"writeFast", methodWriteIterFast, METH_VARARGS, "write numpy array to a text file in .obj format"},
    {NULL, NULL, 0, NULL}  /* Sentinel - marks the end of this structure */
};

/* Set up module definition */
static struct PyModuleDef objWriteModule = {
    PyModuleDef_HEAD_INIT,
    "ObjWrite",
    "write numpy array to a text file in .obj format",
    -1,
    objWriteMethods
};

/* module init function */
PyMODINIT_FUNC PyInit_ObjWrite(void) {
    import_array();
    return PyModule_Create(&objWriteModule);
}

setup.py

from distutils.core import setup, Extension
import numpy

def main():
    setup(
        name="ObjWrite",
        version="1.0.0",
        description="Python interface for the function to write numpy array to a file",
        author="Shobhit Vashistha",
        author_email="shobhit.v87@gmail.com",
        ext_modules=[
            Extension("ObjWrite", ["ObjWrite.c"], include_dirs=[numpy.get_include()])
        ]
    )

if __name__ == "__main__":
    main()

I am aware that this is probably overkill, but I had great fun diving into C and Python/Numpy C-Extension world, and hopefully someone else will find this useful in the future.

Why is True printed with leading whitespace in numpy array

This happens for float and bool arrays.

For float arrays with negative values the extra space ensures alignment (which is nice for multidimensional arrays) leaving space for the negative sign.
```
>>> np.array([-4, 1, 2, -3], dtype=np.float64).reshape(2, 2) 
array([[-4.,  1.],
       [ 2., -3.]])
```

For bool arrays with multiple values it ensures alignment between True and False, which are different lengths.

>>> np.array([0, 1, 1, 0], dtype=np.bool_).reshape(2, 2) 
array([[False,  True],
       [ True, False]], dtype=bool)

There is no particular reason when it is not needed (i.e. all positive float values, single element bool array), and in this case it could be done without.

>>> np.array([4, 1, 2, 3], dtype=np.float64).reshape(2, 2)
array([[ 4.,  1.],
       [ 2.,  3.]])

There's an open PR #9130 on GitHub to make this print spacing more consistent when the additional spacing is not needed with some support, and so it may change in an upcoming version.

Python print aligned numpy array

NumPy provides a function for that: np.array2string

Use it like this to specify your prefix (length):

>>> print('Array:', np.array2string(A, prefix='Array: '))
Array: [[0.     0.5   ]
        [0.     0.3996]]

To understand, what this function does, see the output of it alone:

>>> print(np.array2string(A, prefix='Array: '))
[[0.     0.5   ]
        [0.     0.3996]]

So it simply indents the lines after the first one with the length of the prefix. The prefix itself is not printed.

justify data from right to left

One possible approach is to use Series.str.len to calculate the max length of the list in the column value i.e lmax then using list comprehension pad each of the list based on lmax:

lmax = df['value'].str.len().max()
df1 = pd.DataFrame([[np.nan] * (lmax - len(s)) + s
                    for s in df['value']], columns=range(1, lmax + 1))

Result:

print(df1)
     1    2    3    4    5    6    7    8    9  10
0  1.0  2.0  3.0  4.0  5.0  6.0  7.0  8.0  9.0   0
1  NaN  1.0  2.0  3.0  4.0  5.0  6.0  7.0  8.0   9
2  NaN  NaN  1.0  2.0  3.0  4.0  5.0  6.0  7.0   8
3  NaN  NaN  NaN  1.0  2.0  3.0  4.0  5.0  6.0   7
4  NaN  NaN  NaN  NaN  1.0  2.0  3.0  4.0  5.0   6
5  NaN  NaN  NaN  NaN  NaN  1.0  2.0  3.0  4.0   5
6  NaN  NaN  NaN  NaN  NaN  NaN  1.0  2.0  3.0   4
7  NaN  NaN  NaN  NaN  NaN  NaN  NaN  1.0  2.0   3
8  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  1.0   2
9  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   1

Getting first/second/third... value in row of numpy array after nan using vectorization

Numpy approach

We can define a function first_value which takes a 2D array and offset (n) as input arguments and returns 1D array. Basically, for each row it returns the nth value after the first non-nan value

def first_valid(arr, offset=0):
    m = ~np.isnan(arr)
    i =  m.argmax(axis=1) + offset
    iy = np.clip(i, 0, arr.shape[1] - 1)

    vals = arr[np.r_[:arr.shape[0]], iy]
    vals[(~m.any(1)) | (i >= arr.shape[1])] = np.nan
    return vals

Pandas approach

We can stack the dataframe to reshape then group the dataframe on level=0 and aggregate using nth, then reindex to conform the index of aggregated frame according to original frame

def first_valid(df, offset=0):
    return df.stack().groupby(level=0)\
                     .nth(offset).reindex(df.index)

Sample run

>>> first_valid(df, 0)
Date
1     25.0
2     29.0
3     33.0
4     31.0
5     30.0
6     35.0
7     31.0
8     33.0
9     26.0
10    27.0
11    35.0
12    33.0
13    28.0
14    25.0
15    25.0
16    26.0
17    34.0
18    28.0
19    34.0
20    28.0
dtype: float64


>>> first_valid(df, 1)
Date
1      NaN
2      NaN
3      NaN
4     35.0
5     34.0
6     34.0
7     26.0
8     25.0
9     31.0
10    26.0
11    25.0
12    35.0
13    25.0
14    25.0
15    26.0
16    31.0
17    29.0
18    29.0
19    26.0
20    30.0
dtype: float64

>>> first_valid(df, 2)
Date
1      NaN
2      NaN
3      NaN
4      NaN
5      NaN
6      NaN
7      NaN
8     31.0
9      NaN
10    28.0
11    29.0
12    28.0
13    35.0
14    28.0
15    31.0
16    27.0
17    25.0
18    31.0
19     NaN
20     NaN
dtype: float64

Performance

# Sample dataframe for testing purpose
df_test = pd.concat([df] * 10000, ignore_index=True)

%%timeit # Numpy approach
_ = first_valid(df_test.to_numpy(), 1)
# 6.9 ms ± 212 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


%%timeit # Pandas approach
_ = first_valid(df_test, 1)
# 90 ms ± 867 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


%%timeit # OP's approach
_ = f(df_test, 1)
# 2.03 s ± 183 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

Numpy based approach is approximately 300x faster than the OP's given approach while pandas based approach is approximately 22x faster

Python: Justifying Numpy Array