Adding Two Pandas Dataframes

Adding two pandas dataframes

How about x.add(y, fill_value=0)?

import pandas as pd

df1 = pd.DataFrame([(1,2),(3,4),(5,6)], columns=['a','b'])
Out: 
   a  b
0  1  2
1  3  4
2  5  6

df2 = pd.DataFrame([(100,200),(300,400),(500,600)], columns=['a','b'])
Out: 
     a    b
0  100  200
1  300  400
2  500  600

df_add = df1.add(df2, fill_value=0)
Out: 
     a    b
0  101  202
1  303  404
2  505  606

how to sum two dataframes python

Try this:

df = pd.concat([df1, df2]).groupby(['dt']).sum().reset_index()

print(df)

PS: This is ensure all datetimes to exist.

Pandas sum multiple dataframes

use the add method with fill_value=0 parameter.

df1 = pd.DataFrame({'val':{'a': 1, 'b':2, 'c':3}})
df2 = pd.DataFrame({'val':{'a': 1, 'b':2, 'd':3}})

df1.add(df2, fill_value=0)

   val
a  2.0
b  4.0
c  3.0
d  3.0

MultiIndex example

idx1 = pd.MultiIndex.from_tuples([('a', 'A'), ('a', 'B'), ('b', 'A'), ('b', 'D')])
idx2 = pd.MultiIndex.from_tuples([('a', 'A'), ('a', 'C'), ('b', 'A'), ('b', 'C')])

np.random.seed([3,1415])
df1 = pd.DataFrame(np.random.randn(4, 1), idx1, ['val'])
df2 = pd.DataFrame(np.random.randn(4, 1), idx2, ['val'])

df1

          val
a A -2.129724
  B -1.268466
b A -1.970500
  D -2.259055

df2

          val
a A -0.349286
  C -0.026955
b A  0.316236
  C  0.348782

df1.add(df2, fill_value=0)

          val
a A -2.479011
  B -1.268466
  C -0.026955
b A -1.654264
  C  0.348782
  D -2.259055

More than 2 dataframes

from functools import reduce

df1 = pd.DataFrame({'val':{'a': 1, 'b':2, 'c':3}})
df2 = pd.DataFrame({'val':{'a': 1, 'b':2, 'd':3}})
df3 = pd.DataFrame({'val':{'e': 1, 'c':2, 'd':3}})
df4 = pd.DataFrame({'val':{'f': 1, 'a':2, 'd':3}})
df5 = pd.DataFrame({'val':{'g': 1, 'f':2, 'd':3}})

reduce(lambda a, b: a.add(b, fill_value=0), [df1, df2, df3, df4, df5])

    val
a   4.0
b   4.0
c   5.0
d  12.0
e   1.0
f   3.0
g   1.0

Append multiple pandas data frames at once

Have you simply tried using a list as argument of append? Or am I missing anything?

import numpy as np
import pandas as pd

dates = np.asarray(pd.date_range('1/1/2000', periods=8))
df1 = pd.DataFrame(np.random.randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D'])
df2 = df1.copy()
df3 = df1.copy()
df = df1.append([df2, df3])

print df

Adding two dataframes in pandas with different columns

You can get the union of columns by Index.union(), reindex by .reindex() with fill value 0. Then .add() the 2 dataframes and .reset_index(), as follows:

dt1a = dt1.set_index('index')
dt2a = dt2.set_index('index')
all_cols = dt1a.columns.union(dt2a.columns)

dt1b = dt1a.reindex(all_cols, axis=1, fill_value=0)
dt2b = dt2a.reindex(all_cols, axis=1, fill_value=0)

df_out = dt1b.add(dt2b).reset_index()

Data Input

dt1.at[2, 3] = 200

print(dt1)

   index  1  2    3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19
0    Sun  0  0    0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0
1   Wind  0  0    0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0
2  Water  0  0  200  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0
3   Flow  0  0    0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0

dt2.at[2, 3] = 10

print(dt2)

   index   3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21
0    Sun   0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0
1   Wind   0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0
2  Water  10  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0
3   Flow   0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0

Output

print(df_out)

   index  1  2    3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21
0    Sun  0  0    0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0
1   Wind  0  0    0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0
2  Water  0  0  210  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0
3   Flow  0  0    0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0

how to merge two dataframes and sum the values of columns

I think need set_index for both DataFrames, add and last reset_index:

df = df1.set_index('Name').add(df2.set_index('Name'), fill_value=0).reset_index()
print (df)
  Name  class  value
0  Ram    2.0    8.0
1  Sri    2.0   10.0
2  viv    7.0    8.0

If values in Name are not unique use groupby and aggregate sum:

df = df1.groupby('Name').sum().add(df2.groupby('Name').sum(), fill_value=0).reset_index()

Merge two dataframes by index

Use merge, which is an inner join by default:

pd.merge(df1, df2, left_index=True, right_index=True)

Or join, which is a left join by default:

df1.join(df2)

Or concat, which is an outer join by default:

pd.concat([df1, df2], axis=1)

Samples:

df1 = pd.DataFrame({'a':range(6),
                    'b':[5,3,6,9,2,4]}, index=list('abcdef'))

print (df1)
   a  b
a  0  5
b  1  3
c  2  6
d  3  9
e  4  2
f  5  4

df2 = pd.DataFrame({'c':range(4),
                    'd':[10,20,30, 40]}, index=list('abhi'))

print (df2)
   c   d
a  0  10
b  1  20
h  2  30
i  3  40

# Default inner join
df3 = pd.merge(df1, df2, left_index=True, right_index=True)
print (df3)
   a  b  c   d
a  0  5  0  10
b  1  3  1  20

# Default left join
df4 = df1.join(df2)
print (df4)
   a  b    c     d
a  0  5  0.0  10.0
b  1  3  1.0  20.0
c  2  6  NaN   NaN
d  3  9  NaN   NaN
e  4  2  NaN   NaN
f  5  4  NaN   NaN

# Default outer join
df5 = pd.concat([df1, df2], axis=1)
print (df5)
     a    b    c     d
a  0.0  5.0  0.0  10.0
b  1.0  3.0  1.0  20.0
c  2.0  6.0  NaN   NaN
d  3.0  9.0  NaN   NaN
e  4.0  2.0  NaN   NaN
f  5.0  4.0  NaN   NaN
h  NaN  NaN  2.0  30.0
i  NaN  NaN  3.0  40.0

combining dataframes and adding values on common date index

We can use pd.concate() on the two dataframes, then df.reset_index() to get a new regular-integer index, rename the date column, and then use df.groupby().sum().

df = pd.concat([df1,df2]) # this gives 63 rows by 1 column, where the column is the values and the dates are the index
df = df.reset_index() # moves the dates to a column, now called 'index', and makes a new integer index
df = df.rename(columns={'index':'Date'}) #renames the column
df.groupby('Date').sum()

multiply and sum two columns in two dataframes in Python

You will can just use pandas abstractions for it.

result = df['col1'] * df['col3']

If then you want to get the sum of those result values you can just do:

sum(results)

Pandas: Adding Two Columns From Two Dataframes With Different Time Series

You can use pandas.concat, and cumsum.

As you already have a cumulated sum as input, you first need to take the diff:

(pd.concat([df1.assign(net=df1['net'].diff().combine_first(df1['net'])),
            df2.assign(net=df2['net'].diff().combine_first(df2['net'])),
           ])
   .sort_values(by='epoch_ns')
   .assign(net=lambda d:d['net'].cumsum())
 )

output:

              epoch_ns     net
0  1635747085588589391  -15.25
0  1635747118006744249   49.50
1  1635747144020144197 -150.52
1  1635747224568306088 -910.56
2  1635747242571214411 -110.61

intermediate step to uncumulate the net (example on df2):

>>> df2.assign(raw=df2['net'].diff().combine_first(df2['net']))
              epoch_ns     net     raw
0  1635747085588589391  -15.25  -15.25
1  1635747224568306088 -775.29 -760.04
2  1635747242571214411   24.66  799.95

original answer

(pd.concat([df1,df2])
   .sort_values(by='epoch_ns')
   .assign(net=lambda d:d['net'].cumsum())
 )

Output:

              epoch_ns     net
0  1635747085588589391  -15.25
0  1635747118006744249   49.50
1  1635747144020144197  -85.77
1  1635747224568306088 -861.06
2  1635747242571214411 -836.40

Adding Two Pandas Dataframes