Cumulative Sum of Values by Month, Filling in for Missing Months

Cumulative sum of values by month, filling in for missing months

This is very similar to other questions, but the best query is still tricky.

Basic query to get the running sum quickly:

SELECT to_char(date_trunc('month', date_added), 'Mon YYYY') AS mon_text
     , sum(sum(qty)) OVER (ORDER BY date_trunc('month', date_added)) AS running_sum
FROM   tbl
GROUP  BY date_trunc('month', date_added)
ORDER  BY date_trunc('month', date_added);

The tricky part is to fill in for missing months:

WITH cte AS (
   SELECT date_trunc('month', date_added) AS mon, sum(qty) AS mon_sum
   FROM   tbl
   GROUP  BY 1
   )
SELECT to_char(mon, 'Mon YYYY') AS mon_text
     , sum(c.mon_sum) OVER (ORDER BY mon) AS running_sum
FROM  (SELECT min(mon) AS min_mon FROM cte) init
     , generate_series(init.min_mon, now(), interval '1 month') mon
LEFT   JOIN cte c USING (mon)
ORDER  BY mon;

The implicit CROSS JOIN LATERAL requires Postgres 9.3+. This starts with the first month in the table.

To start with a given month:

WITH cte AS (
   SELECT date_trunc('month', date_added) AS mon, sum(qty) AS mon_sum
   FROM   tbl
   GROUP  BY 1
   )
SELECT to_char(mon, 'Mon YYYY') AS mon_text
     , COALESCE(sum(c.mon_sum) OVER (ORDER BY mon), 0) AS running_sum
FROM   generate_series('2015-01-01'::date, now(), interval '1 month') mon
LEFT   JOIN cte c USING (mon)
ORDER  BY mon;

db<>fiddle here

_{Old sqlfiddle}

Keeping months from different years apart. You did not ask for that, but you'll most likely want it.

Note that the "month" to some degree depends on the time zone setting of the current session! Details:

Ignoring time zones altogether in Rails and PostgreSQL

Calculating Cumulative Sum in PostgreSQL
PostgreSQL: running count of rows for a query 'by minute'
Postgres window function and group by exception

cumsum() by month but repeat the values if there is no data in that month

Use:

#create month period column  for correct ordering
df['months'] = df['date'].dt.to_period('m')
#aggregate month
df1 = df.groupby(['months', 'col1'])['col2'].sum()

#MultiIndex with all possible combinations
mux = pd.MultiIndex.from_product([pd.period_range(df['months'].min(),
                                                  df['months'].max(), freq='M'),
                                  df['col1'].unique()], names=df1.index.names)

#add missing values with reindex reshape, cumulative sum
#forward fill missing values and reshape back
df2 = (df1.reindex(mux)
          .unstack()
          .cumsum()
          .ffill()
          .stack()
          .astype(int)
          .reset_index(name='cumsum')
         )
print (df2)
     months   col1  cumsum
0   2016-01  apple      20
1   2016-02  apple      60
2   2016-02   pear      60
3   2016-03  apple      70
4   2016-03   pear      60
5   2016-04  apple      70
6   2016-04   pear      60
7   2016-05  apple     120
8   2016-05   pear      60
9   2016-06  apple     120
10  2016-06   pear      65

Last if necessary convert datetimes to custom strings:

df2['months'] = df2['months'].dt.strftime('%b-%y')
print (df2)
    months   col1  cumsum
0   Jan-16  apple      20
1   Feb-16  apple      60
2   Feb-16   pear      60
3   Mar-16  apple      70
4   Mar-16   pear      60
5   Apr-16  apple      70
6   Apr-16   pear      60
7   May-16  apple     120
8   May-16   pear      60
9   Jun-16  apple     120
10  Jun-16   pear      65

Fill Missing Dates for Running Total

You need a calendar table here containing all dates. Consider the following approach:

WITH dates AS (
    SELECT '2021-05-01' AS Date UNION ALL
    SELECT '2021-05-02' UNION ALL
    SELECT '2021-05-03' UNION ALL
    SELECT '2021-05-04' UNION ALL
    SELECT '2021-05-05'
)

SELECT
    u.UserID,
    d.Date,
    SUM(t.Sale) AS DailySale,
    SUM(COALESCE(SUM(t.Sale), 0)) OVER (PARTITION BY u.UserID ORDER BY d.Date) AS RunningSale
FROM (SELECT DISTINCT UserID FROM yourTable) u
CROSS JOIN dates d
LEFT JOIN yourTable t
    ON t.UserID = u.UserID AND t.Date = d.Date
GROUP BY
    u.UserID,
    d.Date
ORDER BY
    u.UserID,
    d.Date

screen capture from demo link below

Demo

Running sum with missing dates

To solve this problem you need to create separate derived tables of all the PeriodYearMonth values (generated using a recursive CTE) and the distinct Reg and PartNo pairs, and CROSS JOIN them to each other to get all combinations of the columns. This combined table can then be LEFT JOINed to the original table to get the ComponentRemovals for each Reg, PartNo and PeriodYearMonth, and these can then be summed using a window function:

WITH CTE AS (
  SELECT CONVERT(DATE, CONCAT(REPLACE(MIN(PeriodYearMonth), ' ', '-'), '-01'), 23)  AS date, 
         CONVERT(DATE, CONCAT(REPLACE(MAX(PeriodYearMonth), ' ', '-'), '-01'), 23)  AS max_date
  FROM vtRelRepComponentsRemovalsByPartNo
  UNION ALL
  SELECT DATEADD(MONTH, 1, date), max_date
  FROM CTE
  WHERE date < max_date
)
SELECT FORMAT(CTE.date, 'yyyy MM') AS PeriodYearMonth
     , rp.Reg
     , rp.PartNo
     , COALESCE(v.ComponentRemovals, 0) AS ComponentRemovals
     , SUM(COALESCE(v.ComponentRemovals, 0)) OVER (PARTITION BY rp.Reg, rp.PartNo ORDER BY FORMAT(CTE.date, 'yyyy MM')
                                                   ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS RunningRemovals
FROM CTE
CROSS JOIN (SELECT DISTINCT Reg, PartNo FROM vtRelRepComponentsRemovalsByPartNo) rp
LEFT JOIN vtRelRepComponentsRemovalsByPartNo v
       ON v.PeriodYearMonth = FORMAT(CTE.date, 'yyyy MM')
      AND v.Reg = rp.Reg
      AND v.PartNo = rp.PartNo
ORDER BY rp.Reg, rp.PartNo, CTE.date

Output

PeriodYearMonth     Reg     PartNo              ComponentRemovals   RunningRemovals
2019 09             G-NHVP  109-0740V01-137     0                   0
2019 10             G-NHVP  109-0740V01-137     1                   1
2019 11             G-NHVP  109-0740V01-137     1                   2
2019 12             G-NHVP  109-0740V01-137     1                   3
2020 01             G-NHVP  109-0740V01-137     1                   4
2019 09             G-NHVR  11-13354P           2                   2
2019 10             G-NHVR  11-13354P           0                   2
2019 11             G-NHVR  11-13354P           0                   2
2019 12             G-NHVR  11-13354P           0                   2
2020 01             G-NHVR  11-13354P           0                   2
2019 09             OO-NSF  11-13354P           0                   0
2019 10             OO-NSF  11-13354P           1                   1
2019 11             OO-NSF  11-13354P           0                   1
2019 12             OO-NSF  11-13354P           0                   1
2020 01             OO-NSF  11-13354P           0                   1
2019 09             OY-HMV  11-13354P           0                   0
2019 10             OY-HMV  11-13354P           1                   1
2019 11             OY-HMV  11-13354P           0                   1
2019 12             OY-HMV  11-13354P           0                   1
2020 01             OY-HMV  11-13354P           0                   1

Demo on SQLFiddle

Note that if all PeriodYearMonth values of interest are present in vtRelRepComponentsRemovalsByPartNo you can simply use a SELECT DISTINCT ... subquery to get those values rather than the recursive CTE e.g.

SELECT d.PeriodYearMonth
     , rp.Reg
     , rp.PartNo
     , COALESCE(v.ComponentRemovals, 0) AS ComponentRemovals
     , SUM(COALESCE(v.ComponentRemovals, 0)) OVER (PARTITION BY rp.Reg, rp.PartNo ORDER BY d.PeriodYearMonth
                                                   ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS RunningRemovals
FROM (SELECT DISTINCT PeriodYearMonth FROM vtRelRepComponentsRemovalsByPartNo) d
CROSS JOIN (SELECT DISTINCT Reg, PartNo FROM vtRelRepComponentsRemovalsByPartNo) rp
LEFT JOIN vtRelRepComponentsRemovalsByPartNo v
       ON v.PeriodYearMonth = d.PeriodYearMonth
      AND v.Reg = rp.Reg
      AND v.PartNo = rp.PartNo
ORDER BY rp.Reg, rp.PartNo, d.PeriodYearMonth

Demo on SQLFiddle

Cumulative sum for months that do and dont exist in a Snowflake table

This is a request that's overly complicated by arbitrary rules, but I'm going to give you an answer that's 90% there.

It uses cross joins to generate the combinations with no data, and a UDF to generate the missing months. Then window functions to get the desired values:

with data as (
    select x[4]::string id, x[5]::date action_date, x[6]::string data
    from (
    select split(value, ' ') x
    from table(split_to_table(
$$    001 2021-01-20 jams
    002 2021-01-23 orange
    003 2021-01-19 banana
    001 2021-04-11 pineap
    002 2021-03-01 grape
    004 2021-03-01 apple$$, '\n'
)))), range_months as (
    select date_trunc(month, min(action_date)) since, max(action_date) until
    from data
), all_months as (
    select value::date m
    from range_months, table(flatten(list_months_between(since, until)))
), all_ids as (
    select distinct id
    from data
), all_crossed as (
    select *
    from all_months, all_ids
), left_joined as (
    select m, a.id, data, b.action_date action_date
    from all_crossed a
    left join data b
    on a.id=b.id
    and date_trunc(month, b.action_date)=a.m
), almost_there as (
    select m, id, count(action_date) action_taken_this_month, any_value(data) recent_data
        , lag(action_taken_this_month) over(partition by id order by m) previous_action_taken_this_month
    from left_joined
    group by 1, 2
)

select id, last_day(m) month_end
    , action_taken_this_month
    , ifnull(sum(previous_action_taken_this_month) over(partition by id order by m), 0) total_actions
    , lag(recent_data, 1) over(partition by id order by m) most_recent_data
from almost_there
order by 1, month_end

I wrote the UDF to generate a list of months in Python, but you can rewrite in your favorite language if desired:

create or replace function list_months_between(since date, until date)
returns array
language python
runtime_version='3.8'
packages=('pandas')
handler = 'x'
as 
$$
import pandas as pd
def x(since, until):
    return pd.date_range(since, until, freq='MS').strftime("%Y-%m-%d").tolist()
$$
;

select list_months_between('2020-01-01', '2020-03-10');

At the end of this series of sub-queries, we get results 90% like desired:

Sample Image

After all this work, I hope you can take this queries and add an extra join to replace some of the nulls on most_recent_data with the last lookup value.

SQL Cumulative Sum not showing when group by values are missing

Use a cross join to generate the rows and then left join to bring in the values:

select m.month, c.category, t.monthly_value,
       sum(t.monthly_value) over (partition by c.category order by m.month) as running_monthly_value
from (select distinct month from t) m cross join
     (select distinct category from t) c left join
     t
     on t.month = m.month and t.category = c.category;

SQL Server : cumulative sum, missing dates

This was harder than I thought it would be. Someone may have a simpler solution. Need to fill in the artcode and also consider different ranges on different artcodes.

declare @T table (artcode varchar(10), transdate date, Qty smallmoney, transactionvalue smallmoney);
insert into @T values 
       ('M100', '2010-11-24', 6.00, 179.40)
     , ('M100', '2010-11-24', -6.00, -179.4)
     , ('M100', '2010-11-25', 100.00, 2900.00)
     , ('M100', '2010-11-26', -1.00, -29)
     , ('M100', '2010-11-26', -5.00, -145)
     , ('M100', '2010-11-26', -1.00, -29)
     , ('M100', '2010-11-29', -5.00, -145)
     , ('M100', '2010-11-29', -3.00, -87)
     , ('M100', '2010-11-29', -1.00, -29)
     , ('M101', '2010-11-23', 6.00, 179.40)
     , ('M101', '2010-11-25', 100.00, 2900.00)
     , ('M101', '2010-11-26', -1.00, -29)
     , ('M101', '2010-11-26', -5.00, -145)
     , ('M101', '2010-11-26', -1.00, -29)
     , ('M101', '2010-11-30', -5.00, -145)
     , ('M101', '2010-11-30', -3.00, -87)
     , ('M101', '2010-11-30', -1.00, -29);
with limits as 
( select t.artcode, min(t.transdate) as startDate, max(t.transdate) as endtDate 
  from @T t 
  group by t.artcode
)
, dts as 
( select l.artcode, l.startDate as dt, l.startDate, l.endtDate
    from limits l 
  union all 
  select l.artcode, dateadd(day, 1, l.dt), l.startDate, l.endtDate
  from dts l 
  where dateadd(day, 1, l.dt) <= l.endtDate
)
select distinct dts.artcode, dts.dt
     , sum(isnull(t.Qty, 0))              over (partition by dts.artcode order by dts.dt) as Qty
     , sum(isnull(t.transactionvalue, 0)) over (partition by dts.artcode order by dts.dt) as transactionvalue
from dts 
left join @T t 
  on t.transdate = dts.dt 
 and t.artcode = dts.artcode
order by dts.artcode, dts.dt;

artcode    dt         Qty                   transactionvalue
---------- ---------- --------------------- ---------------------
M100       2010-11-24 0.00                  0.00
M100       2010-11-25 100.00                2900.00
M100       2010-11-26 93.00                 2697.00
M100       2010-11-27 93.00                 2697.00
M100       2010-11-28 93.00                 2697.00
M100       2010-11-29 84.00                 2436.00
M101       2010-11-23 6.00                  179.40
M101       2010-11-24 6.00                  179.40
M101       2010-11-25 106.00                3079.40
M101       2010-11-26 99.00                 2876.40
M101       2010-11-27 99.00                 2876.40
M101       2010-11-28 99.00                 2876.40
M101       2010-11-29 99.00                 2876.40
M101       2010-11-30 90.00                 2615.40

Cumulative Sum of Values by Month, Filling in for Missing Months