Wednesday, December 1, 2021

How to calculate running totals by group in Power Query

   Check list of all posts

Sample requirement

Input:

  1. Dim1,Dim2,Date,M1
  2. Dim11,Dim21,2021-02-11,1
  3. Dim11,Dim21,2021-02-12,2
  4. Dim11,Dim21,2021-02-13,3
  5. Dim11,Dim22,2021-02-11,11
  6. Dim11,Dim22,2021-02-12,12
  7. Dim11,Dim22,2021-02-13,13

Result 1 without group:


Result 2 with group:


Solution 1: Basic logic without subcategory 

Power Query

let

    Source = Csv.Document(File.Contents("C:\Charles\My\Study\T1.csv"),[Delimiter=",",  Encoding=1252, QuoteStyle=QuoteStyle.None]),

    #"Promoted Headers" = Table.PromoteHeaders(Source, [PromoteAllScalars=true]),

    #"Changed Type" = Table.TransformColumnTypes(#"Promoted Headers",{{"Dim1", type text}, {"Dim2", type text}, {"Date", type date}, {"M1", type number}}),

    #"RT1 Sorted table" = Table.Sort(#"Changed Type",{{"Dim1", Order.Ascending},{"Dim2", Order.Ascending},{"Date", Order.Ascending}}),

    #"RT2 Added Index" = Table.AddIndexColumn(#"RT1 Sorted table", "Index", 1, 1, Int64.Type),

    #"RT3 Added Custom" = Table.AddColumn(#"RT2 Added Index", "RunningTotal", each List.Sum(List.Range(#"RT2 Added Index"[M1],0,[Index])))

in

    #"RT3 Added Custom"


Solution 2: Basic logic by subcategory without buffer

Function fxRunningTotalWithoutBuffer

(RunTable as table) as table=>

    let 

        #"Added Index" = Table.AddIndexColumn(RunTable, "Index", 1, 1, Int64.Type),

        #"Added Custom" = Table.AddColumn(#"Added Index", "ValueRT", each List.Sum(List.Range(#"Added Index"[Value],0,[Index])))

    in 

        #"Added Custom"

Power Query

let

    Source = Csv.Document(File.Contents("C:\Charles\My\Study\T1.csv"),[Delimiter=",",  Encoding=1252, QuoteStyle=QuoteStyle.None]),

    #"Promoted Headers" = Table.PromoteHeaders(Source, [PromoteAllScalars=true]),

    #"Changed Type" = Table.TransformColumnTypes(#"Promoted Headers",{{"Dim1", type text}, {"Dim2", type text}, {"Date", type date}, {"M1", type number}}),

    #"RT1 Sorted table" = Table.Sort(#"Changed Type",{{"Dim1", Order.Ascending},{"Dim2", Order.Ascending},{"Date", Order.Ascending}}),

    #"RT2 Renamed Columns" = Table.RenameColumns(#"RT1 Sorted table",{{"M1", "Value"}}),

    #"RT3 Grouped Rows" = Table.Group(#"RT2 Renamed Columns", {"Dim1", "Dim2"}, {{"GroupBy", each _, type table [Dim1=nullable text, Dim2=nullable text, Date=nullable date, M1=nullable number]}}),

    #"RT4 Invoked Custom Function" = Table.AddColumn(#"RT3 Grouped Rows", "RT", each fxRunningTotalWithoutBuffer([GroupBy])),

    #"RT5 Removed Other Columns" = Table.SelectColumns(#"RT4 Invoked Custom Function",{"RT"}),

    #"RT6 Expanded RT" = Table.ExpandTableColumn(#"RT5 Removed Other Columns", "RT", {"Dim1", "Dim2", "Date", "Value", "Index", "ValueRT"}, {"Dim1.1", "Dim2.1", "Date", "Value", "Index", "ValueRT"})

in

    #"RT6 Expanded RT"


Solution 3: Basic logic by subcategory with buffer

Function  fxRunningTotalWithBuffer

(RunTable as table) as table=>

    let 

        #"Added Index" = Table.AddIndexColumn(RunTable, "Index", 1, 1, Int64.Type),

        BufferedValues = List.Buffer(#"Added Index"[Value]),

        #"Added Custom" = Table.AddColumn(#"Added Index", "ValueRT", each List.Sum(List.Range(BufferedValues,0,[Index])))

    in 

        #"Added Custom"

Power Query is same as above


Solution 4:  Advance logic by subcategory with List.Generate

Function 1 fxRT

(values as list) as list =>

  let

    RT = List.Generate(

      () => [RT = values{0}, counter = 0], 

      each [counter] < List.Count(values), 

      each [RT = [RT] + values{[counter] + 1}, counter = [counter] + 1], 

      each [RT]

    )

  in

    RT

Function 2 fxRunningTotal

(RunTable as table) as table =>

  let

    #"Added Index" = Table.AddIndexColumn(RunTable, "Index", 1, 1, Int64.Type), 

    BufferedValues = List.Buffer(#"Added Index"[Value]), 

    RT = Table.FromList(fxRT(BufferedValues), Splitter.SplitByNothing(), {"RT"}), 

    Columns = List.Combine({Table.ToColumns(#"Added Index"), Table.ToColumns(RT)}), 

    #"Converted to table" = Table.FromColumns(

      Columns, 

      List.Combine({Table.ColumnNames(#"Added Index"), {"ValueRT"}})

    )

  in

    #"Converted to table"

Power Query

let

    Source = Csv.Document(File.Contents("C:\Charles\My\Study\T1.csv"),[Delimiter=",",  Encoding=1252, QuoteStyle=QuoteStyle.None]),

    #"Promoted Headers" = Table.PromoteHeaders(Source, [PromoteAllScalars=true]),

    #"Changed Type" = Table.TransformColumnTypes(#"Promoted Headers",{{"Dim1", type text}, {"Dim2", type text}, {"Date", type date}, {"M1", type number}}),

    #"RT1 Sorted table" = Table.Sort(#"Changed Type",{{"Dim1", Order.Ascending},{"Dim2", Order.Ascending},{"Date", Order.Ascending}}),

    #"RT2 Renamed Columns" = Table.RenameColumns(#"RT1 Sorted table",{{"M1", "Value"}}),

    #"RT3 Grouped Rows" = Table.Group(#"RT2 Renamed Columns", {"Dim1", "Dim2"}, {{"GroupBy", each _, type table [Dim1=nullable text, Dim2=nullable text, Date=nullable date, M1=nullable number]}}),

    #"RT4 Invoked fxRunningTotal" = Table.AddColumn(#"RT3 Grouped Rows", "RT", each fxRunningTotal([GroupBy])),

    #"RT5 Removed Other Columns" = Table.SelectColumns(#"RT4 Invoked fxRunningTotal",{"RT"}),

    #"RT6 Expanded RT" = Table.ExpandTableColumn(#"RT5 Removed Other Columns", "RT", {"Dim1", "Dim2", "Date", "Value", "Index", "ValueRT"}, {"Dim1", "Dim2", "Date", "Value", "Index", "ValueRT"})

in

    #"RT6 Expanded RT"


Solution 5:  Improved solution 4 with dynamic column as a parameter

Function 1 fxRT

(values as list) as list =>

  let

    RT = List.Generate(

      () => [RT = values{0}, counter = 0], 

      each [counter] < List.Count(values), 

      each [RT = [RT] + values{[counter] + 1}, counter = [counter] + 1], 

      each [RT]

    )

  in

    RT

Function 2 fxRunningTotal

(RunTable as table, InputColName as text, OutputColName as text) as table =>

  let

    #"RunTable1" = Table.RenameColumns(RunTable,{{InputColName, "Value"}}),

    #"Added Index" = Table.AddIndexColumn(#"RunTable1", "Index", 1, 1, Int64.Type), 

    BufferedValues = List.Buffer(#"Added Index"[Value]), 

    RT = Table.FromList(fxRT(BufferedValues), Splitter.SplitByNothing(), {"RT"}), 

    Columns = List.Combine({Table.ToColumns(#"Added Index"), Table.ToColumns(RT)}), 

    #"Converted to table" = Table.FromColumns(

      Columns, 

      List.Combine({Table.ColumnNames(#"Added Index"), {OutputColName}})

    )

  in

    #"Converted to table"


Power Query

let

    Source = Csv.Document(File.Contents("C:\Charles\My\Study\T1.csv"),[Delimiter=",",  Encoding=1252, QuoteStyle=QuoteStyle.None]),

    #"Promoted Headers" = Table.PromoteHeaders(Source, [PromoteAllScalars=true]),

    #"Changed Type" = Table.TransformColumnTypes(#"Promoted Headers",{{"Dim1", type text}, {"Dim2", type text}, {"Date", type date}, {"M1", type number}}),

    #"RT1 Sorted table" = Table.Sort(#"Changed Type",{{"Dim1", Order.Ascending},{"Dim2", Order.Ascending},{"Date", Order.Ascending}}),

    #"RT2 Grouped Rows" = Table.Group(#"RT1 Sorted table", {"Dim1", "Dim2"}, {{"GroupBy", each _, type table [Dim1=nullable text, Dim2=nullable text, Date=nullable date, M1=nullable number]}}),

    #"RT3 Invoked fxRunningTotal" = Table.AddColumn(#"RT2 Grouped Rows", "RT", each fxRunningTotal([GroupBy], "M1", "OutputCol")),

    #"RT4 Removed Other Columns" = Table.SelectColumns(#"RT3 Invoked fxRunningTotal",{"RT"}),

    #"RT5 Expanded RT" = Table.ExpandTableColumn(#"RT4 Removed Other Columns", "RT", {"Dim1", "Dim2", "Date", "Value", "Index", "OutputCol"}, {"Dim1", "Dim2", "Date", "Value", "Index", "OutputCol"})

in

    #"RT5 Expanded RT"

Reference

https://www.youtube.com/watch?v=uX3_dnb5on0

https://www.youtube.com/watch?v=EFQBMJ6JyCQ

https://www.youtube.com/watch?v=7kFZw8zimCU


No comments:

Post a Comment