library(tidyverse)
library(readxl)
path = "files/CH-129 Custom Grouping.xlsx"
input = read_excel(path, range = "B2:C26") %>% janitor::clean_names()
test = read_excel(path, range = "G2:I4")
result = input %>%
mutate(a = cumsum(lag(stock_price, default = 0) > stock_price),
d = cumsum(lag(stock_price, default = 0) < stock_price)) %>%
mutate(a_n = ifelse(n() > 2, a, NA), .by = a) %>%
mutate(d_n = ifelse(n() > 2, d, NA), .by = d) %>%
mutate(check = case_when(
!is.na(a_n) & is.na(d_n) ~ a_n,
!is.na(d_n) & is.na(a_n) ~ d_n,
!is.na(a_n) & !is.na(d_n) ~ pmin(a_n, d_n),
TRUE ~ NA_real_
)) %>%
mutate(diff = stock_price - lag(stock_price, default = 0)) %>%
mutate(sign = sign(median(diff)),
Group = ifelse(sign == 1, "Upward", "Downward"),
.by = check) %>%
summarise(n = n(), .by = c(Group,check)) %>%
summarise(`AVG Lenght` = mean(n), `Max Length` = max(n), .by = Group)
all.equal(result, test, check.attributes = FALSE)
#> [1] TRUEOmid - Challenge 129
data-challenges
advanced-exercises
🔰 Group Challenge 129: Custom Grouping!

Challenge Description
🔰 Group Challenge 129: Custom Grouping!
Solutions
Logic:
Reads the workbook ranges needed for the challenge
Aggregates or ranks values at the relevant grouping level
Builds the intermediate columns that drive the final result
Strengths:
- The R solution stays close to the workbook rule and keeps the transformation compact.
Areas for Improvement:
- The code assumes the sheet structure and source ranges remain stable.
Gem:
- The strongest part of the solution is choosing the right intermediate representation before shaping the final output.
import pandas as pd
import numpy as np
path = "CH-129 Custom Grouping.xlsx"
input = pd.read_excel(path, usecols="B:C", skiprows=1, nrows=25).rename(columns={'Date': 'date', 'Stock price': 'stock_price'})
test = pd.read_excel(path, usecols="G:I", skiprows=1, nrows=2)
test['Group'] = test['Group'].str.strip()
input['a'] = (input['stock_price'].shift() > input['stock_price']).cumsum()
input['d'] = (input['stock_price'].shift() < input['stock_price']).cumsum()
input['a_n'] = input.groupby('a')['a'].transform(lambda x: x if len(x) > 2 else pd.NA)
input['d_n'] = input.groupby('d')['d'].transform(lambda x: x if len(x) > 2 else pd.NA)
input['check'] = input.apply(lambda row: row['a_n'] if pd.notna(row['a_n']) and pd.isna(row['d_n']) else (
row['d_n'] if pd.notna(row['d_n']) and pd.isna(row['a_n']) else (
min(row['a_n'], row['d_n']) if pd.notna(row['a_n']) and pd.notna(row['d_n']) else pd.NA
)
), axis=1)
input['diff'] = input['stock_price'] - input['stock_price'].shift(1, fill_value=0)
input['sign'] = input.groupby('check')['diff'].transform(lambda x: 1 if x.gt(0).sum() > x.lt(0).sum() else -1)
input['Group'] = input['sign'].apply(lambda x: "Upward" if x == 1 else "Downward")
result = input.groupby(['Group', 'check']).size().reset_index(name='n_of_days')
result = result.groupby('Group')['n_of_days'].agg(['mean', 'max']).sort_values(by='Group', ascending=False).reset_index()
result.columns = test.columns
print(result.equals(test)) # TrueLogic:
Reads the workbook ranges needed for the challenge
Aggregates or ranks values at the relevant grouping level
Strengths:
- The Python version follows the same rule in a direct dataframe-oriented implementation.
Areas for Improvement:
- The code assumes the workbook layout remains stable, so any sheet redesign would require small adjustments.
Gem:
- The implementation stays close to the original workbook rule instead of adding unnecessary abstraction.
Difficulty Level
This task is moderate:
- The business rule is readable, but the workbook still requires careful implementation to reach the expected layout.