Time Intervals¶
Process Time Intervals and Group by Task Code
This script is used to process a list of time intervals, load the data into a Pandas DataFrame, calculate the duration of each interval, and group the intervals by task code.
Input Data Format¶
The input data is a string containing a list of time intervals. Each time interval is formatted as follows:
{end_date_and_time}
; {task_code} {comment}
{start_date_and_time}
Where:
{end_date_and_time}: The date and time when the task ended, inDD.MM.YYYY HH:MM:SSformat (e.g.,24.03.2025 23:29:38).{task_code}: A unique identifier for the task (e.g.,TASK-1234).{comment}: A brief description of the task (e.g.,Estimate new features).{start_date_and_time}: The date and time when the task started, inDD.MM.YYYY HH:MM:SSformat (e.g.,24.03.2025 22:50:13).
Example Input¶
24.03.2025 23:29:38
; TASK-1234 Estimate new features
24.03.2025 22:50:13
25.03.2025 10:15:00
; TASK-1234 Implement feature A
25.03.2025 09:00:00
26.03.2025 12:00:00
; TASK-5678 Bug fixing
26.03.2025 11:00:00
Expected Output¶
The expected output is a Pandas DataFrame grouped by task_code. Optionally, the output can include aggregated duration statistics for each task code.
Processing Steps¶
import streamlit as st
import pandas as pd
import re
from datetime import datetime
import yaml
import os
st.set_page_config(
page_title="T-Int",
layout="wide",
)
Print banner
@st.cache_data
def print_banner():
print("""
.___________. __ .__ __. .___________.
| | | | | \\ | | | |
`---| |----`______| | | \\| | `---| |----`
| | |______| | | . ` | | |
| | | | | |\\ | | |
|__| |__| |__| \\__| |__|
""")
return 1
print_banner()
Input data
data = st.text_area("Time Intervals", height=300)
Regular expression pattern to extract intervals
pattern = re.compile(r'(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2})\n; ([\w\-]+) (.*?)\n(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}:\d{2})', re.DOTALL)
Highlight task
def load_task_colors(path: str = "task_colors.yml") -> dict:
if not os.path.exists(path):
return {}
with open(path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or []
task_colors = {}
for item in data:
color = str(item["color"]).strip().upper()
task_colors[str(item["task"]).strip()] = color
return task_colors
task_colors = load_task_colors()
def highlight_task(val):
color = task_colors.get(val)
if color:
return f"background-color: #{color}; color: white;"
return ""
Process input
def process():
# Extract matches
matches = pattern.findall(data)
if len(matches) == 0:
st.error('Time Intervals not found', icon='❌')
# st.stop()
return
Convert extracted data into a DataFrame
records = []
for end_dt, task_code, comment, start_dt in matches:
start = datetime.strptime(start_dt, '%d.%m.%Y %H:%M:%S')
end = datetime.strptime(end_dt, '%d.%m.%Y %H:%M:%S')
duration = (end - start).total_seconds() / 3600
records.append({
'start_datetime': start,
'end_datetime': end,
'Task': task_code,
'Comment': comment,
'Hours': duration
})
# Create DataFrame
df = pd.DataFrame(records)
Group by task_code, sum durations, and join comments
grouped_df = df.groupby('Task', as_index=False).agg({
'Hours': 'sum',
'Comment': lambda x: ' // '.join(dict.fromkeys(x))
})
grouped_df['Hours'] = grouped_df['Hours'].round(1)
Display results
st.write("### Duration by Task")
st.dataframe(grouped_df.style.applymap(highlight_task, subset=["Task"]))
# Calculate total hours
total_hours = grouped_df['Hours'].sum()
st.write(f"**Total Hours: {total_hours}**")
Click button
if st.button("Process", type='primary', use_container_width=True):
process()