![[Pasted image 20250101103706.png]]
## Summary
- A [WHOOP](https://www.whoop.com/us/en/) is a wearable fitness tracker that monitors health metrics such as:
- Sleep quality
- Respiratory Rate
- Physical Strain
- HRV [(still experimental?)](https://www.health.harvard.edu/blog/heart-rate-variability-new-way-track-well-2017112212789)
- This project uses request interception to grab the data that's displayed on WHOOP's web app.
- This allows us to perform our own analysis on our collected health data.
- This opens the door for us to do things like:
- Experiment with interaction features
- Explore cause-and-effect relationships between daily activities and recovery patterns
- "How does my daily activity level affect my sleeping patterns?"
- "How do sleep patterns affect my overall recovery?"
- Join with health data from other devices
![[Pasted image 20250101104953.png]]
## Code
### xhr_scrape.py
- This is the script that uses mitmproxy to intercept requests to WHOOP's API.
- The typical workflow here is to:
- Run a browser that uses a proxy on the same port that this script runs on (8080).
- Navigate to the WHOOP web app and view your dashboard.
- The requests are intercepted and saved into csvs.
```python
#xhr_scrape.py
import json
from mitmproxy import http
import pandas as pd
from datetime import datetime
import os
whoop_data = []
def extract_nested_data(data, prefix=''):
extracted = {}
for key, value in data.items():
if isinstance(value, dict):
extracted.update(extract_nested_data(value, f"{prefix}{key}_"))
elif isinstance(value, list):
# Handle lists (e.g., zone_durations)
extracted[f"{prefix}{key}"] = json.dumps(value)
else:
extracted[f"{prefix}{key}"] = value
return extracted
def response(flow: http.HTTPFlow) -> None:
# not a huge fan of using global variables, but it allows us to share data between requests here
global whoop_data
# filter by this endpoint
if "api.prod.whoop.com/activities-service/v1/cycles/aggregate/range" in flow.request.url:
try:
response_data = json.loads(flow.response.content)
for record in response_data.get('records', []):
extracted_data = {}
extracted_data.update(extract_nested_data(record.get('cycle', {}), 'cycle_'))
sleep = record.get('sleeps', [{}])[0] if record.get('sleeps') else {}
extracted_data.update(extract_nested_data(sleep, 'sleep_'))
extracted_data.update(extract_nested_data(record.get('recovery', {}), 'recovery_'))
workout = record.get('workouts', [{}])[0] if record.get('workouts') else {}
extracted_data.update(extract_nested_data(workout, 'workout_'))
whoop_data.append(extracted_data)
print(f"Extracted data from {len(response_data.get('records', []))} records")
df = pd.DataFrame(whoop_data)
df['cycle_created_at'] = pd.to_datetime(df['cycle_created_at'])
df = df.sort_values('cycle_created_at')
output_dir = 'whoop_data'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_file = os.path.join(output_dir, f"whoop_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
df.to_csv(output_file, index=False)
print(f"Data saved to {output_file}")
except json.JSONDecodeError:
print("Failed to parse JSON response")
except Exception as e:
print(f"Error processing response: {str(e)}")
# To use this script with mitmproxy, run:
# mitmproxy -s xhr_scrape.py
# or
# mitmdump -s xhr_scrape.py
```
### analysis.py
- Processes the CSV files created from interception script.
- Typical Pandas Dataframe analysis
```python
# analysis.py
#%%
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
#%%
def load_and_process_data(file_path):
df = pd.read_csv(file_path)
date_columns = ['cycle_created_at', 'cycle_updated_at', 'sleep_created_at', 'sleep_updated_at', 'recovery_created_at', 'recovery_updated_at', 'workout_created_at', 'workout_updated_at']
for col in date_columns:
if col in df.columns:
df[col] = pd.to_datetime(df[col])
# Sort the dataframe by cycle_id and cycle_created_at
df = df.sort_values(['cycle_id', 'cycle_created_at'])
df = df.groupby('cycle_id').last().reset_index()
#lag features
df['next_cycle_sleep_slow_wave_sleep_duration'] = df['sleep_slow_wave_sleep_duration'].shift(-1)
df['next_cycle_sleep_rem_sleep_duration'] = df['sleep_rem_sleep_duration'].shift(-1)
df['next_cycle_sleep_respiratory_rate'] = df['sleep_respiratory_rate'].shift(-1)
if 'cycle_created_at' in df.columns:
df.set_index('cycle_created_at', inplace=True)
df.sort_index(inplace=True)
return df
def calculate_daily_data(df):
numeric_df = df.select_dtypes(include=[np.number])
daily_data = numeric_df.groupby(numeric_df.index.date).mean()
daily_data.index = pd.to_datetime(daily_data.index)
return daily_data
def plot_trends(df, columns, title):
plt.figure(figsize=(12, 6))
for col in columns:
if col in df.columns:
plt.plot(df.index, df[col], label=col)
plt.title(title)
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
def plot_correlations(df, columns):
existing_columns = [col for col in columns if col in df.columns]
corr = df[existing_columns].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.tight_layout()
plt.show()
def process_cycle_data(df):
print(df.columns)
df = df.sort_values(['cycle_id', 'cycle_created_at'])
last_records = df.groupby('cycle_id').last().reset_index()
last_records['next_cycle_sleep_slow_wave_sleep_duration'] = last_records['sleep_slow_wave_sleep_duration'].shift(-1)
last_records['next_cycle_sleep_rem_sleep_duration'] = last_records['sleep_rem_sleep_duration'].shift(-1)
last_records['next_cycle_sleep_respiratory_rate'] = last_records['sleep_respiratory_rate'].shift(-1)
return last_records
#%%
data_dir = 'whoop_data/'
# Main processing
file_path = f'{data_dir}/whoop_data_20241021_151310.csv'
df = load_and_process_data(file_path)
#%%
daily_data = calculate_daily_data(df)
#%%
# on the fly preprocessing
#daily_data = process_cycle_data(daily_data)
#%%
plot_trends(daily_data, ['cycle_day_strain', 'sleep_score', 'recovery_recovery_score', 'next_cycle_sleep_slow_wave_sleep_duration', 'next_cycle_sleep_rem_sleep_duration', 'next_cycle_sleep_respiratory_rate'], 'Daily Trends')
#%%
correlation_columns = ['cycle_day_strain', 'sleep_score', 'recovery_recovery_score', 'workout_intensity_score', 'recovery_resting_heart_rate', 'recovery_hrv_rmssd', 'next_cycle_sleep_slow_wave_sleep_duration', 'next_cycle_sleep_rem_sleep_duration', 'next_cycle_sleep_respiratory_rate']
plot_correlations(daily_data, correlation_columns)
#%%
print(daily_data[['cycle_day_strain', 'sleep_score', 'recovery_recovery_score', 'workout_intensity_score']].describe())
#%%
high_strain_days = daily_data[daily_data['cycle_day_strain'] > daily_data['cycle_day_strain'].quantile(0.9)]
print("\nDays with highest strain:")
print(high_strain_days[['cycle_day_strain', 'sleep_score', 'recovery_recovery_score', 'next_cycle_sleep_slow_wave_sleep_duration', 'next_cycle_sleep_rem_sleep_duration', 'next_cycle_sleep_respiratory_rate']])
#%%
if 'sleep_quality_duration' in daily_data.columns:
daily_data['sleep_duration'] = daily_data['sleep_quality_duration'] / 3600000 # Convert to hours
print("\nAverage sleep duration:", daily_data['sleep_duration'].mean())
print("Nights with less than 7 hours of sleep:", (daily_data['sleep_duration'] < 7).sum())
else:
print("\nSleep quality duration data not available")
#%%
if 'workout_intensity_score' in daily_data.columns:
workout_days = daily_data[daily_data['workout_intensity_score'].notnull()]
print("\nNumber of workout days:", len(workout_days))
print("Average workout intensity:", workout_days['workout_intensity_score'].mean())
else:
print("\nWorkout intensity score data not available")
# %%
#explore relationship between sleep_slow_wave_sleep_duration and cycle_day_strain
plt.figure(figsize=(12, 6))
plt.scatter(daily_data['next_cycle_sleep_slow_wave_sleep_duration'], daily_data['cycle_day_strain'])
plt.xlabel('Slow Wave Sleep Duration')
plt.ylabel('Cycle Day Strain')
plt.title('Slow Wave Sleep Duration vs Cycle Day Strain')
plt.tight_layout()
plt.show()
# %%
# same but using cycle_day_avg_heart_rate
plt.figure(figsize=(12, 6))
plt.scatter(daily_data['cycle_day_avg_heart_rate'], daily_data['next_cycle_sleep_slow_wave_sleep_duration'])
plt.xlabel('Average Heart Rate')
plt.ylabel('Slow Wave Sleep Duration')
plt.title('Average Heart Rate vs Slow Wave Sleep Duration')
plt.tight_layout()
plt.show()
# %%
# explore relationship between cycle_day_max_heart_rate and sleep_slow_wave_sleep_duration
plt.figure(figsize=(12, 6))
plt.scatter(daily_data['cycle_day_max_heart_rate'], daily_data['next_cycle_sleep_slow_wave_sleep_duration'])
plt.xlabel('Max Heart Rate')
plt.ylabel('Slow Wave Sleep Duration')
plt.title('Max Heart Rate vs Slow Wave Sleep Duration')
plt.tight_layout()
plt.show()
# %%
# explore relationship between cycle_day_max_heart_rate and sleep_rem_sleep_duration
plt.figure(figsize=(12, 6))
plt.scatter(daily_data['cycle_day_max_heart_rate'], daily_data['next_cycle_sleep_rem_sleep_duration'])
plt.xlabel('Max Heart Rate')
plt.ylabel('REM Sleep Duration')
plt.title('Max Heart Rate vs REM Sleep Duration')
plt.tight_layout()
plt.show()
# %%
# explore between cycle_day_max_heart_rate and sleep_respiratory_rate
plt.figure(figsize=(12, 6))
plt.scatter(daily_data['cycle_day_max_heart_rate'], daily_data['next_cycle_sleep_respiratory_rate'])
plt.xlabel('Max Heart Rate')
plt.ylabel('Respiratory Rate')
plt.title('Max Heart Rate vs Respiratory Rate')
plt.tight_layout()
plt.show()
# we need to account for the fact that the sleep_respiratory_rate is recorded on the next day from the cycle_day_max_heart_rate
# so we need to shift the sleep_respiratory_rate by one day
#%%
plt.figure(figsize=(12, 6))
plt.scatter(daily_data['cycle_day_max_heart_rate'], daily_data['next_cycle_sleep_respiratory_rate'])
plt.xlabel('Max Heart Rate')
plt.ylabel('Respiratory Rate')
plt.title('Max Heart Rate vs Respiratory Rate')
plt.tight_layout()
plt.show()
# %%
#cycle_scaled_strain and next_cycle_sleep_slow_wave_sleep_duration
plt.figure(figsize=(12, 6))
plt.scatter(daily_data['cycle_scaled_strain'], daily_data['next_cycle_sleep_slow_wave_sleep_duration'])
plt.xlabel('Scaled Strain')
plt.ylabel('Slow Wave Sleep Duration')
plt.title('Scaled Strain vs Slow Wave Sleep Duration')
plt.tight_layout()
plt.show()
# %%
#recovery_spo2 vs sleep_respiratory_rate
plt.figure(figsize=(12, 6))
plt.scatter(daily_data['recovery_spo2'], daily_data['next_cycle_sleep_respiratory_rate'])
plt.xlabel('SpO2')
plt.ylabel('Respiratory Rate')
plt.title('SpO2 vs Respiratory Rate')
plt.tight_layout()
plt.show()
# %%
# Analyze sleep efficiency
plt.figure(figsize=(12, 6))
plt.scatter(daily_data['sleep_in_sleep_efficiency'], daily_data['sleep_score'])
plt.xlabel('Sleep Efficiency')
plt.ylabel('Sleep Score')
plt.title('Sleep Efficiency vs Sleep Score')
plt.tight_layout()
plt.show()
# Analyze impact of sleep consistency
plt.figure(figsize=(12, 6))
plt.scatter(daily_data['sleep_sleep_consistency'], daily_data['recovery_recovery_score'])
plt.xlabel('Sleep Consistency')
plt.ylabel('Recovery Score')
plt.title('Sleep Consistency vs Recovery Score')
plt.tight_layout()
plt.show()
# %%
```