![[Pasted image 20250101103706.png]] ## Summary - A [WHOOP](https://www.whoop.com/us/en/) is a wearable fitness tracker that monitors health metrics such as: - Sleep quality - Respiratory Rate - Physical Strain - HRV [(still experimental?)](https://www.health.harvard.edu/blog/heart-rate-variability-new-way-track-well-2017112212789) - This project uses request interception to grab the data that's displayed on WHOOP's web app. - This allows us to perform our own analysis on our collected health data. - This opens the door for us to do things like: - Experiment with interaction features - Explore cause-and-effect relationships between daily activities and recovery patterns - "How does my daily activity level affect my sleeping patterns?" - "How do sleep patterns affect my overall recovery?" - Join with health data from other devices ![[Pasted image 20250101104953.png]] ## Code ### xhr_scrape.py - This is the script that uses mitmproxy to intercept requests to WHOOP's API. - The typical workflow here is to: - Run a browser that uses a proxy on the same port that this script runs on (8080). - Navigate to the WHOOP web app and view your dashboard. - The requests are intercepted and saved into csvs. ```python #xhr_scrape.py import json from mitmproxy import http import pandas as pd from datetime import datetime import os whoop_data = [] def extract_nested_data(data, prefix=''): extracted = {} for key, value in data.items(): if isinstance(value, dict): extracted.update(extract_nested_data(value, f"{prefix}{key}_")) elif isinstance(value, list): # Handle lists (e.g., zone_durations) extracted[f"{prefix}{key}"] = json.dumps(value) else: extracted[f"{prefix}{key}"] = value return extracted def response(flow: http.HTTPFlow) -> None: # not a huge fan of using global variables, but it allows us to share data between requests here global whoop_data # filter by this endpoint if "api.prod.whoop.com/activities-service/v1/cycles/aggregate/range" in flow.request.url: try: response_data = json.loads(flow.response.content) for record in response_data.get('records', []): extracted_data = {} extracted_data.update(extract_nested_data(record.get('cycle', {}), 'cycle_')) sleep = record.get('sleeps', [{}])[0] if record.get('sleeps') else {} extracted_data.update(extract_nested_data(sleep, 'sleep_')) extracted_data.update(extract_nested_data(record.get('recovery', {}), 'recovery_')) workout = record.get('workouts', [{}])[0] if record.get('workouts') else {} extracted_data.update(extract_nested_data(workout, 'workout_')) whoop_data.append(extracted_data) print(f"Extracted data from {len(response_data.get('records', []))} records") df = pd.DataFrame(whoop_data) df['cycle_created_at'] = pd.to_datetime(df['cycle_created_at']) df = df.sort_values('cycle_created_at') output_dir = 'whoop_data' if not os.path.exists(output_dir): os.makedirs(output_dir) output_file = os.path.join(output_dir, f"whoop_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv") df.to_csv(output_file, index=False) print(f"Data saved to {output_file}") except json.JSONDecodeError: print("Failed to parse JSON response") except Exception as e: print(f"Error processing response: {str(e)}") # To use this script with mitmproxy, run: # mitmproxy -s xhr_scrape.py # or # mitmdump -s xhr_scrape.py ``` ### analysis.py - Processes the CSV files created from interception script. - Typical Pandas Dataframe analysis ```python # analysis.py #%% import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import numpy as np #%% def load_and_process_data(file_path): df = pd.read_csv(file_path) date_columns = ['cycle_created_at', 'cycle_updated_at', 'sleep_created_at', 'sleep_updated_at', 'recovery_created_at', 'recovery_updated_at', 'workout_created_at', 'workout_updated_at'] for col in date_columns: if col in df.columns: df[col] = pd.to_datetime(df[col]) # Sort the dataframe by cycle_id and cycle_created_at df = df.sort_values(['cycle_id', 'cycle_created_at']) df = df.groupby('cycle_id').last().reset_index() #lag features df['next_cycle_sleep_slow_wave_sleep_duration'] = df['sleep_slow_wave_sleep_duration'].shift(-1) df['next_cycle_sleep_rem_sleep_duration'] = df['sleep_rem_sleep_duration'].shift(-1) df['next_cycle_sleep_respiratory_rate'] = df['sleep_respiratory_rate'].shift(-1) if 'cycle_created_at' in df.columns: df.set_index('cycle_created_at', inplace=True) df.sort_index(inplace=True) return df def calculate_daily_data(df): numeric_df = df.select_dtypes(include=[np.number]) daily_data = numeric_df.groupby(numeric_df.index.date).mean() daily_data.index = pd.to_datetime(daily_data.index) return daily_data def plot_trends(df, columns, title): plt.figure(figsize=(12, 6)) for col in columns: if col in df.columns: plt.plot(df.index, df[col], label=col) plt.title(title) plt.legend() plt.xticks(rotation=45) plt.tight_layout() plt.show() def plot_correlations(df, columns): existing_columns = [col for col in columns if col in df.columns] corr = df[existing_columns].corr() plt.figure(figsize=(10, 8)) sns.heatmap(corr, annot=True, cmap='coolwarm') plt.title('Correlation Heatmap') plt.tight_layout() plt.show() def process_cycle_data(df): print(df.columns) df = df.sort_values(['cycle_id', 'cycle_created_at']) last_records = df.groupby('cycle_id').last().reset_index() last_records['next_cycle_sleep_slow_wave_sleep_duration'] = last_records['sleep_slow_wave_sleep_duration'].shift(-1) last_records['next_cycle_sleep_rem_sleep_duration'] = last_records['sleep_rem_sleep_duration'].shift(-1) last_records['next_cycle_sleep_respiratory_rate'] = last_records['sleep_respiratory_rate'].shift(-1) return last_records #%% data_dir = 'whoop_data/' # Main processing file_path = f'{data_dir}/whoop_data_20241021_151310.csv' df = load_and_process_data(file_path) #%% daily_data = calculate_daily_data(df) #%% # on the fly preprocessing #daily_data = process_cycle_data(daily_data) #%% plot_trends(daily_data, ['cycle_day_strain', 'sleep_score', 'recovery_recovery_score', 'next_cycle_sleep_slow_wave_sleep_duration', 'next_cycle_sleep_rem_sleep_duration', 'next_cycle_sleep_respiratory_rate'], 'Daily Trends') #%% correlation_columns = ['cycle_day_strain', 'sleep_score', 'recovery_recovery_score', 'workout_intensity_score', 'recovery_resting_heart_rate', 'recovery_hrv_rmssd', 'next_cycle_sleep_slow_wave_sleep_duration', 'next_cycle_sleep_rem_sleep_duration', 'next_cycle_sleep_respiratory_rate'] plot_correlations(daily_data, correlation_columns) #%% print(daily_data[['cycle_day_strain', 'sleep_score', 'recovery_recovery_score', 'workout_intensity_score']].describe()) #%% high_strain_days = daily_data[daily_data['cycle_day_strain'] > daily_data['cycle_day_strain'].quantile(0.9)] print("\nDays with highest strain:") print(high_strain_days[['cycle_day_strain', 'sleep_score', 'recovery_recovery_score', 'next_cycle_sleep_slow_wave_sleep_duration', 'next_cycle_sleep_rem_sleep_duration', 'next_cycle_sleep_respiratory_rate']]) #%% if 'sleep_quality_duration' in daily_data.columns: daily_data['sleep_duration'] = daily_data['sleep_quality_duration'] / 3600000 # Convert to hours print("\nAverage sleep duration:", daily_data['sleep_duration'].mean()) print("Nights with less than 7 hours of sleep:", (daily_data['sleep_duration'] < 7).sum()) else: print("\nSleep quality duration data not available") #%% if 'workout_intensity_score' in daily_data.columns: workout_days = daily_data[daily_data['workout_intensity_score'].notnull()] print("\nNumber of workout days:", len(workout_days)) print("Average workout intensity:", workout_days['workout_intensity_score'].mean()) else: print("\nWorkout intensity score data not available") # %% #explore relationship between sleep_slow_wave_sleep_duration and cycle_day_strain plt.figure(figsize=(12, 6)) plt.scatter(daily_data['next_cycle_sleep_slow_wave_sleep_duration'], daily_data['cycle_day_strain']) plt.xlabel('Slow Wave Sleep Duration') plt.ylabel('Cycle Day Strain') plt.title('Slow Wave Sleep Duration vs Cycle Day Strain') plt.tight_layout() plt.show() # %% # same but using cycle_day_avg_heart_rate plt.figure(figsize=(12, 6)) plt.scatter(daily_data['cycle_day_avg_heart_rate'], daily_data['next_cycle_sleep_slow_wave_sleep_duration']) plt.xlabel('Average Heart Rate') plt.ylabel('Slow Wave Sleep Duration') plt.title('Average Heart Rate vs Slow Wave Sleep Duration') plt.tight_layout() plt.show() # %% # explore relationship between cycle_day_max_heart_rate and sleep_slow_wave_sleep_duration plt.figure(figsize=(12, 6)) plt.scatter(daily_data['cycle_day_max_heart_rate'], daily_data['next_cycle_sleep_slow_wave_sleep_duration']) plt.xlabel('Max Heart Rate') plt.ylabel('Slow Wave Sleep Duration') plt.title('Max Heart Rate vs Slow Wave Sleep Duration') plt.tight_layout() plt.show() # %% # explore relationship between cycle_day_max_heart_rate and sleep_rem_sleep_duration plt.figure(figsize=(12, 6)) plt.scatter(daily_data['cycle_day_max_heart_rate'], daily_data['next_cycle_sleep_rem_sleep_duration']) plt.xlabel('Max Heart Rate') plt.ylabel('REM Sleep Duration') plt.title('Max Heart Rate vs REM Sleep Duration') plt.tight_layout() plt.show() # %% # explore between cycle_day_max_heart_rate and sleep_respiratory_rate plt.figure(figsize=(12, 6)) plt.scatter(daily_data['cycle_day_max_heart_rate'], daily_data['next_cycle_sleep_respiratory_rate']) plt.xlabel('Max Heart Rate') plt.ylabel('Respiratory Rate') plt.title('Max Heart Rate vs Respiratory Rate') plt.tight_layout() plt.show() # we need to account for the fact that the sleep_respiratory_rate is recorded on the next day from the cycle_day_max_heart_rate # so we need to shift the sleep_respiratory_rate by one day #%% plt.figure(figsize=(12, 6)) plt.scatter(daily_data['cycle_day_max_heart_rate'], daily_data['next_cycle_sleep_respiratory_rate']) plt.xlabel('Max Heart Rate') plt.ylabel('Respiratory Rate') plt.title('Max Heart Rate vs Respiratory Rate') plt.tight_layout() plt.show() # %% #cycle_scaled_strain and next_cycle_sleep_slow_wave_sleep_duration plt.figure(figsize=(12, 6)) plt.scatter(daily_data['cycle_scaled_strain'], daily_data['next_cycle_sleep_slow_wave_sleep_duration']) plt.xlabel('Scaled Strain') plt.ylabel('Slow Wave Sleep Duration') plt.title('Scaled Strain vs Slow Wave Sleep Duration') plt.tight_layout() plt.show() # %% #recovery_spo2 vs sleep_respiratory_rate plt.figure(figsize=(12, 6)) plt.scatter(daily_data['recovery_spo2'], daily_data['next_cycle_sleep_respiratory_rate']) plt.xlabel('SpO2') plt.ylabel('Respiratory Rate') plt.title('SpO2 vs Respiratory Rate') plt.tight_layout() plt.show() # %% # Analyze sleep efficiency plt.figure(figsize=(12, 6)) plt.scatter(daily_data['sleep_in_sleep_efficiency'], daily_data['sleep_score']) plt.xlabel('Sleep Efficiency') plt.ylabel('Sleep Score') plt.title('Sleep Efficiency vs Sleep Score') plt.tight_layout() plt.show() # Analyze impact of sleep consistency plt.figure(figsize=(12, 6)) plt.scatter(daily_data['sleep_sleep_consistency'], daily_data['recovery_recovery_score']) plt.xlabel('Sleep Consistency') plt.ylabel('Recovery Score') plt.title('Sleep Consistency vs Recovery Score') plt.tight_layout() plt.show() # %% ```