Macbook: Script to monitor the top disk reads and writes

The script below tracks disk usage of a macbook for 20 seconds and the shows the processes with the highest disk utilisations

#!/bin/bash

# Disk I/O Monitor for macOS
# Shows which processes are using disk I/O the most with full paths

DURATION=20

echo "Disk I/O Monitor for macOS"
echo "========================================"
echo ""

# Check for sudo
if [[ $EUID -ne 0 ]]; then
    echo "ERROR: This script requires sudo privileges"
    echo "Please run: sudo $0"
    exit 1
fi

# Create temp file
TEMP_FILE="/tmp/disk_io_$$.txt"
export TEMP_FILE

# Collect data
echo "Collecting disk I/O data for $DURATION seconds..."
fs_usage -w -f filesys 2>/dev/null > "$TEMP_FILE" &
FS_PID=$!

# Progress bar
for i in $(seq 1 $DURATION); do
    printf "\rProgress: [%-20s] %d/%d seconds" "$(printf '#%.0s' $(seq 1 $((i*20/DURATION))))" $i $DURATION
    sleep 1
done
echo ""

# Stop collection
kill $FS_PID 2>/dev/null
wait $FS_PID 2>/dev/null

echo ""
echo "Processing data..."

# Parse with Python - pass temp file as argument
python3 - "$TEMP_FILE" << 'PYTHON_END'
import re
import os
import sys
from collections import defaultdict
import subprocess

# Get temp file from argument
temp_file = sys.argv[1] if len(sys.argv) > 1 else '/tmp/disk_io_temp.txt'

# Storage for process stats
stats = defaultdict(lambda: {'reads': 0, 'writes': 0, 'process_name': '', 'pid': ''})

# Parse fs_usage output
try:
    with open(temp_file, 'r') as f:
        for line in f:
            # Look for lines with process info (format: processname.pid at end of line)
            match = re.search(r'(\S+)\.(\d+)\s*$', line)
            if match:
                process_name = match.group(1)
                pid = match.group(2)
                key = f"{process_name}|{pid}"
                
                # Store process info
                stats[key]['process_name'] = process_name
                stats[key]['pid'] = pid
                
                # Categorize operation
                if any(op in line for op in ['RdData', 'read', 'READ', 'getattrlist', 'stat64', 'lstat64', 'open']):
                    stats[key]['reads'] += 1
                elif any(op in line for op in ['WrData', 'write', 'WRITE', 'close', 'fsync']):
                    stats[key]['writes'] += 1
except Exception as e:
    print(f"Error reading file: {e}")
    sys.exit(1)

# Calculate totals
total_ops = sum(s['reads'] + s['writes'] for s in stats.values())

# Get executable paths
def get_exe_path(process_name, pid):
    try:
        # Method 1: Try lsof with format output
        result = subprocess.run(['lsof', '-p', pid, '-Fn'], capture_output=True, text=True, stderr=subprocess.DEVNULL)
        paths = []
        for line in result.stdout.split('\n'):
            if line.startswith('n'):
                path = line[1:].strip()
                paths.append(path)
        
        # Look for the best path
        for path in paths:
            if '/Contents/MacOS/' in path and process_name in path:
                return path
            elif path.endswith('.app'):
                return path
            elif any(p in path for p in ['/bin/', '/sbin/', '/usr/']) and not any(path.endswith(ext) for ext in ['.dylib', '.so']):
                return path
        
        # Method 2: Try ps
        result = subprocess.run(['ps', '-p', pid, '-o', 'command='], capture_output=True, text=True, stderr=subprocess.DEVNULL)
        if result.stdout.strip():
            cmd = result.stdout.strip().split()[0]
            if os.path.exists(cmd):
                return cmd
        
        # Method 3: Return command name from ps
        result = subprocess.run(['ps', '-p', pid, '-o', 'comm='], capture_output=True, text=True, stderr=subprocess.DEVNULL)
        if result.stdout.strip():
            return result.stdout.strip()
            
    except Exception:
        pass
    
    # Last resort: return process name
    return process_name

# Sort by total operations
sorted_stats = sorted(stats.items(), key=lambda x: x[1]['reads'] + x[1]['writes'], reverse=True)

# Print header
print("\n%-30s %-8s %-45s %8s %8s %8s %7s %7s" % 
      ("Process Name", "PID", "Executable Path", "Reads", "Writes", "Total", "Read%", "Write%"))
print("=" * 140)

# Print top 20 processes
count = 0
for key, data in sorted_stats:
    if data['reads'] + data['writes'] == 0:
        continue
        
    total = data['reads'] + data['writes']
    read_pct = (data['reads'] * 100.0 / total_ops) if total_ops > 0 else 0
    write_pct = (data['writes'] * 100.0 / total_ops) if total_ops > 0 else 0
    
    # Get executable path
    exe_path = get_exe_path(data['process_name'], data['pid'])
    if len(exe_path) > 45:
        exe_path = "..." + exe_path[-42:]
    
    print("%-30s %-8s %-45s %8d %8d %8d %6.1f%% %6.1f%%" % 
          (data['process_name'][:30], 
           data['pid'], 
           exe_path,
           data['reads'], 
           data['writes'], 
           total,
           read_pct, 
           write_pct))
    
    count += 1
    if count >= 20:
        break

print("=" * 140)
print(f"Total I/O operations captured: {total_ops}")

PYTHON_END

# Cleanup
rm -f "$TEMP_FILE"

echo ""
echo "Monitoring complete."

Example output:

Disk I/O Monitor for macOS
========================================

Collecting disk I/O data for 20 seconds...
Progress: [####################] 20/20 seconds

Processing data...

Process Name                   PID      Executable Path                                  Reads   Writes    Total   Read%  Write%
============================================================================================================================================
Chrome                         4719678  Chrome                                             427      811     1238    3.1%    5.9%
UPMServiceController           4644625  UPMServiceController                               423      587     1010    3.1%    4.3%
UPMServiceController           4014337  UPMServiceController                               468      309      777    3.4%    2.2%
wsdlpd                         3060029  wsdlpd                                             154      370      524    1.1%    2.7%
tccd                           4743441  tccd                                               359       48      407    2.6%    0.3%
tccd                           4742031  tccd                                               358       48      406    2.6%    0.3%
com.crowdstrike.falcon.Agent   6174     com.crowdstrike.falcon.Agent                       301        5      306    2.2%    0.0%
UPMServiceContro               4644625  UPMServiceContro                                    12      285      297    0.1%    2.1%
mds_stores                     4736869  mds_stores                                         204       71      275    1.5%    0.5%
EndPointClassifier             6901     EndPointClassifier                                  40      231      271    0.3%    1.7%

Leave a Reply

Your email address will not be published. Required fields are marked *