The script below tracks disk usage of a macbook for 20 seconds and the shows the processes with the highest disk utilisations
#!/bin/bash
# Disk I/O Monitor for macOS
# Shows which processes are using disk I/O the most with full paths
DURATION=20
echo "Disk I/O Monitor for macOS"
echo "========================================"
echo ""
# Check for sudo
if [[ $EUID -ne 0 ]]; then
echo "ERROR: This script requires sudo privileges"
echo "Please run: sudo $0"
exit 1
fi
# Create temp file
TEMP_FILE="/tmp/disk_io_$$.txt"
export TEMP_FILE
# Collect data
echo "Collecting disk I/O data for $DURATION seconds..."
fs_usage -w -f filesys 2>/dev/null > "$TEMP_FILE" &
FS_PID=$!
# Progress bar
for i in $(seq 1 $DURATION); do
printf "\rProgress: [%-20s] %d/%d seconds" "$(printf '#%.0s' $(seq 1 $((i*20/DURATION))))" $i $DURATION
sleep 1
done
echo ""
# Stop collection
kill $FS_PID 2>/dev/null
wait $FS_PID 2>/dev/null
echo ""
echo "Processing data..."
# Parse with Python - pass temp file as argument
python3 - "$TEMP_FILE" << 'PYTHON_END'
import re
import os
import sys
from collections import defaultdict
import subprocess
# Get temp file from argument
temp_file = sys.argv[1] if len(sys.argv) > 1 else '/tmp/disk_io_temp.txt'
# Storage for process stats
stats = defaultdict(lambda: {'reads': 0, 'writes': 0, 'process_name': '', 'pid': ''})
# Parse fs_usage output
try:
with open(temp_file, 'r') as f:
for line in f:
# Look for lines with process info (format: processname.pid at end of line)
match = re.search(r'(\S+)\.(\d+)\s*$', line)
if match:
process_name = match.group(1)
pid = match.group(2)
key = f"{process_name}|{pid}"
# Store process info
stats[key]['process_name'] = process_name
stats[key]['pid'] = pid
# Categorize operation
if any(op in line for op in ['RdData', 'read', 'READ', 'getattrlist', 'stat64', 'lstat64', 'open']):
stats[key]['reads'] += 1
elif any(op in line for op in ['WrData', 'write', 'WRITE', 'close', 'fsync']):
stats[key]['writes'] += 1
except Exception as e:
print(f"Error reading file: {e}")
sys.exit(1)
# Calculate totals
total_ops = sum(s['reads'] + s['writes'] for s in stats.values())
# Get executable paths
def get_exe_path(process_name, pid):
try:
# Method 1: Try lsof with format output
result = subprocess.run(['lsof', '-p', pid, '-Fn'], capture_output=True, text=True, stderr=subprocess.DEVNULL)
paths = []
for line in result.stdout.split('\n'):
if line.startswith('n'):
path = line[1:].strip()
paths.append(path)
# Look for the best path
for path in paths:
if '/Contents/MacOS/' in path and process_name in path:
return path
elif path.endswith('.app'):
return path
elif any(p in path for p in ['/bin/', '/sbin/', '/usr/']) and not any(path.endswith(ext) for ext in ['.dylib', '.so']):
return path
# Method 2: Try ps
result = subprocess.run(['ps', '-p', pid, '-o', 'command='], capture_output=True, text=True, stderr=subprocess.DEVNULL)
if result.stdout.strip():
cmd = result.stdout.strip().split()[0]
if os.path.exists(cmd):
return cmd
# Method 3: Return command name from ps
result = subprocess.run(['ps', '-p', pid, '-o', 'comm='], capture_output=True, text=True, stderr=subprocess.DEVNULL)
if result.stdout.strip():
return result.stdout.strip()
except Exception:
pass
# Last resort: return process name
return process_name
# Sort by total operations
sorted_stats = sorted(stats.items(), key=lambda x: x[1]['reads'] + x[1]['writes'], reverse=True)
# Print header
print("\n%-30s %-8s %-45s %8s %8s %8s %7s %7s" %
("Process Name", "PID", "Executable Path", "Reads", "Writes", "Total", "Read%", "Write%"))
print("=" * 140)
# Print top 20 processes
count = 0
for key, data in sorted_stats:
if data['reads'] + data['writes'] == 0:
continue
total = data['reads'] + data['writes']
read_pct = (data['reads'] * 100.0 / total_ops) if total_ops > 0 else 0
write_pct = (data['writes'] * 100.0 / total_ops) if total_ops > 0 else 0
# Get executable path
exe_path = get_exe_path(data['process_name'], data['pid'])
if len(exe_path) > 45:
exe_path = "..." + exe_path[-42:]
print("%-30s %-8s %-45s %8d %8d %8d %6.1f%% %6.1f%%" %
(data['process_name'][:30],
data['pid'],
exe_path,
data['reads'],
data['writes'],
total,
read_pct,
write_pct))
count += 1
if count >= 20:
break
print("=" * 140)
print(f"Total I/O operations captured: {total_ops}")
PYTHON_END
# Cleanup
rm -f "$TEMP_FILE"
echo ""
echo "Monitoring complete."
Example output:
Disk I/O Monitor for macOS
========================================
Collecting disk I/O data for 20 seconds...
Progress: [####################] 20/20 seconds
Processing data...
Process Name PID Executable Path Reads Writes Total Read% Write%
============================================================================================================================================
Chrome 4719678 Chrome 427 811 1238 3.1% 5.9%
UPMServiceController 4644625 UPMServiceController 423 587 1010 3.1% 4.3%
UPMServiceController 4014337 UPMServiceController 468 309 777 3.4% 2.2%
wsdlpd 3060029 wsdlpd 154 370 524 1.1% 2.7%
tccd 4743441 tccd 359 48 407 2.6% 0.3%
tccd 4742031 tccd 358 48 406 2.6% 0.3%
com.crowdstrike.falcon.Agent 6174 com.crowdstrike.falcon.Agent 301 5 306 2.2% 0.0%
UPMServiceContro 4644625 UPMServiceContro 12 285 297 0.1% 2.1%
mds_stores 4736869 mds_stores 204 71 275 1.5% 0.5%
EndPointClassifier 6901 EndPointClassifier 40 231 271 0.3% 1.7%