Macbook: Script to monitor the top disk reads and writes

The script below tracks disk usage of a macbook for 20 seconds and the shows the processes with the highest disk utilisations

#!/bin/bash

# Disk I/O Monitor for macOS
# Shows which processes are using disk I/O the most with full paths

DURATION=20

echo "Disk I/O Monitor for macOS"
echo "========================================"
echo ""

# Check for sudo
if [[ $EUID -ne 0 ]]; then
    echo "ERROR: This script requires sudo privileges"
    echo "Please run: sudo $0"
    exit 1
fi

# Create temp file
TEMP_FILE="/tmp/disk_io_$$.txt"
export TEMP_FILE

# Collect data
echo "Collecting disk I/O data for $DURATION seconds..."
fs_usage -w -f filesys 2>/dev/null > "$TEMP_FILE" &
FS_PID=$!

# Progress bar
for i in $(seq 1 $DURATION); do
    printf "\rProgress: [%-20s] %d/%d seconds" "$(printf '#%.0s' $(seq 1 $((i*20/DURATION))))" $i $DURATION
    sleep 1
done
echo ""

# Stop collection
kill $FS_PID 2>/dev/null
wait $FS_PID 2>/dev/null

echo ""
echo "Processing data..."

# Parse with Python - pass temp file as argument
python3 - "$TEMP_FILE" << 'PYTHON_END'
import re
import os
import sys
from collections import defaultdict
import subprocess

# Get temp file from argument
temp_file = sys.argv[1] if len(sys.argv) > 1 else '/tmp/disk_io_temp.txt'

# Storage for process stats
stats = defaultdict(lambda: {'reads': 0, 'writes': 0, 'process_name': '', 'pid': ''})

# Parse fs_usage output
try:
    with open(temp_file, 'r') as f:
        for line in f:
            # Look for lines with process info (format: processname.pid at end of line)
            match = re.search(r'(\S+)\.(\d+)\s*$', line)
            if match:
                process_name = match.group(1)
                pid = match.group(2)
                key = f"{process_name}|{pid}"
                
                # Store process info
                stats[key]['process_name'] = process_name
                stats[key]['pid'] = pid
                
                # Categorize operation
                if any(op in line for op in ['RdData', 'read', 'READ', 'getattrlist', 'stat64', 'lstat64', 'open']):
                    stats[key]['reads'] += 1
                elif any(op in line for op in ['WrData', 'write', 'WRITE', 'close', 'fsync']):
                    stats[key]['writes'] += 1
except Exception as e:
    print(f"Error reading file: {e}")
    sys.exit(1)

# Calculate totals
total_ops = sum(s['reads'] + s['writes'] for s in stats.values())

# Get executable paths
def get_exe_path(process_name, pid):
    try:
        # Method 1: Try lsof with format output
        result = subprocess.run(['lsof', '-p', pid, '-Fn'], capture_output=True, text=True, stderr=subprocess.DEVNULL)
        paths = []
        for line in result.stdout.split('\n'):
            if line.startswith('n'):
                path = line[1:].strip()
                paths.append(path)
        
        # Look for the best path
        for path in paths:
            if '/Contents/MacOS/' in path and process_name in path:
                return path
            elif path.endswith('.app'):
                return path
            elif any(p in path for p in ['/bin/', '/sbin/', '/usr/']) and not any(path.endswith(ext) for ext in ['.dylib', '.so']):
                return path
        
        # Method 2: Try ps
        result = subprocess.run(['ps', '-p', pid, '-o', 'command='], capture_output=True, text=True, stderr=subprocess.DEVNULL)
        if result.stdout.strip():
            cmd = result.stdout.strip().split()[0]
            if os.path.exists(cmd):
                return cmd
        
        # Method 3: Return command name from ps
        result = subprocess.run(['ps', '-p', pid, '-o', 'comm='], capture_output=True, text=True, stderr=subprocess.DEVNULL)
        if result.stdout.strip():
            return result.stdout.strip()
            
    except Exception:
        pass
    
    # Last resort: return process name
    return process_name

# Sort by total operations
sorted_stats = sorted(stats.items(), key=lambda x: x[1]['reads'] + x[1]['writes'], reverse=True)

# Print header
print("\n%-30s %-8s %-45s %8s %8s %8s %7s %7s" % 
      ("Process Name", "PID", "Executable Path", "Reads", "Writes", "Total", "Read%", "Write%"))
print("=" * 140)

# Print top 20 processes
count = 0
for key, data in sorted_stats:
    if data['reads'] + data['writes'] == 0:
        continue
        
    total = data['reads'] + data['writes']
    read_pct = (data['reads'] * 100.0 / total_ops) if total_ops > 0 else 0
    write_pct = (data['writes'] * 100.0 / total_ops) if total_ops > 0 else 0
    
    # Get executable path
    exe_path = get_exe_path(data['process_name'], data['pid'])
    if len(exe_path) > 45:
        exe_path = "..." + exe_path[-42:]
    
    print("%-30s %-8s %-45s %8d %8d %8d %6.1f%% %6.1f%%" % 
          (data['process_name'][:30], 
           data['pid'], 
           exe_path,
           data['reads'], 
           data['writes'], 
           total,
           read_pct, 
           write_pct))
    
    count += 1
    if count >= 20:
        break

print("=" * 140)
print(f"Total I/O operations captured: {total_ops}")

PYTHON_END

# Cleanup
rm -f "$TEMP_FILE"

echo ""
echo "Monitoring complete."

Example output:

Disk I/O Monitor for macOS
========================================

Collecting disk I/O data for 20 seconds...
Progress: [####################] 20/20 seconds

Processing data...

Process Name                   PID      Executable Path                                  Reads   Writes    Total   Read%  Write%
============================================================================================================================================
Chrome                         4719678  Chrome                                             427      811     1238    3.1%    5.9%
UPMServiceController           4644625  UPMServiceController                               423      587     1010    3.1%    4.3%
UPMServiceController           4014337  UPMServiceController                               468      309      777    3.4%    2.2%
wsdlpd                         3060029  wsdlpd                                             154      370      524    1.1%    2.7%
tccd                           4743441  tccd                                               359       48      407    2.6%    0.3%
tccd                           4742031  tccd                                               358       48      406    2.6%    0.3%
com.crowdstrike.falcon.Agent   6174     com.crowdstrike.falcon.Agent                       301        5      306    2.2%    0.0%
UPMServiceContro               4644625  UPMServiceContro                                    12      285      297    0.1%    2.1%
mds_stores                     4736869  mds_stores                                         204       71      275    1.5%    0.5%
EndPointClassifier             6901     EndPointClassifier                                  40      231      271    0.3%    1.7%

MacOs: How to see which processes are using a specific port (eg 443)

Below is a useful script when you want to see which processes are using a specific port.

#!/bin/bash

# Port Monitor Script for macOS
# Usage: ./port_monitor.sh <port_number>
# Check if port number is provided

if [ $# -eq 0 ]; then
echo "Usage: $0 <port_number>"
echo "Example: $0 8080"
exit 1
fi

PORT=$1

# Validate port number

if ! [[ $PORT =~ ^[0-9]+$ ]] || [ $PORT -lt 1 ] || [ $PORT -gt 65535 ]; then
echo "Error: Please provide a valid port number (1-65535)"
exit 1
fi

# Function to display processes using the port

show_port_usage() {
local timestamp=$(date "+%Y-%m-%d %H:%M:%S")

# Clear screen for better readability
clear

echo "=================================="
echo "Port Monitor - Port $PORT"
echo "Last updated: $timestamp"
echo "Press Ctrl+C to exit"
echo "=================================="
echo

# Check for processes using the port with lsof - both TCP and UDP
if lsof -i :$PORT &>/dev/null || netstat -an | grep -E "[:.]$PORT[[:space:]]" &>/dev/null; then
    echo "Processes using port $PORT:"
    echo
    lsof -i :$PORT -P -n | head -1
    echo "--------------------------------------------------------------------------------"
    lsof -i :$PORT -P -n | tail -n +2
    echo
    
    # Also show netstat information for additional context
    echo "Network connections on port $PORT:"
    echo
    printf "%-6s %-30s %-30s %-12s\n" "PROTO" "LOCAL ADDRESS" "FOREIGN ADDRESS" "STATE"
    echo "--------------------------------------------------------------------------------------------"
    
    # Show all connections (LISTEN, ESTABLISHED, etc.)
    # Use netstat -n to show numeric addresses
    netstat -anp tcp | grep -E "\.$PORT[[:space:]]" | while read line; do
        # Extract the relevant fields from netstat output
        proto=$(echo "$line" | awk '{print $1}')
        local_addr=$(echo "$line" | awk '{print $4}')
        foreign_addr=$(echo "$line" | awk '{print $5}')
        state=$(echo "$line" | awk '{print $6}')
        
        # Only print if we have valid data
        if [ -n "$proto" ] && [ -n "$local_addr" ]; then
            printf "%-6s %-30s %-30s %-12s\n" "$proto" "$local_addr" "$foreign_addr" "$state"
        fi
    done
    
    # Also check UDP connections
    netstat -anp udp | grep -E "\.$PORT[[:space:]]" | while read line; do
        proto=$(echo "$line" | awk '{print $1}')
        local_addr=$(echo "$line" | awk '{print $4}')
        foreign_addr=$(echo "$line" | awk '{print $5}')
        printf "%-6s %-30s %-30s %-12s\n" "$proto" "$local_addr" "$foreign_addr" "-"
    done
    
    # Also check for any established connections using lsof
    echo
    echo "Active connections with processes:"
    echo "--------------------------------------------------------------------------------------------"
    lsof -i :$PORT -P -n 2>/dev/null | grep -v LISTEN | tail -n +2 | while read line; do
        if [ -n "$line" ]; then
            echo "$line"
        fi
    done
    
else
    echo "No processes found using port $PORT"
    echo
    
    # Check if the port might be in use but not showing up in lsof
    local netstat_result=$(netstat -anv | grep -E "\.$PORT ")
    if [ -n "$netstat_result" ]; then
        echo "However, netstat shows activity on port $PORT:"
        echo "$netstat_result"
    fi
fi

echo
echo "Refreshing in 20 seconds... (Press Ctrl+C to exit)"
}

# Trap Ctrl+C to exit gracefully

trap 'echo -e "\n\nExiting port monitor..."; exit 0' INT

# Main loop - refresh every 20 seconds

while true; do
show_port_usage
sleep 20
done

Mac OSX: Altering the OS route table to re-direct the traffic of a website to a different interface (eg re-routing whatsapp traffic to en0)

This was a hard article to figure out the title for! Put simply, your mac book has a route table and if you want to move a specific IP address or dns from one interface to another, then follow the steps below:

First find the IP address of the website that you want to re-route the traffic for:

$ nslookup web.whatsapp.com
Server:		100.64.0.1
Address:	100.64.0.1#53

Non-authoritative answer:
web.whatsapp.com	canonical name = mmx-ds.cdn.whatsapp.net.
Name:	mmx-ds.cdn.whatsapp.net
Address: 102.132.99.60

We want to re-route traffic the traffic from: 102.132.99.60 to the default interface. So first lets find out which interface this traffic is currently being routed to?

$ route -n get web.whatsapp.com
   route to: 102.132.99.60
destination: 102.132.99.60
    gateway: 100.64.0.1
  interface: utun0
      flags: <UP,GATEWAY,HOST,DONE,WASCLONED,IFSCOPE,IFREF>
 recvpipe  sendpipe  ssthresh  rtt,msec    rttvar  hopcount      mtu     expire
       0         0         0        34        21         0      1400         0

So this is currently going to a tunnelled interface called utun0 on gateway 100.64.0.1.

Ok, so I want to move if off this tunnelled interface. So lets first display the kernel routing table. The -n option forces netstat to print the IP addresses. Without this option, netstat attempts to display the host names.

$ netstat - rn | head -n 5
Active Internet connections
Proto Recv-Q Send-Q  Local Address          Foreign Address        (state)
tcp4       0    126  100.64.0.1.64770       136.226.216.14.https   ESTABLISHED
tcp4       0      0  100.64.0.1.64768       whatsapp-cdn-shv.https ESTABLISHED
tcp4       0      0  100.64.0.1.64766       52.178.17.3.https      ESTABLISHED

Now we want to re-route whatsapp to the default interface. So lets get the IP address of the default interface.

$ netstat -nr | grep default
default            192.168.8.1        UGScg                 en0
default                                 fe80::%utun1                            UGcIg               utun1
default                                 fe80::%utun2                            UGcIg               utun2
default                                 fe80::%utun3                            UGcIg               utun3
default                                 fe80::%utun4                            UGcIg               utun4
default                                 fe80::%utun5                            UGcIg               utun5
default                                 fe80::%utun0                            UGcIg               utun0

We can see that our en0 interface is on IP address: 192.168.8.1. So lets re-route the traffic from Whatsapp’s ip address to this interace’s IP address:

$ sudo route add 102.132.99.60 192.168.0.1
route: writing to routing socket: File exists
add host 102.132.99.60: gateway 192.168.8.1: File exists

Now lets test if we are routing via the correct interface:

$ route -n get 102.132.99.60
   route to: 102.132.99.60
destination: 102.132.99.60
    gateway: 192.168.8.1
  interface: utun6
      flags: <UP,GATEWAY,HOST,DONE,STATIC>
 recvpipe  sendpipe  ssthresh  rtt,msec    rttvar  hopcount      mtu     expire
       0         0         0         0         0         0      1400         0

Finally delete the route and recheck the routing:

$ sudo route delete 102.132.99.60
delete host 102.132.99.60

$ route -n get 102.132.99.60
   route to: 102.132.99.60
destination: 102.132.99.60
    gateway: 100.64.0.1
  interface: utun6
      flags: <UP,GATEWAY,HOST,DONE,WASCLONED,IFSCOPE,IFREF>
 recvpipe  sendpipe  ssthresh  rtt,msec    rttvar  hopcount      mtu     expire
       0         0         0         0         0         0      1400         0

Macbook OSX: Using Touch ID / fingerprints to enable SUDO and permanently enabling this after Mac OSX updates

Each day that I wake up I try and figure out if I can do less work than yesterday. With this in mind I was playing around to see if there is a way to save me typing my password each time I SUDO. It turns out this is quite a simple change…

Open Terminal and run the following to edit sudos behaviour:

sudo nano /etc/pam.d/sudo

Next add the following to the top of the file:

auth       sufficient     pam_tid.so

The only issue with this is that /etc/pam.d/sudo is overwritten on every macOS update (major, minor or patch – it is always overwritten and reset back to its default state).

MacOS: Sonoma

In their “What’s new for enterprise in macOS Sonoma” document Apple listed the following in the “Bug fixes and other improvements” section:

Touch ID can be allowed for sudo with a configuration that persists across software updates using /etc/pam.d/sudo_local. See /etc/pam.d/sudo_local.template for details.

So lets create a template file in /etc/pam.d/sudo_local.template:

sudo nano /etc/pam.d/sudo_local.template

Next uncomment the auth line, as per:

# sudo_local: local config file which survives system update and is included fo$
# uncomment following line to enable Touch ID for sudo
auth       sufficient     pam_tid.so

This should mean that Touch ID now survive system updates!

Quick tests:

sudo ls
# exit sudo
sudo -k
sudo ls

To enable Touch ID access on Iterm2. You need to do the following. Go to Prefs -> Advanced -> Allow sessions to survive logging out and back in and set value to no . Restart Iterm2 and touch ID authentication will work on Iterm2.

Macbook OSX: Change the default image type of your screenshots from PNG to JPEG, GIF or PDF

There are a few things that I tweak when I get a new Macbook, one of which is the screenshot format (mainly because it doesnt natively render in Whatsapp). So I thought I would share the code snippet that you can run in Terminal to alter the default image type of your screenshots:

For JPEG use:

$ defaults write com.apple.screencapture type JPG

For GIF use:

$ defaults write com.apple.screencapture type GIF

For PDF use:

$ defaults write com.apple.screencapture type PDF

For PNG use:

$ defaults write com.apple.screencapture type PNG

Mac OSX : Tracing which network interface will be used to route traffic to an IP/DNS address

If you have multiple connections on your device (and maybe you have a zero trust client installed); how do you find out which network interface on your device will be used to route the traffic?

Below is a route get request for googles DNS service:

$ route get 8.8.8.8

   route to: dns.google
destination: dns.google
    gateway: 100.64.0.1
  interface: utun3
      flags: <UP,GATEWAY,HOST,DONE,WASCLONED,IFSCOPE,IFREF>
 recvpipe  sendpipe  ssthresh  rtt,msec    rttvar  hopcount      mtu     expire
       0         0         0         0         0         0      1400         0

If you have multiple interfaces enabled, then the first item in the Service Order will be used. If you want to see the default interface for your device:

$ route -n get 0.0.0.0 | grep interface
  interface: en0

Lets go an see whats going on in my default interface:

$ netstat utun3 | grep ESTABLISHED
tcp4       0      0  100.64.0.1.65271       jnb02s11-in-f4.1.https ESTABLISHED
tcp4       0      0  100.64.0.1.65269       jnb02s02-in-f14..https ESTABLISHED
tcp4       0      0  100.64.0.1.65262       192.0.73.2.https       ESTABLISHED
tcp4       0      0  100.64.0.1.65261       192.0.73.2.https       ESTABLISHED
tcp4       0      0  100.64.0.1.65260       192.0.73.2.https       ESTABLISHED
tcp4       0      0  100.64.0.1.65259       192.0.73.2.https       ESTABLISHED
tcp4       0      0  100.64.0.1.65258       192.0.73.2.https       ESTABLISHED
tcp4       0      0  100.64.0.1.65257       192.0.73.2.https       ESTABLISHED
tcp4       0      0  100.64.0.1.65256       192.0.73.2.https       ESTABLISHED
tcp4       0      0  100.64.0.1.65255       192.0.73.2.https       ESTABLISHED
tcp4       0      0  100.64.0.1.65254       192.0.78.23.https      ESTABLISHED
tcp4       0      0  100.64.0.1.65253       192.0.76.3.https       ESTABLISHED
tcp4       0      0  100.64.0.1.65252       192.0.78.23.https      ESTABLISHED
tcp4       0      0  100.64.0.1.65251       192.0.76.3.https       ESTABLISHED
tcp4       0      0  100.64.0.1.65250       192.0.78.23.https      ESTABLISHED
tcp4       0      0  100.64.0.1.65249       192.0.76.3.https       ESTABLISHED
tcp4       0      0  100.64.0.1.65248       ec2-13-244-140-3.https ESTABLISHED
tcp4       0      0  100.64.0.1.65247       192.0.73.2.https       ESTABLISHED

Finding and Setting the Maximum Transmission Unit (MTU) on a Windows Machine

If you have just changed ISPs or moved house and your internet suddenly starts misbehaving the likelihood is your Maximum Transmission Unit (MTU) is set too high for your ISP. The default internet facing MTU is 1500 bytes, BUT depending on your setup, this often needs to be set much lower.

Step 1:

First check your current MTU across all your ipv4 interfaces using netsh:

netsh interface ipv4 show subinterfaces
   MTU  MediaSenseState   Bytes In  Bytes Out  Interface
------  ---------------  ---------  ---------  -------------
4294967295                1          0          0  Loopback Pseudo-Interface 1
  1492                1        675        523  Local Area Connection

As you can see, the Local Area Connection interface is set to a 1492 bytes MTU. So how do we find out what it should be? We are going to send a fixed size Echo packet out, and tell the network not to fragment this packet. If somewhere along the line this packet is too big then this request will fail.

Next enter (if it fails then you know your MTU is too high):

ping 8.8.8.8 -f -l 1492

Procedure to find optimal MTU:

For PPPoE, your Max MTU should be no more than 1492 to allow space for the 8 byte PPPoE “wrapper”. 1492 + 8 = 1500. The ping test we will be doing does not include the IP/ICMP header of 28 bytes. 1500 – 28 = 1472. Include the 8 byte PPPoE wrapper if your ISP uses PPPoE and you get 1500 – 28 – 8 = 1464.

The best value for MTU is that value just before your packets get fragmented. Add 28 to the largest packet size that does not result in fragmenting the packets (since the ping command specifies the ping packet size, not including the IP/ICMP header of 28 bytes), and this is your Max MTU setting.

The below is an automated ping sweep, that tests various packet sizes until it fails (increasing in 10 bytes per iteration):

C:\Windows\system32>for /l %i in (1360,10,1500) do @ping -n 1 -w 8.8.8.8 -l %i -f

Pinging 8.8.8.8. with 1400 bytes of data:
Reply from 8.8.8.8: bytes=1400 time=6ms TTL=64

Ping statistics for 8.8.8.8:
Packets: Sent = 1, Received = 1, Lost = 0 (0% loss),
Approximate round trip times in milli-seconds:
Minimum = 6ms, Maximum = 6ms, Average = 6ms

Pinging 8.8.8.8 with 1401 bytes of data:
Reply from 8.8.8.8: bytes=1401 time<1ms TTL=64

Ping statistics for 8.8.8.8:
Packets: Sent = 1, Received = 1, Lost = 0 (0% loss),
Approximate round trip times in milli-seconds:
Minimum = 0ms, Maximum = 0ms, Average = 0ms

Pinging 8.8.8.8 with 1402 bytes of data:
Reply from 8.8.8.8: bytes=1402 time<1ms TTL=64

Ping statistics for 8.8.8.8:
Packets: Sent = 1, Received = 1, Lost = 0 (0% loss),
Approximate round trip times in milli-seconds:
Minimum = 0ms, Maximum = 0ms, Average = 0ms

Pinging 8.8.8.8 with 1403 bytes of data:
Reply from 8.8.8.8: bytes=1403 time<1ms TTL=64

Ping statistics for 8.8.8.8:
Packets: Sent = 1, Received = 1, Lost = 0 (0% loss),
Approximate round trip times in milli-seconds:
Minimum = 0ms, Maximum = 0ms, Average = 0ms 

Once you find the MTU, you can set it as per below:

set subinterface “Local Area Connection” mtu=1360 store=persistent

Finding and Setting the Maximum Transmission Unit (MTU) on Mac/OSX

If you have just changed ISPs or moved house and your internet suddenly starts misbehaving the likelihood is your Maximum Transmission Unit (MTU) is set too high for your ISP. The default internet facing MTU is 1500 bytes, BUT depending on your setup, this often needs to be set much lower.

Step 1:

First check your current MTU.

$ networksetup -getMTU en0
Active MTU: 1500 (Current Setting: 1500)

As you can see, the Mac is set to 1500 bytes MTU. So how do we find out what it should be? We are going to send a fixed size Echo packet out, and tell the network not to fragment this packet. If somewhere along the line this packet is too big then this request will fail.

Next enter:

$ ping -D -s 1500 www.google.com
PING www.google.com (172.217.170.100): 1500 data bytes
ping: sendto: Message too long
ping: sendto: Message too long
Request timeout for icmp_seq 0
ping: sendto: Message too long
Request timeout for icmp_seq 1
ping: sendto: Message too long

Ok, so our MTU is too high.

Procedure to find optimal MTU:

Hint: For PPPoE, your Max MTU should be no more than 1492 to allow space for the 8 byte PPPoE “wrapper”. 1492 + 8 = 1500. The ping test we will be doing does not include the IP/ICMP header of 28 bytes. 1500 – 28 = 1472. Include the 8 byte PPPoE wrapper if your ISP uses PPPoE and you get 1500 – 28 – 8 = 1464.

The best value for MTU is that value just before your packets get fragmented. Add 28 to the largest packet size that does not result in fragmenting the packets (since the ping command specifies the ping packet size, not including the IP/ICMP header of 28 bytes), and this is your Max MTU setting.

The below is an automated ping sweep, that tests various packet sizes until it fails (increasing in 10 bytes per iteration):

$ ping -g 1300 -G 1600 -h 10 -D www.google.com
PING www.google.com (172.217.170.100): (1300 ... 1600) data bytes
Request timeout for icmp_seq 0
Request timeout for icmp_seq 1
Request timeout for icmp_seq 2
Request timeout for icmp_seq 3
Request timeout for icmp_seq 4
Request timeout for icmp_seq 5
Request timeout for icmp_seq 6
ping: sendto: Message too long
Request timeout for icmp_seq 7

As you can see it failed on the 7th attempt (giving you a 1300 + 60 MTU).

Once you find the MTU, you can set it as per below:

$ ping -D -s 1360 www.google.com
PING www.google.com (172.217.170.100): 1370 data bytes
Request timeout for icmp_seq 0

So I can set my MTU as 1360 + 28 = 1386:

networksetup -setMTU en0 1386

Macbook Tip: iTerm2 clearing your scroll back history

I frequently forget this command shortcut, so this post is simply because I am lazy. To clear your history in iTerm press Command + K. Control + L only clears the screen, so as soon as you run the next command you will see the scroll back again.

If you want to view your command history (for terminal) type:

$ ls -a ~ | grep hist
.zsh_history
$ cat .zsh_history

Macbook: Check a DNS (web site) to see if basic email security has been setup (SPF, DKIM and DMARC)

There are three basic ways to secure email, these are: Sender Policy Framework (SPF), Domain Keys Identified Mail (DKIM), Domain-based Message Authentication, Reporting & Conformance (DMARC) definitions. Lets quickly discuss these before we talk about how to check if they have been setup:

SPF helps prevent spoofing by verifying the sender’s IP address

SPF (Sender Policy Framework) is a DNS record containing information about servers allowed to send emails from a specific domain (eg which servers can send emails from andrewbaker.ninja). 

With it, you can verify that messages coming from your domain are sent by mail servers and IP addresses authorized by you. This might be your email servers or servers of another company you use for your email sending. If SPF isn’t set, scammers can take advantage of it and send fake messages that look like they come from you. 

It’s important to remember that there can be only one SPF record for one domain. Within one SPF record, however, there can be several servers and IP addresses mentioned (for instance, if emails are sent from several mailing platforms).

DKIM shows that the email hasn’t been tampered with

DKIM (DomainKeys Identified Mail) adds a digital signature to the header of your email message, which the receiving email servers then check to ensure that the email content hasn’t changed. Like SPF, a DKIM record exists in the DNS.

DMARC provides reporting visibility on the prior controls

DMARC (Domain-based Message Authentication, Reporting & Conformance) defines how the recipient’s mail server should process incoming emails if they don’t pass the authentication check (either SPF, DKIM, or both).

Basically, if there’s a DKIM signature, and the sending server is found in the SPF records, the email is sent to the recipient’s inbox. 

If the message fails authentication, it’s processed according to the selected DMARC policy: none, reject, or quarantine.

  • Under the “none” policy, the receiving server doesn’t take any action if your emails fail authentication. It doesn’t impact your deliverability. But it also doesn’t protect you from scammers, so we don’t recommend setting it. Only by introducing stricter policies can you block them in the very beginning and let the world know you care about your customers and brand. 
  • Here, messages that come from your domain but don’t pass the DMARC check go to “quarantine.” In such a case, the provider is advised to send your email to the spam folder. 
  • Under the “reject” policy, the receiving server rejects all messages that don’t pass email authentication. This means such emails won’t reach an addressee and will result in a bounce.

The “reject” option is the most effective, but it’s better to choose it only if you are sure that everything is configured correctly.

Now that we’ve clarified all the terms, let’s see how you can check if you have an existing SPF record, DKIM record, and DMARC policy set in place.

1. First Lets Check if SPF is setup

$ dig txt google.com | grep "v=spf"
google.com.		3600	IN	TXT	"v=spf1 include:_spf.google.com ~all"

How to read SPF correctly

  • The “v=spf1” part shows that the record is of SPF type (version 1). 
  • The “include” part lists servers allowed to send emails for the domain. 
  • The “~all” part indicates that if any part of the sent message doesn’t match the record, the recipient server will likely decline it.

2. Next Lets Check if DKIM is setup

What is a DKIM record?

A DKIM record stores the DKIM public key — a randomized string of characters that is used to verify anything signed with the private key. Email servers query the domain’s DNS records to see the DKIM record and view the public key.

A DKIM record is really a DNS TXT (“text”) record. TXT records can be used to store any text that a domain administrator wants to associate with their domain. DKIM is one of many uses for this type of DNS record. (In some cases, domains have stored their DKIM records as CNAME records that point to the key instead; however, the official RFC requires these records to be TXT.)

Here is an example of a DKIM DNS TXT record:

NameTypeContentTTL
big-email._domainkey.example.comTXTv=DKIM1; p=76E629F05F70
9EF665853333
EEC3F5ADE69A
2362BECE4065
8267AB2FC3CB
6CBE
6000

Name

Unlike most DNS TXT records, DKIM records are stored under a specialized name, not just the name of the domain. DKIM record names follow this format:

[selector]._domainkey.[domain]

The selector is a specialized value issued by the email service provider used by the domain. It is included in the DKIM header to enable an email server to perform the required DKIM lookup in the DNS. The domain is the email domain name. ._domainkey. is included in all DKIM record names.

If you want to find the value of the selector, you can view this by selecting “Show Original” when you have the email open in gmail:

Once you are able to view the original email, perform a text search for “DKIM-Signature”. This DKIM-Signature contains an attribute ‘s=’, this is the DKIM selector being used for this domain. In the example below (an amazon email), we can see the DKIM selector is “jvxsykglqiaiibkijmhy37vqxh4mzqr6”. 

DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/simple; s=jvxsykglqiaiibkijmhy37vqxh4mzqr6; d=amazon.com; t=1675842267; h=Date:From:Reply-To:To:Message-ID:Subject:MIME-Version:Content-Type; bh=BJxF0PCdQ4TBdiPcAK83Ah0Z65hMjsvFIWVgzM0O8b0=; b=NUSl8nwZ2aF6ULhIFOJPCANWEeuQNUrnym4hobbeNsB6PPTs2/9jJPFCEEjAh8/q s1l53Vv5qAGx0zO4PTjASyB/UVOZj5FF+LEgDJtUclQcnlNVegRSodaJUHRL3W2xNxa ckDYAnSPr8fTNLG287LPrtxvIL2n8LPOTZWclaGg=
DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/simple; s=6gbrjpgwjskckoa6a5zn6fwqkn67xbtw; d=amazonses.com; t=1675842267; h=Date:From:Reply-To:To:Message-ID:Subject:MIME-Version:Content-Type:Feedback-ID; bh=BJxF0PCdQ4TBdiPcAK83Ah0Z65hMjsvFIWVgzM0O8b0=; b=ivBW6HbegrrlOj7BIB293ZNNy6K8D008I3+wwXoNvZdrBI6SBhL+QmCvCE3Sx0Av qh2hWMJyJBkVVcVwJns8cq8sn6l3NTY7nfN0H5RmuFn/MK4UHJw1vkkzEKKWSDncgf9 6K3DyNhKooBGopkxDOhg/nU8ZX8paHKlD67q7klc=
Date: Wed, 8 Feb 2023 07:44:27 +0000

To look up the DKIM record, email servers use the DKIM selector provided by the email service provider, not just the domain name. Suppose example.com uses Big Email as their email service provider, and suppose Big Email uses the DKIM selector big-email. Most of example.com’s DNS records would be named example.com, but their DKIM DNS record would be under the name big-email._domainkey.example.com, which is listed in the example above.

Content

This is the part of the DKIM DNS record that lists the public key. In the example above, v=DKIM1 indicates that this TXT record should be interpreted as DKIM, and the public key is everything after p=.

Below we query the linuxincluded.com domain using the “dkim” selector.

$ dig TXT dkim._domainkey.linuxincluded.com

; <<>> DiG 9.10.6 <<>> TXT dkim._domainkey.linuxincluded.com
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 45496
;; flags: qr rd ra; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 1

;; OPT PSEUDOSECTION:
; EDNS: version: 0, flags:; udp: 512
;; QUESTION SECTION:
;dkim._domainkey.linuxincluded.com. IN	TXT

;; ANSWER SECTION:
dkim._domainkey.linuxincluded.com. 3600	IN TXT	"v=DKIM1; k=rsa; p=MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDdLyUk58Chz538ZQE4PnZ1JqBiYkSVWp8F77QpVF2onPCM4W4BnVJWXDSCC+yn747XFKv+XkVwayLexUkiAga7hIw6GwOj0gplVjv2dirFCoKecS2jvvqXc6/O0hjVqYlTYXwiYFJMSptaBWoHEEOvpS7VWelnQB+1m3UHHPJRiQIDAQAB; s=email"

;; Query time: 453 msec
;; SERVER: 100.64.0.1#53(100.64.0.1)
;; WHEN: Thu Feb 02 13:39:40 SAST 2023
;; MSG SIZE  rcvd: 318

3. Finally Lets Check if DMARC is setup

What is a DMARC record?

A DMARC record stores a domain’s DMARC policy. DMARC records are stored in the Domain Name System (DNS) as DNS TXT records. A DNS TXT record can contain almost any text a domain administrator wants to associate with their domain. One of the ways DNS TXT records are used is to store DMARC policies.

(Note that a DMARC record is a DNS TXT record that contains a DMARC policy, not a specialized type of DNS record.)

Example.com’s DMARC policy might look like this:

NameTypeContentTTL
example.comTXTv=DMARC1; p=quarantine; adkim=r; aspf=r; rua=mailto:example@third-party-example.com;3260
$ dig txt _dmarc.google.com

; <<>> DiG 9.10.6 <<>> txt _dmarc.google.com
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 16231
;; flags: qr rd ra; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 1

;; OPT PSEUDOSECTION:
; EDNS: version: 0, flags:; udp: 512
;; QUESTION SECTION:
;_dmarc.google.com.		IN	TXT

;; ANSWER SECTION:
_dmarc.google.com.	300	IN	TXT	"v=DMARC1; p=reject; rua=mailto:mailauth-reports@google.com"

;; Query time: 209 msec
;; SERVER: 100.64.0.1#53(100.64.0.1)
;; WHEN: Thu Feb 02 13:42:03 SAST 2023
;; MSG SIZE  rcvd: 117