Trie — Hands-On Tasks¶

Audience: Engineers who learn by building. Each task is a self-contained engineering exercise. After each statement, a complete reference solution is provided in one or two languages — implement first, then compare.

The tasks scale from "first trie" to "production radix tree", "Aho-Corasick automaton", and "frequency-ranked autocomplete". By the end you will have a personal library of working trie code.

Table of Contents¶

Array-children trie for lowercase English
HashMap-children trie for arbitrary Unicode
Implement delete with subtree cleanup
Enumerate all keys with a given prefix
Count words and prefixes
Binary trie for integer XOR queries
Radix tree (compressed trie) with edge labels
Aho-Corasick automaton — failure links
Autocomplete with frequency ranking
Memory profiling — bytes per key
Bonus: serialize and load a trie

Task 1 — Array-children trie for lowercase English¶

Goal: Implement insert, search, startsWith for words in a..z. Array-based children, fail loudly on out-of-range chars.

Reference solution (Go)¶

package trie

import "fmt"

const ALPHA = 26

type Node struct {
    children [ALPHA]*Node
    isEnd    bool
}

type Trie struct{ root *Node }

func New() *Trie { return &Trie{root: &Node{}} }

func idx(b byte) int {
    if b < 'a' || b > 'z' {
        panic(fmt.Sprintf("trie: character %q outside [a..z]", b))
    }
    return int(b - 'a')
}

func (t *Trie) Insert(word string) {
    cur := t.root
    for i := 0; i < len(word); i++ {
        k := idx(word[i])
        if cur.children[k] == nil { cur.children[k] = &Node{} }
        cur = cur.children[k]
    }
    cur.isEnd = true
}

func (t *Trie) Search(word string) bool {
    n := t.walk(word)
    return n != nil && n.isEnd
}

func (t *Trie) StartsWith(p string) bool { return t.walk(p) != nil }

func (t *Trie) walk(s string) *Node {
    cur := t.root
    for i := 0; i < len(s); i++ {
        k := idx(s[i])
        if cur.children[k] == nil { return nil }
        cur = cur.children[k]
    }
    return cur
}

Test (Go)¶

func TestTrie(t *testing.T) {
    tr := New()
    for _, w := range []string{"cat", "car", "card", "dog"} { tr.Insert(w) }
    if !tr.Search("cat") || !tr.Search("card") { t.Fatal("expected hits") }
    if tr.Search("ca") { t.Fatal("ca is prefix only") }
    if !tr.StartsWith("ca") { t.Fatal("ca prefix should exist") }
    if tr.StartsWith("z") { t.Fatal("z should not exist") }
}

Task 2 — HashMap-children trie for arbitrary Unicode¶

Goal: Same API as Task 1, but children are stored in a hash map keyed by Unicode rune. Handle UTF-8 correctly and normalize input to NFC.

Reference solution (Python)¶

import unicodedata

class UnicodeTrie:
    __slots__ = ("children", "is_end")
    def __init__(self):
        self.children: dict[str, "UnicodeTrie"] = {}
        self.is_end = False

    @staticmethod
    def _norm(s: str) -> str:
        return unicodedata.normalize("NFC", s)

    def insert(self, word: str) -> None:
        word = self._norm(word)
        cur = self
        for ch in word:
            cur = cur.children.setdefault(ch, UnicodeTrie())
        cur.is_end = True

    def search(self, word: str) -> bool:
        word = self._norm(word)
        n = self._walk(word)
        return n is not None and n.is_end

    def starts_with(self, prefix: str) -> bool:
        prefix = self._norm(prefix)
        return self._walk(prefix) is not None

    def _walk(self, s: str):
        cur = self
        for ch in s:
            cur = cur.children.get(ch)
            if cur is None: return None
        return cur

# Demo
t = UnicodeTrie()
t.insert("café")
t.insert("日本語")
assert t.search("café")
assert t.search("café".encode().decode())     # NFC round-trip
assert t.starts_with("日本")

Task 3 — Implement delete with subtree cleanup¶

Goal: Delete a word; when a subtree becomes empty and is non-terminal, free its nodes upward.

Reference solution (Python, recursive)¶

class Trie:
    __slots__ = ("children", "is_end")
    def __init__(self):
        self.children = {}
        self.is_end = False

    def insert(self, word):
        cur = self
        for ch in word: cur = cur.children.setdefault(ch, Trie())
        cur.is_end = True

    def search(self, word):
        cur = self
        for ch in word:
            cur = cur.children.get(ch)
            if cur is None: return False
        return cur.is_end

    def delete(self, word) -> bool:
        """Returns True if `word` was present and removed."""
        return self._del(word, 0)

    def _del(self, word, i):
        if i == len(word):
            if not self.is_end: return False
            self.is_end = False
            return True
        ch = word[i]
        child = self.children.get(ch)
        if child is None: return False
        removed = child._del(word, i + 1)
        if removed and not child.is_end and not child.children:
            del self.children[ch]
        return removed

Test¶

t = Trie()
for w in ["cat", "car", "card"]: t.insert(w)
assert t.delete("car")             # True
assert t.search("card")            # Still True — chain not pruned
assert not t.search("car")
assert not t.delete("car")         # Already gone

# Delete only one in a shared chain:
t2 = Trie(); t2.insert("dog")
assert t2.delete("dog")
assert not t2.children              # all cleaned up

Task 4 — Enumerate all keys with a given prefix¶

Goal: Given a prefix, return all stored keys starting with it in lexicographic order.

Reference solution (Go)¶

package trie

import "sort"

func (t *Trie) KeysWithPrefix(prefix string) []string {
    cur := t.root
    for i := 0; i < len(prefix); i++ {
        k := idx(prefix[i])
        if cur.children[k] == nil { return nil }
        cur = cur.children[k]
    }
    var out []string
    collect(cur, []byte(prefix), &out)
    return out
}

func collect(n *Node, path []byte, out *[]string) {
    if n.isEnd { *out = append(*out, string(path)) }
    for i := 0; i < ALPHA; i++ {
        if n.children[i] != nil {
            path = append(path, byte('a' + i))
            collect(n.children[i], path, out)
            path = path[:len(path)-1]
        }
    }
}

// Demo
func ExampleKeysWithPrefix() {
    tr := New()
    for _, w := range []string{"cat", "car", "card", "carpet", "cars", "dog"} {
        tr.Insert(w)
    }
    sort.Strings(tr.KeysWithPrefix("car"))
    // car, card, carpet, cars
}

Task 5 — Count words and prefixes¶

Goal: Augment the trie so countWordsEqualTo(word) returns the number of times word was inserted and countWordsStartingWith(prefix) returns the number of stored keys that start with prefix. Insertion duplicates are now meaningful.

Reference solution (Java)¶

public class CountingTrie {
    private static class Node {
        Node[] children = new Node[26];
        int wordCount;       // how many times this word was inserted
        int prefixCount;     // how many keys pass through this node
    }
    private final Node root = new Node();

    public void insert(String word) {
        Node cur = root;
        for (char c : word.toCharArray()) {
            int i = c - 'a';
            if (cur.children[i] == null) cur.children[i] = new Node();
            cur = cur.children[i];
            cur.prefixCount++;
        }
        cur.wordCount++;
    }

    public int countWordsEqualTo(String word) {
        Node n = walk(word); return n == null ? 0 : n.wordCount;
    }
    public int countWordsStartingWith(String prefix) {
        Node n = walk(prefix); return n == null ? 0 : n.prefixCount;
    }
    public void erase(String word) {
        if (countWordsEqualTo(word) == 0) return;
        Node cur = root;
        for (char c : word.toCharArray()) {
            cur = cur.children[c - 'a'];
            cur.prefixCount--;
        }
        cur.wordCount--;
    }
    private Node walk(String s) {
        Node cur = root;
        for (char c : s.toCharArray()) {
            int i = c - 'a';
            if (cur.children[i] == null) return null;
            cur = cur.children[i];
        }
        return cur;
    }
}

This is LeetCode 1804 — Implement Trie II.

Task 6 — Binary trie for integer XOR queries¶

Goal: Insert N 32-bit integers; for a query Q, return max(Q XOR x) over inserted x in O(32). Used in LC 421 (interview.md §8).

Reference solution (Python)¶

class BinaryTrie:
    BITS = 32
    def __init__(self):
        self.root = [None, None]

    def insert(self, n):
        cur = self.root
        for i in range(self.BITS - 1, -1, -1):
            b = (n >> i) & 1
            if cur[b] is None: cur[b] = [None, None]
            cur = cur[b]

    def max_xor(self, q):
        if self.root == [None, None]: return 0
        cur, x = self.root, 0
        for i in range(self.BITS - 1, -1, -1):
            b = (q >> i) & 1
            want = 1 - b
            if cur[want] is not None:
                x |= 1 << i; cur = cur[want]
            else:
                cur = cur[b]
        return x

# Demo
bt = BinaryTrie()
for n in [3, 10, 5, 25, 2, 8]: bt.insert(n)
assert bt.max_xor(25) == 31      # 25 ^ 8 = 17? actually max is 5^25=28, 10^25=19, 25^8=17
# Verify by brute force
nums = [3, 10, 5, 25, 2, 8]
brute = max(25 ^ n for n in nums)
assert bt.max_xor(25) == brute

Extensions: the same shape solves "k-th maximum XOR pair" (descend two paths in parallel) and "min XOR over a range" (combined with persistent tries).

Task 7 — Radix tree (compressed trie) with edge labels¶

Goal: Implement a radix tree that merges chains of single-child nodes. Edge labels are arbitrary strings. Support insert and search. Demonstrate it uses dramatically less memory than a plain trie on real English words.

Reference solution (Go)¶

package radix

type Node struct {
    label    string
    children map[byte]*Node
    isEnd    bool
}

type Tree struct{ root *Node }

func New() *Tree {
    return &Tree{root: &Node{children: map[byte]*Node{}}}
}

func commonPrefixLen(a, b string) int {
    n := len(a); if len(b) < n { n = len(b) }
    for i := 0; i < n; i++ {
        if a[i] != b[i] { return i }
    }
    return n
}

func (t *Tree) Insert(word string) {
    insert(t.root, word)
}

func insert(n *Node, word string) {
    if word == "" { n.isEnd = true; return }
    head := word[0]
    child, ok := n.children[head]
    if !ok {
        n.children[head] = &Node{
            label:    word,
            isEnd:    true,
            children: map[byte]*Node{},
        }
        return
    }
    cp := commonPrefixLen(child.label, word)
    if cp == len(child.label) {
        insert(child, word[cp:])
        return
    }
    // Split the edge
    split := &Node{
        label:    child.label[:cp],
        children: map[byte]*Node{},
    }
    child.label = child.label[cp:]
    split.children[child.label[0]] = child
    n.children[head] = split
    if cp == len(word) {
        split.isEnd = true
    } else {
        split.children[word[cp]] = &Node{
            label:    word[cp:],
            isEnd:    true,
            children: map[byte]*Node{},
        }
    }
}

func (t *Tree) Search(word string) bool {
    return search(t.root, word)
}

func search(n *Node, word string) bool {
    if word == "" { return n.isEnd }
    child, ok := n.children[word[0]]
    if !ok { return false }
    if len(word) < len(child.label) { return false }
    for i := 0; i < len(child.label); i++ {
        if word[i] != child.label[i] { return false }
    }
    return search(child, word[len(child.label):])
}

// CountNodes returns the number of nodes (excluding root) — for memory comparison.
func (t *Tree) CountNodes() int { return count(t.root) - 1 }

func count(n *Node) int {
    s := 1
    for _, c := range n.children { s += count(c) }
    return s
}

Demo: compare against plain trie¶

words := []string{"international", "internet", "internal", "intern", "interlude"}
plain := trie.New()
rad := radix.New()
for _, w := range words {
    plain.Insert(w); rad.Insert(w)
}
fmt.Println("plain trie nodes:", plain.CountNodes())   // ~40
fmt.Println("radix tree nodes:", rad.CountNodes())     // ~7-8

Run on /usr/share/dict/words (~235k entries) and you'll see ~3–10× node-count reduction. With pointer-array savings on top, memory drops dramatically.

Task 8 — Aho-Corasick automaton with failure links¶

Goal: Build the Aho-Corasick automaton from a set of patterns. Implement searchAll(text) returning all (position, pattern) matches in O(|text| + matches).

Reference solution (Python)¶

from collections import deque

class AhoCorasick:
    def __init__(self, patterns):
        # State 0 is the root.
        self.go = [{}]     # go[state][char] -> next state
        self.fail = [0]    # failure link
        self.out = [[]]    # patterns ending at this state (incl. via failure chain)

        for p in patterns:
            self._add(p)
        self._build_links()

    def _add(self, p):
        cur = 0
        for ch in p:
            if ch not in self.go[cur]:
                self.go.append({}); self.fail.append(0); self.out.append([])
                self.go[cur][ch] = len(self.go) - 1
            cur = self.go[cur][ch]
        self.out[cur].append(p)

    def _build_links(self):
        q = deque()
        for ch, nxt in self.go[0].items():
            self.fail[nxt] = 0
            q.append(nxt)
        while q:
            r = q.popleft()
            for ch, nxt in self.go[r].items():
                # Find the longest proper suffix of (path to nxt) that is in trie
                f = self.fail[r]
                while f != 0 and ch not in self.go[f]:
                    f = self.fail[f]
                self.fail[nxt] = self.go[f].get(ch, 0)
                if self.fail[nxt] == nxt:
                    self.fail[nxt] = 0
                # Merge outputs along the failure chain
                self.out[nxt] = self.out[nxt] + self.out[self.fail[nxt]]
                q.append(nxt)

    def search_all(self, text):
        cur, hits = 0, []
        for i, ch in enumerate(text):
            while cur != 0 and ch not in self.go[cur]:
                cur = self.fail[cur]
            cur = self.go[cur].get(ch, 0)
            for p in self.out[cur]:
                hits.append((i - len(p) + 1, p))
        return hits

# Demo: classic literature search
patterns = ["he", "she", "his", "hers"]
ac = AhoCorasick(patterns)
print(ac.search_all("ushers"))
# [(2, 'she'), (1, 'he'), (2, 'hers')]

Verify the output matches a naive O(|text| · |patterns|) substring search on a few inputs.

Task 9 — Autocomplete with frequency ranking¶

Goal: Implement an autocomplete that, given a prefix, returns the top-k completions by frequency. Frequencies are set when each word is inserted. Aim for O(L + k) per query, independent of dictionary size.

Approach¶

Each node keeps a sorted list (or small heap) of "best k strings in subtree". On insert, update along the path. On query, walk to the prefix node and return its top-k.

Reference solution (Python)¶

import heapq

class AutocompleteTrie:
    K = 5
    __slots__ = ("children", "top")
    def __init__(self):
        self.children = {}
        self.top = []           # min-heap of (freq, word), size <= K

    def insert(self, word: str, freq: int) -> None:
        nodes = [self]
        cur = self
        for ch in word:
            cur = cur.children.setdefault(ch, AutocompleteTrie())
            nodes.append(cur)
        for n in nodes:
            heapq.heappush(n.top, (freq, word))
            if len(n.top) > self.K:
                heapq.heappop(n.top)

    def suggest(self, prefix: str) -> list[str]:
        cur = self
        for ch in prefix:
            cur = cur.children.get(ch)
            if cur is None: return []
        # Return sorted by freq desc, then lex asc.
        return [w for _, w in sorted(cur.top, key=lambda x: (-x[0], x[1]))]

# Demo
ac = AutocompleteTrie()
for word, freq in [("merge", 80), ("mercury", 65), ("mercy", 50),
                   ("merit", 40), ("meringue", 15), ("metric", 30)]:
    ac.insert(word, freq)

print(ac.suggest("mer"))
# ['merge', 'mercury', 'mercy', 'merit', 'meringue']

Note: if words can be re-inserted with new frequencies (updates), use a delta scheme similar to LC 677 to avoid double-counting.

Task 10 — Memory profiling — bytes per key¶

Goal: Measure the actual memory footprint of your trie on the system English dictionary /usr/share/dict/words (~235,000 words on macOS/Linux). Compare with a set[str]. Report bytes per key.

Reference (Python)¶

import sys, tracemalloc

def measure(build_fn, words):
    tracemalloc.start()
    obj = build_fn(words)
    snap_before_gc = tracemalloc.take_snapshot()
    total = sum(stat.size for stat in snap_before_gc.statistics('filename'))
    tracemalloc.stop()
    return obj, total

def build_set(words):
    return set(words)

def build_trie(words):
    t = UnicodeTrie()              # from Task 2
    for w in words: t.insert(w)
    return t

words = [w.strip() for w in open("/usr/share/dict/words") if w.strip()]
_, set_bytes = measure(build_set, words)
_, trie_bytes = measure(build_trie, words)

print(f"set:  {set_bytes/1024/1024:.1f} MB,  {set_bytes/len(words):.1f} B/key")
print(f"trie: {trie_bytes/1024/1024:.1f} MB,  {trie_bytes/len(words):.1f} B/key")

Expected outcome (macOS, Python 3.11, ~235k words):

set: ~25–30 MB, ~120 B/key
HashMap-trie: ~150–250 MB, ~700–1000 B/key

The trie is 5–10× larger for plain English. The trade-off: it supports prefix queries.

Then, build a radix tree version (Task 7) and remeasure — you should see the trie shrink to ~30–40 MB. The radix-tree compression is where most of the memory savings live.

Task 11 — Bonus: serialize and load a trie¶

Goal: Serialize your trie to a byte stream (your choice of format — Protocol Buffers, custom, or pickle), so you can build it once and reload in milliseconds.

Reference (Python, simple binary format)¶

import struct

def serialize(node) -> bytes:
    """Format: u1 isEnd | u2 num_children | (u4 char_codepoint + recursive child bytes)*"""
    buf = bytearray()
    buf.append(1 if node.is_end else 0)
    items = sorted(node.children.items())
    buf += struct.pack("<H", len(items))
    for ch, child in items:
        buf += struct.pack("<I", ord(ch))
        child_bytes = serialize(child)
        buf += struct.pack("<I", len(child_bytes))
        buf += child_bytes
    return bytes(buf)

def deserialize(buf: bytes, offset: int = 0):
    node = UnicodeTrie()
    node.is_end = buf[offset] == 1
    offset += 1
    (n_children,) = struct.unpack_from("<H", buf, offset); offset += 2
    for _ in range(n_children):
        (cp,)  = struct.unpack_from("<I", buf, offset); offset += 4
        (sz,)  = struct.unpack_from("<I", buf, offset); offset += 4
        child, _ = deserialize(buf, offset)
        offset += sz
        node.children[chr(cp)] = child
    return node, offset

# Demo round-trip
t = UnicodeTrie()
for w in ["cat", "car", "card"]: t.insert(w)
blob = serialize(t)
t2, _ = deserialize(blob)
assert t2.search("car") and t2.search("card") and not t2.search("ca")

In production, use a length-prefixed protobuf or a mmap-able format. Avoid Python's pickle for security-sensitive deployments — it executes arbitrary code on load.

Self-Check¶

After completing these tasks you should be able to:

Trie — Hands-On Tasks¶

Table of Contents¶

Task 1 — Array-children trie for lowercase English¶

Reference solution (Go)¶

Test (Go)¶

Task 2 — HashMap-children trie for arbitrary Unicode¶

Reference solution (Python)¶

Task 3 — Implement delete with subtree cleanup¶

Reference solution (Python, recursive)¶

Test¶

Task 4 — Enumerate all keys with a given prefix¶

Reference solution (Go)¶

Task 5 — Count words and prefixes¶

Reference solution (Java)¶

Task 6 — Binary trie for integer XOR queries¶

Reference solution (Python)¶

Task 7 — Radix tree (compressed trie) with edge labels¶

Reference solution (Go)¶

Demo: compare against plain trie¶

Task 8 — Aho-Corasick automaton with failure links¶

Reference solution (Python)¶

Task 9 — Autocomplete with frequency ranking¶

Approach¶

Reference solution (Python)¶

Task 10 — Memory profiling — bytes per key¶

Reference (Python)¶

Task 11 — Bonus: serialize and load a trie¶

Reference (Python, simple binary format)¶

Self-Check¶

Further Reading¶