Iterating Strings — Optimization Exercises
Exercise 1 🟢 — String Concatenation in Range Loop
package main
import "fmt"
func toUppercase(s string) string {
result := ""
for _, r := range s {
if r >= 'a' && r <= 'z' {
result += string(r - 32) // new allocation each iteration!
} else {
result += string(r)
}
}
return result
}
func main() {
long := "the quick brown fox jumps over the lazy dog"
fmt.Println(toUppercase(long))
}
Optimized Solution
import (
"strings"
"unicode"
)
// Option 1: strings.Builder
func toUppercase(s string) string {
var sb strings.Builder
sb.Grow(len(s)) // pre-allocate exact byte count
for _, r := range s {
sb.WriteRune(unicode.ToUpper(r))
}
return sb.String()
}
// Option 2: Use standard library (best for this case)
func toUppercase(s string) string {
return strings.ToUpper(s) // SIMD-optimized internally
}
// Benchmark: strings.Builder is 5-10x faster for large strings
// strings.ToUpper is 2x faster than Builder (SIMD)
Exercise 2 🟢 — Unnecessary []rune Conversion
package main
import "fmt"
func countLetters(s string) int {
runes := []rune(s) // allocates 4 bytes per char!
count := 0
for _, r := range runes {
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') {
count++
}
}
return count
}
func main() {
fmt.Println(countLetters("Hello, 世界! 123"))
}
Optimized Solution
func countLetters(s string) int {
count := 0
for _, r := range s { // range directly — no allocation!
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') {
count++
}
}
return count
}
// Saves: len([]rune(s)) * 4 bytes allocation
// For 1000-char string: saves 4KB allocation per call
Exercise 3 🟢 — Splitting String with String Function
package main
import (
"fmt"
"strings"
)
func splitWords(s string) []string {
words := []string{}
var current []rune
for _, r := range s {
if r == ' ' || r == '\t' || r == '\n' {
if len(current) > 0 {
words = append(words, string(current))
current = current[:0]
}
} else {
current = append(current, r)
}
}
if len(current) > 0 {
words = append(words, string(current))
}
return words
}
func main() {
words := splitWords("hello world foo bar")
fmt.Println(len(words))
}
Optimized Solution
import "strings"
// strings.Fields is highly optimized (uses byte-level scan internally)
func splitWords(s string) []string {
return strings.Fields(s)
}
// If custom logic is needed, avoid []rune buffer:
func splitWordsOpt(s string) []string {
var words []string
start := -1
for i, r := range s {
isSpace := r == ' ' || r == '\t' || r == '\n'
if !isSpace && start == -1 {
start = i // byte position of word start
} else if isSpace && start >= 0 {
words = append(words, s[start:i]) // substring = no copy
start = -1
}
}
if start >= 0 { words = append(words, s[start:]) }
return words
}
// Key: use byte positions from range to slice the original string (zero-copy)
// Avoid: accumulating runes into []rune then converting to string
Exercise 4 🟡 — Repeated Rune Count for Same String
package main
import (
"fmt"
"unicode/utf8"
)
type TextProcessor struct {
text string
}
func (tp *TextProcessor) Length() int {
return utf8.RuneCountInString(tp.text) // O(n) each call!
}
func (tp *TextProcessor) FirstN(n int) string {
runes := []rune(tp.text) // O(n) allocation each call!
if n > len(runes) { n = len(runes) }
return string(runes[:n])
}
func main() {
tp := &TextProcessor{text: "Hello, 世界!"}
for i := 0; i < 10000; i++ {
_ = tp.Length() // 10000 O(n) scans
_ = tp.FirstN(3) // 10000 allocations
}
fmt.Println("done")
}
Optimized Solution
type TextProcessor struct {
text string
runes []rune // cached once
runeLen int // cached
}
func NewTextProcessor(text string) *TextProcessor {
runes := []rune(text) // compute ONCE
return &TextProcessor{
text: text,
runes: runes,
runeLen: len(runes),
}
}
func (tp *TextProcessor) Length() int { return tp.runeLen } // O(1)
func (tp *TextProcessor) FirstN(n int) string {
if n > tp.runeLen { n = tp.runeLen }
return string(tp.runes[:n]) // no re-compute
}
// If text is immutable: compute runes once, cache forever
// If text changes: invalidate cache on Set()
Exercise 5 🟡 — Unnecessary UTF-8 Validation in Hot Path
package main
import (
"fmt"
"unicode/utf8"
)
func processToken(token string) {
if !utf8.ValidString(token) {
panic("invalid UTF-8")
}
for _, r := range token { // validates UTF-8 AGAIN internally!
_ = r
}
}
func main() {
for i := 0; i < 1_000_000; i++ {
processToken("token")
}
fmt.Println("done")
}
Optimized Solution
// Option 1: Validate ONCE at ingress, trust internally
// Validate at API boundary (HTTP handler, file reader), not in hot paths
// Option 2: If validation is needed, it's already done by for range
// for range does NOT panic on invalid UTF-8 — it yields RuneError
// Check RuneError if you need to detect invalid bytes:
func processToken(token string) {
for _, r := range token {
if r == utf8.RuneError {
panic("invalid UTF-8")
}
_ = r
}
// Single pass: validates AND processes
}
// Option 3: For pure ASCII tokens (common in web APIs):
func processTokenASCII(token string) {
for i := 0; i < len(token); i++ { // byte loop: no UTF-8 decode
if token[i] >= 0x80 { panic("non-ASCII") }
_ = token[i]
}
}
package main
import "fmt"
func maskEmail(email string) string {
runes := []rune(email)
atIdx := -1
for i, r := range runes {
if r == '@' { atIdx = i; break }
}
if atIdx <= 0 { return email }
result := make([]rune, len(runes))
copy(result, runes)
for i := 1; i < atIdx-1; i++ {
result[i] = '*'
}
return string(result) // extra allocation
}
func main() {
fmt.Println(maskEmail("alice@example.com")) // a***e@example.com
}
Optimized Solution
import "strings"
func maskEmail(email string) string {
// Find @ using strings.IndexByte (SIMD-optimized, no UTF-8 decode)
atIdx := strings.IndexByte(email, '@')
if atIdx <= 1 { return email }
var sb strings.Builder
sb.Grow(len(email))
count := 0
for i, r := range email {
if count == 0 || i >= atIdx {
sb.WriteRune(r) // keep first char and everything from @
} else if count == atIdx-1 {
sb.WriteRune(r) // keep char before @
} else {
sb.WriteByte('*') // mask middle (all ASCII * so 1 byte)
}
count++
}
return sb.String()
}
// Uses strings.IndexByte for fast @ search
// strings.Builder avoids intermediate []rune
Exercise 7 🟡 — Reading Characters Multiple Times
package main
import "fmt"
func validate(s string) (bool, bool, bool) {
// Three separate passes — reads string 3 times
hasUpper := false
for _, r := range s { if r >= 'A' && r <= 'Z' { hasUpper = true; break } }
hasDigit := false
for _, r := range s { if r >= '0' && r <= '9' { hasDigit = true; break } }
hasSpecial := false
for _, r := range s {
if r == '!' || r == '@' || r == '#' || r == '$' {
hasSpecial = true; break
}
}
return hasUpper, hasDigit, hasSpecial
}
func main() {
u, d, s := validate("Hello World! 123")
fmt.Println(u, d, s) // true true true
}
Optimized Solution
func validate(s string) (hasUpper, hasDigit, hasSpecial bool) {
// Single pass
for _, r := range s {
switch {
case r >= 'A' && r <= 'Z': hasUpper = true
case r >= '0' && r <= '9': hasDigit = true
case r == '!' || r == '@' || r == '#' || r == '$': hasSpecial = true
}
if hasUpper && hasDigit && hasSpecial { break } // early exit
}
return
}
// 1 pass instead of 3
// Early exit when all conditions satisfied
// For 100K-char string with all conditions in first 10 chars: 99.99% fewer operations
Exercise 8 🔴 — String to []rune to String Round-trip
package main
import "fmt"
func transformText(s string) string {
runes := []rune(s) // allocation 1
for i, r := range runes {
runes[i] = unicode.ToUpper(r)
}
return string(runes) // allocation 2
}
func batchTransform(texts []string) []string {
result := make([]string, len(texts))
for i, t := range texts {
result[i] = transformText(t) // 2 allocations per string!
}
return result
}
func main() {
texts := make([]string, 10000)
for i := range texts { texts[i] = "hello world" }
_ = batchTransform(texts)
fmt.Println("done")
}
Optimized Solution
import (
"strings"
"unicode"
)
// Option 1: Use strings.Map (optimized internally, 1 allocation)
func transformText(s string) string {
return strings.Map(unicode.ToUpper, s)
}
// Option 2: Use strings.ToUpper (SIMD-optimized for common cases)
func transformText(s string) string {
return strings.ToUpper(s)
}
// Option 3: For ASCII-only: direct byte manipulation
func transformTextASCII(s string) string {
b := []byte(s)
for i, c := range b {
if c >= 'a' && c <= 'z' { b[i] = c - 32 }
}
return string(b) // only 1 allocation ([]byte) vs 2 for []rune approach
}
// strings.ToUpper: 1 alloc + SIMD = fastest
// strings.Map: 1 alloc = good
// []rune approach: 2 allocs = avoid
Exercise 9 🔴 — Counting Occurrences Without Index
package main
import "fmt"
func countOccurrences(s string, target rune) int {
runes := []rune(s) // unnecessary allocation!
count := 0
for _, r := range runes {
if r == target { count++ }
}
return count
}
func main() {
text := "Hello, 世界 — hello again!"
fmt.Println(countOccurrences(text, 'l')) // 4
fmt.Println(countOccurrences(text, '世')) // 1
}
Optimized Solution
import "strings"
// Option 1: range directly (no allocation)
func countOccurrences(s string, target rune) int {
count := 0
for _, r := range s { // no []rune needed
if r == target { count++ }
}
return count
}
// Option 2: For single-byte runes (ASCII), use strings.Count (SIMD)
func countByte(s string, b byte) int {
return strings.Count(s, string(b))
}
// Option 3: For multi-byte runes, still faster without []rune:
func countRune(s string, target rune) int {
return strings.Count(s, string(target))
// strings.Count is optimized with Rabin-Karp or Boyer-Moore
}
Exercise 10 🔴 — Greedy Rune Slice Pre-allocation
package main
import "fmt"
func parseCSV(line string) []string {
var result []string
var field []rune // re-allocated per call
for _, r := range line {
if r == ',' {
result = append(result, string(field))
field = field[:0] // clear but keep memory
} else {
field = append(field, r)
}
}
result = append(result, string(field))
return result
}
func main() {
for i := 0; i < 100000; i++ {
_ = parseCSV("alice,30,engineer,new york,active")
}
fmt.Println("done")
}
Optimized Solution
import "strings"
// Option 1: Use strings.Split (highly optimized)
func parseCSV(line string) []string {
return strings.Split(line, ",")
}
// Option 2: Use byte positions to avoid rune buffer
func parseCSVOpt(line string) []string {
var result []string
start := 0
for i, r := range line {
if r == ',' {
result = append(result, line[start:i]) // substring = zero copy!
start = i + 1 // comma is 1 byte, safe
}
}
result = append(result, line[start:])
return result
}
// Key optimizations:
// 1. No []rune buffer — use byte positions from range
// 2. Substring (zero copy) instead of string([]rune{...})
// 3. strings.Split uses this approach internally + pre-counts commas
Exercise 11 🔴 — Parallel String Processing for Large Corpus
package main
import (
"fmt"
"strings"
)
func wordCount(corpus []string) map[string]int {
result := map[string]int{}
for _, doc := range corpus {
for _, word := range strings.Fields(doc) {
result[strings.ToLower(word)]++
}
}
return result
}
func main() {
corpus := make([]string, 10000)
for i := range corpus {
corpus[i] = "The quick brown fox jumps over the lazy dog"
}
_ = wordCount(corpus)
fmt.Println("done")
}
Optimized Solution
import (
"runtime"
"strings"
"sync"
)
func wordCount(corpus []string) map[string]int {
n := runtime.GOMAXPROCS(0)
chunkSize := (len(corpus) + n - 1) / n
shards := make([]map[string]int, n)
var wg sync.WaitGroup
for i := 0; i < n; i++ {
start := i * chunkSize
end := start + chunkSize
if end > len(corpus) { end = len(corpus) }
if start >= len(corpus) { break }
shards[i] = map[string]int{}
wg.Add(1)
go func(shard map[string]int, docs []string) {
defer wg.Done()
for _, doc := range docs {
for _, word := range strings.Fields(doc) {
shard[strings.ToLower(word)]++
}
}
}(shards[i], corpus[start:end])
}
wg.Wait()
// Merge (single goroutine — map access is fast for merge)
result := make(map[string]int, 100)
for _, shard := range shards {
for k, v := range shard {
result[k] += v
}
}
return result
}
// Speedup: ~N× for CPU-bound text processing (N = GOMAXPROCS)
// For 10K docs: ~8x faster on 8-core machine
Summary Table
| Exercise | Problem | Fix | Impact |
| 1 | String += in range | strings.Builder / strings.ToUpper | 5-100x |
| 2 | []rune when range suffices | for range string (0 alloc) | 0 alloc per call |
| 3 | Manual split with []rune | strings.Fields / byte position substring | 3-5x |
| 4 | Repeated RuneCount | Cache runes once in struct | O(n)→O(1) |
| 5 | Double UTF-8 validation | Validate at boundary, check RuneError inline | 2x |
| 6 | Intermediate []rune for masking | strings.Builder + byte positions | 50% fewer allocs |
| 7 | 3 passes over string | Single pass with early exit | 3x + early exit |
| 8 | []rune→string round-trip | strings.Map / strings.ToUpper | 2 allocs→1 |
| 9 | []rune for counting | for range string directly | 0 alloc |
| 10 | []rune buffer for CSV | Byte positions + substrings | 0 alloc for fields |
| 11 | Serial corpus processing | Parallel goroutines + merge | N× speedup |