Iterating Strings — Find the Bug
Bug 1 🟢 — Using Byte Length as Character Count
package main
import "fmt"
func displayName(name string) {
if len(name) > 20 {
fmt.Println("Name too long!")
return
}
fmt.Println("Hello,", name)
}
func main() {
displayName("Alice") // OK
displayName("山田太郎") // prints "Name too long!" for a 4-char name!
}
Solution
`len("山田太郎")` returns 12 (4 Japanese chars × 3 bytes each = 12 bytes), not 4. The validation uses bytes instead of characters. **Fix:** import "unicode/utf8"
func displayName(name string) {
if utf8.RuneCountInString(name) > 20 {
fmt.Println("Name too long!")
return
}
fmt.Println("Hello,", name)
}
Bug 2 🟢 — Byte Access Returns Wrong Character
package main
import "fmt"
func getInitial(name string) string {
return string(name[0]) // get first character
}
func main() {
fmt.Println(getInitial("Alice")) // "A" — correct
fmt.Println(getInitial("Étienne")) // "Ã" — wrong! é is U+00C9, first byte is 0xC3
}
Solution
`name[0]` returns the first **byte** (0xC3), not the first character. `string(0xC3)` is "Ã" in UTF-8, not 'É'. **Fix:** func getInitial(name string) string {
for _, r := range name {
return string(r) // returns first rune
}
return ""
}
// Or:
runes := []rune(name)
if len(runes) == 0 { return "" }
return string(runes[0])
Bug 3 🟢 — String Slicing Mid-Rune
package main
import "fmt"
func preview(s string, n int) string {
if len(s) <= n {
return s
}
return s[:n] + "..."
}
func main() {
fmt.Println(preview("Hello, World!", 7)) // "Hello, ..." — correct
fmt.Println(preview("Hello, 世界!", 8)) // garbled! — 世 starts at byte 7, 8 bytes in
}
Solution
`s[:8]` cuts inside the 3-byte character '世' (bytes 7-9), producing invalid UTF-8. **Fix:** func preview(s string, n int) string {
count := 0
for i := range s {
if count == n { return s[:i] + "..." }
count++
}
return s // fewer than n chars
}
Bug 4 🟡 — Reversing String by Bytes
package main
import "fmt"
func reverseString(s string) string {
b := []byte(s)
for i, j := 0, len(b)-1; i < j; i, j = i+1, j-1 {
b[i], b[j] = b[j], b[i]
}
return string(b)
}
func main() {
fmt.Println(reverseString("Hello")) // "olleH" — OK for ASCII
fmt.Println(reverseString("世界")) // garbled! bytes reversed, not runes
}
Solution
Reversing bytes of a multi-byte UTF-8 string scrambles the encoding. Each multi-byte character's bytes are reversed individually AND their positions relative to each other are reversed, producing invalid UTF-8. **Fix:** func reverseString(s string) string {
runes := []rune(s)
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
runes[i], runes[j] = runes[j], runes[i]
}
return string(runes)
}
Bug 5 🟡 — Treating Byte Index as Character Index
package main
import "fmt"
func splitAt(s string, pos int) (string, string) {
// Split string at character position pos
return s[:pos], s[pos:]
}
func main() {
s := "Hello, 世界"
left, right := splitAt(s, 7)
fmt.Println(left) // expected "Hello, "
fmt.Println(right) // expected "世界"
// But with pos=7, s[7] is the start of '世' (correct only by luck!)
// Try with pos=8: crashes mid-rune or produces garbled output
left2, right2 := splitAt(s, 8) // panic or garbled
fmt.Println(left2, right2)
}
Solution
`pos=7` happens to work here because '世' starts at byte 7. But `pos=8` splits inside '世'. The function uses byte positions, not character positions. **Fix:** func splitAt(s string, charPos int) (string, string) {
count := 0
for i := range s {
if count == charPos {
return s[:i], s[i:]
}
count++
}
return s, ""
}
Bug 6 🟡 — Infinite Loop on Invalid UTF-8 (Without Range)
package main
import (
"fmt"
"unicode/utf8"
)
func processRunes(s string) {
i := 0
for i < len(s) {
r, size := utf8.DecodeRuneInString(s[i:])
if r == utf8.RuneError && size == 0 {
break // WRONG: size==0 only when s[i:] is empty
// For invalid UTF-8: size==1, not 0!
}
fmt.Printf("%c", r)
i += size
}
}
func main() {
processRunes("A\xffB") // \xff is invalid UTF-8
// Expected: A?B
// Actual: A (then infinite loop because break condition is wrong)
}
Solution
`utf8.DecodeRuneInString` returns `(RuneError, 1)` for invalid UTF-8 (not `size=0`). `size==0` only when the input is empty. The break condition is wrong, causing an infinite loop on invalid bytes. **Fix:** func processRunes(s string) {
for i := 0; i < len(s); {
r, size := utf8.DecodeRuneInString(s[i:])
if r == utf8.RuneError && size == 1 {
fmt.Print("?") // invalid byte
} else {
fmt.Printf("%c", r)
}
i += size // always advances by at least 1
}
}
// Or simply use for range (handles this correctly automatically)
Bug 7 🟡 — String Concatenation in Range (Quadratic)
package main
import "fmt"
func removeSpaces(s string) string {
result := ""
for _, r := range s {
if r != ' ' {
result += string(r) // O(n) per concatenation!
}
}
return result
}
func main() {
long := "This is a very long string with many spaces in it " +
"and it goes on and on " +
"making this O(n^2)"
fmt.Println(len(removeSpaces(long)))
}
Solution
Each `result += string(r)` creates a new string and copies all previous characters. For n characters, this is O(n²) total work. **Fix:** import "strings"
func removeSpaces(s string) string {
var sb strings.Builder
sb.Grow(len(s)) // pre-allocate
for _, r := range s {
if r != ' ' {
sb.WriteRune(r)
}
}
return sb.String()
}
// O(n) total work
Bug 8 🔴 — Incorrect Rune Comparison Using Byte Value
package main
import "fmt"
func containsNonASCII(s string) bool {
for _, r := range s {
if r > 127 { // checking rune value
return true
}
}
return false
}
// This function looks correct... but consider:
func filterASCII(s string) string {
result := ""
for _, b := range []byte(s) { // iterates BYTES not runes
if b <= 127 {
result += string(b) // BUG: b is a byte, not a rune
}
}
return result
}
func main() {
s := "Héllo"
fmt.Println(filterASCII(s)) // may produce "H?llo" or similar
}
Solution
Iterating `[]byte(s)` gives raw bytes. For 'é' (U+00E9, encoded as 0xC3 0xA9), byte 0xC3 (195) > 127, so it's dropped. Byte 0xA9 (169) > 127, also dropped. But this drops the continuation bytes, potentially creating invalid UTF-8 or losing characters incorrectly. **Fix:** func filterASCII(s string) string {
var sb strings.Builder
for _, r := range s { // range over string gives runes
if r <= 127 { // filter non-ASCII runes
sb.WriteRune(r)
}
}
return sb.String()
}
Bug 9 🔴 — Memory Leak via String Substring
package main
import "fmt"
var cache []string
func processLargeFiles(filePaths []string) {
for _, path := range filePaths {
content := readFile(path) // reads ~100MB file into string
header := content[:1024] // extract first 1024 bytes
cache = append(cache, header)
// BUG: header shares backing array with 100MB content!
// content can't be GC'd as long as header is in cache!
}
}
func readFile(path string) string { return string(make([]byte, 100*1024*1024)) }
func main() {
processLargeFiles([]string{"a", "b", "c"})
fmt.Println("Cache entries:", len(cache))
// 300MB of file data still in memory!
}
Solution
`content[:1024]` is a substring sharing the same backing array. The 100MB backing array is kept alive as long as `header` is referenced. **Fix:** import "strings"
func processLargeFiles(filePaths []string) {
for _, path := range filePaths {
content := readFile(path)
header := strings.Clone(content[:1024]) // new 1024-byte allocation!
cache = append(cache, header)
// content can now be GC'd — header has its own backing array
}
}
Bug 10 🔴 — Rune vs String Comparison
package main
import "fmt"
func splitOnDelimiter(s string, delim string) []string {
var parts []string
start := 0
for i, r := range s {
if r == delim { // BUG: r is rune, delim is string — compile error!
parts = append(parts, s[start:i])
start = i + len(delim)
}
}
parts = append(parts, s[start:])
return parts
}
func main() {
fmt.Println(splitOnDelimiter("a,b,,c", ","))
}
Solution
`r == delim` is a type mismatch — `r` is `rune` and `delim` is `string`. This won't compile. Additionally, `i + len(delim)` uses `len(delim)` in bytes which is correct only for single-byte delimiters. **Fix:** func splitOnDelimiter(s string, delim string) []string {
delimRunes := []rune(delim)
if len(delimRunes) == 1 {
delimRune := delimRunes[0]
var parts []string
start := 0
for i, r := range s {
if r == delimRune { // compare rune to rune
parts = append(parts, s[start:i])
start = i + len(string(delimRune)) // byte length of delim rune
}
}
return append(parts, s[start:])
}
// Multi-rune delimiter: use strings.Split
import "strings"
return strings.Split(s, delim)
}
Bug 11 🔴 — Goroutine Leak on String Reader
package main
import (
"fmt"
"io"
"strings"
"time"
)
func asyncProcess(s string, result chan<- rune) {
r := strings.NewReader(s)
go func() {
for {
ch, _, err := r.ReadRune()
if err == io.EOF {
close(result)
return
}
result <- ch
}
}()
}
func main() {
result := make(chan rune, 10)
asyncProcess("Hello", result)
// Only read first 3 runes, then stop
for i, r := range result {
if i >= 3 { break }
fmt.Println(string(r))
}
time.Sleep(time.Second)
// Goroutine is blocked trying to send to full channel — LEAKED!
}
Solution
The goroutine in `asyncProcess` tries to send all runes to `result`. When the caller stops reading after 3 items, the channel buffer fills up and the goroutine blocks forever — a goroutine leak. **Fix:** Use `context.Context` for cancellation or simply use `for range` directly: func main() {
count := 0
for _, r := range "Hello" {
if count >= 3 { break }
fmt.Println(string(r))
count++
}
}
// No goroutine needed at all for simple string processing
Bug 12 🔴 — Wrong String Position After Multi-byte Rune
package main
import "fmt"
// Replace first occurrence of target rune with replacement string
func replaceFirst(s string, target rune, replacement string) string {
for i, r := range s {
if r == target {
// Take everything before i, add replacement, add everything after i
return s[:i] + replacement + s[i+1:] // BUG: i+1 is wrong for multi-byte!
}
}
return s
}
func main() {
fmt.Println(replaceFirst("Hello", 'l', "L")) // "HeLlo" — seems correct
fmt.Println(replaceFirst("Hello, 世界", '世', "WORLD"))
// BUG: 世 is 3 bytes, but we skip only 1 byte with s[i+1:]
// s[i+1:] starts at byte i+1, which is inside '世'!
}
Solution
`s[i+1:]` skips only 1 byte after position `i`, but multi-byte runes take 2-4 bytes. We must skip the entire rune's bytes. **Fix:** import "unicode/utf8"
func replaceFirst(s string, target rune, replacement string) string {
for i, r := range s {
if r == target {
runeSize := utf8.RuneLen(r) // 1, 2, 3, or 4 bytes
return s[:i] + replacement + s[i+runeSize:]
}
}
return s
}
// replaceFirst("Hello, 世界", '世', "WORLD") -> "Hello, WORLD界"