Multiset / Bag — Practice Tasks¶
All tasks must be solved in Go, Java, and Python.
Beginner Tasks¶
Task 1: Character Frequency Counter¶
Implement char_frequency(s) that returns a map/dict from each character to its count in s.
Input: "banana" Output: {b: 1, a: 3, n: 2}
Hint: Iterate the string and increment per character.
Go¶
func charFrequency(s string) map[rune]int {
freq := map[rune]int{}
for _, r := range s {
freq[r]++
}
return freq
}
Java¶
import java.util.HashMap;
import java.util.Map;
public static Map<Character, Integer> charFrequency(String s) {
Map<Character, Integer> freq = new HashMap<>();
for (char c : s.toCharArray()) freq.merge(c, 1, Integer::sum);
return freq;
}
Python¶
from collections import Counter
def char_frequency(s: str) -> dict[str, int]:
return dict(Counter(s))
Task 2: Anagram Detection¶
Given two strings, return true iff one is an anagram of the other.
Input: "listen", "silent" Output: true
Hint: Two strings are anagrams iff their character multisets are equal.
Go¶
func isAnagram(a, b string) bool {
if len(a) != len(b) {
return false
}
counts := map[rune]int{}
for _, r := range a {
counts[r]++
}
for _, r := range b {
counts[r]--
if counts[r] < 0 {
return false
}
}
return true
}
Java¶
public static boolean isAnagram(String a, String b) {
if (a.length() != b.length()) return false;
int[] counts = new int[26];
for (int i = 0; i < a.length(); i++) {
counts[a.charAt(i) - 'a']++;
counts[b.charAt(i) - 'a']--;
}
for (int c : counts) if (c != 0) return false;
return true;
}
Python¶
from collections import Counter
def is_anagram(a: str, b: str) -> bool:
return Counter(a) == Counter(b)
Task 3: First Non-Repeating Character¶
Return the index of the first character in s whose count is exactly 1. Return -1 if no such character exists.
Input: "leetcode" Output: 0 (the 'l').
Hint: Count pass, then linear scan.
Go¶
func firstUnique(s string) int {
counts := map[rune]int{}
for _, r := range s {
counts[r]++
}
for i, r := range s {
if counts[r] == 1 {
return i
}
}
return -1
}
Java¶
public static int firstUnique(String s) {
int[] counts = new int[26];
for (char c : s.toCharArray()) counts[c - 'a']++;
for (int i = 0; i < s.length(); i++) {
if (counts[s.charAt(i) - 'a'] == 1) return i;
}
return -1;
}
Python¶
from collections import Counter
def first_unique(s: str) -> int:
counts = Counter(s)
for i, ch in enumerate(s):
if counts[ch] == 1:
return i
return -1
Task 4: Multiset Equality¶
Implement multiset_equal(a, b) for two lists of integers without sorting them.
Input: [3,1,3,2], [2,3,1,3] Output: true
Hint: Build two counters and compare.
Go¶
func multisetEqual(a, b []int) bool {
if len(a) != len(b) {
return false
}
counts := map[int]int{}
for _, v := range a {
counts[v]++
}
for _, v := range b {
counts[v]--
if counts[v] < 0 {
return false
}
}
return true
}
Java¶
import java.util.HashMap;
import java.util.Map;
public static boolean multisetEqual(int[] a, int[] b) {
if (a.length != b.length) return false;
Map<Integer, Integer> counts = new HashMap<>();
for (int v : a) counts.merge(v, 1, Integer::sum);
for (int v : b) {
int next = counts.getOrDefault(v, 0) - 1;
if (next < 0) return false;
counts.put(v, next);
}
return true;
}
Python¶
from collections import Counter
def multiset_equal(a: list[int], b: list[int]) -> bool:
return Counter(a) == Counter(b)
Task 5: Total vs Distinct¶
Implement two helpers: total(counter) returns the sum of counts; distinct(counter) returns the number of keys with count > 0.
Input: {a:3, b:0, c:2} Output: total = 5, distinct = 2
Hint: Iterate values; skip zeros for distinct.
Go¶
func total(c map[string]int) int {
s := 0
for _, v := range c {
s += v
}
return s
}
func distinct(c map[string]int) int {
d := 0
for _, v := range c {
if v > 0 {
d++
}
}
return d
}
Java¶
public static int total(Map<String, Integer> c) {
return c.values().stream().mapToInt(Integer::intValue).sum();
}
public static int distinct(Map<String, Integer> c) {
return (int) c.values().stream().filter(v -> v > 0).count();
}
Python¶
def total(c: dict[str, int]) -> int:
return sum(c.values())
def distinct(c: dict[str, int]) -> int:
return sum(1 for v in c.values() if v > 0)
Intermediate Tasks¶
Task 6: Group Anagrams¶
Group a list of strings such that all anagrams land in the same group.
Input: ["eat","tea","tan","ate","nat","bat"] Output: [["eat","tea","ate"],["tan","nat"],["bat"]]
Hint: Use the sorted string or a frozenset of Counter.items() as the signature.
Go¶
import "sort"
func groupAnagrams(strs []string) [][]string {
groups := map[string][]string{}
for _, s := range strs {
runes := []rune(s)
sort.Slice(runes, func(i, j int) bool { return runes[i] < runes[j] })
key := string(runes)
groups[key] = append(groups[key], s)
}
out := make([][]string, 0, len(groups))
for _, g := range groups {
out = append(out, g)
}
return out
}
Java¶
import java.util.*;
public static List<List<String>> groupAnagrams(String[] strs) {
Map<String, List<String>> groups = new HashMap<>();
for (String s : strs) {
char[] chars = s.toCharArray();
Arrays.sort(chars);
groups.computeIfAbsent(new String(chars), k -> new ArrayList<>()).add(s);
}
return new ArrayList<>(groups.values());
}
Python¶
from collections import defaultdict
def group_anagrams(strs: list[str]) -> list[list[str]]:
groups: dict[str, list[str]] = defaultdict(list)
for s in strs:
groups["".join(sorted(s))].append(s)
return list(groups.values())
Task 7: Find All Anagrams in a String¶
Return all start indices of p's anagrams in s.
Input: s = "cbaebabacd", p = "abc" Output: [0, 6]
Hint: Sliding window of size len(p) with two counters and a matched key count. See middle.md for the full pattern.
Go¶
(See middle.md for full implementation of the sliding-window approach.)
Java¶
(See middle.md.)
Python¶
from collections import Counter
def find_anagrams(s: str, p: str) -> list[int]:
if len(s) < len(p):
return []
need = Counter(p)
have: Counter[str] = Counter()
matched = 0
result: list[int] = []
for r, ch in enumerate(s):
have[ch] += 1
if have[ch] == need[ch]:
matched += 1
if r >= len(p):
left = s[r - len(p)]
if have[left] == need[left]:
matched -= 1
have[left] -= 1
if matched == len(need):
result.append(r - len(p) + 1)
return result
Task 8: Longest Substring with At Most K Distinct Characters¶
Return the length of the longest substring of s with at most k distinct characters.
Input: s = "eceba", k = 2 Output: 3 ("ece")
Hint: Sliding window with a multiset; shrink until distinct <= k.
Go¶
func lengthOfLongestSubstringKDistinct(s string, k int) int {
counts := map[byte]int{}
best, l := 0, 0
for r := 0; r < len(s); r++ {
counts[s[r]]++
for len(counts) > k {
counts[s[l]]--
if counts[s[l]] == 0 {
delete(counts, s[l])
}
l++
}
if r-l+1 > best {
best = r - l + 1
}
}
return best
}
Java¶
import java.util.HashMap;
import java.util.Map;
public static int lengthOfLongestSubstringKDistinct(String s, int k) {
Map<Character, Integer> counts = new HashMap<>();
int best = 0, l = 0;
for (int r = 0; r < s.length(); r++) {
counts.merge(s.charAt(r), 1, Integer::sum);
while (counts.size() > k) {
char c = s.charAt(l++);
if (counts.merge(c, -1, Integer::sum) == 0) counts.remove(c);
}
best = Math.max(best, r - l + 1);
}
return best;
}
Python¶
from collections import defaultdict
def length_of_longest_substring_k_distinct(s: str, k: int) -> int:
counts: dict[str, int] = defaultdict(int)
best = l = 0
for r, ch in enumerate(s):
counts[ch] += 1
while len(counts) > k:
counts[s[l]] -= 1
if counts[s[l]] == 0:
del counts[s[l]]
l += 1
best = max(best, r - l + 1)
return best
Task 9: Subarray Sum Equals K¶
Return the number of contiguous subarrays whose sum is exactly k.
Input: nums = [1, 1, 1], k = 2 Output: 2
Hint: Prefix sums with a counter of seen prefix sums.
Go¶
func subarraySum(nums []int, k int) int {
prefixCounts := map[int]int{0: 1}
curSum, result := 0, 0
for _, v := range nums {
curSum += v
if c, ok := prefixCounts[curSum-k]; ok {
result += c
}
prefixCounts[curSum]++
}
return result
}
Java¶
import java.util.HashMap;
import java.util.Map;
public static int subarraySum(int[] nums, int k) {
Map<Integer, Integer> prefixCounts = new HashMap<>();
prefixCounts.put(0, 1);
int curSum = 0, result = 0;
for (int v : nums) {
curSum += v;
result += prefixCounts.getOrDefault(curSum - k, 0);
prefixCounts.merge(curSum, 1, Integer::sum);
}
return result;
}
Python¶
from collections import Counter
def subarray_sum(nums: list[int], k: int) -> int:
prefix_counts: Counter[int] = Counter({0: 1})
cur_sum = result = 0
for v in nums:
cur_sum += v
result += prefix_counts[cur_sum - k]
prefix_counts[cur_sum] += 1
return result
Task 10: Multiset Algebra Library¶
Implement sum, diff, intersect, union on two multisets. Verify with sample inputs.
Input: A = {x:3, y:1}, B = {x:1, z:2} Output: sum = {x:4, y:1, z:2}, diff = {x:2, y:1}, intersect = {x:1}, union = {x:3, y:1, z:2}
Hint: See middle.md. In Python this is one-line per op using Counter operators.
Go¶
type MS map[string]int
func (a MS) Union(b MS) MS {
out := MS{}
for k, v := range a {
out[k] = v
}
for k, v := range b {
if v > out[k] {
out[k] = v
}
}
return out
}
Java¶
public static Map<String, Integer> union(Map<String, Integer> a, Map<String, Integer> b) {
Map<String, Integer> out = new HashMap<>(a);
b.forEach((k, v) -> out.merge(k, v, Math::max));
return out;
}
Python¶
# All four operators built into Counter:
a + b # sum
a - b # difference (drops <=0)
a & b # intersection
a | b # union
Advanced Tasks¶
Task 11: Top-K Frequent Elements (Bucket Sort)¶
Given nums and k, return the k most frequent elements in O(n) time.
Input: [1,1,1,2,2,3], k = 2 Output: [1, 2]
Hint: Bucket sort by count. Full solution in interview.md.
Task 12: Sliding-Window Median¶
Given an array nums and window size k, return the median of every window.
Input: [1,3,-1,-3,5,3,6,7], k = 3 Output: [1, -1, -1, 3, 5, 6]
Hint: Two sorted multisets (low half, high half), rebalance on insert/remove. O(n log k).
Python¶
from sortedcontainers import SortedList
def median_sliding_window(nums: list[int], k: int) -> list[float]:
window = SortedList(nums[:k])
result = []
for i in range(k, len(nums) + 1):
if k % 2:
result.append(float(window[k // 2]))
else:
result.append((window[k // 2 - 1] + window[k // 2]) / 2)
if i < len(nums):
window.remove(nums[i - k])
window.add(nums[i])
return result
(Go and Java solutions follow the same recipe — manage two TreeMultisets or balanced BSTs; full multi-page implementations omitted here.)
Task 13: Minimum Window Substring¶
Return the smallest substring of s containing every character of t (with multiplicity).
Input: s = "ADOBECODEBANC", t = "ABC" Output: "BANC"
Hint: Two counters (need, have) plus a matched integer for matched keys. O(n).
Python¶
from collections import Counter
def min_window(s: str, t: str) -> str:
if not t or not s:
return ""
need = Counter(t)
have: Counter[str] = Counter()
matched, required = 0, len(need)
best = (float("inf"), 0, 0)
l = 0
for r, ch in enumerate(s):
have[ch] += 1
if have[ch] == need[ch]:
matched += 1
while matched == required:
if r - l + 1 < best[0]:
best = (r - l + 1, l, r)
have[s[l]] -= 1
if have[s[l]] < need[s[l]]:
matched -= 1
l += 1
return "" if best[0] == float("inf") else s[best[1]:best[2] + 1]
(Go and Java mirror this with explicit maps and the same matched invariant.)
Task 14: Misra-Gries Heavy Hitters¶
Implement Misra-Gries with parameter k: after streaming all items, return every element appearing more than N/k times (after a verification pass).
Input: stream of 1M ints, k = 10 Output: every value with > 100K occurrences.
Hint: Maintain at most k-1 candidate counters; on a miss when full, decrement all. Then verify by re-streaming.
(Full implementations in senior.md.)
Task 15: Distributed Counter with Sharded HINCRBY¶
Design (pseudocode + skeleton client code) a multi-worker view-counter that: 1. Buffers per-key counts in memory for at most 1 second. 2. Flushes a single pipelined HINCRBY per key per worker. 3. Reads the global value with a single HGET (or HGETALL for small hashes).
Input: 100 workers, 10M events/sec aggregate. Output: per-key counts in Redis within ~1 second of each event.
Hint: Local map[string]int64 flushed by a goroutine/thread every second; one pipelined Redis call per flush. Watch out for memory growth on workers — bound the local map size and force-flush on hitting the cap.
Benchmark Task¶
Compare hand-rolled map[T]int vs. the language's idiomatic library counter on a stream of 1M random integers drawn from a Zipf distribution. Report wall-clock time and peak memory.
Go¶
package main
import (
"fmt"
"math/rand"
"time"
)
func main() {
n := 1_000_000
data := make([]int, n)
for i := range data {
data[i] = rand.Intn(10_000)
}
start := time.Now()
counts := map[int]int{}
for _, v := range data {
counts[v]++
}
fmt.Printf("Go map[int]int: %.3f ms, distinct=%d\n",
float64(time.Since(start).Microseconds())/1000, len(counts))
}
Java¶
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
public class Bench {
public static void main(String[] args) {
int n = 1_000_000;
int[] data = new int[n];
Random r = new Random(0);
for (int i = 0; i < n; i++) data[i] = r.nextInt(10_000);
long start = System.nanoTime();
Map<Integer, Integer> counts = new HashMap<>();
for (int v : data) counts.merge(v, 1, Integer::sum);
long elapsed = System.nanoTime() - start;
System.out.printf("Java HashMap: %.3f ms, distinct=%d%n",
elapsed / 1_000_000.0, counts.size());
}
}
Python¶
import random, time
from collections import Counter
random.seed(0)
n = 1_000_000
data = [random.randrange(10_000) for _ in range(n)]
start = time.perf_counter()
counts = Counter(data)
elapsed = (time.perf_counter() - start) * 1000
print(f"Python Counter: {elapsed:.3f} ms, distinct={len(counts)}")
Expect: Python's C-implemented Counter ~2-3x faster than a hand-rolled for v in data: counts[v] += 1 loop. Go and Java map increments are already optimal for this workload. Zipf distributions stress the hash hot-key path — useful to confirm that hash spreading is doing its job.