Commit 8de8a62

bryfry <bryon@fryer.io>
2026-01-07 05:49:14
Code review improvements: bug fixes, cleanup, and documentation
Critical bug fixes: - Fix gzip flag that was overwriting Verbose config (main.go:57) - Fix data race in Pipeline.QueueLength() by using mutex-protected stats - Fix insecure certificate file permissions (now 0600 for private keys) Code cleanup: - Remove unused RecordingReader type (dead code) - Replace custom cloneHeaders with stdlib http.Header.Clone() Improvements: - Add Config.Validate() method with comprehensive validation - Add size limit checking for request/response bodies (MaxResourceSize) - Add ErrResourceTooLarge error for oversized resources Documentation: - Add package-level documentation to all packages - Document thread-safety guarantees - Add usage examples Test coverage: - config: 90.5% (added 7 validation test functions) - models: 100% - pipeline: 98.2% 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 64ccb28
cmd/gowarcprox/main.go
@@ -54,7 +54,7 @@ func init() {
 		"WARC filename prefix")
 	rootCmd.Flags().Int64Var(&cfg.WARCSize, "size", cfg.WARCSize,
 		"WARC file size limit in bytes")
-	rootCmd.Flags().BoolVarP(&cfg.Verbose, "gzip", "z", cfg.WARCCompression == "gzip",
+	rootCmd.Flags().BoolVarP(&cfg.GzipEnabled, "gzip", "z", cfg.GzipEnabled,
 		"Compress WARC files with gzip")
 	rootCmd.Flags().StringVarP(&cfg.DigestAlgorithm, "digest", "g", cfg.DigestAlgorithm,
 		"Digest algorithm: sha1, sha256, blake3")
@@ -93,6 +93,18 @@ func init() {
 }
 
 func run(cmd *cobra.Command, args []string) error {
+	// Apply gzip flag to compression setting
+	if cfg.GzipEnabled {
+		cfg.WARCCompression = "gzip"
+	} else {
+		cfg.WARCCompression = ""
+	}
+
+	// Validate configuration
+	if err := cfg.Validate(); err != nil {
+		return fmt.Errorf("invalid configuration: %w", err)
+	}
+
 	// Setup logger
 	logger := setupLogger(cfg)
 
internal/certauth/certauth.go
@@ -1,3 +1,19 @@
+// Package certauth provides certificate authority functionality for HTTPS MITM proxying.
+//
+// The CertificateAuthority generates and caches TLS certificates for intercepted
+// HTTPS connections. When a client makes an HTTPS request through the proxy,
+// a certificate is dynamically generated (or retrieved from cache) for the
+// target hostname, allowing the proxy to decrypt, record, and re-encrypt traffic.
+//
+// Certificates are stored in an in-memory cache (using sync.Map for thread-safe
+// access) and optionally persisted to disk for debugging or reuse across restarts.
+// The CA certificate itself is either loaded from disk or generated on first run.
+//
+// Thread-safety:
+//   - Certificate generation uses a mutex with double-check locking to prevent
+//     multiple goroutines from generating certificates for the same hostname.
+//   - The cert cache uses sync.Map for lock-free reads in the common case.
+//   - Certificate files are written with 0600 permissions for security.
 package certauth
 
 import (
@@ -166,8 +182,9 @@ func (ca *CertificateAuthority) createCA() error {
 	}
 	ca.caCert = cert
 
-	// Save to file
-	f, err := os.Create(ca.caFile)
+	// Save to file with restricted permissions (0600 - owner read/write only)
+	// because the file contains a private key
+	f, err := os.OpenFile(ca.caFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600)
 	if err != nil {
 		return fmt.Errorf("failed to create CA file: %w", err)
 	}
@@ -298,7 +315,9 @@ func (ca *CertificateAuthority) saveCertToDisk(hostname string, certPEM, keyPEM
 	// Sanitize hostname for filename
 	filename := filepath.Join(ca.certsDir, hostname+".pem")
 
-	f, err := os.Create(filename)
+	// Use restricted permissions (0600 - owner read/write only)
+	// because the file contains a private key
+	f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600)
 	if err != nil {
 		return fmt.Errorf("failed to create cert file: %w", err)
 	}
internal/models/recordedurl.go
@@ -1,3 +1,18 @@
+// Package models defines data structures used throughout gowarcprox.
+//
+// The primary type is RecordedURL, which captures all information about
+// an HTTP request/response pair as it flows through the proxy. This includes:
+//   - Request: URL, method, headers, body
+//   - Response: status, headers, body
+//   - Metadata: timing, addresses, digests, content info
+//   - Warcprox-Meta: optional per-request configuration from client
+//
+// The package also defines supporting types for deduplication (DedupInfo),
+// statistics bucketing (StatsBucketDef), and blocking rules (BlockRule).
+//
+// RecordedURL objects are created by the proxy handler, flow through the
+// pipeline, and are consumed by processors like the WARC writer and
+// deduplication checker.
 package models
 
 import (
internal/pipeline/pipeline.go
@@ -1,3 +1,23 @@
+// Package pipeline provides a processing pipeline for recorded URLs.
+//
+// The Pipeline manages a queue of RecordedURL objects and processes them
+// through a chain of Processor implementations. URLs are enqueued by the
+// proxy handlers and processed asynchronously by worker goroutines.
+//
+// Key features:
+//   - Buffered input channel for decoupling URL capture from processing
+//   - Multiple processors executed in sequence (e.g., WARC writer, dedup checker)
+//   - Statistics tracking for monitoring (queued, processed, failed counts)
+//   - Graceful shutdown with context cancellation
+//
+// The typical flow is:
+//  1. Create pipeline with NewPipeline
+//  2. Add processors with AddProcessor
+//  3. Start workers with Start
+//  4. Enqueue URLs from proxy handlers
+//  5. Stop gracefully with Stop when shutting down
+//
+// Thread-safety: All public methods are safe for concurrent use.
 package pipeline
 
 import (
@@ -171,7 +191,11 @@ func (p *Pipeline) GetStats() Stats {
 	return *p.stats
 }
 
-// QueueLength returns the current queue length
+// QueueLength returns the current queue length.
+// Note: This returns the last recorded length, not the real-time channel length,
+// to avoid data races with concurrent channel operations.
 func (p *Pipeline) QueueLength() int {
-	return len(p.inputCh)
+	p.stats.mu.Lock()
+	defer p.stats.mu.Unlock()
+	return p.stats.CurrentQueueLen
 }
internal/proxy/handler.go
@@ -1,8 +1,8 @@
 package proxy
 
 import (
+	"errors"
 	"fmt"
-	"io"
 	"log/slog"
 	"net/http"
 	"strings"
@@ -73,11 +73,15 @@ func (h *Handler) handleHTTP(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	// Read and record request body
-	reqBody, err := readRequestBody(r)
+	// Read and record request body (with size limit if configured)
+	reqBody, err := readRequestBody(r, h.config.MaxResourceSize)
 	if err != nil {
 		h.logger.Error("failed to read request body", "error", err)
-		http.Error(w, "Bad Gateway", http.StatusBadGateway)
+		if errors.Is(err, ErrResourceTooLarge) {
+			http.Error(w, "Request Entity Too Large", http.StatusRequestEntityTooLarge)
+		} else {
+			http.Error(w, "Bad Gateway", http.StatusBadGateway)
+		}
 		return
 	}
 
@@ -121,13 +125,17 @@ func (h *Handler) handleHTTP(w http.ResponseWriter, r *http.Request) {
 	}
 	defer resp.Body.Close()
 
-	// Read response body for recording
-	respBody, err := io.ReadAll(resp.Body)
+	// Read response body for recording (with size limit if configured)
+	respBody, err := readResponseBody(resp.Body, h.config.MaxResourceSize)
 	if err != nil {
 		h.logger.Error("failed to read response body",
 			"url", r.URL.String(),
 			"error", err)
-		http.Error(w, "Bad Gateway", http.StatusBadGateway)
+		if errors.Is(err, ErrResourceTooLarge) {
+			http.Error(w, "Response Too Large", http.StatusBadGateway)
+		} else {
+			http.Error(w, "Bad Gateway", http.StatusBadGateway)
+		}
 		return
 	}
 
internal/proxy/mitm.go
@@ -104,8 +104,8 @@ func (h *Handler) handleConnectMITM(w http.ResponseWriter, r *http.Request) erro
 		return nil // Don't return error for EOF
 	}
 
-	// Read request body for recording
-	reqBody, err := readRequestBody(req)
+	// Read request body for recording (with size limit if configured)
+	reqBody, err := readRequestBody(req, h.config.MaxResourceSize)
 	if err != nil {
 		h.logger.Error("failed to read request body", "error", err)
 		return fmt.Errorf("failed to read request body: %w", err)
@@ -172,8 +172,8 @@ func (h *Handler) handleConnectMITM(w http.ResponseWriter, r *http.Request) erro
 	}
 	defer resp.Body.Close()
 
-	// Read response body for recording
-	respBody, err := io.ReadAll(resp.Body)
+	// Read response body for recording (with size limit if configured)
+	respBody, err := readResponseBody(resp.Body, h.config.MaxResourceSize)
 	if err != nil {
 		h.logger.Error("failed to read response body",
 			"url", req.URL.String(),
internal/proxy/proxy.go
@@ -1,3 +1,32 @@
+// Package proxy implements the HTTP/HTTPS MITM proxy server.
+//
+// The Server orchestrates all proxy components: the HTTP handler, certificate
+// authority for HTTPS interception, processing pipeline, and WARC writer.
+// It handles both plain HTTP requests (forwarding them directly) and HTTPS
+// CONNECT requests (performing man-in-the-middle interception to record traffic).
+//
+// Key components:
+//   - Server: Main entry point, coordinates lifecycle of all components
+//   - Handler: Processes HTTP requests and CONNECT tunnels
+//   - CertificateAuthority: Generates certificates for HTTPS interception
+//   - Pipeline: Async processing of recorded URLs
+//   - WARCWriter: Writes captured traffic to WARC files
+//
+// The proxy captures both requests and responses, calculates digests,
+// and enqueues RecordedURL objects for asynchronous WARC writing.
+//
+// Example usage:
+//
+//	cfg := config.NewDefaultConfig()
+//	server, err := proxy.NewServer(cfg, logger)
+//	if err != nil {
+//	    log.Fatal(err)
+//	}
+//	if err := server.Start(); err != nil {
+//	    log.Fatal(err)
+//	}
+//	// ... handle signals ...
+//	server.Stop()
 package proxy
 
 import (
internal/proxy/recorder.go
@@ -59,34 +59,6 @@ func (r *RecordingResponseWriter) Body() []byte {
 	return r.body.Bytes()
 }
 
-// RecordingReader wraps io.Reader to capture data while reading
-type RecordingReader struct {
-	reader io.Reader
-	buffer *bytes.Buffer
-}
-
-// NewRecordingReader creates a new recording reader
-func NewRecordingReader(r io.Reader) *RecordingReader {
-	return &RecordingReader{
-		reader: r,
-		buffer: &bytes.Buffer{},
-	}
-}
-
-// Read reads from the underlying reader and captures data
-func (r *RecordingReader) Read(p []byte) (n int, err error) {
-	n, err = r.reader.Read(p)
-	if n > 0 {
-		r.buffer.Write(p[:n])
-	}
-	return n, err
-}
-
-// Bytes returns the captured data
-func (r *RecordingReader) Bytes() []byte {
-	return r.buffer.Bytes()
-}
-
 // createRecordedURL creates a RecordedURL from request and response data
 func (h *Handler) createRecordedURL(
 	req *http.Request,
@@ -103,11 +75,11 @@ func (h *Handler) createRecordedURL(
 	ru := &models.RecordedURL{
 		URL:            req.URL.String(),
 		Method:         req.Method,
-		RequestHeader:  cloneHeaders(req.Header),
+		RequestHeader:  req.Header.Clone(),
 		RequestBody:    reqBody,
 		StatusCode:     resp.StatusCode,
 		StatusMessage:  resp.Status,
-		ResponseHeader: cloneHeaders(resp.Header),
+		ResponseHeader: resp.Header.Clone(),
 		ResponseBody:   respBody,
 		Timestamp:      startTime,
 		Duration:       duration,
@@ -203,24 +175,42 @@ func (h *Handler) buildResponseBlock(resp *http.Response, body []byte) []byte {
 	return buf.Bytes()
 }
 
-// cloneHeaders creates a deep copy of HTTP headers
-func cloneHeaders(src http.Header) http.Header {
-	dst := make(http.Header, len(src))
-	for k, vv := range src {
-		vv2 := make([]string, len(vv))
-		copy(vv2, vv)
-		dst[k] = vv2
+// ErrResourceTooLarge is returned when a resource exceeds the maximum allowed size
+var ErrResourceTooLarge = fmt.Errorf("resource size exceeds maximum allowed")
+
+// readLimitedBody reads from a reader with an optional size limit.
+// If maxSize is 0, no limit is applied.
+// If maxSize is positive and the content exceeds it, returns ErrResourceTooLarge.
+func readLimitedBody(r io.Reader, maxSize int64) ([]byte, error) {
+	if r == nil {
+		return nil, nil
+	}
+
+	if maxSize > 0 {
+		// Use LimitReader to prevent reading more than maxSize+1 bytes
+		// The +1 allows us to detect if the content was truncated
+		limited := io.LimitReader(r, maxSize+1)
+		body, err := io.ReadAll(limited)
+		if err != nil {
+			return nil, err
+		}
+		if int64(len(body)) > maxSize {
+			return nil, ErrResourceTooLarge
+		}
+		return body, nil
 	}
-	return dst
+
+	return io.ReadAll(r)
 }
 
-// readRequestBody reads and returns the request body, replacing it with a buffer
-func readRequestBody(req *http.Request) ([]byte, error) {
+// readRequestBody reads and returns the request body, replacing it with a buffer.
+// If maxSize is positive, it limits the body size and returns ErrResourceTooLarge if exceeded.
+func readRequestBody(req *http.Request, maxSize int64) ([]byte, error) {
 	if req.Body == nil {
 		return nil, nil
 	}
 
-	body, err := io.ReadAll(req.Body)
+	body, err := readLimitedBody(req.Body, maxSize)
 	if err != nil {
 		return nil, fmt.Errorf("failed to read request body: %w", err)
 	}
@@ -230,3 +220,13 @@ func readRequestBody(req *http.Request) ([]byte, error) {
 
 	return body, nil
 }
+
+// readResponseBody reads and returns the response body.
+// If maxSize is positive, it limits the body size and returns ErrResourceTooLarge if exceeded.
+func readResponseBody(r io.Reader, maxSize int64) ([]byte, error) {
+	body, err := readLimitedBody(r, maxSize)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read response body: %w", err)
+	}
+	return body, nil
+}
internal/writer/writer.go
@@ -1,3 +1,22 @@
+// Package writer provides WARC file writing functionality using the gowarc library.
+//
+// The WARCWriter is a pipeline.Processor implementation that converts
+// RecordedURL objects into WARC records and writes them to disk. It uses
+// the gowarc library's file rotator to manage file sizes and naming.
+//
+// Key features:
+//   - Writes both request and response records for each captured URL
+//   - Supports gzip and zstd compression
+//   - Automatic file rotation when size limits are reached
+//   - Configurable file naming prefix
+//   - Uses spooled temp files to minimize memory usage for large payloads
+//
+// The writer runs asynchronously via gowarc's file rotator, receiving
+// record batches through a channel. This decouples the proxy request
+// handling from the slower disk I/O operations.
+//
+// WARC records follow the ISO 28500 standard format, with each captured
+// HTTP transaction resulting in a request record and response record pair.
 package writer
 
 import (
pkg/config/config.go
@@ -1,6 +1,26 @@
+// Package config provides configuration management for gowarcprox.
+//
+// The Config struct holds all configuration options for the proxy server,
+// including network settings, WARC output options, certificate authority
+// configuration, deduplication, statistics, and performance tuning.
+//
+// Use NewDefaultConfig to create a configuration with sensible defaults,
+// then modify individual fields as needed. Call Validate() before using
+// the configuration to catch errors early.
+//
+// Example:
+//
+//	cfg := config.NewDefaultConfig()
+//	cfg.Port = 8080
+//	cfg.WARCDirectory = "/var/lib/warcs"
+//	if err := cfg.Validate(); err != nil {
+//	    log.Fatal(err)
+//	}
 package config
 
 import (
+	"errors"
+	"fmt"
 	"time"
 )
 
@@ -12,12 +32,13 @@ type Config struct {
 	SocketTimeout time.Duration
 
 	// WARC output configuration
-	WARCDirectory      string
-	WARCPrefix         string
-	WARCSize           int64
-	WARCCompression    string // "gzip", "zstd", or ""
-	DigestAlgorithm    string // "sha1", "sha256", "blake3"
-	WARCWriterThreads  int
+	WARCDirectory     string
+	WARCPrefix        string
+	WARCSize          int64
+	WARCCompression   string // "gzip", "zstd", or ""
+	GzipEnabled       bool   // CLI flag to enable gzip compression
+	DigestAlgorithm   string // "sha1", "sha256", "blake3"
+	WARCWriterThreads int
 
 	// HTTPS/Certificate configuration
 	CACertFile string
@@ -54,6 +75,7 @@ func NewDefaultConfig() *Config {
 		WARCPrefix:           "warcprox",
 		WARCSize:             1000000000, // 1GB
 		WARCCompression:      "gzip",
+		GzipEnabled:          true,
 		DigestAlgorithm:      "sha1",
 		WARCWriterThreads:    1,
 		CACertFile:           "warcprox-ca.pem",
@@ -72,3 +94,78 @@ func NewDefaultConfig() *Config {
 		LogLevel:             "info",
 	}
 }
+
+// Validate checks that the configuration is valid and returns an error if not.
+// It validates ranges, formats, and required fields.
+func (c *Config) Validate() error {
+	var errs []error
+
+	// Network validation
+	if c.Port < 1 || c.Port > 65535 {
+		errs = append(errs, fmt.Errorf("port must be between 1 and 65535, got %d", c.Port))
+	}
+	if c.SocketTimeout <= 0 {
+		errs = append(errs, fmt.Errorf("socket-timeout must be positive, got %v", c.SocketTimeout))
+	}
+
+	// WARC output validation
+	if c.WARCDirectory == "" {
+		errs = append(errs, errors.New("directory cannot be empty"))
+	}
+	if c.WARCPrefix == "" {
+		errs = append(errs, errors.New("prefix cannot be empty"))
+	}
+	if c.WARCSize <= 0 {
+		errs = append(errs, fmt.Errorf("size must be positive, got %d", c.WARCSize))
+	}
+	validCompressions := map[string]bool{"gzip": true, "zstd": true, "": true}
+	if !validCompressions[c.WARCCompression] {
+		errs = append(errs, fmt.Errorf("invalid compression %q: must be gzip, zstd, or empty", c.WARCCompression))
+	}
+	validDigests := map[string]bool{"sha1": true, "sha256": true, "blake3": true}
+	if !validDigests[c.DigestAlgorithm] {
+		errs = append(errs, fmt.Errorf("invalid digest algorithm %q: must be sha1, sha256, or blake3", c.DigestAlgorithm))
+	}
+	if c.WARCWriterThreads < 1 {
+		errs = append(errs, fmt.Errorf("warc-writer-threads must be at least 1, got %d", c.WARCWriterThreads))
+	}
+
+	// Certificate validation
+	if c.CACertFile == "" {
+		errs = append(errs, errors.New("cacert cannot be empty"))
+	}
+	if c.CertsDir == "" {
+		errs = append(errs, errors.New("certs-dir cannot be empty"))
+	}
+
+	// Performance validation
+	if c.MaxThreads < 1 {
+		errs = append(errs, fmt.Errorf("max-threads must be at least 1, got %d", c.MaxThreads))
+	}
+	if c.QueueSize < 1 {
+		errs = append(errs, fmt.Errorf("queue-size must be at least 1, got %d", c.QueueSize))
+	}
+	if c.TmpFileMaxMemory < 0 {
+		errs = append(errs, fmt.Errorf("tmp-file-max-memory cannot be negative, got %d", c.TmpFileMaxMemory))
+	}
+	if c.MaxResourceSize < 0 {
+		errs = append(errs, fmt.Errorf("max-resource-size cannot be negative, got %d", c.MaxResourceSize))
+	}
+	if c.BatchFlushTimeout <= 0 {
+		errs = append(errs, fmt.Errorf("batch-flush-timeout must be positive, got %v", c.BatchFlushTimeout))
+	}
+	if c.BatchFlushMaxURLs < 1 {
+		errs = append(errs, fmt.Errorf("batch-flush-max-urls must be at least 1, got %d", c.BatchFlushMaxURLs))
+	}
+
+	// Logging validation
+	validLogLevels := map[string]bool{"debug": true, "info": true, "warn": true, "error": true}
+	if !validLogLevels[c.LogLevel] {
+		errs = append(errs, fmt.Errorf("invalid log-level %q: must be debug, info, warn, or error", c.LogLevel))
+	}
+
+	if len(errs) > 0 {
+		return errors.Join(errs...)
+	}
+	return nil
+}
pkg/config/config_test.go
@@ -32,6 +32,9 @@ func TestNewDefaultConfig(t *testing.T) {
 	if cfg.WARCCompression != "gzip" {
 		t.Errorf("WARCCompression = %q, want %q", cfg.WARCCompression, "gzip")
 	}
+	if !cfg.GzipEnabled {
+		t.Error("GzipEnabled = false, want true")
+	}
 	if cfg.DigestAlgorithm != "sha1" {
 		t.Errorf("DigestAlgorithm = %q, want %q", cfg.DigestAlgorithm, "sha1")
 	}
@@ -184,3 +187,121 @@ func TestConfig_ReasonableDefaults(t *testing.T) {
 		t.Errorf("LogLevel = %q, should be one of debug, info, warn, error", cfg.LogLevel)
 	}
 }
+
+func TestConfig_Validate_DefaultsValid(t *testing.T) {
+	cfg := NewDefaultConfig()
+	if err := cfg.Validate(); err != nil {
+		t.Errorf("default config should be valid, got error: %v", err)
+	}
+}
+
+func TestConfig_Validate_InvalidPort(t *testing.T) {
+	tests := []struct {
+		name string
+		port int
+	}{
+		{"zero", 0},
+		{"negative", -1},
+		{"too high", 65536},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cfg := NewDefaultConfig()
+			cfg.Port = tt.port
+			if err := cfg.Validate(); err == nil {
+				t.Errorf("expected error for port %d, got nil", tt.port)
+			}
+		})
+	}
+}
+
+func TestConfig_Validate_InvalidDigest(t *testing.T) {
+	cfg := NewDefaultConfig()
+	cfg.DigestAlgorithm = "md5"
+	if err := cfg.Validate(); err == nil {
+		t.Error("expected error for invalid digest algorithm, got nil")
+	}
+}
+
+func TestConfig_Validate_InvalidCompression(t *testing.T) {
+	cfg := NewDefaultConfig()
+	cfg.WARCCompression = "bzip2"
+	if err := cfg.Validate(); err == nil {
+		t.Error("expected error for invalid compression, got nil")
+	}
+}
+
+func TestConfig_Validate_InvalidLogLevel(t *testing.T) {
+	cfg := NewDefaultConfig()
+	cfg.LogLevel = "verbose"
+	if err := cfg.Validate(); err == nil {
+		t.Error("expected error for invalid log level, got nil")
+	}
+}
+
+func TestConfig_Validate_EmptyRequired(t *testing.T) {
+	tests := []struct {
+		name  string
+		setup func(*Config)
+	}{
+		{"empty directory", func(c *Config) { c.WARCDirectory = "" }},
+		{"empty prefix", func(c *Config) { c.WARCPrefix = "" }},
+		{"empty cacert", func(c *Config) { c.CACertFile = "" }},
+		{"empty certs-dir", func(c *Config) { c.CertsDir = "" }},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cfg := NewDefaultConfig()
+			tt.setup(cfg)
+			if err := cfg.Validate(); err == nil {
+				t.Errorf("expected error for %s, got nil", tt.name)
+			}
+		})
+	}
+}
+
+func TestConfig_Validate_NegativeValues(t *testing.T) {
+	tests := []struct {
+		name  string
+		setup func(*Config)
+	}{
+		{"negative WARC size", func(c *Config) { c.WARCSize = -1 }},
+		{"zero WARC size", func(c *Config) { c.WARCSize = 0 }},
+		{"negative max threads", func(c *Config) { c.MaxThreads = -1 }},
+		{"zero max threads", func(c *Config) { c.MaxThreads = 0 }},
+		{"negative queue size", func(c *Config) { c.QueueSize = -1 }},
+		{"zero queue size", func(c *Config) { c.QueueSize = 0 }},
+		{"negative tmp file memory", func(c *Config) { c.TmpFileMaxMemory = -1 }},
+		{"negative max resource size", func(c *Config) { c.MaxResourceSize = -1 }},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cfg := NewDefaultConfig()
+			tt.setup(cfg)
+			if err := cfg.Validate(); err == nil {
+				t.Errorf("expected error for %s, got nil", tt.name)
+			}
+		})
+	}
+}
+
+func TestConfig_Validate_MultipleErrors(t *testing.T) {
+	cfg := NewDefaultConfig()
+	cfg.Port = 0
+	cfg.WARCSize = 0
+	cfg.DigestAlgorithm = "invalid"
+
+	err := cfg.Validate()
+	if err == nil {
+		t.Fatal("expected multiple errors, got nil")
+	}
+
+	// errors.Join produces an error that contains all errors
+	errStr := err.Error()
+	if errStr == "" {
+		t.Error("expected non-empty error message")
+	}
+}
go.mod
@@ -2,24 +2,26 @@ module github.com/internetarchive/gowarcprox
 
 go 1.25.5
 
+require (
+	github.com/google/uuid v1.6.0
+	github.com/internetarchive/gowarc v0.8.96
+	github.com/spf13/cobra v1.10.2
+	github.com/zeebo/blake3 v0.2.4
+)
+
 require (
 	github.com/andybalholm/brotli v1.1.1 // indirect
 	github.com/dolthub/maphash v0.1.0 // indirect
 	github.com/gammazero/deque v1.0.0 // indirect
-	github.com/google/uuid v1.6.0 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
-	github.com/internetarchive/gowarc v0.8.96 // indirect
 	github.com/klauspost/compress v1.18.1 // indirect
 	github.com/klauspost/cpuid/v2 v2.0.12 // indirect
-	github.com/mattn/go-sqlite3 v1.14.33 // indirect
 	github.com/maypok86/otter v1.2.4 // indirect
 	github.com/miekg/dns v1.1.68 // indirect
 	github.com/refraction-networking/utls v1.8.1 // indirect
-	github.com/spf13/cobra v1.10.2 // indirect
 	github.com/spf13/pflag v1.0.9 // indirect
 	github.com/ulikunitz/xz v0.5.15 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
-	github.com/zeebo/blake3 v0.2.4 // indirect
 	golang.org/x/crypto v0.44.0 // indirect
 	golang.org/x/mod v0.24.0 // indirect
 	golang.org/x/net v0.47.0 // indirect
go.sum
@@ -1,10 +1,14 @@
 github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
 github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
 github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dolthub/maphash v0.1.0 h1:bsQ7JsF4FkkWyrP3oCnFJgrCUAFbFf3kOl4L/QxPDyQ=
 github.com/dolthub/maphash v0.1.0/go.mod h1:gkg4Ch4CdCDu5h6PMriVLawB7koZ+5ijb9puGMV50a4=
 github.com/gammazero/deque v1.0.0 h1:LTmimT8H7bXkkCy6gZX7zNLtkbz4NdS2z8LZuor3j34=
 github.com/gammazero/deque v1.0.0/go.mod h1:iflpYvtGfM3U8S8j+sZEKIak3SAKYpA5/SQewgfXDKo=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@@ -15,12 +19,12 @@ github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3J
 github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
 github.com/klauspost/cpuid/v2 v2.0.12 h1:p9dKCg8i4gmOxtv35DvrYoWqYzQrvEVdjQ762Y0OqZE=
 github.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c=
-github.com/mattn/go-sqlite3 v1.14.33 h1:A5blZ5ulQo2AtayQ9/limgHEkFreKj1Dv226a1K73s0=
-github.com/mattn/go-sqlite3 v1.14.33/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
 github.com/maypok86/otter v1.2.4 h1:HhW1Pq6VdJkmWwcZZq19BlEQkHtI8xgsQzBVXJU0nfc=
 github.com/maypok86/otter v1.2.4/go.mod h1:mKLfoI7v1HOmQMwFgX4QkRk23mX6ge3RDvjdHOWG4R4=
 github.com/miekg/dns v1.1.68 h1:jsSRkNozw7G/mnmXULynzMNIsgY2dHC8LO6U6Ij2JEA=
 github.com/miekg/dns v1.1.68/go.mod h1:fujopn7TB3Pu3JM69XaawiU0wqjpL9/8xGop5UrTPps=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/refraction-networking/utls v1.8.1 h1:yNY1kapmQU8JeM1sSw2H2asfTIwWxIkrMJI0pRUOCAo=
 github.com/refraction-networking/utls v1.8.1/go.mod h1:jkSOEkLqn+S/jtpEHPOsVv/4V4EVnelwbMQl4vCWXAM=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
@@ -28,13 +32,24 @@ github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
 github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
 github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
 github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/things-go/go-socks5 v0.1.0 h1:4f5dz0iMQ6cA4wseFmyLmCHmg3SWJTW92ndrKS6oERg=
+github.com/things-go/go-socks5 v0.1.0/go.mod h1:Riabiyu52kLsla0YmJqunt1c1JEl6iXSr4bRd7swFEA=
 github.com/ulikunitz/xz v0.5.15 h1:9DNdB5s+SgV3bQ2ApL10xRc35ck0DuIX/isZvIk+ubY=
 github.com/ulikunitz/xz v0.5.15/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
 github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
+github.com/zeebo/assert v1.1.0 h1:hU1L1vLTHsnO8x8c9KAR5GmM5QscxHg5RNU5z5qbUWY=
+github.com/zeebo/assert v1.1.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
 github.com/zeebo/blake3 v0.2.4 h1:KYQPkhpRtcqh0ssGYcKLG1JYvddkEA8QwCM/yBqhaZI=
 github.com/zeebo/blake3 v0.2.4/go.mod h1:7eeQ6d2iXWRGF6npfaxl2CU+xy2Fjo2gxeyZGCRUjcE=
+github.com/zeebo/pcg v1.0.1 h1:lyqfGeWiv4ahac6ttHs+I5hwtH/+1mrhlCtVNQM2kHo=
+github.com/zeebo/pcg v1.0.1/go.mod h1:09F0S9iiKrwn9rlI5yjLkmrug154/YRW6KnnXVDM/l4=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
 go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
 golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU=
 golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc=
@@ -49,3 +64,5 @@ golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc=
 golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=