main
Raw Download raw file
  1// Package config provides configuration management for gowarcprox.
  2//
  3// The Config struct holds all configuration options for the proxy server,
  4// including network settings, WARC output options, certificate authority
  5// configuration, deduplication, statistics, and performance tuning.
  6//
  7// Use NewDefaultConfig to create a configuration with sensible defaults,
  8// then modify individual fields as needed. Call Validate() before using
  9// the configuration to catch errors early.
 10//
 11// Example:
 12//
 13//	cfg := config.NewDefaultConfig()
 14//	cfg.Port = 8080
 15//	cfg.WARCDirectory = "/var/lib/warcs"
 16//	if err := cfg.Validate(); err != nil {
 17//	    log.Fatal(err)
 18//	}
 19package config
 20
 21import (
 22	"errors"
 23	"fmt"
 24	"time"
 25)
 26
 27// Config holds the configuration for gowarcprox
 28type Config struct {
 29	// Network configuration
 30	Address       string
 31	Port          int
 32	SocketTimeout time.Duration
 33
 34	// WARC output configuration
 35	WARCDirectory     string
 36	WARCPrefix        string
 37	WARCSize          int64
 38	WARCCompression   string // "gzip", "zstd", or ""
 39	GzipEnabled       bool   // CLI flag to enable gzip compression
 40	DigestAlgorithm   string // "sha1", "sha256", "blake3"
 41	WARCWriterThreads int
 42
 43	// HTTPS/Certificate configuration
 44	CACertFile string
 45	CertsDir   string
 46
 47	// Deduplication configuration
 48	DedupEnabled bool
 49	DedupDBFile  string
 50
 51	// Statistics configuration
 52	StatsEnabled bool
 53	StatsDBFile  string
 54
 55	// Performance configuration
 56	MaxThreads             int
 57	QueueSize              int
 58	TmpFileMaxMemory       int
 59	MaxResourceSize        int64
 60	BatchFlushTimeout      time.Duration
 61	BatchFlushMaxURLs      int
 62
 63	// Logging configuration
 64	Verbose  bool
 65	LogLevel string
 66}
 67
 68// NewDefaultConfig returns a Config with default values
 69func NewDefaultConfig() *Config {
 70	return &Config{
 71		Address:              "localhost",
 72		Port:                 8000,
 73		SocketTimeout:        60 * time.Second,
 74		WARCDirectory:        "./warcs",
 75		WARCPrefix:           "warcprox",
 76		WARCSize:             1000000000, // 1GB
 77		WARCCompression:      "gzip",
 78		GzipEnabled:          true,
 79		DigestAlgorithm:      "sha1",
 80		WARCWriterThreads:    1,
 81		CACertFile:           "warcprox-ca.pem",
 82		CertsDir:             "./warcprox-ca",
 83		DedupEnabled:         true,
 84		DedupDBFile:          "warcprox.sqlite",
 85		StatsEnabled:         true,
 86		StatsDBFile:          "warcprox.sqlite",
 87		MaxThreads:           100,
 88		QueueSize:            1000,
 89		TmpFileMaxMemory:     524288, // 512KB
 90		MaxResourceSize:      0,      // unlimited
 91		BatchFlushTimeout:    10 * time.Second,
 92		BatchFlushMaxURLs:    500,
 93		Verbose:              false,
 94		LogLevel:             "info",
 95	}
 96}
 97
 98// Validate checks that the configuration is valid and returns an error if not.
 99// It validates ranges, formats, and required fields.
100func (c *Config) Validate() error {
101	var errs []error
102
103	// Network validation
104	if c.Port < 1 || c.Port > 65535 {
105		errs = append(errs, fmt.Errorf("port must be between 1 and 65535, got %d", c.Port))
106	}
107	if c.SocketTimeout <= 0 {
108		errs = append(errs, fmt.Errorf("socket-timeout must be positive, got %v", c.SocketTimeout))
109	}
110
111	// WARC output validation
112	if c.WARCDirectory == "" {
113		errs = append(errs, errors.New("directory cannot be empty"))
114	}
115	if c.WARCPrefix == "" {
116		errs = append(errs, errors.New("prefix cannot be empty"))
117	}
118	if c.WARCSize <= 0 {
119		errs = append(errs, fmt.Errorf("size must be positive, got %d", c.WARCSize))
120	}
121	validCompressions := map[string]bool{"gzip": true, "zstd": true, "": true}
122	if !validCompressions[c.WARCCompression] {
123		errs = append(errs, fmt.Errorf("invalid compression %q: must be gzip, zstd, or empty", c.WARCCompression))
124	}
125	validDigests := map[string]bool{"sha1": true, "sha256": true, "blake3": true}
126	if !validDigests[c.DigestAlgorithm] {
127		errs = append(errs, fmt.Errorf("invalid digest algorithm %q: must be sha1, sha256, or blake3", c.DigestAlgorithm))
128	}
129	if c.WARCWriterThreads < 1 {
130		errs = append(errs, fmt.Errorf("warc-writer-threads must be at least 1, got %d", c.WARCWriterThreads))
131	}
132
133	// Certificate validation
134	if c.CACertFile == "" {
135		errs = append(errs, errors.New("cacert cannot be empty"))
136	}
137	if c.CertsDir == "" {
138		errs = append(errs, errors.New("certs-dir cannot be empty"))
139	}
140
141	// Performance validation
142	if c.MaxThreads < 1 {
143		errs = append(errs, fmt.Errorf("max-threads must be at least 1, got %d", c.MaxThreads))
144	}
145	if c.QueueSize < 1 {
146		errs = append(errs, fmt.Errorf("queue-size must be at least 1, got %d", c.QueueSize))
147	}
148	if c.TmpFileMaxMemory < 0 {
149		errs = append(errs, fmt.Errorf("tmp-file-max-memory cannot be negative, got %d", c.TmpFileMaxMemory))
150	}
151	if c.MaxResourceSize < 0 {
152		errs = append(errs, fmt.Errorf("max-resource-size cannot be negative, got %d", c.MaxResourceSize))
153	}
154	if c.BatchFlushTimeout <= 0 {
155		errs = append(errs, fmt.Errorf("batch-flush-timeout must be positive, got %v", c.BatchFlushTimeout))
156	}
157	if c.BatchFlushMaxURLs < 1 {
158		errs = append(errs, fmt.Errorf("batch-flush-max-urls must be at least 1, got %d", c.BatchFlushMaxURLs))
159	}
160
161	// Logging validation
162	validLogLevels := map[string]bool{"debug": true, "info": true, "warn": true, "error": true}
163	if !validLogLevels[c.LogLevel] {
164		errs = append(errs, fmt.Errorf("invalid log-level %q: must be debug, info, warn, or error", c.LogLevel))
165	}
166
167	if len(errs) > 0 {
168		return errors.Join(errs...)
169	}
170	return nil
171}