main
1// Package config provides configuration management for gowarcprox.
2//
3// The Config struct holds all configuration options for the proxy server,
4// including network settings, WARC output options, certificate authority
5// configuration, deduplication, statistics, and performance tuning.
6//
7// Use NewDefaultConfig to create a configuration with sensible defaults,
8// then modify individual fields as needed. Call Validate() before using
9// the configuration to catch errors early.
10//
11// Example:
12//
13// cfg := config.NewDefaultConfig()
14// cfg.Port = 8080
15// cfg.WARCDirectory = "/var/lib/warcs"
16// if err := cfg.Validate(); err != nil {
17// log.Fatal(err)
18// }
19package config
20
21import (
22 "errors"
23 "fmt"
24 "time"
25)
26
27// Config holds the configuration for gowarcprox
28type Config struct {
29 // Network configuration
30 Address string
31 Port int
32 SocketTimeout time.Duration
33
34 // WARC output configuration
35 WARCDirectory string
36 WARCPrefix string
37 WARCSize int64
38 WARCCompression string // "gzip", "zstd", or ""
39 GzipEnabled bool // CLI flag to enable gzip compression
40 DigestAlgorithm string // "sha1", "sha256", "blake3"
41 WARCWriterThreads int
42
43 // HTTPS/Certificate configuration
44 CACertFile string
45 CertsDir string
46
47 // Deduplication configuration
48 DedupEnabled bool
49 DedupDBFile string
50
51 // Statistics configuration
52 StatsEnabled bool
53 StatsDBFile string
54
55 // Performance configuration
56 MaxThreads int
57 QueueSize int
58 TmpFileMaxMemory int
59 MaxResourceSize int64
60 BatchFlushTimeout time.Duration
61 BatchFlushMaxURLs int
62
63 // Logging configuration
64 Verbose bool
65 LogLevel string
66}
67
68// NewDefaultConfig returns a Config with default values
69func NewDefaultConfig() *Config {
70 return &Config{
71 Address: "localhost",
72 Port: 8000,
73 SocketTimeout: 60 * time.Second,
74 WARCDirectory: "./warcs",
75 WARCPrefix: "warcprox",
76 WARCSize: 1000000000, // 1GB
77 WARCCompression: "gzip",
78 GzipEnabled: true,
79 DigestAlgorithm: "sha1",
80 WARCWriterThreads: 1,
81 CACertFile: "warcprox-ca.pem",
82 CertsDir: "./warcprox-ca",
83 DedupEnabled: true,
84 DedupDBFile: "warcprox.sqlite",
85 StatsEnabled: true,
86 StatsDBFile: "warcprox.sqlite",
87 MaxThreads: 100,
88 QueueSize: 1000,
89 TmpFileMaxMemory: 524288, // 512KB
90 MaxResourceSize: 0, // unlimited
91 BatchFlushTimeout: 10 * time.Second,
92 BatchFlushMaxURLs: 500,
93 Verbose: false,
94 LogLevel: "info",
95 }
96}
97
98// Validate checks that the configuration is valid and returns an error if not.
99// It validates ranges, formats, and required fields.
100func (c *Config) Validate() error {
101 var errs []error
102
103 // Network validation
104 if c.Port < 1 || c.Port > 65535 {
105 errs = append(errs, fmt.Errorf("port must be between 1 and 65535, got %d", c.Port))
106 }
107 if c.SocketTimeout <= 0 {
108 errs = append(errs, fmt.Errorf("socket-timeout must be positive, got %v", c.SocketTimeout))
109 }
110
111 // WARC output validation
112 if c.WARCDirectory == "" {
113 errs = append(errs, errors.New("directory cannot be empty"))
114 }
115 if c.WARCPrefix == "" {
116 errs = append(errs, errors.New("prefix cannot be empty"))
117 }
118 if c.WARCSize <= 0 {
119 errs = append(errs, fmt.Errorf("size must be positive, got %d", c.WARCSize))
120 }
121 validCompressions := map[string]bool{"gzip": true, "zstd": true, "": true}
122 if !validCompressions[c.WARCCompression] {
123 errs = append(errs, fmt.Errorf("invalid compression %q: must be gzip, zstd, or empty", c.WARCCompression))
124 }
125 validDigests := map[string]bool{"sha1": true, "sha256": true, "blake3": true}
126 if !validDigests[c.DigestAlgorithm] {
127 errs = append(errs, fmt.Errorf("invalid digest algorithm %q: must be sha1, sha256, or blake3", c.DigestAlgorithm))
128 }
129 if c.WARCWriterThreads < 1 {
130 errs = append(errs, fmt.Errorf("warc-writer-threads must be at least 1, got %d", c.WARCWriterThreads))
131 }
132
133 // Certificate validation
134 if c.CACertFile == "" {
135 errs = append(errs, errors.New("cacert cannot be empty"))
136 }
137 if c.CertsDir == "" {
138 errs = append(errs, errors.New("certs-dir cannot be empty"))
139 }
140
141 // Performance validation
142 if c.MaxThreads < 1 {
143 errs = append(errs, fmt.Errorf("max-threads must be at least 1, got %d", c.MaxThreads))
144 }
145 if c.QueueSize < 1 {
146 errs = append(errs, fmt.Errorf("queue-size must be at least 1, got %d", c.QueueSize))
147 }
148 if c.TmpFileMaxMemory < 0 {
149 errs = append(errs, fmt.Errorf("tmp-file-max-memory cannot be negative, got %d", c.TmpFileMaxMemory))
150 }
151 if c.MaxResourceSize < 0 {
152 errs = append(errs, fmt.Errorf("max-resource-size cannot be negative, got %d", c.MaxResourceSize))
153 }
154 if c.BatchFlushTimeout <= 0 {
155 errs = append(errs, fmt.Errorf("batch-flush-timeout must be positive, got %v", c.BatchFlushTimeout))
156 }
157 if c.BatchFlushMaxURLs < 1 {
158 errs = append(errs, fmt.Errorf("batch-flush-max-urls must be at least 1, got %d", c.BatchFlushMaxURLs))
159 }
160
161 // Logging validation
162 validLogLevels := map[string]bool{"debug": true, "info": true, "warn": true, "error": true}
163 if !validLogLevels[c.LogLevel] {
164 errs = append(errs, fmt.Errorf("invalid log-level %q: must be debug, info, warn, or error", c.LogLevel))
165 }
166
167 if len(errs) > 0 {
168 return errors.Join(errs...)
169 }
170 return nil
171}