main
1// Package proxy implements the HTTP/HTTPS MITM proxy server.
2//
3// The Server orchestrates all proxy components: the HTTP handler, certificate
4// authority for HTTPS interception, processing pipeline, and WARC writer.
5// It handles both plain HTTP requests (forwarding them directly) and HTTPS
6// CONNECT requests (performing man-in-the-middle interception to record traffic).
7//
8// Key components:
9// - Server: Main entry point, coordinates lifecycle of all components
10// - Handler: Processes HTTP requests and CONNECT tunnels
11// - CertificateAuthority: Generates certificates for HTTPS interception
12// - Pipeline: Async processing of recorded URLs
13// - WARCWriter: Writes captured traffic to WARC files
14//
15// The proxy captures both requests and responses, calculates digests,
16// and enqueues RecordedURL objects for asynchronous WARC writing.
17//
18// Example usage:
19//
20// cfg := config.NewDefaultConfig()
21// server, err := proxy.NewServer(cfg, logger)
22// if err != nil {
23// log.Fatal(err)
24// }
25// if err := server.Start(); err != nil {
26// log.Fatal(err)
27// }
28// // ... handle signals ...
29// server.Stop()
30package proxy
31
32import (
33 "context"
34 "fmt"
35 "log/slog"
36 "net"
37 "net/http"
38 "sync"
39 "time"
40
41 "github.com/internetarchive/gowarcprox/internal/certauth"
42 "github.com/internetarchive/gowarcprox/internal/pipeline"
43 "github.com/internetarchive/gowarcprox/internal/writer"
44 "github.com/internetarchive/gowarcprox/pkg/config"
45)
46
47// Server represents the proxy server
48type Server struct {
49 config *config.Config
50 listener net.Listener
51 server *http.Server
52 handler *Handler
53 certAuth *certauth.CertificateAuthority
54 pipeline *pipeline.Pipeline
55 warcWriter *writer.WARCWriter
56 wg sync.WaitGroup
57 ctx context.Context
58 cancel context.CancelFunc
59 logger *slog.Logger
60}
61
62// NewServer creates a new proxy server
63func NewServer(cfg *config.Config, logger *slog.Logger) (*Server, error) {
64 if logger == nil {
65 logger = slog.Default()
66 }
67
68 ctx, cancel := context.WithCancel(context.Background())
69
70 s := &Server{
71 config: cfg,
72 ctx: ctx,
73 cancel: cancel,
74 logger: logger,
75 }
76
77 // Initialize certificate authority
78 certAuth, err := certauth.NewCertificateAuthority(
79 cfg.CACertFile,
80 cfg.CertsDir,
81 "gowarcprox CA",
82 logger,
83 )
84 if err != nil {
85 cancel()
86 return nil, fmt.Errorf("failed to initialize certificate authority: %w", err)
87 }
88 s.certAuth = certAuth
89
90 // Initialize processing pipeline
91 s.pipeline = pipeline.NewPipeline(cfg, logger)
92
93 // Initialize WARC writer
94 warcWriter, err := writer.NewWARCWriter(cfg, logger)
95 if err != nil {
96 cancel()
97 return nil, fmt.Errorf("failed to initialize WARC writer: %w", err)
98 }
99 s.warcWriter = warcWriter
100
101 // Add WARC writer to pipeline
102 s.pipeline.AddProcessor(warcWriter)
103
104 // Create the HTTP handler
105 s.handler = NewHandler(s, cfg, logger)
106
107 // Create the HTTP server
108 s.server = &http.Server{
109 Handler: s.handler,
110 ReadTimeout: cfg.SocketTimeout,
111 WriteTimeout: cfg.SocketTimeout,
112 IdleTimeout: cfg.SocketTimeout,
113 }
114
115 return s, nil
116}
117
118// Start starts the proxy server
119func (s *Server) Start() error {
120 addr := fmt.Sprintf("%s:%d", s.config.Address, s.config.Port)
121
122 listener, err := net.Listen("tcp", addr)
123 if err != nil {
124 return fmt.Errorf("failed to listen on %s: %w", addr, err)
125 }
126 s.listener = listener
127
128 s.logger.Info("proxy server starting",
129 "address", s.config.Address,
130 "port", s.config.Port)
131
132 // Start the processing pipeline
133 if err := s.pipeline.Start(); err != nil {
134 return fmt.Errorf("failed to start pipeline: %w", err)
135 }
136
137 // Start accepting connections
138 s.wg.Add(1)
139 go func() {
140 defer s.wg.Done()
141 if err := s.server.Serve(s.listener); err != nil && err != http.ErrServerClosed {
142 s.logger.Error("server error", "error", err)
143 }
144 }()
145
146 return nil
147}
148
149// Stop gracefully stops the proxy server
150func (s *Server) Stop() error {
151 s.logger.Info("proxy server stopping")
152
153 // Cancel context to signal all goroutines to stop
154 s.cancel()
155
156 // Give the server a grace period to finish in-flight requests
157 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
158 defer cancel()
159
160 // Shutdown the HTTP server
161 if err := s.server.Shutdown(ctx); err != nil {
162 s.logger.Error("server shutdown error", "error", err)
163 return fmt.Errorf("server shutdown failed: %w", err)
164 }
165
166 // Wait for all goroutines to finish
167 s.wg.Wait()
168
169 // Stop the pipeline
170 if err := s.pipeline.Stop(); err != nil {
171 s.logger.Error("pipeline stop error", "error", err)
172 }
173
174 // Close the WARC writer
175 if err := s.warcWriter.Close(); err != nil {
176 s.logger.Error("WARC writer close error", "error", err)
177 }
178
179 s.logger.Info("proxy server stopped")
180 return nil
181}
182
183// Addr returns the address the server is listening on
184func (s *Server) Addr() net.Addr {
185 if s.listener != nil {
186 return s.listener.Addr()
187 }
188 return nil
189}