main
Raw Download raw file
  1// Package proxy implements the HTTP/HTTPS MITM proxy server.
  2//
  3// The Server orchestrates all proxy components: the HTTP handler, certificate
  4// authority for HTTPS interception, processing pipeline, and WARC writer.
  5// It handles both plain HTTP requests (forwarding them directly) and HTTPS
  6// CONNECT requests (performing man-in-the-middle interception to record traffic).
  7//
  8// Key components:
  9//   - Server: Main entry point, coordinates lifecycle of all components
 10//   - Handler: Processes HTTP requests and CONNECT tunnels
 11//   - CertificateAuthority: Generates certificates for HTTPS interception
 12//   - Pipeline: Async processing of recorded URLs
 13//   - WARCWriter: Writes captured traffic to WARC files
 14//
 15// The proxy captures both requests and responses, calculates digests,
 16// and enqueues RecordedURL objects for asynchronous WARC writing.
 17//
 18// Example usage:
 19//
 20//	cfg := config.NewDefaultConfig()
 21//	server, err := proxy.NewServer(cfg, logger)
 22//	if err != nil {
 23//	    log.Fatal(err)
 24//	}
 25//	if err := server.Start(); err != nil {
 26//	    log.Fatal(err)
 27//	}
 28//	// ... handle signals ...
 29//	server.Stop()
 30package proxy
 31
 32import (
 33	"context"
 34	"fmt"
 35	"log/slog"
 36	"net"
 37	"net/http"
 38	"sync"
 39	"time"
 40
 41	"github.com/internetarchive/gowarcprox/internal/certauth"
 42	"github.com/internetarchive/gowarcprox/internal/pipeline"
 43	"github.com/internetarchive/gowarcprox/internal/writer"
 44	"github.com/internetarchive/gowarcprox/pkg/config"
 45)
 46
 47// Server represents the proxy server
 48type Server struct {
 49	config     *config.Config
 50	listener   net.Listener
 51	server     *http.Server
 52	handler    *Handler
 53	certAuth   *certauth.CertificateAuthority
 54	pipeline   *pipeline.Pipeline
 55	warcWriter *writer.WARCWriter
 56	wg         sync.WaitGroup
 57	ctx        context.Context
 58	cancel     context.CancelFunc
 59	logger     *slog.Logger
 60}
 61
 62// NewServer creates a new proxy server
 63func NewServer(cfg *config.Config, logger *slog.Logger) (*Server, error) {
 64	if logger == nil {
 65		logger = slog.Default()
 66	}
 67
 68	ctx, cancel := context.WithCancel(context.Background())
 69
 70	s := &Server{
 71		config: cfg,
 72		ctx:    ctx,
 73		cancel: cancel,
 74		logger: logger,
 75	}
 76
 77	// Initialize certificate authority
 78	certAuth, err := certauth.NewCertificateAuthority(
 79		cfg.CACertFile,
 80		cfg.CertsDir,
 81		"gowarcprox CA",
 82		logger,
 83	)
 84	if err != nil {
 85		cancel()
 86		return nil, fmt.Errorf("failed to initialize certificate authority: %w", err)
 87	}
 88	s.certAuth = certAuth
 89
 90	// Initialize processing pipeline
 91	s.pipeline = pipeline.NewPipeline(cfg, logger)
 92
 93	// Initialize WARC writer
 94	warcWriter, err := writer.NewWARCWriter(cfg, logger)
 95	if err != nil {
 96		cancel()
 97		return nil, fmt.Errorf("failed to initialize WARC writer: %w", err)
 98	}
 99	s.warcWriter = warcWriter
100
101	// Add WARC writer to pipeline
102	s.pipeline.AddProcessor(warcWriter)
103
104	// Create the HTTP handler
105	s.handler = NewHandler(s, cfg, logger)
106
107	// Create the HTTP server
108	s.server = &http.Server{
109		Handler:      s.handler,
110		ReadTimeout:  cfg.SocketTimeout,
111		WriteTimeout: cfg.SocketTimeout,
112		IdleTimeout:  cfg.SocketTimeout,
113	}
114
115	return s, nil
116}
117
118// Start starts the proxy server
119func (s *Server) Start() error {
120	addr := fmt.Sprintf("%s:%d", s.config.Address, s.config.Port)
121
122	listener, err := net.Listen("tcp", addr)
123	if err != nil {
124		return fmt.Errorf("failed to listen on %s: %w", addr, err)
125	}
126	s.listener = listener
127
128	s.logger.Info("proxy server starting",
129		"address", s.config.Address,
130		"port", s.config.Port)
131
132	// Start the processing pipeline
133	if err := s.pipeline.Start(); err != nil {
134		return fmt.Errorf("failed to start pipeline: %w", err)
135	}
136
137	// Start accepting connections
138	s.wg.Add(1)
139	go func() {
140		defer s.wg.Done()
141		if err := s.server.Serve(s.listener); err != nil && err != http.ErrServerClosed {
142			s.logger.Error("server error", "error", err)
143		}
144	}()
145
146	return nil
147}
148
149// Stop gracefully stops the proxy server
150func (s *Server) Stop() error {
151	s.logger.Info("proxy server stopping")
152
153	// Cancel context to signal all goroutines to stop
154	s.cancel()
155
156	// Give the server a grace period to finish in-flight requests
157	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
158	defer cancel()
159
160	// Shutdown the HTTP server
161	if err := s.server.Shutdown(ctx); err != nil {
162		s.logger.Error("server shutdown error", "error", err)
163		return fmt.Errorf("server shutdown failed: %w", err)
164	}
165
166	// Wait for all goroutines to finish
167	s.wg.Wait()
168
169	// Stop the pipeline
170	if err := s.pipeline.Stop(); err != nil {
171		s.logger.Error("pipeline stop error", "error", err)
172	}
173
174	// Close the WARC writer
175	if err := s.warcWriter.Close(); err != nil {
176		s.logger.Error("WARC writer close error", "error", err)
177	}
178
179	s.logger.Info("proxy server stopped")
180	return nil
181}
182
183// Addr returns the address the server is listening on
184func (s *Server) Addr() net.Addr {
185	if s.listener != nil {
186		return s.listener.Addr()
187	}
188	return nil
189}