Commit 28c27be

bryfry <bryon@fryer.io>
2026-01-07 00:07:15
Add unit tests for simple packages (config, models)
pkg/config/config_test.go: - Test NewDefaultConfig returns correct defaults - Test all fields are non-nil/non-empty - Test reasonable defaults (ports, timeouts, algorithms) - All 3 tests passing internal/models/recordedurl_test.go: - Test RecordedURL.IsDedup() method - Test GetWarcPrefix() with default and custom values - Test GetDedupBuckets() with nil, default, and custom buckets - Test GetStatsBuckets() from simple buckets and definitions - Test GetStatsBucketDefs() retrieval - Test ShouldTallyDomain() bucket matching and domain lists - Test field types for RecordedURL, DedupInfo, WarcproxMeta - All 11 tests passing with table-driven approach Status: STEP 3.1 complete, ready for core components
1 parent e6b8ee8
Changed files (2)
internal
pkg
internal/models/recordedurl_test.go
@@ -0,0 +1,453 @@
+package models
+
+import (
+	"net/http"
+	"reflect"
+	"testing"
+	"time"
+)
+
+func TestRecordedURL_IsDedup(t *testing.T) {
+	tests := []struct {
+		name     string
+		dedupInfo *DedupInfo
+		want     bool
+	}{
+		{
+			name:      "nil DedupInfo",
+			dedupInfo: nil,
+			want:      false,
+		},
+		{
+			name: "non-nil DedupInfo",
+			dedupInfo: &DedupInfo{
+				RecordID: "urn:uuid:12345",
+				URL:      "http://example.com",
+				Date:     time.Now(),
+			},
+			want: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ru := &RecordedURL{
+				DedupInfo: tt.dedupInfo,
+			}
+			if got := ru.IsDedup(); got != tt.want {
+				t.Errorf("RecordedURL.IsDedup() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestRecordedURL_GetWarcPrefix(t *testing.T) {
+	tests := []struct {
+		name          string
+		warcproxMeta  *WarcproxMeta
+		defaultPrefix string
+		want          string
+	}{
+		{
+			name:          "nil WarcproxMeta",
+			warcproxMeta:  nil,
+			defaultPrefix: "default",
+			want:          "default",
+		},
+		{
+			name:          "empty WarcPrefix",
+			warcproxMeta:  &WarcproxMeta{WarcPrefix: ""},
+			defaultPrefix: "default",
+			want:          "default",
+		},
+		{
+			name:          "custom WarcPrefix",
+			warcproxMeta:  &WarcproxMeta{WarcPrefix: "custom"},
+			defaultPrefix: "default",
+			want:          "custom",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ru := &RecordedURL{
+				WarcproxMeta: tt.warcproxMeta,
+			}
+			if got := ru.GetWarcPrefix(tt.defaultPrefix); got != tt.want {
+				t.Errorf("RecordedURL.GetWarcPrefix() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestRecordedURL_GetDedupBuckets(t *testing.T) {
+	tests := []struct {
+		name         string
+		warcproxMeta *WarcproxMeta
+		want         map[string]string
+	}{
+		{
+			name:         "nil WarcproxMeta",
+			warcproxMeta: nil,
+			want:         map[string]string{"": "rw"},
+		},
+		{
+			name:         "nil DedupBuckets",
+			warcproxMeta: &WarcproxMeta{DedupBuckets: nil},
+			want:         map[string]string{"": "rw"},
+		},
+		{
+			name: "custom DedupBuckets",
+			warcproxMeta: &WarcproxMeta{
+				DedupBuckets: map[string]string{
+					"bucket1": "rw",
+					"bucket2": "ro",
+				},
+			},
+			want: map[string]string{
+				"bucket1": "rw",
+				"bucket2": "ro",
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ru := &RecordedURL{
+				WarcproxMeta: tt.warcproxMeta,
+			}
+			if got := ru.GetDedupBuckets(); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("RecordedURL.GetDedupBuckets() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestRecordedURL_GetStatsBuckets(t *testing.T) {
+	tests := []struct {
+		name         string
+		warcproxMeta *WarcproxMeta
+		want         []string
+	}{
+		{
+			name:         "nil WarcproxMeta",
+			warcproxMeta: nil,
+			want:         nil,
+		},
+		{
+			name:         "empty buckets",
+			warcproxMeta: &WarcproxMeta{},
+			want:         []string{},
+		},
+		{
+			name: "simple buckets only",
+			warcproxMeta: &WarcproxMeta{
+				StatsBuckets: []string{"bucket1", "bucket2"},
+			},
+			want: []string{"bucket1", "bucket2"},
+		},
+		{
+			name: "bucket definitions only",
+			warcproxMeta: &WarcproxMeta{
+				StatsBucketDefs: []StatsBucketDef{
+					{Bucket: "bucket3", TallyDomains: []string{"example.com"}},
+					{Bucket: "bucket4", TallyDomains: nil},
+				},
+			},
+			want: []string{"bucket3", "bucket4"},
+		},
+		{
+			name: "both simple and definitions",
+			warcproxMeta: &WarcproxMeta{
+				StatsBuckets: []string{"bucket1", "bucket2"},
+				StatsBucketDefs: []StatsBucketDef{
+					{Bucket: "bucket3", TallyDomains: []string{"example.com"}},
+				},
+			},
+			want: []string{"bucket1", "bucket2", "bucket3"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ru := &RecordedURL{
+				WarcproxMeta: tt.warcproxMeta,
+			}
+			got := ru.GetStatsBuckets()
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("RecordedURL.GetStatsBuckets() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestRecordedURL_GetStatsBucketDefs(t *testing.T) {
+	tests := []struct {
+		name         string
+		warcproxMeta *WarcproxMeta
+		want         []StatsBucketDef
+	}{
+		{
+			name:         "nil WarcproxMeta",
+			warcproxMeta: nil,
+			want:         nil,
+		},
+		{
+			name:         "nil StatsBucketDefs",
+			warcproxMeta: &WarcproxMeta{StatsBucketDefs: nil},
+			want:         nil,
+		},
+		{
+			name: "with bucket definitions",
+			warcproxMeta: &WarcproxMeta{
+				StatsBucketDefs: []StatsBucketDef{
+					{Bucket: "bucket1", TallyDomains: []string{"example.com"}},
+					{Bucket: "bucket2", TallyDomains: []string{"test.com", "demo.com"}},
+				},
+			},
+			want: []StatsBucketDef{
+				{Bucket: "bucket1", TallyDomains: []string{"example.com"}},
+				{Bucket: "bucket2", TallyDomains: []string{"test.com", "demo.com"}},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ru := &RecordedURL{
+				WarcproxMeta: tt.warcproxMeta,
+			}
+			if got := ru.GetStatsBucketDefs(); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("RecordedURL.GetStatsBucketDefs() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestRecordedURL_ShouldTallyDomain(t *testing.T) {
+	tests := []struct {
+		name          string
+		warcproxMeta  *WarcproxMeta
+		bucket        string
+		wantShould    bool
+		wantDomains   []string
+	}{
+		{
+			name:          "nil WarcproxMeta",
+			warcproxMeta:  nil,
+			bucket:        "bucket1",
+			wantShould:    false,
+			wantDomains:   nil,
+		},
+		{
+			name: "bucket not found",
+			warcproxMeta: &WarcproxMeta{
+				StatsBucketDefs: []StatsBucketDef{
+					{Bucket: "bucket2", TallyDomains: []string{"example.com"}},
+				},
+			},
+			bucket:        "bucket1",
+			wantShould:    false,
+			wantDomains:   nil,
+		},
+		{
+			name: "bucket found with no tally domains",
+			warcproxMeta: &WarcproxMeta{
+				StatsBucketDefs: []StatsBucketDef{
+					{Bucket: "bucket1", TallyDomains: nil},
+				},
+			},
+			bucket:        "bucket1",
+			wantShould:    false,
+			wantDomains:   nil,
+		},
+		{
+			name: "bucket found with empty tally domains",
+			warcproxMeta: &WarcproxMeta{
+				StatsBucketDefs: []StatsBucketDef{
+					{Bucket: "bucket1", TallyDomains: []string{}},
+				},
+			},
+			bucket:        "bucket1",
+			wantShould:    false,
+			wantDomains:   nil,
+		},
+		{
+			name: "bucket found with tally domains",
+			warcproxMeta: &WarcproxMeta{
+				StatsBucketDefs: []StatsBucketDef{
+					{Bucket: "bucket1", TallyDomains: []string{"example.com", "test.com"}},
+				},
+			},
+			bucket:        "bucket1",
+			wantShould:    true,
+			wantDomains:   []string{"example.com", "test.com"},
+		},
+		{
+			name: "multiple buckets, find correct one",
+			warcproxMeta: &WarcproxMeta{
+				StatsBucketDefs: []StatsBucketDef{
+					{Bucket: "bucket1", TallyDomains: []string{"example.com"}},
+					{Bucket: "bucket2", TallyDomains: []string{"test.com", "demo.com"}},
+					{Bucket: "bucket3", TallyDomains: nil},
+				},
+			},
+			bucket:        "bucket2",
+			wantShould:    true,
+			wantDomains:   []string{"test.com", "demo.com"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ru := &RecordedURL{
+				WarcproxMeta: tt.warcproxMeta,
+			}
+			gotShould, gotDomains := ru.ShouldTallyDomain(tt.bucket)
+			if gotShould != tt.wantShould {
+				t.Errorf("RecordedURL.ShouldTallyDomain() should = %v, want %v", gotShould, tt.wantShould)
+			}
+			if !reflect.DeepEqual(gotDomains, tt.wantDomains) {
+				t.Errorf("RecordedURL.ShouldTallyDomain() domains = %v, want %v", gotDomains, tt.wantDomains)
+			}
+		})
+	}
+}
+
+func TestRecordedURL_FieldTypes(t *testing.T) {
+	// Verify that RecordedURL has expected field types
+	ru := &RecordedURL{
+		URL:            "http://example.com",
+		Method:         "GET",
+		RequestHeader:  http.Header{"User-Agent": []string{"test"}},
+		RequestBody:    []byte("request"),
+		StatusCode:     200,
+		StatusMessage:  "OK",
+		ResponseHeader: http.Header{"Content-Type": []string{"text/html"}},
+		ResponseBody:   []byte("response"),
+		Timestamp:      time.Now(),
+		Duration:       100 * time.Millisecond,
+		RemoteAddr:     "93.184.216.34:80",
+		RemoteIP:       "93.184.216.34",
+		ClientAddr:     "192.168.1.100:54321",
+		ClientIP:       "192.168.1.100",
+		PayloadDigest:  "sha1:B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A",
+		BlockDigest:    "sha1:ABCDEFGHIJKLMNOPQRSTUVWXYZ234567",
+		ContentType:    "text/html",
+		ContentLength:  8,
+		WARCRecordID:   "urn:uuid:12345678-1234-1234-1234-123456789abc",
+	}
+
+	// Just verify we can create the struct with all fields
+	// and access them without panic
+	if ru.URL == "" {
+		t.Error("URL should not be empty")
+	}
+	if ru.StatusCode == 0 {
+		t.Error("StatusCode should not be zero")
+	}
+	if ru.Timestamp.IsZero() {
+		t.Error("Timestamp should not be zero")
+	}
+}
+
+func TestDedupInfo_Fields(t *testing.T) {
+	now := time.Now()
+	di := &DedupInfo{
+		RecordID: "urn:uuid:12345",
+		URL:      "http://example.com",
+		Date:     now,
+		ReadOnly: true,
+	}
+
+	if di.RecordID != "urn:uuid:12345" {
+		t.Errorf("RecordID = %q, want %q", di.RecordID, "urn:uuid:12345")
+	}
+	if di.URL != "http://example.com" {
+		t.Errorf("URL = %q, want %q", di.URL, "http://example.com")
+	}
+	if !di.Date.Equal(now) {
+		t.Errorf("Date = %v, want %v", di.Date, now)
+	}
+	if !di.ReadOnly {
+		t.Error("ReadOnly should be true")
+	}
+}
+
+func TestWarcproxMeta_Fields(t *testing.T) {
+	wm := &WarcproxMeta{
+		WarcPrefix:   "custom",
+		DedupBuckets: map[string]string{"bucket1": "rw"},
+		StatsBuckets: []string{"stats1", "stats2"},
+		StatsBucketDefs: []StatsBucketDef{
+			{Bucket: "stats3", TallyDomains: []string{"example.com"}},
+		},
+		Limits:     map[string]int64{"max_urls": 1000},
+		SoftLimits: map[string]int64{"soft_max": 900},
+		Blocks: []BlockRule{
+			{Domain: "blocked.com"},
+		},
+		Metadata: map[string]interface{}{"seed": "http://start.com"},
+		Accept:   map[string]bool{"capture-metadata": true},
+	}
+
+	if wm.WarcPrefix != "custom" {
+		t.Errorf("WarcPrefix = %q, want %q", wm.WarcPrefix, "custom")
+	}
+	if len(wm.DedupBuckets) != 1 {
+		t.Errorf("DedupBuckets length = %d, want 1", len(wm.DedupBuckets))
+	}
+	if len(wm.StatsBuckets) != 2 {
+		t.Errorf("StatsBuckets length = %d, want 2", len(wm.StatsBuckets))
+	}
+	if len(wm.StatsBucketDefs) != 1 {
+		t.Errorf("StatsBucketDefs length = %d, want 1", len(wm.StatsBucketDefs))
+	}
+	if wm.Limits["max_urls"] != 1000 {
+		t.Errorf("Limits[max_urls] = %d, want 1000", wm.Limits["max_urls"])
+	}
+	if wm.SoftLimits["soft_max"] != 900 {
+		t.Errorf("SoftLimits[soft_max] = %d, want 900", wm.SoftLimits["soft_max"])
+	}
+	if len(wm.Blocks) != 1 {
+		t.Errorf("Blocks length = %d, want 1", len(wm.Blocks))
+	}
+	if wm.Metadata["seed"] != "http://start.com" {
+		t.Errorf("Metadata[seed] = %v, want http://start.com", wm.Metadata["seed"])
+	}
+	if !wm.Accept["capture-metadata"] {
+		t.Error("Accept[capture-metadata] should be true")
+	}
+}
+
+func TestStatsBucketDef_Fields(t *testing.T) {
+	sbd := StatsBucketDef{
+		Bucket:       "test-bucket",
+		TallyDomains: []string{"example.com", "test.com"},
+	}
+
+	if sbd.Bucket != "test-bucket" {
+		t.Errorf("Bucket = %q, want %q", sbd.Bucket, "test-bucket")
+	}
+	if len(sbd.TallyDomains) != 2 {
+		t.Errorf("TallyDomains length = %d, want 2", len(sbd.TallyDomains))
+	}
+}
+
+func TestBlockRule_Fields(t *testing.T) {
+	br := BlockRule{
+		Domain: "blocked.com",
+		SURT:   "com,blocked)/path",
+		Regex:  ".*blocked.*",
+	}
+
+	if br.Domain != "blocked.com" {
+		t.Errorf("Domain = %q, want %q", br.Domain, "blocked.com")
+	}
+	if br.SURT != "com,blocked)/path" {
+		t.Errorf("SURT = %q, want %q", br.SURT, "com,blocked)/path")
+	}
+	if br.Regex != ".*blocked.*" {
+		t.Errorf("Regex = %q, want %q", br.Regex, ".*blocked.*")
+	}
+}
pkg/config/config_test.go
@@ -0,0 +1,186 @@
+package config
+
+import (
+	"testing"
+	"time"
+)
+
+func TestNewDefaultConfig(t *testing.T) {
+	cfg := NewDefaultConfig()
+
+	// Network configuration
+	if cfg.Address != "localhost" {
+		t.Errorf("Address = %q, want %q", cfg.Address, "localhost")
+	}
+	if cfg.Port != 8000 {
+		t.Errorf("Port = %d, want %d", cfg.Port, 8000)
+	}
+	if cfg.SocketTimeout != 60*time.Second {
+		t.Errorf("SocketTimeout = %v, want %v", cfg.SocketTimeout, 60*time.Second)
+	}
+
+	// WARC output configuration
+	if cfg.WARCDirectory != "./warcs" {
+		t.Errorf("WARCDirectory = %q, want %q", cfg.WARCDirectory, "./warcs")
+	}
+	if cfg.WARCPrefix != "warcprox" {
+		t.Errorf("WARCPrefix = %q, want %q", cfg.WARCPrefix, "warcprox")
+	}
+	if cfg.WARCSize != 1000000000 {
+		t.Errorf("WARCSize = %d, want %d", cfg.WARCSize, 1000000000)
+	}
+	if cfg.WARCCompression != "gzip" {
+		t.Errorf("WARCCompression = %q, want %q", cfg.WARCCompression, "gzip")
+	}
+	if cfg.DigestAlgorithm != "sha1" {
+		t.Errorf("DigestAlgorithm = %q, want %q", cfg.DigestAlgorithm, "sha1")
+	}
+	if cfg.WARCWriterThreads != 1 {
+		t.Errorf("WARCWriterThreads = %d, want %d", cfg.WARCWriterThreads, 1)
+	}
+
+	// HTTPS/Certificate configuration
+	if cfg.CACertFile != "warcprox-ca.pem" {
+		t.Errorf("CACertFile = %q, want %q", cfg.CACertFile, "warcprox-ca.pem")
+	}
+	if cfg.CertsDir != "./warcprox-ca" {
+		t.Errorf("CertsDir = %q, want %q", cfg.CertsDir, "./warcprox-ca")
+	}
+
+	// Deduplication configuration
+	if !cfg.DedupEnabled {
+		t.Error("DedupEnabled = false, want true")
+	}
+	if cfg.DedupDBFile != "warcprox.sqlite" {
+		t.Errorf("DedupDBFile = %q, want %q", cfg.DedupDBFile, "warcprox.sqlite")
+	}
+
+	// Statistics configuration
+	if !cfg.StatsEnabled {
+		t.Error("StatsEnabled = false, want true")
+	}
+	if cfg.StatsDBFile != "warcprox.sqlite" {
+		t.Errorf("StatsDBFile = %q, want %q", cfg.StatsDBFile, "warcprox.sqlite")
+	}
+
+	// Performance configuration
+	if cfg.MaxThreads != 100 {
+		t.Errorf("MaxThreads = %d, want %d", cfg.MaxThreads, 100)
+	}
+	if cfg.QueueSize != 1000 {
+		t.Errorf("QueueSize = %d, want %d", cfg.QueueSize, 1000)
+	}
+	if cfg.TmpFileMaxMemory != 524288 {
+		t.Errorf("TmpFileMaxMemory = %d, want %d", cfg.TmpFileMaxMemory, 524288)
+	}
+	if cfg.MaxResourceSize != 0 {
+		t.Errorf("MaxResourceSize = %d, want %d", cfg.MaxResourceSize, 0)
+	}
+	if cfg.BatchFlushTimeout != 10*time.Second {
+		t.Errorf("BatchFlushTimeout = %v, want %v", cfg.BatchFlushTimeout, 10*time.Second)
+	}
+	if cfg.BatchFlushMaxURLs != 500 {
+		t.Errorf("BatchFlushMaxURLs = %d, want %d", cfg.BatchFlushMaxURLs, 500)
+	}
+
+	// Logging configuration
+	if cfg.Verbose {
+		t.Error("Verbose = true, want false")
+	}
+	if cfg.LogLevel != "info" {
+		t.Errorf("LogLevel = %q, want %q", cfg.LogLevel, "info")
+	}
+}
+
+func TestConfig_NoNilFields(t *testing.T) {
+	cfg := NewDefaultConfig()
+
+	// Ensure no pointer fields are nil (we don't have any currently, but this is good practice)
+	// and all string fields have values
+	if cfg.Address == "" {
+		t.Error("Address is empty string")
+	}
+	if cfg.WARCDirectory == "" {
+		t.Error("WARCDirectory is empty string")
+	}
+	if cfg.WARCPrefix == "" {
+		t.Error("WARCPrefix is empty string")
+	}
+	if cfg.WARCCompression == "" {
+		t.Error("WARCCompression is empty string")
+	}
+	if cfg.DigestAlgorithm == "" {
+		t.Error("DigestAlgorithm is empty string")
+	}
+	if cfg.CACertFile == "" {
+		t.Error("CACertFile is empty string")
+	}
+	if cfg.CertsDir == "" {
+		t.Error("CertsDir is empty string")
+	}
+	if cfg.DedupDBFile == "" {
+		t.Error("DedupDBFile is empty string")
+	}
+	if cfg.StatsDBFile == "" {
+		t.Error("StatsDBFile is empty string")
+	}
+	if cfg.LogLevel == "" {
+		t.Error("LogLevel is empty string")
+	}
+}
+
+func TestConfig_ReasonableDefaults(t *testing.T) {
+	cfg := NewDefaultConfig()
+
+	// Port should be valid
+	if cfg.Port < 1 || cfg.Port > 65535 {
+		t.Errorf("Port = %d, should be in range 1-65535", cfg.Port)
+	}
+
+	// WARC size should be positive
+	if cfg.WARCSize <= 0 {
+		t.Errorf("WARCSize = %d, should be positive", cfg.WARCSize)
+	}
+
+	// WARC writer threads should be positive
+	if cfg.WARCWriterThreads < 1 {
+		t.Errorf("WARCWriterThreads = %d, should be at least 1", cfg.WARCWriterThreads)
+	}
+
+	// Timeouts should be positive
+	if cfg.SocketTimeout <= 0 {
+		t.Errorf("SocketTimeout = %v, should be positive", cfg.SocketTimeout)
+	}
+	if cfg.BatchFlushTimeout <= 0 {
+		t.Errorf("BatchFlushTimeout = %v, should be positive", cfg.BatchFlushTimeout)
+	}
+
+	// Queue sizes should be positive
+	if cfg.QueueSize < 1 {
+		t.Errorf("QueueSize = %d, should be at least 1", cfg.QueueSize)
+	}
+	if cfg.MaxThreads < 1 {
+		t.Errorf("MaxThreads = %d, should be at least 1", cfg.MaxThreads)
+	}
+	if cfg.BatchFlushMaxURLs < 1 {
+		t.Errorf("BatchFlushMaxURLs = %d, should be at least 1", cfg.BatchFlushMaxURLs)
+	}
+
+	// Digest algorithm should be valid
+	validDigests := map[string]bool{"sha1": true, "sha256": true, "blake3": true}
+	if !validDigests[cfg.DigestAlgorithm] {
+		t.Errorf("DigestAlgorithm = %q, should be one of sha1, sha256, blake3", cfg.DigestAlgorithm)
+	}
+
+	// Compression should be valid
+	validCompressions := map[string]bool{"gzip": true, "zstd": true, "": true}
+	if !validCompressions[cfg.WARCCompression] {
+		t.Errorf("WARCCompression = %q, should be one of gzip, zstd, or empty", cfg.WARCCompression)
+	}
+
+	// Log level should be valid
+	validLogLevels := map[string]bool{"debug": true, "info": true, "warn": true, "error": true}
+	if !validLogLevels[cfg.LogLevel] {
+		t.Errorf("LogLevel = %q, should be one of debug, info, warn, error", cfg.LogLevel)
+	}
+}