Commit 28c27be
Changed files (2)
internal
models
pkg
config
internal/models/recordedurl_test.go
@@ -0,0 +1,453 @@
+package models
+
+import (
+ "net/http"
+ "reflect"
+ "testing"
+ "time"
+)
+
+func TestRecordedURL_IsDedup(t *testing.T) {
+ tests := []struct {
+ name string
+ dedupInfo *DedupInfo
+ want bool
+ }{
+ {
+ name: "nil DedupInfo",
+ dedupInfo: nil,
+ want: false,
+ },
+ {
+ name: "non-nil DedupInfo",
+ dedupInfo: &DedupInfo{
+ RecordID: "urn:uuid:12345",
+ URL: "http://example.com",
+ Date: time.Now(),
+ },
+ want: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ru := &RecordedURL{
+ DedupInfo: tt.dedupInfo,
+ }
+ if got := ru.IsDedup(); got != tt.want {
+ t.Errorf("RecordedURL.IsDedup() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestRecordedURL_GetWarcPrefix(t *testing.T) {
+ tests := []struct {
+ name string
+ warcproxMeta *WarcproxMeta
+ defaultPrefix string
+ want string
+ }{
+ {
+ name: "nil WarcproxMeta",
+ warcproxMeta: nil,
+ defaultPrefix: "default",
+ want: "default",
+ },
+ {
+ name: "empty WarcPrefix",
+ warcproxMeta: &WarcproxMeta{WarcPrefix: ""},
+ defaultPrefix: "default",
+ want: "default",
+ },
+ {
+ name: "custom WarcPrefix",
+ warcproxMeta: &WarcproxMeta{WarcPrefix: "custom"},
+ defaultPrefix: "default",
+ want: "custom",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ru := &RecordedURL{
+ WarcproxMeta: tt.warcproxMeta,
+ }
+ if got := ru.GetWarcPrefix(tt.defaultPrefix); got != tt.want {
+ t.Errorf("RecordedURL.GetWarcPrefix() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestRecordedURL_GetDedupBuckets(t *testing.T) {
+ tests := []struct {
+ name string
+ warcproxMeta *WarcproxMeta
+ want map[string]string
+ }{
+ {
+ name: "nil WarcproxMeta",
+ warcproxMeta: nil,
+ want: map[string]string{"": "rw"},
+ },
+ {
+ name: "nil DedupBuckets",
+ warcproxMeta: &WarcproxMeta{DedupBuckets: nil},
+ want: map[string]string{"": "rw"},
+ },
+ {
+ name: "custom DedupBuckets",
+ warcproxMeta: &WarcproxMeta{
+ DedupBuckets: map[string]string{
+ "bucket1": "rw",
+ "bucket2": "ro",
+ },
+ },
+ want: map[string]string{
+ "bucket1": "rw",
+ "bucket2": "ro",
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ru := &RecordedURL{
+ WarcproxMeta: tt.warcproxMeta,
+ }
+ if got := ru.GetDedupBuckets(); !reflect.DeepEqual(got, tt.want) {
+ t.Errorf("RecordedURL.GetDedupBuckets() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestRecordedURL_GetStatsBuckets(t *testing.T) {
+ tests := []struct {
+ name string
+ warcproxMeta *WarcproxMeta
+ want []string
+ }{
+ {
+ name: "nil WarcproxMeta",
+ warcproxMeta: nil,
+ want: nil,
+ },
+ {
+ name: "empty buckets",
+ warcproxMeta: &WarcproxMeta{},
+ want: []string{},
+ },
+ {
+ name: "simple buckets only",
+ warcproxMeta: &WarcproxMeta{
+ StatsBuckets: []string{"bucket1", "bucket2"},
+ },
+ want: []string{"bucket1", "bucket2"},
+ },
+ {
+ name: "bucket definitions only",
+ warcproxMeta: &WarcproxMeta{
+ StatsBucketDefs: []StatsBucketDef{
+ {Bucket: "bucket3", TallyDomains: []string{"example.com"}},
+ {Bucket: "bucket4", TallyDomains: nil},
+ },
+ },
+ want: []string{"bucket3", "bucket4"},
+ },
+ {
+ name: "both simple and definitions",
+ warcproxMeta: &WarcproxMeta{
+ StatsBuckets: []string{"bucket1", "bucket2"},
+ StatsBucketDefs: []StatsBucketDef{
+ {Bucket: "bucket3", TallyDomains: []string{"example.com"}},
+ },
+ },
+ want: []string{"bucket1", "bucket2", "bucket3"},
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ru := &RecordedURL{
+ WarcproxMeta: tt.warcproxMeta,
+ }
+ got := ru.GetStatsBuckets()
+ if !reflect.DeepEqual(got, tt.want) {
+ t.Errorf("RecordedURL.GetStatsBuckets() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestRecordedURL_GetStatsBucketDefs(t *testing.T) {
+ tests := []struct {
+ name string
+ warcproxMeta *WarcproxMeta
+ want []StatsBucketDef
+ }{
+ {
+ name: "nil WarcproxMeta",
+ warcproxMeta: nil,
+ want: nil,
+ },
+ {
+ name: "nil StatsBucketDefs",
+ warcproxMeta: &WarcproxMeta{StatsBucketDefs: nil},
+ want: nil,
+ },
+ {
+ name: "with bucket definitions",
+ warcproxMeta: &WarcproxMeta{
+ StatsBucketDefs: []StatsBucketDef{
+ {Bucket: "bucket1", TallyDomains: []string{"example.com"}},
+ {Bucket: "bucket2", TallyDomains: []string{"test.com", "demo.com"}},
+ },
+ },
+ want: []StatsBucketDef{
+ {Bucket: "bucket1", TallyDomains: []string{"example.com"}},
+ {Bucket: "bucket2", TallyDomains: []string{"test.com", "demo.com"}},
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ru := &RecordedURL{
+ WarcproxMeta: tt.warcproxMeta,
+ }
+ if got := ru.GetStatsBucketDefs(); !reflect.DeepEqual(got, tt.want) {
+ t.Errorf("RecordedURL.GetStatsBucketDefs() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestRecordedURL_ShouldTallyDomain(t *testing.T) {
+ tests := []struct {
+ name string
+ warcproxMeta *WarcproxMeta
+ bucket string
+ wantShould bool
+ wantDomains []string
+ }{
+ {
+ name: "nil WarcproxMeta",
+ warcproxMeta: nil,
+ bucket: "bucket1",
+ wantShould: false,
+ wantDomains: nil,
+ },
+ {
+ name: "bucket not found",
+ warcproxMeta: &WarcproxMeta{
+ StatsBucketDefs: []StatsBucketDef{
+ {Bucket: "bucket2", TallyDomains: []string{"example.com"}},
+ },
+ },
+ bucket: "bucket1",
+ wantShould: false,
+ wantDomains: nil,
+ },
+ {
+ name: "bucket found with no tally domains",
+ warcproxMeta: &WarcproxMeta{
+ StatsBucketDefs: []StatsBucketDef{
+ {Bucket: "bucket1", TallyDomains: nil},
+ },
+ },
+ bucket: "bucket1",
+ wantShould: false,
+ wantDomains: nil,
+ },
+ {
+ name: "bucket found with empty tally domains",
+ warcproxMeta: &WarcproxMeta{
+ StatsBucketDefs: []StatsBucketDef{
+ {Bucket: "bucket1", TallyDomains: []string{}},
+ },
+ },
+ bucket: "bucket1",
+ wantShould: false,
+ wantDomains: nil,
+ },
+ {
+ name: "bucket found with tally domains",
+ warcproxMeta: &WarcproxMeta{
+ StatsBucketDefs: []StatsBucketDef{
+ {Bucket: "bucket1", TallyDomains: []string{"example.com", "test.com"}},
+ },
+ },
+ bucket: "bucket1",
+ wantShould: true,
+ wantDomains: []string{"example.com", "test.com"},
+ },
+ {
+ name: "multiple buckets, find correct one",
+ warcproxMeta: &WarcproxMeta{
+ StatsBucketDefs: []StatsBucketDef{
+ {Bucket: "bucket1", TallyDomains: []string{"example.com"}},
+ {Bucket: "bucket2", TallyDomains: []string{"test.com", "demo.com"}},
+ {Bucket: "bucket3", TallyDomains: nil},
+ },
+ },
+ bucket: "bucket2",
+ wantShould: true,
+ wantDomains: []string{"test.com", "demo.com"},
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ru := &RecordedURL{
+ WarcproxMeta: tt.warcproxMeta,
+ }
+ gotShould, gotDomains := ru.ShouldTallyDomain(tt.bucket)
+ if gotShould != tt.wantShould {
+ t.Errorf("RecordedURL.ShouldTallyDomain() should = %v, want %v", gotShould, tt.wantShould)
+ }
+ if !reflect.DeepEqual(gotDomains, tt.wantDomains) {
+ t.Errorf("RecordedURL.ShouldTallyDomain() domains = %v, want %v", gotDomains, tt.wantDomains)
+ }
+ })
+ }
+}
+
+func TestRecordedURL_FieldTypes(t *testing.T) {
+ // Verify that RecordedURL has expected field types
+ ru := &RecordedURL{
+ URL: "http://example.com",
+ Method: "GET",
+ RequestHeader: http.Header{"User-Agent": []string{"test"}},
+ RequestBody: []byte("request"),
+ StatusCode: 200,
+ StatusMessage: "OK",
+ ResponseHeader: http.Header{"Content-Type": []string{"text/html"}},
+ ResponseBody: []byte("response"),
+ Timestamp: time.Now(),
+ Duration: 100 * time.Millisecond,
+ RemoteAddr: "93.184.216.34:80",
+ RemoteIP: "93.184.216.34",
+ ClientAddr: "192.168.1.100:54321",
+ ClientIP: "192.168.1.100",
+ PayloadDigest: "sha1:B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A",
+ BlockDigest: "sha1:ABCDEFGHIJKLMNOPQRSTUVWXYZ234567",
+ ContentType: "text/html",
+ ContentLength: 8,
+ WARCRecordID: "urn:uuid:12345678-1234-1234-1234-123456789abc",
+ }
+
+ // Just verify we can create the struct with all fields
+ // and access them without panic
+ if ru.URL == "" {
+ t.Error("URL should not be empty")
+ }
+ if ru.StatusCode == 0 {
+ t.Error("StatusCode should not be zero")
+ }
+ if ru.Timestamp.IsZero() {
+ t.Error("Timestamp should not be zero")
+ }
+}
+
+func TestDedupInfo_Fields(t *testing.T) {
+ now := time.Now()
+ di := &DedupInfo{
+ RecordID: "urn:uuid:12345",
+ URL: "http://example.com",
+ Date: now,
+ ReadOnly: true,
+ }
+
+ if di.RecordID != "urn:uuid:12345" {
+ t.Errorf("RecordID = %q, want %q", di.RecordID, "urn:uuid:12345")
+ }
+ if di.URL != "http://example.com" {
+ t.Errorf("URL = %q, want %q", di.URL, "http://example.com")
+ }
+ if !di.Date.Equal(now) {
+ t.Errorf("Date = %v, want %v", di.Date, now)
+ }
+ if !di.ReadOnly {
+ t.Error("ReadOnly should be true")
+ }
+}
+
+func TestWarcproxMeta_Fields(t *testing.T) {
+ wm := &WarcproxMeta{
+ WarcPrefix: "custom",
+ DedupBuckets: map[string]string{"bucket1": "rw"},
+ StatsBuckets: []string{"stats1", "stats2"},
+ StatsBucketDefs: []StatsBucketDef{
+ {Bucket: "stats3", TallyDomains: []string{"example.com"}},
+ },
+ Limits: map[string]int64{"max_urls": 1000},
+ SoftLimits: map[string]int64{"soft_max": 900},
+ Blocks: []BlockRule{
+ {Domain: "blocked.com"},
+ },
+ Metadata: map[string]interface{}{"seed": "http://start.com"},
+ Accept: map[string]bool{"capture-metadata": true},
+ }
+
+ if wm.WarcPrefix != "custom" {
+ t.Errorf("WarcPrefix = %q, want %q", wm.WarcPrefix, "custom")
+ }
+ if len(wm.DedupBuckets) != 1 {
+ t.Errorf("DedupBuckets length = %d, want 1", len(wm.DedupBuckets))
+ }
+ if len(wm.StatsBuckets) != 2 {
+ t.Errorf("StatsBuckets length = %d, want 2", len(wm.StatsBuckets))
+ }
+ if len(wm.StatsBucketDefs) != 1 {
+ t.Errorf("StatsBucketDefs length = %d, want 1", len(wm.StatsBucketDefs))
+ }
+ if wm.Limits["max_urls"] != 1000 {
+ t.Errorf("Limits[max_urls] = %d, want 1000", wm.Limits["max_urls"])
+ }
+ if wm.SoftLimits["soft_max"] != 900 {
+ t.Errorf("SoftLimits[soft_max] = %d, want 900", wm.SoftLimits["soft_max"])
+ }
+ if len(wm.Blocks) != 1 {
+ t.Errorf("Blocks length = %d, want 1", len(wm.Blocks))
+ }
+ if wm.Metadata["seed"] != "http://start.com" {
+ t.Errorf("Metadata[seed] = %v, want http://start.com", wm.Metadata["seed"])
+ }
+ if !wm.Accept["capture-metadata"] {
+ t.Error("Accept[capture-metadata] should be true")
+ }
+}
+
+func TestStatsBucketDef_Fields(t *testing.T) {
+ sbd := StatsBucketDef{
+ Bucket: "test-bucket",
+ TallyDomains: []string{"example.com", "test.com"},
+ }
+
+ if sbd.Bucket != "test-bucket" {
+ t.Errorf("Bucket = %q, want %q", sbd.Bucket, "test-bucket")
+ }
+ if len(sbd.TallyDomains) != 2 {
+ t.Errorf("TallyDomains length = %d, want 2", len(sbd.TallyDomains))
+ }
+}
+
+func TestBlockRule_Fields(t *testing.T) {
+ br := BlockRule{
+ Domain: "blocked.com",
+ SURT: "com,blocked)/path",
+ Regex: ".*blocked.*",
+ }
+
+ if br.Domain != "blocked.com" {
+ t.Errorf("Domain = %q, want %q", br.Domain, "blocked.com")
+ }
+ if br.SURT != "com,blocked)/path" {
+ t.Errorf("SURT = %q, want %q", br.SURT, "com,blocked)/path")
+ }
+ if br.Regex != ".*blocked.*" {
+ t.Errorf("Regex = %q, want %q", br.Regex, ".*blocked.*")
+ }
+}
pkg/config/config_test.go
@@ -0,0 +1,186 @@
+package config
+
+import (
+ "testing"
+ "time"
+)
+
+func TestNewDefaultConfig(t *testing.T) {
+ cfg := NewDefaultConfig()
+
+ // Network configuration
+ if cfg.Address != "localhost" {
+ t.Errorf("Address = %q, want %q", cfg.Address, "localhost")
+ }
+ if cfg.Port != 8000 {
+ t.Errorf("Port = %d, want %d", cfg.Port, 8000)
+ }
+ if cfg.SocketTimeout != 60*time.Second {
+ t.Errorf("SocketTimeout = %v, want %v", cfg.SocketTimeout, 60*time.Second)
+ }
+
+ // WARC output configuration
+ if cfg.WARCDirectory != "./warcs" {
+ t.Errorf("WARCDirectory = %q, want %q", cfg.WARCDirectory, "./warcs")
+ }
+ if cfg.WARCPrefix != "warcprox" {
+ t.Errorf("WARCPrefix = %q, want %q", cfg.WARCPrefix, "warcprox")
+ }
+ if cfg.WARCSize != 1000000000 {
+ t.Errorf("WARCSize = %d, want %d", cfg.WARCSize, 1000000000)
+ }
+ if cfg.WARCCompression != "gzip" {
+ t.Errorf("WARCCompression = %q, want %q", cfg.WARCCompression, "gzip")
+ }
+ if cfg.DigestAlgorithm != "sha1" {
+ t.Errorf("DigestAlgorithm = %q, want %q", cfg.DigestAlgorithm, "sha1")
+ }
+ if cfg.WARCWriterThreads != 1 {
+ t.Errorf("WARCWriterThreads = %d, want %d", cfg.WARCWriterThreads, 1)
+ }
+
+ // HTTPS/Certificate configuration
+ if cfg.CACertFile != "warcprox-ca.pem" {
+ t.Errorf("CACertFile = %q, want %q", cfg.CACertFile, "warcprox-ca.pem")
+ }
+ if cfg.CertsDir != "./warcprox-ca" {
+ t.Errorf("CertsDir = %q, want %q", cfg.CertsDir, "./warcprox-ca")
+ }
+
+ // Deduplication configuration
+ if !cfg.DedupEnabled {
+ t.Error("DedupEnabled = false, want true")
+ }
+ if cfg.DedupDBFile != "warcprox.sqlite" {
+ t.Errorf("DedupDBFile = %q, want %q", cfg.DedupDBFile, "warcprox.sqlite")
+ }
+
+ // Statistics configuration
+ if !cfg.StatsEnabled {
+ t.Error("StatsEnabled = false, want true")
+ }
+ if cfg.StatsDBFile != "warcprox.sqlite" {
+ t.Errorf("StatsDBFile = %q, want %q", cfg.StatsDBFile, "warcprox.sqlite")
+ }
+
+ // Performance configuration
+ if cfg.MaxThreads != 100 {
+ t.Errorf("MaxThreads = %d, want %d", cfg.MaxThreads, 100)
+ }
+ if cfg.QueueSize != 1000 {
+ t.Errorf("QueueSize = %d, want %d", cfg.QueueSize, 1000)
+ }
+ if cfg.TmpFileMaxMemory != 524288 {
+ t.Errorf("TmpFileMaxMemory = %d, want %d", cfg.TmpFileMaxMemory, 524288)
+ }
+ if cfg.MaxResourceSize != 0 {
+ t.Errorf("MaxResourceSize = %d, want %d", cfg.MaxResourceSize, 0)
+ }
+ if cfg.BatchFlushTimeout != 10*time.Second {
+ t.Errorf("BatchFlushTimeout = %v, want %v", cfg.BatchFlushTimeout, 10*time.Second)
+ }
+ if cfg.BatchFlushMaxURLs != 500 {
+ t.Errorf("BatchFlushMaxURLs = %d, want %d", cfg.BatchFlushMaxURLs, 500)
+ }
+
+ // Logging configuration
+ if cfg.Verbose {
+ t.Error("Verbose = true, want false")
+ }
+ if cfg.LogLevel != "info" {
+ t.Errorf("LogLevel = %q, want %q", cfg.LogLevel, "info")
+ }
+}
+
+func TestConfig_NoNilFields(t *testing.T) {
+ cfg := NewDefaultConfig()
+
+ // Ensure no pointer fields are nil (we don't have any currently, but this is good practice)
+ // and all string fields have values
+ if cfg.Address == "" {
+ t.Error("Address is empty string")
+ }
+ if cfg.WARCDirectory == "" {
+ t.Error("WARCDirectory is empty string")
+ }
+ if cfg.WARCPrefix == "" {
+ t.Error("WARCPrefix is empty string")
+ }
+ if cfg.WARCCompression == "" {
+ t.Error("WARCCompression is empty string")
+ }
+ if cfg.DigestAlgorithm == "" {
+ t.Error("DigestAlgorithm is empty string")
+ }
+ if cfg.CACertFile == "" {
+ t.Error("CACertFile is empty string")
+ }
+ if cfg.CertsDir == "" {
+ t.Error("CertsDir is empty string")
+ }
+ if cfg.DedupDBFile == "" {
+ t.Error("DedupDBFile is empty string")
+ }
+ if cfg.StatsDBFile == "" {
+ t.Error("StatsDBFile is empty string")
+ }
+ if cfg.LogLevel == "" {
+ t.Error("LogLevel is empty string")
+ }
+}
+
+func TestConfig_ReasonableDefaults(t *testing.T) {
+ cfg := NewDefaultConfig()
+
+ // Port should be valid
+ if cfg.Port < 1 || cfg.Port > 65535 {
+ t.Errorf("Port = %d, should be in range 1-65535", cfg.Port)
+ }
+
+ // WARC size should be positive
+ if cfg.WARCSize <= 0 {
+ t.Errorf("WARCSize = %d, should be positive", cfg.WARCSize)
+ }
+
+ // WARC writer threads should be positive
+ if cfg.WARCWriterThreads < 1 {
+ t.Errorf("WARCWriterThreads = %d, should be at least 1", cfg.WARCWriterThreads)
+ }
+
+ // Timeouts should be positive
+ if cfg.SocketTimeout <= 0 {
+ t.Errorf("SocketTimeout = %v, should be positive", cfg.SocketTimeout)
+ }
+ if cfg.BatchFlushTimeout <= 0 {
+ t.Errorf("BatchFlushTimeout = %v, should be positive", cfg.BatchFlushTimeout)
+ }
+
+ // Queue sizes should be positive
+ if cfg.QueueSize < 1 {
+ t.Errorf("QueueSize = %d, should be at least 1", cfg.QueueSize)
+ }
+ if cfg.MaxThreads < 1 {
+ t.Errorf("MaxThreads = %d, should be at least 1", cfg.MaxThreads)
+ }
+ if cfg.BatchFlushMaxURLs < 1 {
+ t.Errorf("BatchFlushMaxURLs = %d, should be at least 1", cfg.BatchFlushMaxURLs)
+ }
+
+ // Digest algorithm should be valid
+ validDigests := map[string]bool{"sha1": true, "sha256": true, "blake3": true}
+ if !validDigests[cfg.DigestAlgorithm] {
+ t.Errorf("DigestAlgorithm = %q, should be one of sha1, sha256, blake3", cfg.DigestAlgorithm)
+ }
+
+ // Compression should be valid
+ validCompressions := map[string]bool{"gzip": true, "zstd": true, "": true}
+ if !validCompressions[cfg.WARCCompression] {
+ t.Errorf("WARCCompression = %q, should be one of gzip, zstd, or empty", cfg.WARCCompression)
+ }
+
+ // Log level should be valid
+ validLogLevels := map[string]bool{"debug": true, "info": true, "warn": true, "error": true}
+ if !validLogLevels[cfg.LogLevel] {
+ t.Errorf("LogLevel = %q, should be one of debug, info, warn, error", cfg.LogLevel)
+ }
+}