main
1package models
2
3import (
4 "net/http"
5 "reflect"
6 "testing"
7 "time"
8)
9
10func TestRecordedURL_IsDedup(t *testing.T) {
11 tests := []struct {
12 name string
13 dedupInfo *DedupInfo
14 want bool
15 }{
16 {
17 name: "nil DedupInfo",
18 dedupInfo: nil,
19 want: false,
20 },
21 {
22 name: "non-nil DedupInfo",
23 dedupInfo: &DedupInfo{
24 RecordID: "urn:uuid:12345",
25 URL: "http://example.com",
26 Date: time.Now(),
27 },
28 want: true,
29 },
30 }
31
32 for _, tt := range tests {
33 t.Run(tt.name, func(t *testing.T) {
34 ru := &RecordedURL{
35 DedupInfo: tt.dedupInfo,
36 }
37 if got := ru.IsDedup(); got != tt.want {
38 t.Errorf("RecordedURL.IsDedup() = %v, want %v", got, tt.want)
39 }
40 })
41 }
42}
43
44func TestRecordedURL_GetWarcPrefix(t *testing.T) {
45 tests := []struct {
46 name string
47 warcproxMeta *WarcproxMeta
48 defaultPrefix string
49 want string
50 }{
51 {
52 name: "nil WarcproxMeta",
53 warcproxMeta: nil,
54 defaultPrefix: "default",
55 want: "default",
56 },
57 {
58 name: "empty WarcPrefix",
59 warcproxMeta: &WarcproxMeta{WarcPrefix: ""},
60 defaultPrefix: "default",
61 want: "default",
62 },
63 {
64 name: "custom WarcPrefix",
65 warcproxMeta: &WarcproxMeta{WarcPrefix: "custom"},
66 defaultPrefix: "default",
67 want: "custom",
68 },
69 }
70
71 for _, tt := range tests {
72 t.Run(tt.name, func(t *testing.T) {
73 ru := &RecordedURL{
74 WarcproxMeta: tt.warcproxMeta,
75 }
76 if got := ru.GetWarcPrefix(tt.defaultPrefix); got != tt.want {
77 t.Errorf("RecordedURL.GetWarcPrefix() = %v, want %v", got, tt.want)
78 }
79 })
80 }
81}
82
83func TestRecordedURL_GetDedupBuckets(t *testing.T) {
84 tests := []struct {
85 name string
86 warcproxMeta *WarcproxMeta
87 want map[string]string
88 }{
89 {
90 name: "nil WarcproxMeta",
91 warcproxMeta: nil,
92 want: map[string]string{"": "rw"},
93 },
94 {
95 name: "nil DedupBuckets",
96 warcproxMeta: &WarcproxMeta{DedupBuckets: nil},
97 want: map[string]string{"": "rw"},
98 },
99 {
100 name: "custom DedupBuckets",
101 warcproxMeta: &WarcproxMeta{
102 DedupBuckets: map[string]string{
103 "bucket1": "rw",
104 "bucket2": "ro",
105 },
106 },
107 want: map[string]string{
108 "bucket1": "rw",
109 "bucket2": "ro",
110 },
111 },
112 }
113
114 for _, tt := range tests {
115 t.Run(tt.name, func(t *testing.T) {
116 ru := &RecordedURL{
117 WarcproxMeta: tt.warcproxMeta,
118 }
119 if got := ru.GetDedupBuckets(); !reflect.DeepEqual(got, tt.want) {
120 t.Errorf("RecordedURL.GetDedupBuckets() = %v, want %v", got, tt.want)
121 }
122 })
123 }
124}
125
126func TestRecordedURL_GetStatsBuckets(t *testing.T) {
127 tests := []struct {
128 name string
129 warcproxMeta *WarcproxMeta
130 want []string
131 }{
132 {
133 name: "nil WarcproxMeta",
134 warcproxMeta: nil,
135 want: nil,
136 },
137 {
138 name: "empty buckets",
139 warcproxMeta: &WarcproxMeta{},
140 want: []string{},
141 },
142 {
143 name: "simple buckets only",
144 warcproxMeta: &WarcproxMeta{
145 StatsBuckets: []string{"bucket1", "bucket2"},
146 },
147 want: []string{"bucket1", "bucket2"},
148 },
149 {
150 name: "bucket definitions only",
151 warcproxMeta: &WarcproxMeta{
152 StatsBucketDefs: []StatsBucketDef{
153 {Bucket: "bucket3", TallyDomains: []string{"example.com"}},
154 {Bucket: "bucket4", TallyDomains: nil},
155 },
156 },
157 want: []string{"bucket3", "bucket4"},
158 },
159 {
160 name: "both simple and definitions",
161 warcproxMeta: &WarcproxMeta{
162 StatsBuckets: []string{"bucket1", "bucket2"},
163 StatsBucketDefs: []StatsBucketDef{
164 {Bucket: "bucket3", TallyDomains: []string{"example.com"}},
165 },
166 },
167 want: []string{"bucket1", "bucket2", "bucket3"},
168 },
169 }
170
171 for _, tt := range tests {
172 t.Run(tt.name, func(t *testing.T) {
173 ru := &RecordedURL{
174 WarcproxMeta: tt.warcproxMeta,
175 }
176 got := ru.GetStatsBuckets()
177 if !reflect.DeepEqual(got, tt.want) {
178 t.Errorf("RecordedURL.GetStatsBuckets() = %v, want %v", got, tt.want)
179 }
180 })
181 }
182}
183
184func TestRecordedURL_GetStatsBucketDefs(t *testing.T) {
185 tests := []struct {
186 name string
187 warcproxMeta *WarcproxMeta
188 want []StatsBucketDef
189 }{
190 {
191 name: "nil WarcproxMeta",
192 warcproxMeta: nil,
193 want: nil,
194 },
195 {
196 name: "nil StatsBucketDefs",
197 warcproxMeta: &WarcproxMeta{StatsBucketDefs: nil},
198 want: nil,
199 },
200 {
201 name: "with bucket definitions",
202 warcproxMeta: &WarcproxMeta{
203 StatsBucketDefs: []StatsBucketDef{
204 {Bucket: "bucket1", TallyDomains: []string{"example.com"}},
205 {Bucket: "bucket2", TallyDomains: []string{"test.com", "demo.com"}},
206 },
207 },
208 want: []StatsBucketDef{
209 {Bucket: "bucket1", TallyDomains: []string{"example.com"}},
210 {Bucket: "bucket2", TallyDomains: []string{"test.com", "demo.com"}},
211 },
212 },
213 }
214
215 for _, tt := range tests {
216 t.Run(tt.name, func(t *testing.T) {
217 ru := &RecordedURL{
218 WarcproxMeta: tt.warcproxMeta,
219 }
220 if got := ru.GetStatsBucketDefs(); !reflect.DeepEqual(got, tt.want) {
221 t.Errorf("RecordedURL.GetStatsBucketDefs() = %v, want %v", got, tt.want)
222 }
223 })
224 }
225}
226
227func TestRecordedURL_ShouldTallyDomain(t *testing.T) {
228 tests := []struct {
229 name string
230 warcproxMeta *WarcproxMeta
231 bucket string
232 wantShould bool
233 wantDomains []string
234 }{
235 {
236 name: "nil WarcproxMeta",
237 warcproxMeta: nil,
238 bucket: "bucket1",
239 wantShould: false,
240 wantDomains: nil,
241 },
242 {
243 name: "bucket not found",
244 warcproxMeta: &WarcproxMeta{
245 StatsBucketDefs: []StatsBucketDef{
246 {Bucket: "bucket2", TallyDomains: []string{"example.com"}},
247 },
248 },
249 bucket: "bucket1",
250 wantShould: false,
251 wantDomains: nil,
252 },
253 {
254 name: "bucket found with no tally domains",
255 warcproxMeta: &WarcproxMeta{
256 StatsBucketDefs: []StatsBucketDef{
257 {Bucket: "bucket1", TallyDomains: nil},
258 },
259 },
260 bucket: "bucket1",
261 wantShould: false,
262 wantDomains: nil,
263 },
264 {
265 name: "bucket found with empty tally domains",
266 warcproxMeta: &WarcproxMeta{
267 StatsBucketDefs: []StatsBucketDef{
268 {Bucket: "bucket1", TallyDomains: []string{}},
269 },
270 },
271 bucket: "bucket1",
272 wantShould: false,
273 wantDomains: nil,
274 },
275 {
276 name: "bucket found with tally domains",
277 warcproxMeta: &WarcproxMeta{
278 StatsBucketDefs: []StatsBucketDef{
279 {Bucket: "bucket1", TallyDomains: []string{"example.com", "test.com"}},
280 },
281 },
282 bucket: "bucket1",
283 wantShould: true,
284 wantDomains: []string{"example.com", "test.com"},
285 },
286 {
287 name: "multiple buckets, find correct one",
288 warcproxMeta: &WarcproxMeta{
289 StatsBucketDefs: []StatsBucketDef{
290 {Bucket: "bucket1", TallyDomains: []string{"example.com"}},
291 {Bucket: "bucket2", TallyDomains: []string{"test.com", "demo.com"}},
292 {Bucket: "bucket3", TallyDomains: nil},
293 },
294 },
295 bucket: "bucket2",
296 wantShould: true,
297 wantDomains: []string{"test.com", "demo.com"},
298 },
299 }
300
301 for _, tt := range tests {
302 t.Run(tt.name, func(t *testing.T) {
303 ru := &RecordedURL{
304 WarcproxMeta: tt.warcproxMeta,
305 }
306 gotShould, gotDomains := ru.ShouldTallyDomain(tt.bucket)
307 if gotShould != tt.wantShould {
308 t.Errorf("RecordedURL.ShouldTallyDomain() should = %v, want %v", gotShould, tt.wantShould)
309 }
310 if !reflect.DeepEqual(gotDomains, tt.wantDomains) {
311 t.Errorf("RecordedURL.ShouldTallyDomain() domains = %v, want %v", gotDomains, tt.wantDomains)
312 }
313 })
314 }
315}
316
317func TestRecordedURL_FieldTypes(t *testing.T) {
318 // Verify that RecordedURL has expected field types
319 ru := &RecordedURL{
320 URL: "http://example.com",
321 Method: "GET",
322 RequestHeader: http.Header{"User-Agent": []string{"test"}},
323 RequestBody: []byte("request"),
324 StatusCode: 200,
325 StatusMessage: "OK",
326 ResponseHeader: http.Header{"Content-Type": []string{"text/html"}},
327 ResponseBody: []byte("response"),
328 Timestamp: time.Now(),
329 Duration: 100 * time.Millisecond,
330 RemoteAddr: "93.184.216.34:80",
331 RemoteIP: "93.184.216.34",
332 ClientAddr: "192.168.1.100:54321",
333 ClientIP: "192.168.1.100",
334 PayloadDigest: "sha1:B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A",
335 BlockDigest: "sha1:ABCDEFGHIJKLMNOPQRSTUVWXYZ234567",
336 ContentType: "text/html",
337 ContentLength: 8,
338 WARCRecordID: "urn:uuid:12345678-1234-1234-1234-123456789abc",
339 }
340
341 // Just verify we can create the struct with all fields
342 // and access them without panic
343 if ru.URL == "" {
344 t.Error("URL should not be empty")
345 }
346 if ru.StatusCode == 0 {
347 t.Error("StatusCode should not be zero")
348 }
349 if ru.Timestamp.IsZero() {
350 t.Error("Timestamp should not be zero")
351 }
352}
353
354func TestDedupInfo_Fields(t *testing.T) {
355 now := time.Now()
356 di := &DedupInfo{
357 RecordID: "urn:uuid:12345",
358 URL: "http://example.com",
359 Date: now,
360 ReadOnly: true,
361 }
362
363 if di.RecordID != "urn:uuid:12345" {
364 t.Errorf("RecordID = %q, want %q", di.RecordID, "urn:uuid:12345")
365 }
366 if di.URL != "http://example.com" {
367 t.Errorf("URL = %q, want %q", di.URL, "http://example.com")
368 }
369 if !di.Date.Equal(now) {
370 t.Errorf("Date = %v, want %v", di.Date, now)
371 }
372 if !di.ReadOnly {
373 t.Error("ReadOnly should be true")
374 }
375}
376
377func TestWarcproxMeta_Fields(t *testing.T) {
378 wm := &WarcproxMeta{
379 WarcPrefix: "custom",
380 DedupBuckets: map[string]string{"bucket1": "rw"},
381 StatsBuckets: []string{"stats1", "stats2"},
382 StatsBucketDefs: []StatsBucketDef{
383 {Bucket: "stats3", TallyDomains: []string{"example.com"}},
384 },
385 Limits: map[string]int64{"max_urls": 1000},
386 SoftLimits: map[string]int64{"soft_max": 900},
387 Blocks: []BlockRule{
388 {Domain: "blocked.com"},
389 },
390 Metadata: map[string]interface{}{"seed": "http://start.com"},
391 Accept: map[string]bool{"capture-metadata": true},
392 }
393
394 if wm.WarcPrefix != "custom" {
395 t.Errorf("WarcPrefix = %q, want %q", wm.WarcPrefix, "custom")
396 }
397 if len(wm.DedupBuckets) != 1 {
398 t.Errorf("DedupBuckets length = %d, want 1", len(wm.DedupBuckets))
399 }
400 if len(wm.StatsBuckets) != 2 {
401 t.Errorf("StatsBuckets length = %d, want 2", len(wm.StatsBuckets))
402 }
403 if len(wm.StatsBucketDefs) != 1 {
404 t.Errorf("StatsBucketDefs length = %d, want 1", len(wm.StatsBucketDefs))
405 }
406 if wm.Limits["max_urls"] != 1000 {
407 t.Errorf("Limits[max_urls] = %d, want 1000", wm.Limits["max_urls"])
408 }
409 if wm.SoftLimits["soft_max"] != 900 {
410 t.Errorf("SoftLimits[soft_max] = %d, want 900", wm.SoftLimits["soft_max"])
411 }
412 if len(wm.Blocks) != 1 {
413 t.Errorf("Blocks length = %d, want 1", len(wm.Blocks))
414 }
415 if wm.Metadata["seed"] != "http://start.com" {
416 t.Errorf("Metadata[seed] = %v, want http://start.com", wm.Metadata["seed"])
417 }
418 if !wm.Accept["capture-metadata"] {
419 t.Error("Accept[capture-metadata] should be true")
420 }
421}
422
423func TestStatsBucketDef_Fields(t *testing.T) {
424 sbd := StatsBucketDef{
425 Bucket: "test-bucket",
426 TallyDomains: []string{"example.com", "test.com"},
427 }
428
429 if sbd.Bucket != "test-bucket" {
430 t.Errorf("Bucket = %q, want %q", sbd.Bucket, "test-bucket")
431 }
432 if len(sbd.TallyDomains) != 2 {
433 t.Errorf("TallyDomains length = %d, want 2", len(sbd.TallyDomains))
434 }
435}
436
437func TestBlockRule_Fields(t *testing.T) {
438 br := BlockRule{
439 Domain: "blocked.com",
440 SURT: "com,blocked)/path",
441 Regex: ".*blocked.*",
442 }
443
444 if br.Domain != "blocked.com" {
445 t.Errorf("Domain = %q, want %q", br.Domain, "blocked.com")
446 }
447 if br.SURT != "com,blocked)/path" {
448 t.Errorf("SURT = %q, want %q", br.SURT, "com,blocked)/path")
449 }
450 if br.Regex != ".*blocked.*" {
451 t.Errorf("Regex = %q, want %q", br.Regex, ".*blocked.*")
452 }
453}