Commit 0b031a2

bryfry <bryon@fryer.io>
2025-12-31 15:06:59
init
Changed files (8)
cmd/mill/main.go
@@ -0,0 +1,41 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"mill/internal/content"
+	"mill/internal/fetch/sources/lobsters"
+)
+
+func main() {
+	err := run()
+	if err != nil {
+		panic(err)
+	}
+}
+
+func run() error {
+	ctx := context.Background()
+	l, err := lobsters.New("https://lobste.rs/t/go.rss")
+	if err != nil {
+		err = fmt.Errorf("setting up lobsters go feed: %w", err)
+		return err
+	}
+
+	items, err := l.Fetch(ctx)
+	if err != nil {
+		return err
+	}
+	for _, item := range items {
+		c, err := content.NewItem(item.URL, item.Title)
+		if err != nil {
+			return err
+		}
+		err = c.WriteMetadata()
+		if err != nil {
+			return err
+		}
+		fmt.Printf("[%s](%s)\n", item.Title, item.URL)
+	}
+	return nil
+}
internal/content/content.go
@@ -0,0 +1,123 @@
+package content
+
+import (
+	"encoding/json"
+	"net/url"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+const (
+	_contentDir       = "content"
+	_dirMode          = 0o750
+	_fileMode         = 0o640
+	_metadataFilename = "metadata.json"
+)
+
+// Item is an indexed
+type Item struct {
+	Id           URLID    `json:"id"`
+	Title        string   `json:"title"`
+	CanonicalURL *url.URL `json:"canonical_url"`
+
+	FirstSeen    *time.Time            `json:"first_seen,omitempty"`
+	LastSeen     *time.Time            `json:"last_seen,omitempty"`
+	Fetched      *time.Time            `json:"fetched,omitempty"`
+	Observations map[URLID]Observation `json:"observations,omitempty"`
+}
+
+func NewItem(
+	url *url.URL,
+	title string,
+) (*Item, error) {
+	id, err := NewURLID(url)
+	if err != nil {
+		return nil, err
+	}
+	return &Item{
+		Id:           id,
+		Title:        title,
+		CanonicalURL: url,
+	}, nil
+}
+
+func (item *Item) WriteMetadata() error {
+
+	itemDir := filepath.Join(_contentDir, string(item.Id))
+	err := os.MkdirAll(itemDir, _dirMode)
+	if err != nil {
+		return err
+	}
+
+	itemMetadata := filepath.Join(itemDir, _metadataFilename)
+	f, err := os.Create(itemMetadata)
+	if err != nil {
+		return err
+	}
+
+	err = json.NewEncoder(f).Encode(item)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// MarshalJSON allows for stringified urls in marshaled Item type
+// satisfies the json.Marshaler interface
+func (item *Item) MarshalJSON() ([]byte, error) {
+	i := struct {
+		Id           URLID  `json:"id"`
+		Title        string `json:"title"`
+		CanonicalURL string `json:"canonical_url"`
+
+		FirstSeen    *time.Time            `json:"first_seen,omitempty"`
+		LastSeen     *time.Time            `json:"last_seen,omitempty"`
+		Fetched      *time.Time            `json:"fetched,omitempty"`
+		Observations map[URLID]Observation `json:"observations,omitempty"`
+	}{
+		Id:           item.Id,
+		Title:        item.Title,
+		CanonicalURL: item.CanonicalURL.String(),
+		FirstSeen:    item.FirstSeen,
+		LastSeen:     item.LastSeen,
+		Fetched:      item.Fetched,
+		Observations: item.Observations,
+	}
+	return json.Marshal(&i)
+}
+
+// UnmarshalJSON parses string urls from the json bytes to url.URL where needed
+// satisfies the json.Unmarshaler interface
+func (item *Item) UnmarshalJSON(data []byte) error {
+	i := struct {
+		Id           URLID  `json:"id"`
+		Title        string `json:"title"`
+		CanonicalURL string `json:"canonical_url"`
+
+		FirstSeen    *time.Time            `json:"first_seen,omitempty"`
+		LastSeen     *time.Time            `json:"last_seen,omitempty"`
+		Fetched      *time.Time            `json:"fetched,omitempty"`
+		Observations map[URLID]Observation `json:"observations,omitempty"`
+	}{}
+	err := json.Unmarshal(data, &i)
+	if err != nil {
+		return err
+	}
+
+	u, err := url.Parse(i.CanonicalURL)
+	if err != nil {
+		return err
+	}
+
+	item.Id = i.Id
+	item.Title = i.Title
+	item.CanonicalURL = u
+	item.FirstSeen = i.FirstSeen
+	item.LastSeen = i.LastSeen
+	item.Fetched = i.Fetched
+	item.Observations = i.Observations
+
+	return nil
+}
internal/content/observations.go
@@ -0,0 +1,68 @@
+package content
+
+import (
+	"encoding/json"
+	"net/url"
+	"time"
+)
+
+// Observation is a specific discussion/permalink that points at an Item's URL
+type Observation struct {
+	Id         URLID
+	SourceName string
+	SourceURL  *url.URL
+	Seen       time.Time
+
+	Title string
+	// TODO: Score, Comments, Etc
+}
+
+// MarshalJSON allows for stringified urls in marshaled Observation type
+// satisfies the json.Marshaler interface
+func (obs *Observation) MarshalJSON() ([]byte, error) {
+	o := struct {
+		Id         URLID     `json:"id"`
+		SourceName string    `json:"source_name"`
+		SourceURL  string    `json:"source_url"`
+		Seen       time.Time `json:"seen"`
+
+		Title string `json:"title,omitempty"`
+	}{
+		Id:         obs.Id,
+		SourceName: obs.SourceName,
+		SourceURL:  obs.SourceURL.String(),
+		Seen:       obs.Seen,
+		Title:      obs.Title,
+	}
+	return json.Marshal(&o)
+}
+
+// UnmarshalJSON parses string urls from the json bytes to url.URL where needed
+// satisfies the json.Unmarshaler interface
+func (obs *Observation) UnmarshalJSON(data []byte) error {
+	o := struct {
+		Id         URLID     `json:"id"`
+		SourceName string    `json:"source_name"`
+		SourceURL  string    `json:"source_url"`
+		Seen       time.Time `json:"seen"`
+
+		Title string `json:"title,omitempty"`
+	}{}
+	err := json.Unmarshal(data, &o)
+	if err != nil {
+		return err
+	}
+
+	u, err := url.Parse(o.SourceURL)
+	if err != nil {
+		return err
+	}
+
+	obs.Id = o.Id
+	obs.SourceName = o.SourceName
+	obs.SourceURL = u
+	obs.Seen = o.Seen
+	obs.Title = o.Title
+
+	return nil
+}
internal/content/urlid.go
@@ -0,0 +1,19 @@
+package content
+
+import (
+	"crypto/sha256"
+	"fmt"
+	"net/url"
+)
+
+type URLID string // sha256 hex
+
+func NewURLID(u *url.URL) (URLID, error) {
+	hash := sha256.New()
+	_, err := hash.Write([]byte(u.String()))
+	if err != nil {
+		return "", err
+	}
+	hex := fmt.Sprintf("%x", hash.Sum(nil))
+	return URLID(hex), nil
+}
internal/fetch/sources/lobsters/lobsters.go
@@ -0,0 +1,63 @@
+package lobsters
+
+import (
+	"context"
+	"fmt"
+	"net/url"
+
+	"mill/internal/fetch"
+
+	"github.com/mmcdole/gofeed"
+)
+
+type Source struct {
+	url  *url.URL
+	feed *gofeed.Feed
+}
+
+func New(feedURL string) (*Source, error) {
+	u, err := url.Parse(feedURL)
+	if err != nil {
+		return nil, err
+	}
+
+	return &Source{
+		url: u,
+	}, nil
+}
+
+func (s *Source) Name() string {
+	return "lobste.rs rss"
+}
+
+func (s *Source) Fetch(ctx context.Context) ([]fetch.Item, error) {
+	fp := gofeed.NewParser()
+	feed, err := fp.ParseURLWithContext(s.url.String(), ctx)
+	if err != nil {
+		err = fmt.Errorf("rss feed parsing: %w", err)
+		return nil, err
+	}
+	s.feed = feed
+
+	items := make([]fetch.Item, len(s.feed.Items))
+	for i, item := range s.feed.Items {
+
+		if len(item.Links) != 1 {
+			return nil, fmt.Errorf("unexpected link count: expected 1, got %d", len(s.feed.Links))
+		}
+
+		u, err := url.Parse(item.Links[0])
+		if err != nil {
+			return nil, err
+		}
+
+		items[i] = fetch.Item{
+			Source: s.url.String(),
+			URL:    u,
+			Title:  item.Title,
+		}
+
+	}
+
+	return items, nil
+}
internal/fetch/fetch.go
@@ -0,0 +1,17 @@
+package fetch
+
+import (
+	"context"
+	"net/url"
+)
+
+type Item struct {
+	Source string // rss, lobste.rs, etc
+	URL    *url.URL
+	Title  string
+}
+
+type Source interface {
+	Name() string
+	Fetch(ctx context.Context) ([]Item, error)
+}
go.mod
@@ -0,0 +1,15 @@
+module mill
+
+go 1.25.5
+
+require (
+	github.com/PuerkitoBio/goquery v1.8.0 // indirect
+	github.com/andybalholm/cascadia v1.3.1 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/mmcdole/gofeed v1.3.0 // indirect
+	github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	golang.org/x/net v0.4.0 // indirect
+	golang.org/x/text v0.5.0 // indirect
+)
go.sum
@@ -0,0 +1,31 @@
+github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
+github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
+github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
+github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/mmcdole/gofeed v1.3.0 h1:5yn+HeqlcvjMeAI4gu6T+crm7d0anY85+M+v6fIFNG4=
+github.com/mmcdole/gofeed v1.3.0/go.mod h1:9TGv2LcJhdXePDzxiuMnukhV2/zb6VtnZt1mS+SjkLE=
+github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 h1:Zr92CAlFhy2gL+V1F+EyIuzbQNbSgP4xhTODZtrXUtk=
+github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.4.0 h1:Q5QPcMlvfxFTAPV0+07Xz/MpK9NTXu2VDUuy0FeMfaU=
+golang.org/x/net v0.4.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM=
+golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=