Commit 0b031a2
2025-12-31 15:06:59
Changed files (8)
cmd
mill
internal
content
fetch
sources
lobsters
cmd/mill/main.go
@@ -0,0 +1,41 @@
+package main
+
+import (
+ "context"
+ "fmt"
+ "mill/internal/content"
+ "mill/internal/fetch/sources/lobsters"
+)
+
+func main() {
+ err := run()
+ if err != nil {
+ panic(err)
+ }
+}
+
+func run() error {
+ ctx := context.Background()
+ l, err := lobsters.New("https://lobste.rs/t/go.rss")
+ if err != nil {
+ err = fmt.Errorf("setting up lobsters go feed: %w", err)
+ return err
+ }
+
+ items, err := l.Fetch(ctx)
+ if err != nil {
+ return err
+ }
+ for _, item := range items {
+ c, err := content.NewItem(item.URL, item.Title)
+ if err != nil {
+ return err
+ }
+ err = c.WriteMetadata()
+ if err != nil {
+ return err
+ }
+ fmt.Printf("[%s](%s)\n", item.Title, item.URL)
+ }
+ return nil
+}
internal/content/content.go
@@ -0,0 +1,123 @@
+package content
+
+import (
+ "encoding/json"
+ "net/url"
+ "os"
+ "path/filepath"
+ "time"
+)
+
+const (
+ _contentDir = "content"
+ _dirMode = 0o750
+ _fileMode = 0o640
+ _metadataFilename = "metadata.json"
+)
+
+// Item is an indexed
+type Item struct {
+ Id URLID `json:"id"`
+ Title string `json:"title"`
+ CanonicalURL *url.URL `json:"canonical_url"`
+
+ FirstSeen *time.Time `json:"first_seen,omitempty"`
+ LastSeen *time.Time `json:"last_seen,omitempty"`
+ Fetched *time.Time `json:"fetched,omitempty"`
+ Observations map[URLID]Observation `json:"observations,omitempty"`
+}
+
+func NewItem(
+ url *url.URL,
+ title string,
+) (*Item, error) {
+ id, err := NewURLID(url)
+ if err != nil {
+ return nil, err
+ }
+ return &Item{
+ Id: id,
+ Title: title,
+ CanonicalURL: url,
+ }, nil
+}
+
+func (item *Item) WriteMetadata() error {
+
+ itemDir := filepath.Join(_contentDir, string(item.Id))
+ err := os.MkdirAll(itemDir, _dirMode)
+ if err != nil {
+ return err
+ }
+
+ itemMetadata := filepath.Join(itemDir, _metadataFilename)
+ f, err := os.Create(itemMetadata)
+ if err != nil {
+ return err
+ }
+
+ err = json.NewEncoder(f).Encode(item)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// MarshalJSON allows for stringified urls in marshaled Item type
+// satisfies the json.Marshaler interface
+func (item *Item) MarshalJSON() ([]byte, error) {
+ i := struct {
+ Id URLID `json:"id"`
+ Title string `json:"title"`
+ CanonicalURL string `json:"canonical_url"`
+
+ FirstSeen *time.Time `json:"first_seen,omitempty"`
+ LastSeen *time.Time `json:"last_seen,omitempty"`
+ Fetched *time.Time `json:"fetched,omitempty"`
+ Observations map[URLID]Observation `json:"observations,omitempty"`
+ }{
+ Id: item.Id,
+ Title: item.Title,
+ CanonicalURL: item.CanonicalURL.String(),
+ FirstSeen: item.FirstSeen,
+ LastSeen: item.LastSeen,
+ Fetched: item.Fetched,
+ Observations: item.Observations,
+ }
+ return json.Marshal(&i)
+}
+
+// UnmarshalJSON parses string urls from the json bytes to url.URL where needed
+// satisfies the json.Unmarshaler interface
+func (item *Item) UnmarshalJSON(data []byte) error {
+ i := struct {
+ Id URLID `json:"id"`
+ Title string `json:"title"`
+ CanonicalURL string `json:"canonical_url"`
+
+ FirstSeen *time.Time `json:"first_seen,omitempty"`
+ LastSeen *time.Time `json:"last_seen,omitempty"`
+ Fetched *time.Time `json:"fetched,omitempty"`
+ Observations map[URLID]Observation `json:"observations,omitempty"`
+ }{}
+ err := json.Unmarshal(data, &i)
+ if err != nil {
+ return err
+ }
+
+ u, err := url.Parse(i.CanonicalURL)
+ if err != nil {
+ return err
+ }
+
+ item.Id = i.Id
+ item.Title = i.Title
+ item.CanonicalURL = u
+ item.FirstSeen = i.FirstSeen
+ item.LastSeen = i.LastSeen
+ item.Fetched = i.Fetched
+ item.Observations = i.Observations
+
+ return nil
+}
internal/content/observations.go
@@ -0,0 +1,68 @@
+package content
+
+import (
+ "encoding/json"
+ "net/url"
+ "time"
+)
+
+// Observation is a specific discussion/permalink that points at an Item's URL
+type Observation struct {
+ Id URLID
+ SourceName string
+ SourceURL *url.URL
+ Seen time.Time
+
+ Title string
+ // TODO: Score, Comments, Etc
+}
+
+// MarshalJSON allows for stringified urls in marshaled Observation type
+// satisfies the json.Marshaler interface
+func (obs *Observation) MarshalJSON() ([]byte, error) {
+ o := struct {
+ Id URLID `json:"id"`
+ SourceName string `json:"source_name"`
+ SourceURL string `json:"source_url"`
+ Seen time.Time `json:"seen"`
+
+ Title string `json:"title,omitempty"`
+ }{
+ Id: obs.Id,
+ SourceName: obs.SourceName,
+ SourceURL: obs.SourceURL.String(),
+ Seen: obs.Seen,
+ Title: obs.Title,
+ }
+ return json.Marshal(&o)
+}
+
+// UnmarshalJSON parses string urls from the json bytes to url.URL where needed
+// satisfies the json.Unmarshaler interface
+func (obs *Observation) UnmarshalJSON(data []byte) error {
+ o := struct {
+ Id URLID `json:"id"`
+ SourceName string `json:"source_name"`
+ SourceURL string `json:"source_url"`
+ Seen time.Time `json:"seen"`
+
+ Title string `json:"title,omitempty"`
+ }{}
+ err := json.Unmarshal(data, &o)
+ if err != nil {
+ return err
+ }
+
+ u, err := url.Parse(o.SourceURL)
+ if err != nil {
+ return err
+ }
+
+ obs.Id = o.Id
+ obs.SourceName = o.SourceName
+ obs.SourceURL = u
+ obs.Seen = o.Seen
+ obs.Title = o.Title
+
+ return nil
+}
internal/content/urlid.go
@@ -0,0 +1,19 @@
+package content
+
+import (
+ "crypto/sha256"
+ "fmt"
+ "net/url"
+)
+
+type URLID string // sha256 hex
+
+func NewURLID(u *url.URL) (URLID, error) {
+ hash := sha256.New()
+ _, err := hash.Write([]byte(u.String()))
+ if err != nil {
+ return "", err
+ }
+ hex := fmt.Sprintf("%x", hash.Sum(nil))
+ return URLID(hex), nil
+}
internal/fetch/sources/lobsters/lobsters.go
@@ -0,0 +1,63 @@
+package lobsters
+
+import (
+ "context"
+ "fmt"
+ "net/url"
+
+ "mill/internal/fetch"
+
+ "github.com/mmcdole/gofeed"
+)
+
+type Source struct {
+ url *url.URL
+ feed *gofeed.Feed
+}
+
+func New(feedURL string) (*Source, error) {
+ u, err := url.Parse(feedURL)
+ if err != nil {
+ return nil, err
+ }
+
+ return &Source{
+ url: u,
+ }, nil
+}
+
+func (s *Source) Name() string {
+ return "lobste.rs rss"
+}
+
+func (s *Source) Fetch(ctx context.Context) ([]fetch.Item, error) {
+ fp := gofeed.NewParser()
+ feed, err := fp.ParseURLWithContext(s.url.String(), ctx)
+ if err != nil {
+ err = fmt.Errorf("rss feed parsing: %w", err)
+ return nil, err
+ }
+ s.feed = feed
+
+ items := make([]fetch.Item, len(s.feed.Items))
+ for i, item := range s.feed.Items {
+
+ if len(item.Links) != 1 {
+ return nil, fmt.Errorf("unexpected link count: expected 1, got %d", len(s.feed.Links))
+ }
+
+ u, err := url.Parse(item.Links[0])
+ if err != nil {
+ return nil, err
+ }
+
+ items[i] = fetch.Item{
+ Source: s.url.String(),
+ URL: u,
+ Title: item.Title,
+ }
+
+ }
+
+ return items, nil
+}
internal/fetch/fetch.go
@@ -0,0 +1,17 @@
+package fetch
+
+import (
+ "context"
+ "net/url"
+)
+
+type Item struct {
+ Source string // rss, lobste.rs, etc
+ URL *url.URL
+ Title string
+}
+
+type Source interface {
+ Name() string
+ Fetch(ctx context.Context) ([]Item, error)
+}
go.mod
@@ -0,0 +1,15 @@
+module mill
+
+go 1.25.5
+
+require (
+ github.com/PuerkitoBio/goquery v1.8.0 // indirect
+ github.com/andybalholm/cascadia v1.3.1 // indirect
+ github.com/json-iterator/go v1.1.12 // indirect
+ github.com/mmcdole/gofeed v1.3.0 // indirect
+ github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 // indirect
+ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+ github.com/modern-go/reflect2 v1.0.2 // indirect
+ golang.org/x/net v0.4.0 // indirect
+ golang.org/x/text v0.5.0 // indirect
+)
go.sum
@@ -0,0 +1,31 @@
+github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
+github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
+github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
+github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/mmcdole/gofeed v1.3.0 h1:5yn+HeqlcvjMeAI4gu6T+crm7d0anY85+M+v6fIFNG4=
+github.com/mmcdole/gofeed v1.3.0/go.mod h1:9TGv2LcJhdXePDzxiuMnukhV2/zb6VtnZt1mS+SjkLE=
+github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 h1:Zr92CAlFhy2gL+V1F+EyIuzbQNbSgP4xhTODZtrXUtk=
+github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.4.0 h1:Q5QPcMlvfxFTAPV0+07Xz/MpK9NTXu2VDUuy0FeMfaU=
+golang.org/x/net v0.4.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM=
+golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=