convert some of my feeds to db entries

This commit is contained in:
colin 2023-01-11 13:16:26 +00:00
parent 2413e2eb5f
commit dc6a08a12b
15 changed files with 255 additions and 18 deletions

View File

@ -1,4 +1,4 @@
{ ... }:
{ lib, sane-data, ... }:
let
hourly = { freq = "hourly"; };
daily = { freq = "daily"; };
@ -21,19 +21,39 @@ let
# host-specific helpers
mkSubstack = subdomain: { substack = subdomain; };
fromDb = name:
let
raw = sane-data.feeds."${name}";
in {
url = raw.url;
format = lib.mkIf (raw.is_podcast or false) "podcast";
# not sure the exact mapping with velocity here: entries per day?
freq = lib.mkDefault (
if raw.velocity or 0 > 2 then
"hourly"
else if raw.velocity or 0 > 0.5 then
"daily"
else if raw.velocity or 0 > 0.1 then
"weekly"
else
"infrequent"
);
};
podcasts = [
(mkPod "https://lexfridman.com/feed/podcast/" // rat // weekly)
(fromDb "lexfridman.com/podcast" // rat)
# (mkPod "https://lexfridman.com/feed/podcast/" // rat // weekly)
## Astral Codex Ten
(mkPod "https://sscpodcast.libsyn.com/rss" // rat // daily)
(fromDb "sscpodcast.libsyn.com" // rat)
## Econ Talk
(mkPod "https://feeds.simplecast.com/wgl4xEgL" // rat // daily)
(fromDb "econtalk.org" // rat)
## Cory Doctorow
(mkPod "https://feeds.feedburner.com/doctorow_podcast" // pol // infrequent)
(fromDb "feeds.feedburner.com/doctorow_podcast" // pol)
(mkPod "https://congressionaldish.libsyn.com/rss" // pol // infrequent)
## Civboot
(mkPod "https://anchor.fm/s/34c7232c/podcast/rss" // tech // infrequent)
(mkPod "https://feeds.feedburner.com/80000HoursPodcast" // rat // weekly)
(mkPod "https://allinchamathjason.libsyn.com/rss" // pol // weekly)
(fromDb "allinchamathjason.libsyn.com" // pol)
(mkPod "https://acquired.libsyn.com/rss" // tech // infrequent)
(mkPod "https://rss.acast.com/deconstructed" // pol // infrequent)
## The Daily
@ -61,8 +81,8 @@ let
texts = [
# AGGREGATORS (> 1 post/day)
(mkText "https://www.lesswrong.com/feed.xml" // rat // hourly)
(mkText "http://www.econlib.org/index.xml" // pol // hourly)
(fromDb "lesswrong.com" // rat)
(fromDb "econlib.org" // pol)
# AGGREGATORS (< 1 post/day)
(mkText "https://palladiummag.com/feed" // uncat // weekly)
@ -75,10 +95,10 @@ let
(mkText "https://www.rifters.com/crawl/?feed=rss2" // uncat // weekly)
# DEVELOPERS
(mkText "https://uninsane.org/atom.xml" // infrequent // tech)
(mkText "https://mg.lol/blog/rss/" // infrequent // tech)
(fromDb "uninsane.org" // tech)
(fromDb "mg.lol" // tech)
## Ken Shirriff
(mkText "https://www.righto.com/feeds/posts/default" // tech // infrequent)
(fromDb "righto.com" // tech)
## Vitalik Buterin
(mkText "https://vitalik.ca/feed.xml" // tech // infrequent)
## ian (Sanctuary)
@ -94,7 +114,7 @@ let
(mkText "https://pomeroyb.com/feed.xml" // tech // infrequent)
# (TECH; POL) COMMENTATORS
(mkSubstack "edwardsnowden" // pol // infrequent)
(fromDb "edwardsnowden.substack.com" // pol)
(mkText "http://benjaminrosshoffman.com/feed" // pol // weekly)
## Ben Thompson
(mkText "https://www.stratechery.com/rss" // pol // weekly)

View File

@ -0,0 +1,21 @@
{
"bozo": 0,
"content_length": 1030773,
"content_type": "application/rss+xml; charset=utf-8",
"description": "Industry veterans, degenerate gamblers & besties Chamath Palihapitiya, Jason Calacanis, David Sacks & David Friedberg cover all things economic, tech, political, social & poker.",
"favicon": null,
"hubs": [],
"is_podcast": true,
"is_push": false,
"item_count": 124,
"last_seen": "2023-01-11T12:44:53.606606+00:00",
"last_updated": "2023-01-06T10:51:00+00:00",
"score": 18,
"self_url": "https://allinchamathjason.libsyn.com/rss",
"site_name": "All-In with Chamath, Jason, Sacks & Friedberg",
"site_url": "https://allinchamathjason.libsyn.com",
"title": "All-In with Chamath, Jason, Sacks & Friedberg",
"url": "https://allinchamathjason.libsyn.com/rss",
"velocity": 0.12,
"version": "rss20"
}

View File

@ -0,0 +1,21 @@
{
"bozo": 0,
"content_length": 12669,
"content_type": "application/rss+xml; charset=utf-8",
"description": "The territory is a map of the map.",
"favicon": "http://benjaminrosshoffman.com/favicon.ico",
"hubs": [],
"is_podcast": false,
"is_push": false,
"item_count": 10,
"last_seen": "2023-01-11T12:32:52.176940+00:00",
"last_updated": "2023-01-09T04:33:31+00:00",
"score": -15,
"self_url": "http://benjaminrosshoffman.com/comments/feed/",
"site_name": "Compass Rose",
"site_url": "http://benjaminrosshoffman.com",
"title": "Comments for Compass Rose",
"url": "http://benjaminrosshoffman.com/comments/feed/",
"velocity": 0.312,
"version": "rss20"
}

View File

@ -0,0 +1,21 @@
{
"bozo": 0,
"content_length": 66775,
"content_type": "application/rss+xml; charset=utf-8",
"description": "The Library of Economics and Liberty",
"favicon": null,
"hubs": [],
"is_podcast": false,
"is_push": false,
"item_count": 10,
"last_seen": "2023-01-11T10:46:38.526754+00:00",
"last_updated": "2023-01-10T05:21:31+00:00",
"score": 14,
"self_url": "https://www.econlib.org/feed/",
"site_name": "Econlib",
"site_url": "https://www.econlib.org",
"title": "Econlib",
"url": "https://www.econlib.org/feed/",
"velocity": 2.549,
"version": "rss20"
}

View File

@ -0,0 +1,21 @@
{
"bozo": 0,
"content_length": 27185,
"content_type": "application/rss+xml; charset=utf-8",
"description": "The Library of Economics and Liberty",
"favicon": null,
"hubs": [],
"is_podcast": false,
"is_push": false,
"item_count": 10,
"last_seen": "2023-01-11T13:05:47.318206+00:00",
"last_updated": "2023-01-09T11:30:25+00:00",
"score": 14,
"self_url": "https://www.econtalk.org/feed/",
"site_name": null,
"site_url": null,
"title": "EconTalk Podcast Econlib",
"url": "https://www.econtalk.org/feed",
"velocity": 0.143,
"version": "rss20"
}

View File

@ -0,0 +1,21 @@
{
"bozo": 0,
"content_length": 429348,
"content_type": "application/rss+xml; charset=utf-8",
"description": "The world's most famous whistleblower writes from exile on the intersection of technology, humanity, and power.",
"favicon": "https://bucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com/public/images/2a7d3aa2-3c2f-4196-ab7c-31541be1272e/favicon.ico",
"hubs": [],
"is_podcast": true,
"is_push": false,
"item_count": 16,
"last_seen": "2023-01-11T12:32:02.320483+00:00",
"last_updated": "2022-09-20T13:03:59+00:00",
"score": 14,
"self_url": "https://edwardsnowden.substack.com/feed",
"site_name": "Continuing Ed — with Edward Snowden",
"site_url": "https://edwardsnowden.substack.com",
"title": "Continuing Ed — with Edward Snowden",
"url": "https://edwardsnowden.substack.com/feed",
"velocity": 0.032,
"version": "rss20"
}

View File

@ -0,0 +1,21 @@
{
"bozo": 0,
"content_length": 62633,
"content_type": "text/xml; charset=utf-8",
"description": "Articles, speeches, stories and novels by an award-winning science fiction writer, read aloud in small regular chunks",
"favicon": null,
"hubs": [],
"is_podcast": true,
"is_push": false,
"item_count": 20,
"last_seen": "2023-01-11T12:57:50.103797+00:00",
"last_updated": "2022-12-12T14:46:35+00:00",
"score": 4,
"self_url": "https://craphound.com/category/podcast/feed/",
"site_name": null,
"site_url": null,
"title": "Podcast Cory Doctorow's craphound.com",
"url": "https://feeds.feedburner.com/doctorow_podcast",
"velocity": 0.068,
"version": "rss20"
}

View File

@ -10,7 +10,7 @@
"is_podcast": true,
"is_push": true,
"item_count": 300,
"last_seen": "2023-01-08T23:41:32.928322+00:00",
"last_seen": "2023-01-11T12:40:59.343327+00:00",
"last_updated": "2022-12-29T17:35:50+00:00",
"score": 20,
"self_url": "https://lexfridman.com/feed/podcast/",

View File

@ -0,0 +1,21 @@
{
"bozo": 0,
"content_length": 83074,
"content_type": "text/xml; charset=utf-8",
"description": "projects & research",
"favicon": null,
"hubs": [],
"is_podcast": false,
"is_push": false,
"item_count": 14,
"last_seen": "2023-01-11T12:28:34.383284+00:00",
"last_updated": "2021-07-29T05:10:05+00:00",
"score": 14,
"self_url": "https://mg.lol/blog/rss/",
"site_name": null,
"site_url": "https://mg.lol",
"title": "MG",
"url": "https://mg.lol/blog/rss/",
"velocity": 0.004,
"version": "rss20"
}

View File

@ -0,0 +1,23 @@
{
"bozo": 0,
"content_length": 862917,
"content_type": "application/atom+xml; charset=utf-8",
"description": "Computer history, restoring vintage computers, IC reverse engineering, and whatever",
"favicon": "https://www.blogger.com/about/favicon/favicon.ico",
"hubs": [
"http://pubsubhubbub.appspot.com/"
],
"is_podcast": false,
"is_push": true,
"item_count": 25,
"last_seen": "2023-01-11T12:29:19.820378+00:00",
"last_updated": "2023-01-10T18:21:20.265000+00:00",
"score": -2,
"self_url": "https://www.blogger.com/feeds/6264947694886887540/posts/default",
"site_name": "Blogger.com - Create a unique and beautiful blog easily.",
"site_url": "https://www.blogger.com",
"title": "Ken Shirriff's blog",
"url": "https://www.blogger.com/feeds/6264947694886887540/posts/default",
"velocity": 0.12,
"version": "atom10"
}

View File

@ -0,0 +1,21 @@
{
"bozo": 0,
"content_length": 3905927,
"content_type": "application/rss+xml; charset=utf-8",
"description": "The official audio version of Astral Codex Ten, with an archive of posts from Slate Star Codex. It's just me reading Scott Alexander's blog posts.",
"favicon": null,
"hubs": [],
"is_podcast": true,
"is_push": false,
"item_count": 739,
"last_seen": "2023-01-11T11:05:40.604126+00:00",
"last_updated": "2023-01-11T05:13:00+00:00",
"score": 18,
"self_url": "https://sscpodcast.libsyn.com/rss",
"site_name": "Astral Codex Ten Podcast",
"site_url": "https://sscpodcast.libsyn.com",
"title": "Astral Codex Ten Podcast",
"url": "https://sscpodcast.libsyn.com/rss",
"velocity": 0.384,
"version": "rss20"
}

View File

@ -0,0 +1,21 @@
{
"bozo": 1,
"content_length": 178687,
"content_type": "text/xml; charset=utf-8",
"description": null,
"favicon": null,
"hubs": [],
"is_podcast": false,
"is_push": false,
"item_count": 6,
"last_seen": "2023-01-11T10:51:13.435393+00:00",
"last_updated": "2022-10-13T00:00:00+00:00",
"score": -4,
"self_url": "https://uninsane.org/atom.xml",
"site_name": "Perfectly Sane",
"site_url": "https://uninsane.org",
"title": "Perfectly Sane",
"url": "https://uninsane.org/atom.xml",
"velocity": 0.025,
"version": "atom10"
}

View File

@ -1,4 +1,4 @@
{ lib, ... }:
{ lib, sane-data, ... }:
with lib;
let

View File

@ -26,12 +26,17 @@
passthru.initFeedScript = pkgs.writeShellScript
"init-feed"
''
sources_dir=modules/data/feeds/sources
name="$1"
url="https://$name"
dir="modules/data/feeds/sources/$name"
json_path="$dir/default.json"
mkdir "$dir"
json_path="$sources_dir/$name/default.json"
# the name could have slashes in it, so we want to mkdir -p that
# but in a way where the least could go wrong.
pushd "$sources_dir"; mkdir -p "$name"; popd
${./update.sh} "$url" "$json_path"
cat "$json_path"
'';
}
))

View File

@ -7,4 +7,4 @@ url="$1"
jsonPath="$2"
apiQuery="https://feedsearch.dev/api/v1/search?url=$url"
curl -X GET "$apiQuery" | jq '.[-1]' > "$jsonPath"
curl -X GET "$apiQuery" | jq '.[0]' > "$jsonPath"