START.md LINK TERRORIZER!!

Name: START.md LINK TERRORIZER!!
Creator: troycarboni
Published: 2026-06-12T00:03:09.050Z
License: https://opensource.org/licenses/MIT

v1Published

You know a link hates to see me coming!

›Author's sample data

pages
source	https://start.me/pages
pageCount	40
scrapedAt	2026-06-11T23:49:04.000Z
totalLinks	16281

›Publisher

0 subscribers

troycarboni@troycarboni

Every day at 3:00 AM0 runs in 14d · published 4h ago

›Parameters

--max-pagesnumberMaximum number of bookmark pages to scrape (BFS-expanded from the directory). Higher values collect more links but take longer. default 40 · e.g. 40

›Versions

managed by author

v1builtapprovedcurrent4h ago

Schedulesdeploy to enable

Run this collector on a cadence — daily, hourly, your call.

API endpointdeploy to unlock

POST to run it on demand and get fresh data in the response.

Activitydeploy to track

0 subscriber runs in the last 14 days.

How this script collects data

import Firecrawl from "@mendable/firecrawl-js";
import * as cheerio from "cheerio";
import { parseArgs } from "node:util";

const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
  console.error("FIRECRAWL_API_KEY is not set");
  process.exit(1);
}
const firecrawl = new Firecrawl({ apiKey });

const { values: flags } = parseArgs({
  strict: true,
  options: {
    "max-pages": { type: "string" }, // cap on bookmark pages to scrape (default 40)
  },
});
const maxPages = Number(flags["max-pages"] ?? "40");
if (!Number.isFinite(maxPages) || maxPages < 1) {
  console.error("--max-pages must be a positive number");
  process.exit(1);
}

const PAGE_RE = /^https:\/\/start\.me\/p\/([A-Za-z0-9]+)\/[^"#?]+/;

function pageId(url: string): string | null {
  const m = url.match(PAGE_RE);
  return m ? m[1] : null;
}

async function getRawHtml(url: string): Promise<string> {
  const doc: any = await firecrawl.scrape(url, {
    formats: ["rawHtml"],
    integration: "prometheus",
  });
  const html = doc?.rawHtml ?? doc?.data?.rawHtml ?? "";
  if (!html) throw new Error(`no rawHtml returned for ${url}`);
  return html;
}

// Collect start.me/p/ page links from an HTML document.
function extractPageLinks($: cheerio.CheerioAPI): string[] {
  const out = new Set<string>();
  $("a[href]").each((_, el) => {
    const href = $(el).attr("href") || "";
    if (PAGE_RE.test(href)) out.add(href.split("#")[0].split("?")[0]);
  });
  return [...out];
}

// Extract the bookmarks (external links) listed on a bookmark page.
function extractBookmarks($: cheerio.CheerioAPI): { url: string; title: string }[] {
  const out: { url: string; title: string }[] = [];
  const seen = new Set<string>();
  $("a.bookmark-item__link").each((_, el) => {
    const url = ($(el).attr("href") || "").trim();
    if (!url || seen.has(url)) return;
    seen.add(url);
    const title = ($(el).attr("title") || $(el).text() || "").trim();
    out.push({ url, title });
  });
  return out;
}

function pageTitle($: cheerio.CheerioAPI): string {
  const t = $("title").first().text().trim();
  return t.replace(/\s*-\s*Start\.me\s*$/i, "").trim();
}

async function main() {
  const seedUrl = "https://start.me/pages";
  console.error(`Loading directory ${seedUrl}`);
  const dir = cheerio.load(await getRawHtml(seedUrl));
  const seeds = extractPageLinks(dir);
  console.error(`Found ${seeds.length} featured bookmark pages`);
  if (seeds.length === 0) {
    throw new Error("no bookmark page links found on start.me/pages directory");
  }

  // BFS over bookmark pages, discovering more via each page's "pages-bar".
  const queue: string[] = [...seeds];
  const queuedIds = new Set<string>(seeds.map((u) => pageId(u)!).filter(Boolean));
  const pages: {
    pageUrl: string;
    title: string;
    linkCount: number;
    links: { url: string; title: string }[];
  }[] = [];

  while (queue.length > 0 && pages.length < maxPages) {
    const url = queue.shift()!;
    try {
      console.error(`[${pages.length + 1}/${maxPages}] scraping ${url}`);
      const $ = cheerio.load(await getRawHtml(url));
      const links = extractBookmarks($);
      pages.push({
        pageUrl: url,
        title: pageTitle($),
        linkCount: links.length,
        links,
      });
      // Discover more bookmark pages from this page.
      for (const disc of extractPageLinks($)) {
        const id = pageId(disc);
        if (id && !queuedIds.has(id)) {
          queuedIds.add(id);
          queue.push(disc);
        }
      }
    } catch (err) {
      console.error(`  skipped ${url}: ${(err as Error).message}`);
    }
  }

  const totalLinks = pages.reduce((s, p) => s + p.linkCount, 0);
  const out = {
    source: seedUrl,
    scrapedAt: new Date().toISOString(),
    pageCount: pages.length,
    totalLinks,
    pages,
  };
  console.error(`Done: ${pages.length} pages, ${totalLinks} links`);
  process.stdout.write(JSON.stringify(out));
}

main().catch((err) => {
  console.error(err);
  process.exit(1);
});

Build prompt

START.md LINK TERRORIZER!!