{
  "schema_version": "github-machine-beacon/v2",
  "name": "GitHub Machine Beacon",
  "slug": "github-machine-beacon",
  "version": "0.4.0",
  "description": "A transparent GitHub experiment that makes a repository unusually easy for crawlers, search indexes, AI agents, LLM readers, link preview bots, and code indexers to discover and parse.",
  "updated": "2026-06-15",
  "base_url": "https://beacon.ybliterature.com/",
  "repo_url": "https://github.com/Yang1Bai/github-machine-beacon",
  "owner": "Yang1Bai",
  "ethical_boundaries": [
    "Be transparent about the experiment.",
    "Use honest metadata and relevant keywords only.",
    "Publish stable machine-readable entry points.",
    "Respect robots.txt and platform rules.",
    "Measure discovery without generating fake traffic."
  ],
  "machine_entry_points": [
    {
      "title": "GitHub Machine Beacon",
      "url": "https://beacon.ybliterature.com/",
      "summary": "Landing page for the machine-readable GitHub crawler discovery experiment.",
      "type": "text/html",
      "changefreq": "weekly",
      "priority": "1.0"
    },
    {
      "title": "LLM Reader Guide",
      "url": "https://beacon.ybliterature.com/llms.txt",
      "summary": "Compact guide for LLM crawlers, retrieval systems, and AI coding agents.",
      "type": "text/plain",
      "changefreq": "weekly",
      "priority": "0.9"
    },
    {
      "title": "Full LLM Context",
      "url": "https://beacon.ybliterature.com/llms-full.txt",
      "summary": "Extended project context for retrieval augmented generation and AI readers.",
      "type": "text/plain",
      "changefreq": "weekly",
      "priority": "0.8"
    },
    {
      "title": "Crawler Manifest",
      "url": "https://beacon.ybliterature.com/crawler-manifest.json",
      "summary": "Canonical JSON manifest describing URLs, topics, principles, and measurement fields.",
      "type": "application/json",
      "changefreq": "weekly",
      "priority": "0.8"
    },
    {
      "title": "Keyword Index",
      "url": "https://beacon.ybliterature.com/keyword-index.json",
      "summary": "Structured topic map for machine discovery and search indexing experiments.",
      "type": "application/json",
      "changefreq": "weekly",
      "priority": "0.7"
    },
    {
      "title": "Resource Index",
      "url": "https://beacon.ybliterature.com/resources.json",
      "summary": "Structured JSON index of all human-readable and machine-readable project resources.",
      "type": "application/json",
      "changefreq": "weekly",
      "priority": "0.7"
    },
    {
      "title": "Traffic Snapshot",
      "url": "https://beacon.ybliterature.com/traffic.json",
      "summary": "Latest public traffic snapshot sourced from the GitHub Traffic API.",
      "type": "application/json",
      "changefreq": "hourly",
      "priority": "0.75"
    },
    {
      "title": "Well-Known LLM Reader Guide",
      "url": "https://beacon.ybliterature.com/.well-known/llms.txt",
      "summary": "A compatibility copy of llms.txt for readers that check well-known paths.",
      "type": "text/plain",
      "changefreq": "weekly",
      "priority": "0.5"
    },
    {
      "title": "Web App Manifest",
      "url": "https://beacon.ybliterature.com/manifest.webmanifest",
      "summary": "Basic web manifest that exposes the site name, description, and canonical start URL.",
      "type": "application/manifest+json",
      "changefreq": "monthly",
      "priority": "0.4"
    },
    {
      "title": "Atom Feed",
      "url": "https://beacon.ybliterature.com/feed.xml",
      "summary": "Recrawl-friendly Atom feed for experiment updates.",
      "type": "application/xml",
      "changefreq": "weekly",
      "priority": "0.6"
    },
    {
      "title": "Machine-Readable Repository Checklist",
      "url": "https://beacon.ybliterature.com/machine-readable-repository-checklist.html",
      "summary": "A practical checklist for making a GitHub repository easier for crawlers, code indexes, LLM readers, and AI agents to parse.",
      "type": "text/html",
      "changefreq": "monthly",
      "priority": "0.90"
    },
    {
      "title": "Crawler Surface Map",
      "url": "https://beacon.ybliterature.com/crawler-surface-map.html",
      "summary": "A map of repository and website surfaces that expose the experiment to crawlers, code indexes, LLM readers, and link preview systems.",
      "type": "text/html",
      "changefreq": "monthly",
      "priority": "0.88"
    },
    {
      "title": "AI Agent Entrypoints",
      "url": "https://beacon.ybliterature.com/ai-agent-entrypoints.html",
      "summary": "Recommended routes for LLM crawlers, AI coding assistants, retrieval systems, and autonomous browser agents reading this repository.",
      "type": "text/html",
      "changefreq": "monthly",
      "priority": "0.86"
    },
    {
      "title": "Experiment Protocol",
      "url": "https://beacon.ybliterature.com/experiment-protocol.html",
      "summary": "A reproducible protocol for measuring whether machine-readable repository surfaces increase legitimate GitHub and web discovery.",
      "type": "text/html",
      "changefreq": "weekly",
      "priority": "0.84"
    },
    {
      "title": "Standards and Sources",
      "url": "https://beacon.ybliterature.com/standards-and-sources.html",
      "summary": "Source-backed notes for the project surfaces: GitHub topics, GitHub Pages, Schema.org JSON-LD, llms.txt, sitemap.xml, robots.txt, and Atom feeds.",
      "type": "text/html",
      "changefreq": "monthly",
      "priority": "0.80"
    },
    {
      "title": "Crawlability Audit",
      "url": "https://beacon.ybliterature.com/crawlability-audit.html",
      "summary": "A self-audit of the repository discovery surfaces and machine-readable files currently published by GitHub Machine Beacon.",
      "type": "text/html",
      "changefreq": "weekly",
      "priority": "0.82"
    },
    {
      "title": "Results Log",
      "url": "https://beacon.ybliterature.com/results-log.html",
      "summary": "A public log for launch status, validation checks, and later traffic observations from the machine-readable GitHub discovery experiment.",
      "type": "text/html",
      "changefreq": "weekly",
      "priority": "0.78"
    }
  ],
  "content_resources": [
    {
      "title": "Machine-Readable Repository Checklist",
      "url": "https://beacon.ybliterature.com/machine-readable-repository-checklist.html",
      "summary": "A practical checklist for making a GitHub repository easier for crawlers, code indexes, LLM readers, and AI agents to parse.",
      "keywords": [
        "machine-readable repository checklist",
        "GitHub README structure",
        "crawler-friendly documentation",
        "AI agent documentation",
        "repository metadata"
      ]
    },
    {
      "title": "Crawler Surface Map",
      "url": "https://beacon.ybliterature.com/crawler-surface-map.html",
      "summary": "A map of repository and website surfaces that expose the experiment to crawlers, code indexes, LLM readers, and link preview systems.",
      "keywords": [
        "crawler surface map",
        "crawler entry points",
        "GitHub Pages metadata",
        "repository discovery",
        "web crawler observability"
      ]
    },
    {
      "title": "AI Agent Entrypoints",
      "url": "https://beacon.ybliterature.com/ai-agent-entrypoints.html",
      "summary": "Recommended routes for LLM crawlers, AI coding assistants, retrieval systems, and autonomous browser agents reading this repository.",
      "keywords": [
        "AI agent entrypoints",
        "LLM crawler",
        "llms.txt",
        "agent-readable documentation",
        "RAG source"
      ]
    },
    {
      "title": "Experiment Protocol",
      "url": "https://beacon.ybliterature.com/experiment-protocol.html",
      "summary": "A reproducible protocol for measuring whether machine-readable repository surfaces increase legitimate GitHub and web discovery.",
      "keywords": [
        "repository traffic experiment",
        "GitHub Insights traffic",
        "crawler experiment protocol",
        "public web observability",
        "bot traffic research"
      ]
    },
    {
      "title": "Standards and Sources",
      "url": "https://beacon.ybliterature.com/standards-and-sources.html",
      "summary": "Source-backed notes for the project surfaces: GitHub topics, GitHub Pages, Schema.org JSON-LD, llms.txt, sitemap.xml, robots.txt, and Atom feeds.",
      "keywords": [
        "GitHub topics documentation",
        "GitHub Pages Actions",
        "Schema.org JSON-LD",
        "llms.txt proposal",
        "sitemap robots Atom feed"
      ]
    },
    {
      "title": "Crawlability Audit",
      "url": "https://beacon.ybliterature.com/crawlability-audit.html",
      "summary": "A self-audit of the repository discovery surfaces and machine-readable files currently published by GitHub Machine Beacon.",
      "keywords": [
        "crawlability audit",
        "machine-readable audit",
        "GitHub Pages audit",
        "metadata validation",
        "crawler readiness"
      ]
    },
    {
      "title": "Results Log",
      "url": "https://beacon.ybliterature.com/results-log.html",
      "summary": "A public log for launch status, validation checks, and later traffic observations from the machine-readable GitHub discovery experiment.",
      "keywords": [
        "GitHub traffic log",
        "crawler experiment results",
        "repository views",
        "unique visitors",
        "traffic measurement"
      ]
    }
  ],
  "keyword_groups": [
    {
      "name": "machine-readable web discovery",
      "intent": "Signals for crawlers and search indexes that prefer structured, canonical resources.",
      "terms": [
        "machine-readable repository",
        "crawler-friendly GitHub project",
        "GitHub Pages metadata",
        "sitemap.xml",
        "robots.txt",
        "structured data",
        "JSON-LD",
        "Open Graph metadata",
        "canonical URL",
        "Atom feed",
        "RSS feed",
        "web crawler observability"
      ]
    },
    {
      "name": "AI and LLM discovery",
      "intent": "Signals for retrieval systems, AI coding tools, and agent browsers.",
      "terms": [
        "llms.txt",
        "LLM crawler",
        "AI agent browser",
        "AI search indexing",
        "retrieval augmented generation",
        "RAG source",
        "agent-readable documentation",
        "machine context file",
        "AI code search",
        "LLM metadata",
        "crawler manifest",
        "semantic README"
      ]
    },
    {
      "name": "GitHub repository discovery",
      "intent": "Signals that help repository search, code search, and topic-based browsing.",
      "terms": [
        "GitHub search optimization",
        "GitHub repository metadata",
        "GitHub topics",
        "README structure",
        "code indexing",
        "open source discoverability",
        "repository traffic experiment",
        "GitHub Insights traffic",
        "GitHub Pages deployment",
        "open research repository",
        "software citation",
        "CITATION.cff"
      ]
    },
    {
      "name": "measurement and ethics",
      "intent": "Signals that the project is an observable, non-deceptive experiment.",
      "terms": [
        "crawler experiment",
        "traffic measurement",
        "ethical SEO",
        "transparent metadata",
        "no fake traffic",
        "no cloaking",
        "privacy-preserving analytics",
        "search experiment",
        "bot traffic research",
        "machine traffic benchmark",
        "crawlability audit",
        "public web observability"
      ]
    }
  ],
  "measurement_fields": [
    "repository_views",
    "unique_visitors",
    "referrers",
    "popular_content",
    "clones",
    "unique_cloners",
    "edge_requests",
    "machine_requests",
    "human_requests",
    "unknown_requests",
    "stars",
    "forks",
    "issues_or_discussions",
    "external_citations"
  ],
  "traffic_snapshot": {
    "url": "https://beacon.ybliterature.com/traffic.json",
    "updated_at": "2026-06-12T15:21:59Z",
    "source": "GitHub Traffic API",
    "views": {
      "count": 0,
      "uniques": 0,
      "daily": [
        {
          "timestamp": "2026-05-30T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-05-31T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-01T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-02T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-03T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-04T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-05T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-06T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-07T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-08T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-09T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-10T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-11T00:00:00Z",
          "count": 0,
          "uniques": 0
        },
        {
          "timestamp": "2026-06-12T00:00:00Z",
          "count": 0,
          "uniques": 0
        }
      ]
    },
    "visitor_classification": {
      "machine_visits": null,
      "human_visits": null,
      "status": "not_available_without_request_logs",
      "reason": "GitHub Traffic API does not expose user-agent level data, and GitHub Pages does not provide raw request logs to this static site."
    }
  },
  "cloudflare_edge": {
    "url": "https://beacon.ybliterature.com/",
    "traffic_url": "https://beacon.ybliterature.com/cloudflare-traffic.json",
    "classification": "machine/human split via Worker request-header heuristic"
  },
  "recommended_citation": "GitHub Machine Beacon: transparent machine-readable GitHub discovery experiment.",
  "traffic_policy": "Do not generate fake visits. Observe legitimate discovery only.",
  "validation": {
    "json_parseable": true,
    "xml_parseable": true,
    "local_links_checked": true,
    "placeholder_usernames_removed": true
  }
}
