Skip to main content

Command Palette

Search for a command to run...

GitHub Has a Free API — Analyze Any Repository in Seconds (No Auth for Public Data)

Updated
3 min read

title: GitHub Has a Free API — Analyze Any Repository in Seconds (No Auth for Public Data)

tags: python, api, github, webdev

GitHub's REST API gives you structured access to 420M+ repositories, 100M+ developers, and every commit, issue, and pull request — without authentication for public data. Rate limit: 60 requests/hour unauthenticated, 5,000/hour with a free token.

I built a competitive intelligence tool that tracks trending repos in my niche. Here's how.

The API: REST + GraphQL, Your Choice

https://api.github.com/repos/{owner}/{repo}

Returns stars, forks, language, description, last update — everything you need for analysis.

No API key required for public repos (60 req/hour). Free personal token bumps you to 5,000/hour.

Quick Start: Analyze Any Repository

import urllib.request
import json

def analyze_repo(owner, repo):
    url = f"https://api.github.com/repos/{owner}/{repo}"
    headers = {"User-Agent": "Python-Script"}
    req = urllib.request.Request(url, headers=headers)
    data = json.loads(urllib.request.urlopen(req).read())

    print(f"Repository: {data['full_name']}")
    print(f"Stars: {data['stargazers_count']:,}")
    print(f"Forks: {data['forks_count']:,}")
    print(f"Language: {data['language']}")
    print(f"Open Issues: {data['open_issues_count']:,}")
    print(f"Created: {data['created_at'][:10]}")
    print(f"Last Push: {data['pushed_at'][:10]}")
    return data

analyze_repo("microsoft", "vscode")

5 Practical Use Cases

def search_trending(topic, min_stars=100, sort="stars"):
    """Find top repos for any topic."""
    url = f"https://api.github.com/search/repositories?q={topic}+stars:>{min_stars}&sort={sort}&per_page=10"
    req = urllib.request.Request(url, headers={"User-Agent": "Python"})
    data = json.loads(urllib.request.urlopen(req).read())

    print(f"Found {data['total_count']:,} repos for '{topic}':\n")
    for repo in data['items']:
        print(f"[{repo['stargazers_count']:,} ★] {repo['full_name']}")
        print(f"  {repo.get('description', 'No description')[:80]}")
        print()

search_trending("web scraping python")

2. Analyze Competitor's Tech Stack

def get_languages(owner, repo):
    """Get language breakdown for a repo."""
    url = f"https://api.github.com/repos/{owner}/{repo}/languages"
    req = urllib.request.Request(url, headers={"User-Agent": "Python"})
    langs = json.loads(urllib.request.urlopen(req).read())

    total = sum(langs.values())
    print(f"Languages in {owner}/{repo}:")
    for lang, bytes_count in sorted(langs.items(), key=lambda x: -x[1]):
        pct = (bytes_count / total) * 100
        print(f"  {lang}: {pct:.1f}%")

get_languages("facebook", "react")

3. Monitor Release Activity

def get_releases(owner, repo, limit=5):
    """Track release frequency and changelog."""
    url = f"https://api.github.com/repos/{owner}/{repo}/releases?per_page={limit}"
    req = urllib.request.Request(url, headers={"User-Agent": "Python"})
    releases = json.loads(urllib.request.urlopen(req).read())

    for r in releases:
        print(f"[{r['tag_name']}] {r['name'] or 'No title'}")
        print(f"  Published: {r['published_at'][:10]}")
        print(f"  Downloads: {sum(a['download_count'] for a in r.get('assets', []))}")
        print()

get_releases("python", "cpython")

4. Find Active Contributors

def top_contributors(owner, repo, limit=10):
    """Find who's actually building the project."""
    url = f"https://api.github.com/repos/{owner}/{repo}/contributors?per_page={limit}"
    req = urllib.request.Request(url, headers={"User-Agent": "Python"})
    contributors = json.loads(urllib.request.urlopen(req).read())

    for c in contributors:
        print(f"  {c['login']}: {c['contributions']} commits")

top_contributors("apify", "crawlee")

5. Issue Sentiment Analysis

def recent_issues(owner, repo, limit=10):
    """Get recent issues — understand user pain points."""
    url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=open&per_page={limit}&sort=created"
    req = urllib.request.Request(url, headers={"User-Agent": "Python"})
    issues = json.loads(urllib.request.urlopen(req).read())

    for i in issues:
        if "pull_request" not in i:  # Skip PRs
            labels = ", ".join(l["name"] for l in i.get("labels", []))
            print(f"[#{i['number']}] {i['title'][:70]}")
            if labels:
                print(f"  Labels: {labels}")
            print(f"  Comments: {i['comments']} | Created: {i['created_at'][:10]}")
            print()

recent_issues("scrapy", "scrapy")

API Endpoints Cheat Sheet

EndpointWhat You Get
/repos/{owner}/{repo}Repository details
/search/repositories?q=...Search repos by topic/language
/repos/{o}/{r}/languagesLanguage breakdown
/repos/{o}/{r}/releasesReleases with download counts
/repos/{o}/{r}/contributorsTop contributors
/repos/{o}/{r}/issuesIssues and PRs
/repos/{o}/{r}/commitsCommit history
/users/{username}User profile
/users/{username}/reposUser's repositories
/rate_limitYour current rate limit status

Tips for Production Use

  • Authentication — create a free token at github.com/settings/tokens for 5,000 req/hour
  • Pagination — use ?page=2&per_page=100 for large result sets
  • Conditional requests — use If-None-Match header to save rate limit on unchanged data
  • GraphQL APIapi.github.com/graphql for complex queries (requires token)
  • Respect rate limits — check X-RateLimit-Remaining header

Need production-scale GitHub data? Check my Apify Store — 78 published scrapers including a GitHub Trending Scraper.


More Free API Tutorials

Need a custom scraper or data pipeline built in 48 hours? Pilot rate: $100/project. 78 published Apify actors, 376+ production runs. Email spinov001@gmail.com — reply by Friday for priority slot.

💡 More scraping & API tips → @scraping_ai