Skip to main content
Sourcebot can sync code from GitHub.com, GitHub Enterprise Server, and GitHub Enterprise Cloud. If you’re not familiar with Sourcebot connections, please read that overview first.

Examples

{
    "type": "github",
    "repos": [
        "sourcebot-dev/sourcebot",
        "getsentry/sentry",
        "torvalds/linux"
    ]
}
{
    "type": "github",
    "orgs": [
        "sourcebot-dev",
        "getsentry",
        "vercel"
    ]
}
{
    "type": "github",
    "users": [
        "torvalds",
        "ggerganov"
    ]
}
{
    "type": "github",
    // Sync all repos in `my-org` that have a topic that...
    "orgs": [
        "my-org"
    ],
    // ...match one of these glob patterns.
    "topics": [
        "test-*",
        "ci-*",
        "k8s"
    ]
}

{
    "type": "github",
    // Include all repos in my-org...
    "orgs": [
        "my-org"
    ],
    // ...except:
    "exclude": {
        // repos that are archived
        "archived": true,
        // repos that are forks
        "forks": true,
        // repos that match these glob patterns
        "repos": [
            "my-org/repo1",
            "my-org/repo2",
            "my-org/sub-org-1/**",
            "my-org/sub-org-*/**"
        ],
        "size": {
            // repos that are less than 1MB (in bytes)...
            "min": 1048576,
            // or repos greater than 100MB (in bytes)
            "max": 104857600 
        },
        // repos with topics that match these glob patterns
        "topics": [
            "test-*",
            "ci"
        ]
    }
}

Authenticating with GitHub

In order to index private repositories, you’ll need to generate a access token and provide it to Sourcebot. GitHub provides two types of access tokens:

Fine-grained personal access tokens

Create a new fine-grained PAT here. First, select the resource owner and the repositories that you want Sourcebot to have access to.Next, under “Repository permissions”, select permissions Contents and Metadata with access Read-only. The permissions should look like the following:GitHub PAT ScopeGitHub docs
Create a new PAT here and make sure you select the repo scope:GitHub PAT ScopeGitHub docs
Next, provide the access token via the token property, either as an environment variable or a secret:
  • Environment Variable
  • Secret
Environment variables are only supported in a declarative config and cannot be used in the web UI.
  1. Add the token property to your connection config:
{
    "type": "github",
    "token": {
        // note: this env var can be named anything. It
        // doesn't need to be `GITHUB_TOKEN`.
        "env": "GITHUB_TOKEN"
    }
    // .. rest of config ..
}
  1. Pass this environment variable each time you run Sourcebot:
docker run \
    -e GITHUB_TOKEN=<PAT> \
    /* additional args */ \
    ghcr.io/sourcebot-dev/sourcebot:latest

Connecting to a custom GitHub host

To connect to a GitHub host other than github.com, provide the url property to your config:
{
    "type": "github",
    "url": "https://github.example.com"
    // .. rest of config ..
}

Schema reference

schemas/v3/github.json
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "type": "object",
  "title": "GithubConnectionConfig",
  "properties": {
    "type": {
      "const": "github",
      "description": "GitHub Configuration"
    },
    "token": {
      "description": "A Personal Access Token (PAT).",
      "examples": [
        {
          "secret": "SECRET_KEY"
        }
      ],
      "anyOf": [
        {
          "type": "object",
          "properties": {
            "secret": {
              "type": "string",
              "description": "The name of the secret that contains the token."
            }
          },
          "required": [
            "secret"
          ],
          "additionalProperties": false
        },
        {
          "type": "object",
          "properties": {
            "env": {
              "type": "string",
              "description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
            }
          },
          "required": [
            "env"
          ],
          "additionalProperties": false
        }
      ]
    },
    "url": {
      "type": "string",
      "format": "url",
      "default": "https://github.com",
      "description": "The URL of the GitHub host. Defaults to https://github.com",
      "examples": [
        "https://github.com",
        "https://github.example.com"
      ],
      "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
    },
    "users": {
      "type": "array",
      "items": {
        "type": "string",
        "pattern": "^[\\w.-]+$"
      },
      "default": [],
      "examples": [
        [
          "torvalds",
          "DHH"
        ]
      ],
      "description": "List of users to sync with. All repositories that the user owns will be synced, unless explicitly defined in the `exclude` property."
    },
    "orgs": {
      "type": "array",
      "items": {
        "type": "string",
        "pattern": "^[\\w.-]+$"
      },
      "default": [],
      "examples": [
        [
          "my-org-name"
        ],
        [
          "sourcebot-dev",
          "commaai"
        ]
      ],
      "description": "List of organizations to sync with. All repositories in the organization visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property."
    },
    "repos": {
      "type": "array",
      "items": {
        "type": "string",
        "pattern": "^[\\w.-]+\\/[\\w.-]+$"
      },
      "default": [],
      "description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'."
    },
    "topics": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "minItems": 1,
      "default": [],
      "description": "List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.",
      "examples": [
        [
          "docs",
          "core"
        ]
      ]
    },
    "exclude": {
      "type": "object",
      "properties": {
        "forks": {
          "type": "boolean",
          "default": false,
          "description": "Exclude forked repositories from syncing."
        },
        "archived": {
          "type": "boolean",
          "default": false,
          "description": "Exclude archived repositories from syncing."
        },
        "repos": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "default": [],
          "description": "List of individual repositories to exclude from syncing. Glob patterns are supported."
        },
        "topics": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "default": [],
          "description": "List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.",
          "examples": [
            [
              "tests",
              "ci"
            ]
          ]
        },
        "size": {
          "type": "object",
          "description": "Exclude repositories based on their disk usage. Note: the disk usage is calculated by GitHub and may not reflect the actual disk usage when cloned.",
          "properties": {
            "min": {
              "type": "integer",
              "description": "Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing."
            },
            "max": {
              "type": "integer",
              "description": "Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing."
            }
          },
          "additionalProperties": false
        }
      },
      "additionalProperties": false
    },
    "revisions": {
      "type": "object",
      "description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
      "properties": {
        "branches": {
          "type": "array",
          "description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
          "items": {
            "type": "string"
          },
          "examples": [
            [
              "main",
              "release/*"
            ],
            [
              "**"
            ]
          ],
          "default": []
        },
        "tags": {
          "type": "array",
          "description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
          "items": {
            "type": "string"
          },
          "examples": [
            [
              "latest",
              "v2.*.*"
            ],
            [
              "**"
            ]
          ],
          "default": []
        }
      },
      "additionalProperties": false
    }
  },
  "required": [
    "type"
  ],
  "additionalProperties": false
}
I