> ## Documentation Index
> Fetch the complete documentation index at: https://docs.context.dev/llms.txt
> Use this file to discover all available pages before exploring further.

# Crawl Sitemap

> Crawl an entire website's sitemap and return all discovered page URLs.

<Badge color="blue">1 Credit</Badge>


## OpenAPI

````yaml https://app.stainless.com/api/spec/documented/context.dev/openapi.documented.yml get /web/scrape/sitemap
openapi: 3.0.0
info:
  title: Context API
  description: API for retrieving context data from any website
  version: 1.0.0
servers:
  - url: https://api.context.dev/v1
security: []
paths:
  /web/scrape/sitemap:
    get:
      tags:
        - Web Scraping
      summary: Crawl Sitemap
      description: Crawl an entire website's sitemap and return all discovered page URLs.
      parameters:
        - name: domain
          in: query
          required: true
          schema:
            type: string
          description: Domain to build a sitemap for
        - name: maxLinks
          in: query
          required: false
          schema:
            type: integer
            minimum: 1
            maximum: 100000
            default: 10000
          description: >-
            Maximum number of links to return from the sitemap crawl. Defaults
            to 10,000. Minimum is 1, maximum is 100,000.
        - name: urlRegex
          in: query
          required: false
          schema:
            type: string
            maxLength: 256
            example: ^https?://[^/]+/blog/
          description: >-
            Optional RE2-compatible regex pattern. Only URLs matching this
            pattern are returned and counted against maxLinks.
        - $ref: '#/components/parameters/TimeoutMS'
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                type: object
                properties:
                  success:
                    type: boolean
                    enum:
                      - true
                    description: Indicates success
                  domain:
                    type: string
                    description: The normalized domain that was crawled
                  urls:
                    type: array
                    items:
                      type: string
                    description: Array of discovered page URLs from the sitemap (max 500)
                  meta:
                    type: object
                    description: Metadata about the sitemap crawl operation
                    properties:
                      sitemapsDiscovered:
                        type: integer
                        description: Total number of sitemap files discovered
                      sitemapsFetched:
                        type: integer
                        description: >-
                          Number of sitemap files successfully fetched and
                          parsed
                      sitemapsSkipped:
                        type: integer
                        description: >-
                          Number of sitemap files skipped (due to errors,
                          timeouts, or limits)
                      errors:
                        type: integer
                        description: Number of errors encountered during crawling
                    required:
                      - sitemapsDiscovered
                      - sitemapsFetched
                      - sitemapsSkipped
                      - errors
                required:
                  - success
                  - domain
                  - urls
                  - meta
        '400':
          description: Bad request - Invalid domain
          content:
            application/json:
              schema:
                type: object
                properties:
                  message:
                    type: string
                    description: Error message describing the issue
                  error_code:
                    type: string
                    enum:
                      - INPUT_VALIDATION_ERROR
                      - WEBSITE_ACCESS_ERROR
                    description: Error code indicating the type of error
                required:
                  - message
                  - error_code
        '401':
          description: Unauthorized - Invalid or missing API key
          content:
            application/json:
              schema:
                type: object
                properties:
                  message:
                    type: string
                    description: Error message
                  error_code:
                    type: string
                    enum:
                      - UNAUTHORIZED
                    description: Error code indicating unauthorized access
        '403':
          description: Forbidden - Insufficient permissions or usage limit exceeded
          content:
            application/json:
              schema:
                type: object
                properties:
                  message:
                    type: string
                    description: Error message
                  error_code:
                    type: string
                    enum:
                      - FORBIDDEN
                      - USAGE_EXCEEDED
                      - DISABLED
                      - INSUFFICIENT_PERMISSIONS
                    description: Error code indicating forbidden access
        '408':
          description: Request timeout
          content:
            application/json:
              schema:
                type: object
                properties:
                  message:
                    type: string
                    description: Timeout error message
                  error_code:
                    type: string
                    enum:
                      - REQUEST_TIMEOUT
                    description: Error code indicating request timeout
        '500':
          description: Internal server error
          content:
            application/json:
              schema:
                type: object
                properties:
                  message:
                    type: string
                    description: Error message
                  error_code:
                    type: string
                    enum:
                      - INTERNAL_ERROR
                    description: Error code indicating internal server error
      security:
        - bearerAuth: []
      x-codeSamples:
        - lang: JavaScript
          source: >-
            import ContextDev from 'context.dev';


            const client = new ContextDev({
              apiKey: process.env['CONTEXT_DEV_API_KEY'], // This is the default and can be omitted
            });


            const response = await client.web.webScrapeSitemap({ domain:
            'domain' });


            console.log(response.domain);
        - lang: Python
          source: |-
            import os
            from context.dev import ContextDev

            client = ContextDev(
                api_key=os.environ.get("CONTEXT_DEV_API_KEY"),  # This is the default and can be omitted
            )
            response = client.web.web_scrape_sitemap(
                domain="domain",
            )
            print(response.domain)
        - lang: Ruby
          source: |-
            require "context_dev"

            context_dev = ContextDev::Client.new(api_key: "My API Key")

            response = context_dev.web.web_scrape_sitemap(domain: "domain")

            puts(response)
components:
  parameters:
    TimeoutMS:
      name: timeoutMS
      in: query
      required: false
      schema:
        $ref: '#/components/schemas/TimeoutMS'
      description: >-
        Optional timeout in milliseconds for the request. If the request takes
        longer than this value, it will be aborted with a 408 status code.
        Maximum allowed value is 300000ms (5 minutes).
  schemas:
    TimeoutMS:
      type: integer
      minimum: 1000
      maximum: 300000
      description: >-
        Optional timeout in milliseconds for the request. If the request takes
        longer than this value, it will be aborted with a 408 status code.
        Maximum allowed value is 300000ms (5 minutes).
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer

````