> ## Documentation Index
> Fetch the complete documentation index at: https://docs.voiceflow.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Create document

> Upload or provide a URL to scrape and import as a knowledge base document. Supports both `application/json` and `multipart/form-data` content types.



## OpenAPI

````yaml /specs/realtime/openapi.public.json post /v1alpha1/public/knowledge-base/document
openapi: 3.0.0
info:
  title: Realtime
  description: Realtime gateway API service
  version: 1.0.0
  contact: {}
servers:
  - url: https://realtime-api.voiceflow.com
security: []
paths:
  /v1alpha1/public/knowledge-base/document:
    post:
      tags:
        - KBPublicApiDocument
        - Public-Docs
      summary: Create document
      description: >-
        Upload or provide a URL to scrape and import as a knowledge base
        document. Supports both `application/json` and `multipart/form-data`
        content types.
      operationId: KnowledgeBaseDocumentApiPublicHTTPController_publicCreateOne
      parameters:
        - name: maxChunkSize
          required: false
          in: query
          schema:
            description: >-
              Determines how granularly each document is broken up. Range
              available is 500-1500 tokens, default is 1000. Smaller chunk size
              means narrower context, faster response, less tokens consumed, and
              greater risk of less accurate answers. Max chunk size affects the
              total amount of chunks parsed from a document - i.e., larger
              chunks means less chunks retrieved.
            anyOf:
              - type: string
              - type: number
        - name: overwrite
          required: false
          in: query
          schema:
            description: >-
              If set to true, the existing table with the same name will be
              overwritten.
            anyOf:
              - type: boolean
              - type: string
                enum:
                  - 'true'
                  - 'false'
                x-enumNames:
                  - 'TRUE'
                  - 'FALSE'
        - name: markdownConversion
          required: false
          in: query
          schema:
            description: >-
              When enabled, HTML is automatically converted to markdown to
              generate better chunks.
            anyOf:
              - type: boolean
              - type: string
                enum:
                  - 'true'
                  - 'false'
                x-enumNames:
                  - 'TRUE'
                  - 'FALSE'
        - name: llmBasedChunks
          required: false
          in: query
          schema:
            anyOf:
              - type: boolean
              - type: string
                enum:
                  - 'true'
                  - 'false'
                x-enumNames:
                  - 'TRUE'
                  - 'FALSE'
        - name: llmGeneratedQ
          required: false
          in: query
          schema:
            description: >-
              When enabled, an LLM will be used to generate a question based on
              the document context and specific chunk, then prepend it to the
              chunk. This enhances retrieval by aligning chunks with potential
              user queries.
            anyOf:
              - type: boolean
              - type: string
                enum:
                  - 'true'
                  - 'false'
                x-enumNames:
                  - 'TRUE'
                  - 'FALSE'
        - name: llmContentSummarization
          required: false
          in: query
          schema:
            description: >-
              When enabled, an LLM summarizes and rewrites the content, removing
              unnecessary information and focusing on important parts to
              optimize for retrieval. Limited to 15 rows per table upload.
            anyOf:
              - type: boolean
              - type: string
                enum:
                  - 'true'
                  - 'false'
                x-enumNames:
                  - 'TRUE'
                  - 'FALSE'
        - name: llmPrependContext
          required: false
          in: query
          schema:
            description: >-
              When enabled, an LLM generates a context summary based on the
              document and chunk context, and prepends it to each chunk. This
              improves retrieval by providing additional context to each chunk.
              Note: If both llmGeneratedQ and llmPrependContext are set to true,
              llmGeneratedQ takes precedence, and the context summarization will
              not be applied.
            anyOf:
              - type: boolean
              - type: string
                enum:
                  - 'true'
                  - 'false'
                x-enumNames:
                  - 'TRUE'
                  - 'FALSE'
        - name: content-type
          in: header
          schema:
            type: string
      requestBody:
        content:
          application/json:
            schema:
              type: object
              properties:
                data:
                  type: object
                  properties:
                    type:
                      type: string
                      enum:
                        - url
                      x-enumNames:
                        - URL
                    url:
                      type: string
                    name:
                      type: string
                    refreshRate:
                      allOf:
                        - $ref: >-
                            #/components/schemas/KnowledgeBaseDocumentRefreshRate
                    folderID:
                      type: string
                    documentMetadata:
                      description: >-
                        An array of document metadata fields.


                        > **💡 Tip:** This metadata can be used in agent KB
                        metadata filter conditions. [Learn more about metadata
                        filtering](/documentation/build/querying-the-knowledge-base#meta-data-filtering).
                      type: array
                      items:
                        type: object
                        properties:
                          key:
                            type: string
                            maxLength: 255
                          values:
                            type: array
                            items:
                              type: string
                        required:
                          - key
                          - values
                    metadata:
                      description: >-
                        Chunk-level metadata that can be used in a KB API query
                        request.


                        > **⚠️ Warning:** This metadata can't be used in agent
                        KB metadata filter conditions. Use `documentMetadata`
                        instead if you need agent-level metadata filtering.
                      type: object
                      additionalProperties: {}
                    projectEnvironmentIDOrAlias:
                      description: >-
                        The alias of the environment to target (ie. `main`). You
                        can find this in the environments page of your agent.


                        > **💡 Tip:** [Learn more about
                        Environments](/documentation/deploy/environments).
                      type: string
                  required:
                    - type
                    - url
              required:
                - data
          multipart/form-data:
            schema:
              type: object
              properties:
                file:
                  type: string
                  format: binary
                  description: The file to upload (max 10 MB).
                folderID:
                  type: string
                  description: The folder ID to place the document in.
                documentMetadata:
                  type: string
                  description: >-
                    JSON string of document metadata fields.


                    > **💡 Tip:** This metadata can be used in agent KB metadata
                    filter conditions. [Learn more about metadata
                    filtering](/documentation/build/querying-the-knowledge-base#meta-data-filtering).
                metadata:
                  type: string
                  description: >-
                    JSON string of chunk metadata key-value pairs. Can be used
                    in a KB API query request.


                    > **⚠️ Note:** This metadata can't be used in agent KB
                    metadata filter conditions. Use `documentMetadata` instead
                    if you need agent-level metadata filtering.
                url:
                  type: string
                  description: The source URL of the document.
                canEdit:
                  type: boolean
                  description: >-
                    Whether the document can be edited. Applicable only to `TXT`
                    documents
                  default: false
                projectEnvironmentIDOrAlias:
                  description: >-
                    The alias of the environment to target (ie. `main`). You can
                    find this in the environments page of your agent.


                    > **💡 Tip:** [Learn more about
                    Environments](/documentation/deploy/environments).
                  type: string
              required:
                - file
      responses:
        '201':
          description: The document was created successfully.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DocumentCreateOneUrlPublicResponse'
            multipart/form-data:
              schema:
                $ref: '#/components/schemas/DocumentCreateOneFilePublicResponse'
      security:
        - auth: []
components:
  schemas:
    KnowledgeBaseDocumentRefreshRate:
      type: string
      enum:
        - daily
        - weekly
        - monthly
        - never
      x-enumNames:
        - DAILY
        - WEEKLY
        - MONTHLY
        - NEVER
    DocumentCreateOneUrlPublicResponse:
      type: object
      properties:
        data:
          type: object
          properties:
            documentID:
              type: string
            data:
              nullable: true
              allOf:
                - $ref: '#/components/schemas/KBDocumentUrlData'
            updatedAt:
              type: string
              format: date-time
              pattern: >-
                ^(?:(?:\d\d[2468][048]|\d\d[13579][26]|\d\d0[48]|[02468][048]00|[13579][26]00)-02-29|\d{4}-(?:(?:0[13578]|1[02])-(?:0[1-9]|[12]\d|3[01])|(?:0[469]|11)-(?:0[1-9]|[12]\d|30)|(?:02)-(?:0[1-9]|1\d|2[0-8])))T(?:(?:[01]\d|2[0-3]):[0-5]\d(?::[0-5]\d(?:\.\d+)?)?(?:Z))$
            status:
              $ref: '#/components/schemas/KBDocumentStatus'
          required:
            - documentID
            - data
            - updatedAt
            - status
      required:
        - data
    DocumentCreateOneFilePublicResponse:
      type: object
      properties:
        data:
          type: object
          properties:
            documentID:
              type: string
            data:
              nullable: true
              oneOf:
                - $ref: '#/components/schemas/KBDocumentDocxData'
                - $ref: '#/components/schemas/KBDocumentPDFData'
                - $ref: '#/components/schemas/KBDocumentTextData'
                - $ref: '#/components/schemas/KBDocumentMarkdownData'
                - $ref: '#/components/schemas/KBDocumentCSVData'
                - $ref: '#/components/schemas/KBDocumentXLSXData'
            updatedAt:
              type: string
              format: date-time
              pattern: >-
                ^(?:(?:\d\d[2468][048]|\d\d[13579][26]|\d\d0[48]|[02468][048]00|[13579][26]00)-02-29|\d{4}-(?:(?:0[13578]|1[02])-(?:0[1-9]|[12]\d|3[01])|(?:0[469]|11)-(?:0[1-9]|[12]\d|30)|(?:02)-(?:0[1-9]|1\d|2[0-8])))T(?:(?:[01]\d|2[0-3]):[0-5]\d(?::[0-5]\d(?:\.\d+)?)?(?:Z))$
            status:
              $ref: '#/components/schemas/KBDocumentStatus'
          required:
            - documentID
            - data
            - updatedAt
            - status
      required:
        - data
    KBDocumentUrlData:
      type: object
      properties:
        type:
          type: string
          enum:
            - url
          x-enumNames:
            - URL
        name:
          type: string
        url:
          type: string
        refreshRate:
          allOf:
            - $ref: '#/components/schemas/KnowledgeBaseDocumentRefreshRate'
        lastSuccessUpdate:
          type: string
        accessTokenID:
          type: number
        integrationExternalID:
          type: string
        source:
          allOf:
            - $ref: '#/components/schemas/KnowledgeBaseDocumentIntegrationType'
      required:
        - type
        - name
        - url
    KBDocumentStatus:
      type: object
      properties:
        type:
          $ref: '#/components/schemas/KnowledgeBaseDocumentStatus'
        data: {}
      required:
        - type
    KBDocumentDocxData:
      type: object
      properties:
        type:
          type: string
          enum:
            - docx
          x-enumNames:
            - DOCX
        name:
          type: string
        url:
          nullable: true
          type: string
      required:
        - type
        - name
    KBDocumentPDFData:
      type: object
      properties:
        type:
          type: string
          enum:
            - pdf
          x-enumNames:
            - PDF
        name:
          type: string
        url:
          nullable: true
          type: string
      required:
        - type
        - name
    KBDocumentTextData:
      type: object
      properties:
        type:
          type: string
          enum:
            - text
          x-enumNames:
            - TEXT
        name:
          type: string
        canEdit:
          type: boolean
        url:
          nullable: true
          type: string
      required:
        - type
        - name
    KBDocumentMarkdownData:
      type: object
      properties:
        type:
          type: string
          enum:
            - md
          x-enumNames:
            - MD
        name:
          type: string
        url:
          nullable: true
          type: string
      required:
        - type
        - name
    KBDocumentCSVData:
      type: object
      properties:
        type:
          type: string
          enum:
            - csv
          x-enumNames:
            - CSV
        name:
          type: string
        rowsCount:
          type: number
        url:
          nullable: true
          type: string
      required:
        - type
        - name
    KBDocumentXLSXData:
      type: object
      properties:
        type:
          type: string
          enum:
            - xlsx
          x-enumNames:
            - XLSX
        name:
          type: string
        rowsCount:
          type: number
        url:
          nullable: true
          type: string
      required:
        - type
        - name
    KnowledgeBaseDocumentIntegrationType:
      type: string
      enum:
        - zendesk
        - shopify
      x-enumNames:
        - ZENDESK
        - SHOPIFY
    KnowledgeBaseDocumentStatus:
      type: string
      enum:
        - ERROR
        - PENDING
        - SUCCESS
        - INITIALIZED
      x-enumNames:
        - ERROR
        - PENDING
        - SUCCESS
        - INITIALIZED
  securitySchemes:
    auth:
      type: apiKey
      in: header
      name: authorization
      description: Voiceflow API key

````