> ## Documentation Index
> Fetch the complete documentation index at: https://doc.lucidworks.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Standalone query rewriter

> The standalone query rewriter use case rewrites the text in reference to the context based on the memoryUuid.


## OpenAPI

````yaml /api-reference/saas/machine-learning-platform-predict.json post /ai/prediction/standalone-query-rewriter/{MODEL_ID}
openapi: 3.0.1
info:
  title: Lucidworks AI Prediction API
  version: v0
  description: >-
    The Lucidworks AI Prediction API is used to send synchronous API calls that
    run predictions from pre-trained models or custom models.


    The Use Case API returns a list of all supported models.


    The `prediction` endpoints require an authentication token with scope
    `machinelearning.predict`.
  contact:
    name: Lucidworks
    url: https://lucidworks.com/
    email: support@lucidworks.com
  termsOfService: https://lucidworks.com/legal/developer-license-agreement/
  license:
    name: Lucidworks
    url: https://lucidworks.com/legal/developer-license-agreement/
servers:
  - url: https://APPLICATION_ID.applications.lucidworks.com
    description: Production
security: []
tags:
  - name: Get predictions
    description: Submit prediction tasks to Lucidworks AI.
paths:
  /ai/prediction/standalone-query-rewriter/{MODEL_ID}:
    parameters:
      - schema:
          type: string
        name: MODEL_ID
        in: path
        required: true
        description: Unique identifier for the model.
        example: llama-3v2-3b-instruct
    post:
      tags:
        - Get predictions
      summary: Standalone query rewriter
      description: >-
        The standalone query rewriter use case rewrites the text in reference to
        the context based on the memoryUuid.
      operationId: post-ai-prediction-standalone-query-rewriter-modelId
      parameters:
        - in: header
          name: Authorization
          schema:
            type: string
          required: true
          description: >-
            Bearer token used for authentication. Format: `Authorization: Bearer
            ACCESS_TOKEN`.
          example: Bearer abc123def456
        - schema:
            type: string
            example: application/json
          in: header
          name: Content-Type
          description: application/json
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/StandaloneQueryRewriterRequest'
            example:
              batch:
                - text: Is it a framework?
              useCaseConfig:
                memoryUuid: 27a887fe-3d7c-4ef0-9597-e2dfc054c20e
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/StandaloneResponse'
              example:
                predictions:
                  - response: Is RAG a framework?
                    tokensUsed:
                      promptTokens: 245
                      completionTokens: 6
                      totalTokens: 251
components:
  schemas:
    StandaloneQueryRewriterRequest:
      title: StandaloneQueryRewriterRequest
      type: object
      x-examples: {}
      properties:
        batch:
          type: array
          description: >-
            The batch of key:value pairs used as inputs in the prediction. Up to
            32 inputs per request are allowed.
          maxItems: 32
          items:
            type: object
            properties:
              text:
                type: string
                description: The content the model analyzes.
                example: Is it a framework?
        useCaseConfig:
          $ref: '#/components/schemas/UseCaseConfigStandalone'
        modelConfig:
          $ref: '#/components/schemas/ModelConfig'
    StandaloneResponse:
      type: object
      x-examples: {}
      properties:
        predictions:
          type: array
          items:
            $ref: '#/components/schemas/StandaloneResponseTokens'
    UseCaseConfigStandalone:
      title: UseCaseConfigStandalone
      type: object
      properties:
        memoryUuid:
          type: string
          description: >-
            The universal unique identifier (UUID) stored in the trained set of
            data in the model that is used in the model request.


            This parameter is optional, and is used when previous chat history
            reference information is available.
          example: 27a887fe-3d7c-4ef0-9597-e2dfc054c20e
    ModelConfig:
      title: ModelConfig
      type: object
      description: >-
        Provides fields and values that specify ranges for tokens. Fields used
        for specific use cases and models are specified. The default values are
        used if other values are not specified.
      properties:
        temperature:
          type: number
          format: float
          example: 0.8
          minimum: 0
          maximum: 2
          description: >-
            A sampling temperature between 0 and 2. A higher sampling
            temperature such as 0.8, results in more random (creative) output. A
            lower value such as 0.2 results in more focused (conservative)
            output. A lower value does not guarantee the model returns the same
            response for the same input. We recommend staying at or below a
            temperature of 1.0. Values above 1.0 might return nonsense unless
            the topP value is lowered to be more deterministic.
        topP:
          type: number
          format: float
          example: 1
          minimum: 0
          maximum: 1
          description: >-
            A floating-point number between 0 and 1 that controls the cumulative
            probability of the top tokens to consider, known as the randomness
            of the LLM's response. This parameter is also referred to as top
            probability. Set `topP` to 1 to consider all tokens. A higher value
            specifies a higher probability threshold and selects tokens whose
            cumulative probability is greater than the threshold. The higher the
            value, the more diverse the output.
        topK:
          type: integer
          example: -1
          description: >-
            An integer that controls the number of top tokens to consider. Set
            topK to -1 to consider all tokens.
        presencePenalty:
          type: number
          format: float
          minimum: -2
          maximum: 2
          description: >-
            A floating-point number between -2.0 and 2.0 that penalizes new
            tokens based on whether they have already appeared in the text. This
            increases the model's use of diverse tokens. A value greater than
            zero (0) encourages the model to use new tokens. A value less than
            zero (0) encourages the model to repeat existing tokens. This is
            applicable for all OpenAI and Llama models.
          example: 2
        frequencyPenalty:
          type: number
          format: float
          minimum: -2
          maximum: 2
          example: 1
          description: >-
            A floating-point number between -2.0 and 2.0 that penalizes new
            tokens based on their frequency in the generated text. A value
            greater than zero (0) encourages the model to use new tokens. A
            value less than zero (0) encourages the model to repeat existing
            tokens. This is applicable for all OpenAI and Llama models.
        maxTokens:
          type: integer
          format: int32
          example: 1
          description: >-
            The maximum number of tokens to generate per output sequence. The
            value is different for each model. Review individual model
            specifications when the value exceeds 2048.
        apiKey:
          type: string
          description: >-
            This optional parameter is only required when using the model for
            prediction. You can find this value in your model's settings:


            * **OpenAI**: Copy and paste the API key found in your
            organization's settings. For more information, see <a
            href="https://platform.openai.com/docs/api-reference/authentication">OpenAI
            Authentication API keys</a>.


            * **Azure OpenAI**: Copy and paste the API key found in your Azure
            portal. See <a
            href="https://learn.microsoft.com/en-us/azure/api-management/api-management-authenticate-authorize-azure-openai#authenticate-with-api-key">Authenticate
            with API key</a>.


            * **Anthropic**: Copy and paste the API key found in your <a
            href="https://console.anthropic.com/settings/keys">Anthropic
            console</a> or by using the <a
            href="https://docs.anthropic.com/en/api/admin-api/apikeys/get-api-key">Anthropic
            API</a>.


            * **Google Vertex AI**: Copy and paste the base64-encoded service
            account key JSON found in your <a
            href="https://cloud.google.com/iam/docs/keys-list-get#list-keys">Google
            Cloud console</a>. This service account key must have the <a
            href="https://cloud.google.com/iam/docs/understanding-roles#aiplatform.user">Vertex
            AI user</a> role enabled. For more information, see <a
            href="https://cloud.google.com/iam/docs/keys-create-delete#creating">generate
            service account key</a>.
          example: API key specific to use case and model
        azureDeployment:
          type: string
          example: DEPLOYMENT_NAME
          description: >-
            This optional parameter is the name of the deployed Azure OpenAI
            model and is only required when a deployed Azure OpenAI model is
            used for prediction.
        azureEndpoint:
          type: string
          description: "\t\nThis optional parameter is the URL endpoint of the deployed Azure OpenAI model and is only required when a deployed Azure OpenAI model is used for prediction."
          example: https://azure.endpoint.com
        googleProjectId:
          type: string
          example: '[GOOGLE_PROJECT_ID]'
          description: >-
            This parameter is optional, and is only required when a Google
            Vertex AI model is used for prediction.  
        googleRegion:
          type: string
          description: >-
            This parameter is optional, and is only required when a Google
            Vertex AI model is used for prediction. A value of `global` routes
            the query to any available region. Other possible region values are:


            * us-central1

            * us-west4

            * northamerica-northeast1

            * us-east4

            * us-west1

            * asia-northeast3

            * asia-southeast1

            * asia-northeast
          example: '[GOOGLE_PROJECT_REGION_OF_MODEL_ACCESS]'
    StandaloneResponseTokens:
      type: object
      properties:
        response:
          type: string
          description: The results returned from the request.
          example: Is RAG a framework?
        tokensUsed:
          $ref: '#/components/schemas/Token'
    Token:
      type: object
      properties:
        promptTokens:
          type: integer
          format: int32
          description: >-
            The number of tokens generated to prompt the model to continue
            generating results.
          example: 148
        completionTokens:
          type: integer
          format: int32
          description: The number of tokens used until the model completes.
          example: 27
        totalTokens:
          type: integer
          format: int32
          description: The sum of the prompt and completion tokens used in the model.
          example: 175

````