"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""

from .basesdk import BaseSDK
from mistralai import models, utils
from mistralai._hooks import HookContext
from mistralai.models import (
    agentscompletionrequest as models_agentscompletionrequest,
    agentscompletionstreamrequest as models_agentscompletionstreamrequest,
    mistralpromptmode as models_mistralpromptmode,
    prediction as models_prediction,
    responseformat as models_responseformat,
    tool as models_tool,
)
from mistralai.types import OptionalNullable, UNSET
from mistralai.utils import eventstreaming, get_security_from_env
from mistralai.utils.unmarshal_json_response import unmarshal_json_response
from typing import Any, Dict, List, Mapping, Optional, Union


class Agents(BaseSDK):
    r"""Agents API."""

    def complete(
        self,
        *,
        messages: Union[
            List[models_agentscompletionrequest.AgentsCompletionRequestMessages],
            List[
                models_agentscompletionrequest.AgentsCompletionRequestMessagesTypedDict
            ],
        ],
        agent_id: str,
        max_tokens: OptionalNullable[int] = UNSET,
        stream: Optional[bool] = False,
        stop: Optional[
            Union[
                models_agentscompletionrequest.AgentsCompletionRequestStop,
                models_agentscompletionrequest.AgentsCompletionRequestStopTypedDict,
            ]
        ] = None,
        random_seed: OptionalNullable[int] = UNSET,
        metadata: OptionalNullable[Dict[str, Any]] = UNSET,
        response_format: Optional[
            Union[
                models_responseformat.ResponseFormat,
                models_responseformat.ResponseFormatTypedDict,
            ]
        ] = None,
        tools: OptionalNullable[
            Union[List[models_tool.Tool], List[models_tool.ToolTypedDict]]
        ] = UNSET,
        tool_choice: Optional[
            Union[
                models_agentscompletionrequest.AgentsCompletionRequestToolChoice,
                models_agentscompletionrequest.AgentsCompletionRequestToolChoiceTypedDict,
            ]
        ] = None,
        presence_penalty: Optional[float] = None,
        frequency_penalty: Optional[float] = None,
        n: OptionalNullable[int] = UNSET,
        prediction: Optional[
            Union[models_prediction.Prediction, models_prediction.PredictionTypedDict]
        ] = None,
        parallel_tool_calls: Optional[bool] = None,
        prompt_mode: OptionalNullable[
            models_mistralpromptmode.MistralPromptMode
        ] = UNSET,
        retries: OptionalNullable[utils.RetryConfig] = UNSET,
        server_url: Optional[str] = None,
        timeout_ms: Optional[int] = None,
        http_headers: Optional[Mapping[str, str]] = None,
    ) -> models.ChatCompletionResponse:
        r"""Agents Completion

        :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
        :param agent_id: The ID of the agent to use for this completion.
        :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
        :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
        :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
        :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
        :param metadata:
        :param response_format: Specify the format that the model must output. By default it will use `{ \"type\": \"text\" }`. Setting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is in JSON. When using JSON mode you MUST also instruct the model to produce JSON yourself with a system or a user message. Setting to `{ \"type\": \"json_schema\" }` enables JSON schema mode, which guarantees the message the model generates is in JSON and follows the schema you provide.
        :param tools:
        :param tool_choice:
        :param presence_penalty: The `presence_penalty` determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
        :param frequency_penalty: The `frequency_penalty` penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
        :param n: Number of completions to return for each request, input tokens are only billed once.
        :param prediction: Enable users to specify an expected completion, optimizing response times by leveraging known or predictable content.
        :param parallel_tool_calls:
        :param prompt_mode: Allows toggling between the reasoning mode and no system prompt. When set to `reasoning` the system prompt for reasoning models will be used.
        :param retries: Override the default retry configuration for this method
        :param server_url: Override the default server URL for this method
        :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
        :param http_headers: Additional headers to set or replace on requests.
        """
        base_url = None
        url_variables = None
        if timeout_ms is None:
            timeout_ms = self.sdk_configuration.timeout_ms

        if server_url is not None:
            base_url = server_url
        else:
            base_url = self._get_url(base_url, url_variables)

        request = models.AgentsCompletionRequest(
            max_tokens=max_tokens,
            stream=stream,
            stop=stop,
            random_seed=random_seed,
            metadata=metadata,
            messages=utils.get_pydantic_model(
                messages, List[models.AgentsCompletionRequestMessages]
            ),
            response_format=utils.get_pydantic_model(
                response_format, Optional[models.ResponseFormat]
            ),
            tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
            tool_choice=utils.get_pydantic_model(
                tool_choice, Optional[models.AgentsCompletionRequestToolChoice]
            ),
            presence_penalty=presence_penalty,
            frequency_penalty=frequency_penalty,
            n=n,
            prediction=utils.get_pydantic_model(
                prediction, Optional[models.Prediction]
            ),
            parallel_tool_calls=parallel_tool_calls,
            prompt_mode=prompt_mode,
            agent_id=agent_id,
        )

        req = self._build_request(
            method="POST",
            path="/v1/agents/completions",
            base_url=base_url,
            url_variables=url_variables,
            request=request,
            request_body_required=True,
            request_has_path_params=False,
            request_has_query_params=True,
            user_agent_header="user-agent",
            accept_header_value="application/json",
            http_headers=http_headers,
            security=self.sdk_configuration.security,
            get_serialized_body=lambda: utils.serialize_request_body(
                request, False, False, "json", models.AgentsCompletionRequest
            ),
            allow_empty_value=None,
            timeout_ms=timeout_ms,
        )

        if retries == UNSET:
            if self.sdk_configuration.retry_config is not UNSET:
                retries = self.sdk_configuration.retry_config

        retry_config = None
        if isinstance(retries, utils.RetryConfig):
            retry_config = (retries, ["429", "500", "502", "503", "504"])

        http_res = self.do_request(
            hook_ctx=HookContext(
                config=self.sdk_configuration,
                base_url=base_url or "",
                operation_id="agents_completion_v1_agents_completions_post",
                oauth2_scopes=None,
                security_source=get_security_from_env(
                    self.sdk_configuration.security, models.Security
                ),
            ),
            request=req,
            error_status_codes=["422", "4XX", "5XX"],
            retry_config=retry_config,
        )

        response_data: Any = None
        if utils.match_response(http_res, "200", "application/json"):
            return unmarshal_json_response(models.ChatCompletionResponse, http_res)
        if utils.match_response(http_res, "422", "application/json"):
            response_data = unmarshal_json_response(
                models.HTTPValidationErrorData, http_res
            )
            raise models.HTTPValidationError(response_data, http_res)
        if utils.match_response(http_res, "4XX", "*"):
            http_res_text = utils.stream_to_text(http_res)
            raise models.SDKError("API error occurred", http_res, http_res_text)
        if utils.match_response(http_res, "5XX", "*"):
            http_res_text = utils.stream_to_text(http_res)
            raise models.SDKError("API error occurred", http_res, http_res_text)

        raise models.SDKError("Unexpected response received", http_res)

    async def complete_async(
        self,
        *,
        messages: Union[
            List[models_agentscompletionrequest.AgentsCompletionRequestMessages],
            List[
                models_agentscompletionrequest.AgentsCompletionRequestMessagesTypedDict
            ],
        ],
        agent_id: str,
        max_tokens: OptionalNullable[int] = UNSET,
        stream: Optional[bool] = False,
        stop: Optional[
            Union[
                models_agentscompletionrequest.AgentsCompletionRequestStop,
                models_agentscompletionrequest.AgentsCompletionRequestStopTypedDict,
            ]
        ] = None,
        random_seed: OptionalNullable[int] = UNSET,
        metadata: OptionalNullable[Dict[str, Any]] = UNSET,
        response_format: Optional[
            Union[
                models_responseformat.ResponseFormat,
                models_responseformat.ResponseFormatTypedDict,
            ]
        ] = None,
        tools: OptionalNullable[
            Union[List[models_tool.Tool], List[models_tool.ToolTypedDict]]
        ] = UNSET,
        tool_choice: Optional[
            Union[
                models_agentscompletionrequest.AgentsCompletionRequestToolChoice,
                models_agentscompletionrequest.AgentsCompletionRequestToolChoiceTypedDict,
            ]
        ] = None,
        presence_penalty: Optional[float] = None,
        frequency_penalty: Optional[float] = None,
        n: OptionalNullable[int] = UNSET,
        prediction: Optional[
            Union[models_prediction.Prediction, models_prediction.PredictionTypedDict]
        ] = None,
        parallel_tool_calls: Optional[bool] = None,
        prompt_mode: OptionalNullable[
            models_mistralpromptmode.MistralPromptMode
        ] = UNSET,
        retries: OptionalNullable[utils.RetryConfig] = UNSET,
        server_url: Optional[str] = None,
        timeout_ms: Optional[int] = None,
        http_headers: Optional[Mapping[str, str]] = None,
    ) -> models.ChatCompletionResponse:
        r"""Agents Completion

        :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
        :param agent_id: The ID of the agent to use for this completion.
        :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
        :param stream: Whether to stream back partial progress. If set, tokens will be sent as data-only server-side events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.
        :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
        :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
        :param metadata:
        :param response_format: Specify the format that the model must output. By default it will use `{ \"type\": \"text\" }`. Setting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is in JSON. When using JSON mode you MUST also instruct the model to produce JSON yourself with a system or a user message. Setting to `{ \"type\": \"json_schema\" }` enables JSON schema mode, which guarantees the message the model generates is in JSON and follows the schema you provide.
        :param tools:
        :param tool_choice:
        :param presence_penalty: The `presence_penalty` determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
        :param frequency_penalty: The `frequency_penalty` penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
        :param n: Number of completions to return for each request, input tokens are only billed once.
        :param prediction: Enable users to specify an expected completion, optimizing response times by leveraging known or predictable content.
        :param parallel_tool_calls:
        :param prompt_mode: Allows toggling between the reasoning mode and no system prompt. When set to `reasoning` the system prompt for reasoning models will be used.
        :param retries: Override the default retry configuration for this method
        :param server_url: Override the default server URL for this method
        :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
        :param http_headers: Additional headers to set or replace on requests.
        """
        base_url = None
        url_variables = None
        if timeout_ms is None:
            timeout_ms = self.sdk_configuration.timeout_ms

        if server_url is not None:
            base_url = server_url
        else:
            base_url = self._get_url(base_url, url_variables)

        request = models.AgentsCompletionRequest(
            max_tokens=max_tokens,
            stream=stream,
            stop=stop,
            random_seed=random_seed,
            metadata=metadata,
            messages=utils.get_pydantic_model(
                messages, List[models.AgentsCompletionRequestMessages]
            ),
            response_format=utils.get_pydantic_model(
                response_format, Optional[models.ResponseFormat]
            ),
            tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
            tool_choice=utils.get_pydantic_model(
                tool_choice, Optional[models.AgentsCompletionRequestToolChoice]
            ),
            presence_penalty=presence_penalty,
            frequency_penalty=frequency_penalty,
            n=n,
            prediction=utils.get_pydantic_model(
                prediction, Optional[models.Prediction]
            ),
            parallel_tool_calls=parallel_tool_calls,
            prompt_mode=prompt_mode,
            agent_id=agent_id,
        )

        req = self._build_request_async(
            method="POST",
            path="/v1/agents/completions",
            base_url=base_url,
            url_variables=url_variables,
            request=request,
            request_body_required=True,
            request_has_path_params=False,
            request_has_query_params=True,
            user_agent_header="user-agent",
            accept_header_value="application/json",
            http_headers=http_headers,
            security=self.sdk_configuration.security,
            get_serialized_body=lambda: utils.serialize_request_body(
                request, False, False, "json", models.AgentsCompletionRequest
            ),
            allow_empty_value=None,
            timeout_ms=timeout_ms,
        )

        if retries == UNSET:
            if self.sdk_configuration.retry_config is not UNSET:
                retries = self.sdk_configuration.retry_config

        retry_config = None
        if isinstance(retries, utils.RetryConfig):
            retry_config = (retries, ["429", "500", "502", "503", "504"])

        http_res = await self.do_request_async(
            hook_ctx=HookContext(
                config=self.sdk_configuration,
                base_url=base_url or "",
                operation_id="agents_completion_v1_agents_completions_post",
                oauth2_scopes=None,
                security_source=get_security_from_env(
                    self.sdk_configuration.security, models.Security
                ),
            ),
            request=req,
            error_status_codes=["422", "4XX", "5XX"],
            retry_config=retry_config,
        )

        response_data: Any = None
        if utils.match_response(http_res, "200", "application/json"):
            return unmarshal_json_response(models.ChatCompletionResponse, http_res)
        if utils.match_response(http_res, "422", "application/json"):
            response_data = unmarshal_json_response(
                models.HTTPValidationErrorData, http_res
            )
            raise models.HTTPValidationError(response_data, http_res)
        if utils.match_response(http_res, "4XX", "*"):
            http_res_text = await utils.stream_to_text_async(http_res)
            raise models.SDKError("API error occurred", http_res, http_res_text)
        if utils.match_response(http_res, "5XX", "*"):
            http_res_text = await utils.stream_to_text_async(http_res)
            raise models.SDKError("API error occurred", http_res, http_res_text)

        raise models.SDKError("Unexpected response received", http_res)

    def stream(
        self,
        *,
        messages: Union[
            List[
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestMessages
            ],
            List[
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestMessagesTypedDict
            ],
        ],
        agent_id: str,
        max_tokens: OptionalNullable[int] = UNSET,
        stream: Optional[bool] = True,
        stop: Optional[
            Union[
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestStop,
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestStopTypedDict,
            ]
        ] = None,
        random_seed: OptionalNullable[int] = UNSET,
        metadata: OptionalNullable[Dict[str, Any]] = UNSET,
        response_format: Optional[
            Union[
                models_responseformat.ResponseFormat,
                models_responseformat.ResponseFormatTypedDict,
            ]
        ] = None,
        tools: OptionalNullable[
            Union[List[models_tool.Tool], List[models_tool.ToolTypedDict]]
        ] = UNSET,
        tool_choice: Optional[
            Union[
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestToolChoice,
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestToolChoiceTypedDict,
            ]
        ] = None,
        presence_penalty: Optional[float] = None,
        frequency_penalty: Optional[float] = None,
        n: OptionalNullable[int] = UNSET,
        prediction: Optional[
            Union[models_prediction.Prediction, models_prediction.PredictionTypedDict]
        ] = None,
        parallel_tool_calls: Optional[bool] = None,
        prompt_mode: OptionalNullable[
            models_mistralpromptmode.MistralPromptMode
        ] = UNSET,
        retries: OptionalNullable[utils.RetryConfig] = UNSET,
        server_url: Optional[str] = None,
        timeout_ms: Optional[int] = None,
        http_headers: Optional[Mapping[str, str]] = None,
    ) -> eventstreaming.EventStream[models.CompletionEvent]:
        r"""Stream Agents completion

        Mistral AI provides the ability to stream responses back to a client in order to allow partial results for certain requests. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.

        :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
        :param agent_id: The ID of the agent to use for this completion.
        :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
        :param stream:
        :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
        :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
        :param metadata:
        :param response_format: Specify the format that the model must output. By default it will use `{ \"type\": \"text\" }`. Setting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is in JSON. When using JSON mode you MUST also instruct the model to produce JSON yourself with a system or a user message. Setting to `{ \"type\": \"json_schema\" }` enables JSON schema mode, which guarantees the message the model generates is in JSON and follows the schema you provide.
        :param tools:
        :param tool_choice:
        :param presence_penalty: The `presence_penalty` determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
        :param frequency_penalty: The `frequency_penalty` penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
        :param n: Number of completions to return for each request, input tokens are only billed once.
        :param prediction: Enable users to specify an expected completion, optimizing response times by leveraging known or predictable content.
        :param parallel_tool_calls:
        :param prompt_mode: Allows toggling between the reasoning mode and no system prompt. When set to `reasoning` the system prompt for reasoning models will be used.
        :param retries: Override the default retry configuration for this method
        :param server_url: Override the default server URL for this method
        :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
        :param http_headers: Additional headers to set or replace on requests.
        """
        base_url = None
        url_variables = None
        if timeout_ms is None:
            timeout_ms = self.sdk_configuration.timeout_ms

        if server_url is not None:
            base_url = server_url
        else:
            base_url = self._get_url(base_url, url_variables)

        request = models.AgentsCompletionStreamRequest(
            max_tokens=max_tokens,
            stream=stream,
            stop=stop,
            random_seed=random_seed,
            metadata=metadata,
            messages=utils.get_pydantic_model(
                messages, List[models.AgentsCompletionStreamRequestMessages]
            ),
            response_format=utils.get_pydantic_model(
                response_format, Optional[models.ResponseFormat]
            ),
            tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
            tool_choice=utils.get_pydantic_model(
                tool_choice, Optional[models.AgentsCompletionStreamRequestToolChoice]
            ),
            presence_penalty=presence_penalty,
            frequency_penalty=frequency_penalty,
            n=n,
            prediction=utils.get_pydantic_model(
                prediction, Optional[models.Prediction]
            ),
            parallel_tool_calls=parallel_tool_calls,
            prompt_mode=prompt_mode,
            agent_id=agent_id,
        )

        req = self._build_request(
            method="POST",
            path="/v1/agents/completions#stream",
            base_url=base_url,
            url_variables=url_variables,
            request=request,
            request_body_required=True,
            request_has_path_params=False,
            request_has_query_params=True,
            user_agent_header="user-agent",
            accept_header_value="text/event-stream",
            http_headers=http_headers,
            security=self.sdk_configuration.security,
            get_serialized_body=lambda: utils.serialize_request_body(
                request, False, False, "json", models.AgentsCompletionStreamRequest
            ),
            allow_empty_value=None,
            timeout_ms=timeout_ms,
        )

        if retries == UNSET:
            if self.sdk_configuration.retry_config is not UNSET:
                retries = self.sdk_configuration.retry_config

        retry_config = None
        if isinstance(retries, utils.RetryConfig):
            retry_config = (retries, ["429", "500", "502", "503", "504"])

        http_res = self.do_request(
            hook_ctx=HookContext(
                config=self.sdk_configuration,
                base_url=base_url or "",
                operation_id="stream_agents",
                oauth2_scopes=None,
                security_source=get_security_from_env(
                    self.sdk_configuration.security, models.Security
                ),
            ),
            request=req,
            error_status_codes=["422", "4XX", "5XX"],
            stream=True,
            retry_config=retry_config,
        )

        response_data: Any = None
        if utils.match_response(http_res, "200", "text/event-stream"):
            return eventstreaming.EventStream(
                http_res,
                lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
                sentinel="[DONE]",
                client_ref=self,
            )
        if utils.match_response(http_res, "422", "application/json"):
            http_res_text = utils.stream_to_text(http_res)
            response_data = unmarshal_json_response(
                models.HTTPValidationErrorData, http_res, http_res_text
            )
            raise models.HTTPValidationError(response_data, http_res, http_res_text)
        if utils.match_response(http_res, "4XX", "*"):
            http_res_text = utils.stream_to_text(http_res)
            raise models.SDKError("API error occurred", http_res, http_res_text)
        if utils.match_response(http_res, "5XX", "*"):
            http_res_text = utils.stream_to_text(http_res)
            raise models.SDKError("API error occurred", http_res, http_res_text)

        http_res_text = utils.stream_to_text(http_res)
        raise models.SDKError("Unexpected response received", http_res, http_res_text)

    async def stream_async(
        self,
        *,
        messages: Union[
            List[
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestMessages
            ],
            List[
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestMessagesTypedDict
            ],
        ],
        agent_id: str,
        max_tokens: OptionalNullable[int] = UNSET,
        stream: Optional[bool] = True,
        stop: Optional[
            Union[
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestStop,
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestStopTypedDict,
            ]
        ] = None,
        random_seed: OptionalNullable[int] = UNSET,
        metadata: OptionalNullable[Dict[str, Any]] = UNSET,
        response_format: Optional[
            Union[
                models_responseformat.ResponseFormat,
                models_responseformat.ResponseFormatTypedDict,
            ]
        ] = None,
        tools: OptionalNullable[
            Union[List[models_tool.Tool], List[models_tool.ToolTypedDict]]
        ] = UNSET,
        tool_choice: Optional[
            Union[
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestToolChoice,
                models_agentscompletionstreamrequest.AgentsCompletionStreamRequestToolChoiceTypedDict,
            ]
        ] = None,
        presence_penalty: Optional[float] = None,
        frequency_penalty: Optional[float] = None,
        n: OptionalNullable[int] = UNSET,
        prediction: Optional[
            Union[models_prediction.Prediction, models_prediction.PredictionTypedDict]
        ] = None,
        parallel_tool_calls: Optional[bool] = None,
        prompt_mode: OptionalNullable[
            models_mistralpromptmode.MistralPromptMode
        ] = UNSET,
        retries: OptionalNullable[utils.RetryConfig] = UNSET,
        server_url: Optional[str] = None,
        timeout_ms: Optional[int] = None,
        http_headers: Optional[Mapping[str, str]] = None,
    ) -> eventstreaming.EventStreamAsync[models.CompletionEvent]:
        r"""Stream Agents completion

        Mistral AI provides the ability to stream responses back to a client in order to allow partial results for certain requests. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message. Otherwise, the server will hold the request open until the timeout or until completion, with the response containing the full result as JSON.

        :param messages: The prompt(s) to generate completions for, encoded as a list of dict with role and content.
        :param agent_id: The ID of the agent to use for this completion.
        :param max_tokens: The maximum number of tokens to generate in the completion. The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
        :param stream:
        :param stop: Stop generation if this token is detected. Or if one of these tokens is detected when providing an array
        :param random_seed: The seed to use for random sampling. If set, different calls will generate deterministic results.
        :param metadata:
        :param response_format: Specify the format that the model must output. By default it will use `{ \"type\": \"text\" }`. Setting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is in JSON. When using JSON mode you MUST also instruct the model to produce JSON yourself with a system or a user message. Setting to `{ \"type\": \"json_schema\" }` enables JSON schema mode, which guarantees the message the model generates is in JSON and follows the schema you provide.
        :param tools:
        :param tool_choice:
        :param presence_penalty: The `presence_penalty` determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative.
        :param frequency_penalty: The `frequency_penalty` penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition.
        :param n: Number of completions to return for each request, input tokens are only billed once.
        :param prediction: Enable users to specify an expected completion, optimizing response times by leveraging known or predictable content.
        :param parallel_tool_calls:
        :param prompt_mode: Allows toggling between the reasoning mode and no system prompt. When set to `reasoning` the system prompt for reasoning models will be used.
        :param retries: Override the default retry configuration for this method
        :param server_url: Override the default server URL for this method
        :param timeout_ms: Override the default request timeout configuration for this method in milliseconds
        :param http_headers: Additional headers to set or replace on requests.
        """
        base_url = None
        url_variables = None
        if timeout_ms is None:
            timeout_ms = self.sdk_configuration.timeout_ms

        if server_url is not None:
            base_url = server_url
        else:
            base_url = self._get_url(base_url, url_variables)

        request = models.AgentsCompletionStreamRequest(
            max_tokens=max_tokens,
            stream=stream,
            stop=stop,
            random_seed=random_seed,
            metadata=metadata,
            messages=utils.get_pydantic_model(
                messages, List[models.AgentsCompletionStreamRequestMessages]
            ),
            response_format=utils.get_pydantic_model(
                response_format, Optional[models.ResponseFormat]
            ),
            tools=utils.get_pydantic_model(tools, OptionalNullable[List[models.Tool]]),
            tool_choice=utils.get_pydantic_model(
                tool_choice, Optional[models.AgentsCompletionStreamRequestToolChoice]
            ),
            presence_penalty=presence_penalty,
            frequency_penalty=frequency_penalty,
            n=n,
            prediction=utils.get_pydantic_model(
                prediction, Optional[models.Prediction]
            ),
            parallel_tool_calls=parallel_tool_calls,
            prompt_mode=prompt_mode,
            agent_id=agent_id,
        )

        req = self._build_request_async(
            method="POST",
            path="/v1/agents/completions#stream",
            base_url=base_url,
            url_variables=url_variables,
            request=request,
            request_body_required=True,
            request_has_path_params=False,
            request_has_query_params=True,
            user_agent_header="user-agent",
            accept_header_value="text/event-stream",
            http_headers=http_headers,
            security=self.sdk_configuration.security,
            get_serialized_body=lambda: utils.serialize_request_body(
                request, False, False, "json", models.AgentsCompletionStreamRequest
            ),
            allow_empty_value=None,
            timeout_ms=timeout_ms,
        )

        if retries == UNSET:
            if self.sdk_configuration.retry_config is not UNSET:
                retries = self.sdk_configuration.retry_config

        retry_config = None
        if isinstance(retries, utils.RetryConfig):
            retry_config = (retries, ["429", "500", "502", "503", "504"])

        http_res = await self.do_request_async(
            hook_ctx=HookContext(
                config=self.sdk_configuration,
                base_url=base_url or "",
                operation_id="stream_agents",
                oauth2_scopes=None,
                security_source=get_security_from_env(
                    self.sdk_configuration.security, models.Security
                ),
            ),
            request=req,
            error_status_codes=["422", "4XX", "5XX"],
            stream=True,
            retry_config=retry_config,
        )

        response_data: Any = None
        if utils.match_response(http_res, "200", "text/event-stream"):
            return eventstreaming.EventStreamAsync(
                http_res,
                lambda raw: utils.unmarshal_json(raw, models.CompletionEvent),
                sentinel="[DONE]",
                client_ref=self,
            )
        if utils.match_response(http_res, "422", "application/json"):
            http_res_text = await utils.stream_to_text_async(http_res)
            response_data = unmarshal_json_response(
                models.HTTPValidationErrorData, http_res, http_res_text
            )
            raise models.HTTPValidationError(response_data, http_res, http_res_text)
        if utils.match_response(http_res, "4XX", "*"):
            http_res_text = await utils.stream_to_text_async(http_res)
            raise models.SDKError("API error occurred", http_res, http_res_text)
        if utils.match_response(http_res, "5XX", "*"):
            http_res_text = await utils.stream_to_text_async(http_res)
            raise models.SDKError("API error occurred", http_res, http_res_text)

        http_res_text = await utils.stream_to_text_async(http_res)
        raise models.SDKError("Unexpected response received", http_res, http_res_text)
