diff --git a/src/uipath/platform/attachments/attachments.py b/src/uipath/platform/attachments/attachments.py index e91c7eff4..8379a7955 100644 --- a/src/uipath/platform/attachments/attachments.py +++ b/src/uipath/platform/attachments/attachments.py @@ -16,7 +16,7 @@ class AttachmentMode(str, Enum): class Attachment(BaseModel): - """Model representing an attachment. Id 'None' is used for uploads.""" + """Model representing an attachment.""" id: uuid.UUID = Field(..., alias="ID") full_name: str = Field(..., alias="FullName") diff --git a/src/uipath/platform/documents/_documents_service.py b/src/uipath/platform/documents/_documents_service.py index cca831fcd..1f1c7521e 100644 --- a/src/uipath/platform/documents/_documents_service.py +++ b/src/uipath/platform/documents/_documents_service.py @@ -8,6 +8,7 @@ from ..._utils import Endpoint, resource_override from ...tracing import traced +from ..attachments import Attachment from ..common import BaseService, FolderContext, UiPathApiConfig, UiPathExecutionContext from ..errors import OperationFailedException, OperationNotCompleteException from .documents import ( @@ -207,6 +208,7 @@ def _get_document_id( project_id=project_id, file=file, file_path=file_path, + attachment=None, ) self._wait_for_digitization( project_id=project_id, @@ -229,6 +231,7 @@ async def _get_document_id_async( project_id=project_id, file=file, file_path=file_path, + attachment=None, ) await self._wait_for_digitization_async( project_id=project_id, @@ -289,11 +292,61 @@ async def _get_project_id_and_tag_async( return project_id, tag - def _start_digitization( + def _start_digitization_from_attachment( self, project_id: str, - file: Optional[FileContent] = None, - file_path: Optional[str] = None, + attachment: Attachment, + ) -> str: + return self.request( + "POST", + url=Endpoint( + f"/du_/api/framework/projects/{project_id}/digitization/startFromJobAttachment" + ), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + json={ + "attachmentId": str(attachment.id), + "fileName": attachment.full_name, + "mimeType": attachment.mime_type, + "folderId": str( + UUID( + int=0 + ) # temporary workaround until backend supports null folderId + ), + }, + ).json()["documentId"] + + async def _start_digitization_from_attachment_async( + self, + project_id: str, + attachment: Attachment, + ) -> str: + return ( + await self.request_async( + "POST", + url=Endpoint( + f"/du_/api/framework/projects/{project_id}/digitization/startFromJobAttachment" + ), + params={"api-version": 1.1}, + headers=self._get_common_headers(), + json={ + "attachmentId": str(attachment.id), + "fileName": attachment.full_name, + "mimeType": attachment.mime_type, + "folderId": str( + UUID( + int=0 + ) # temporary workaround until backend supports null folderId + ), + }, + ) + ).json()["documentId"] + + def _start_digitization_from_file( + self, + project_id: str, + file: Optional[FileContent], + file_path: Optional[str], ) -> str: with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle: return self.request( @@ -306,11 +359,11 @@ def _start_digitization( files={"File": handle}, ).json()["documentId"] - async def _start_digitization_async( + async def _start_digitization_from_file_async( self, project_id: str, - file: Optional[FileContent] = None, - file_path: Optional[str] = None, + file: Optional[FileContent], + file_path: Optional[str], ) -> str: with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle: return ( @@ -325,6 +378,44 @@ async def _start_digitization_async( ) ).json()["documentId"] + def _start_digitization( + self, + project_id: str, + file: Optional[FileContent], + file_path: Optional[str], + attachment: Optional[Attachment], + ) -> str: + if attachment is not None: + return self._start_digitization_from_attachment( + project_id=project_id, + attachment=attachment, + ) + else: + return self._start_digitization_from_file( + project_id=project_id, + file=file, + file_path=file_path, + ) + + async def _start_digitization_async( + self, + project_id: str, + file: Optional[FileContent], + file_path: Optional[str], + attachment: Optional[Attachment], + ) -> str: + if attachment is not None: + return await self._start_digitization_from_attachment_async( + project_id=project_id, + attachment=attachment, + ) + else: + return await self._start_digitization_from_file_async( + project_id=project_id, + file=file, + file_path=file_path, + ) + def _wait_for_digitization(self, project_id: str, document_id: str) -> None: def result_getter() -> Tuple[str, Optional[str], Optional[str]]: result = self.request( @@ -917,6 +1008,7 @@ def start_ixp_extraction( tag: str, file: Optional[FileContent] = None, file_path: Optional[str] = None, + attachment: Optional[Attachment] = None, ) -> StartExtractionResponse: """Start an IXP extraction process without waiting for results (non-blocking). @@ -929,9 +1021,10 @@ def start_ixp_extraction( tag (str): Tag of the published project version (e.g., "staging"). file (FileContent, optional): The document file to be processed. file_path (str, optional): Path to the document file to be processed. + attachment (Attachment, optional): An existing attachment to use for digitization. Note: - Either `file` or `file_path` must be provided, but not both. + Either `file`, `file_path` or `attachment` must be provided, but not more than one. Returns: ExtractionStartResponse: Contains the operation_id, document_id, project_id, and tag @@ -946,14 +1039,14 @@ def start_ixp_extraction( # start_response.operation_id can be used to poll for results later ``` """ - _exactly_one_must_be_provided(file=file, file_path=file_path) + _exactly_one_must_be_provided( + file=file, file_path=file_path, attachment=attachment + ) project_id = self._get_project_id_by_name(project_name, ProjectType.IXP) document_id = self._start_digitization( - project_id=project_id, - file=file, - file_path=file_path, + project_id=project_id, file=file, file_path=file_path, attachment=attachment ) return self._start_extraction( @@ -971,9 +1064,12 @@ async def start_ixp_extraction_async( tag: str, file: Optional[FileContent] = None, file_path: Optional[str] = None, + attachment: Optional[Attachment] = None, ) -> StartExtractionResponse: """Asynchronous version of the [`start_ixp_extraction`][uipath.platform.documents._documents_service.DocumentsService.start_ixp_extraction] method.""" - _exactly_one_must_be_provided(file=file, file_path=file_path) + _exactly_one_must_be_provided( + file=file, file_path=file_path, attachment=attachment + ) project_id = await self._get_project_id_by_name_async( project_name, ProjectType.IXP @@ -983,6 +1079,7 @@ async def start_ixp_extraction_async( project_id=project_id, file=file, file_path=file_path, + attachment=attachment, ) return await self._start_extraction_async( diff --git a/tests/sdk/services/test_documents_service.py b/tests/sdk/services/test_documents_service.py index 7f033b221..baa875d73 100644 --- a/tests/sdk/services/test_documents_service.py +++ b/tests/sdk/services/test_documents_service.py @@ -8,6 +8,7 @@ from pytest_httpx import HTTPXMock from uipath.platform import UiPathApiConfig, UiPathExecutionContext +from uipath.platform.attachments import Attachment from uipath.platform.documents import ( ActionPriority, ClassificationResult, @@ -2078,6 +2079,78 @@ async def test_start_ixp_extraction( assert response.project_id == project_id assert response.tag == "staging" + @pytest.mark.parametrize("mode", ["sync", "async"]) + @pytest.mark.asyncio + async def test_start_ixp_extraction_using_attachment( + self, + httpx_mock: HTTPXMock, + service: DocumentsService, + base_url: str, + org: str, + tenant: str, + mode: str, + ): + # ARRANGE + project_id = str(uuid4()) + document_id = str(uuid4()) + operation_id = str(uuid4()) + attachment = Attachment( + ID=uuid4(), # type: ignore + FullName="alex.pdf", + MimeType="application/pdf", + ) + + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects?api-version=1.1&type=IXP", + status_code=200, + match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"}, + json={ + "projects": [ + {"id": project_id, "name": "TestProjectIXP"}, + ] + }, + ) + httpx_mock.add_response( + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/digitization/startFromJobAttachment?api-version=1.1", + status_code=200, + match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"}, + match_json={ + "attachmentId": str(attachment.id), + "fileName": attachment.full_name, + "mimeType": attachment.mime_type, + "folderId": str(UUID(int=0)), + }, + json={"documentId": document_id}, + ) + httpx_mock.add_response( + method="POST", + url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/staging/document-types/{UUID(int=0)}/extraction/start?api-version=1.1", + status_code=200, + match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"}, + match_json={"documentId": document_id}, + json={"operationId": operation_id}, + ) + + # ACT + if mode == "async": + response = await service.start_ixp_extraction_async( + project_name="TestProjectIXP", + tag="staging", + attachment=attachment, + ) + else: + response = service.start_ixp_extraction( + project_name="TestProjectIXP", + tag="staging", + attachment=attachment, + ) + + # ASSERT + assert response.operation_id == operation_id + assert response.document_id == document_id + assert response.project_id == project_id + assert response.tag == "staging" + @pytest.mark.parametrize("mode", ["sync", "async"]) @pytest.mark.asyncio async def test_start_ixp_extraction_invalid_parameters( @@ -2088,7 +2161,7 @@ async def test_start_ixp_extraction_invalid_parameters( # ACT & ASSERT with pytest.raises( ValueError, - match="Exactly one of `file, file_path` must be provided", + match="Exactly one of `file, file_path, attachment` must be provided", ): if mode == "async": await service.start_ixp_extraction_async(