Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/uipath/platform/attachments/attachments.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class AttachmentMode(str, Enum):


class Attachment(BaseModel):
"""Model representing an attachment. Id 'None' is used for uploads."""
"""Model representing an attachment."""

id: uuid.UUID = Field(..., alias="ID")
full_name: str = Field(..., alias="FullName")
Expand Down
121 changes: 109 additions & 12 deletions src/uipath/platform/documents/_documents_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from ..._utils import Endpoint, resource_override
from ...tracing import traced
from ..attachments import Attachment
from ..common import BaseService, FolderContext, UiPathApiConfig, UiPathExecutionContext
from ..errors import OperationFailedException, OperationNotCompleteException
from .documents import (
Expand Down Expand Up @@ -207,6 +208,7 @@ def _get_document_id(
project_id=project_id,
file=file,
file_path=file_path,
attachment=None,
)
self._wait_for_digitization(
project_id=project_id,
Expand All @@ -229,6 +231,7 @@ async def _get_document_id_async(
project_id=project_id,
file=file,
file_path=file_path,
attachment=None,
)
await self._wait_for_digitization_async(
project_id=project_id,
Expand Down Expand Up @@ -289,11 +292,61 @@ async def _get_project_id_and_tag_async(

return project_id, tag

def _start_digitization(
def _start_digitization_from_attachment(
self,
project_id: str,
file: Optional[FileContent] = None,
file_path: Optional[str] = None,
attachment: Attachment,
) -> str:
return self.request(
"POST",
url=Endpoint(
f"/du_/api/framework/projects/{project_id}/digitization/startFromJobAttachment"
),
params={"api-version": 1.1},
headers=self._get_common_headers(),
json={
"attachmentId": str(attachment.id),
"fileName": attachment.full_name,
"mimeType": attachment.mime_type,
"folderId": str(
UUID(
int=0
) # temporary workaround until backend supports null folderId
),
},
).json()["documentId"]

async def _start_digitization_from_attachment_async(
self,
project_id: str,
attachment: Attachment,
) -> str:
return (
await self.request_async(
"POST",
url=Endpoint(
f"/du_/api/framework/projects/{project_id}/digitization/startFromJobAttachment"
),
params={"api-version": 1.1},
headers=self._get_common_headers(),
json={
"attachmentId": str(attachment.id),
"fileName": attachment.full_name,
"mimeType": attachment.mime_type,
"folderId": str(
UUID(
int=0
) # temporary workaround until backend supports null folderId
),
},
)
).json()["documentId"]

def _start_digitization_from_file(
self,
project_id: str,
file: Optional[FileContent],
file_path: Optional[str],
) -> str:
with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle:
return self.request(
Expand All @@ -306,11 +359,11 @@ def _start_digitization(
files={"File": handle},
).json()["documentId"]

async def _start_digitization_async(
async def _start_digitization_from_file_async(
self,
project_id: str,
file: Optional[FileContent] = None,
file_path: Optional[str] = None,
file: Optional[FileContent],
file_path: Optional[str],
) -> str:
with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle:
return (
Expand All @@ -325,6 +378,44 @@ async def _start_digitization_async(
)
).json()["documentId"]

def _start_digitization(
self,
project_id: str,
file: Optional[FileContent],
file_path: Optional[str],
attachment: Optional[Attachment],
) -> str:
if attachment is not None:
return self._start_digitization_from_attachment(
project_id=project_id,
attachment=attachment,
)
else:
return self._start_digitization_from_file(
project_id=project_id,
file=file,
file_path=file_path,
)

async def _start_digitization_async(
self,
project_id: str,
file: Optional[FileContent],
file_path: Optional[str],
attachment: Optional[Attachment],
) -> str:
if attachment is not None:
return await self._start_digitization_from_attachment_async(
project_id=project_id,
attachment=attachment,
)
else:
return await self._start_digitization_from_file_async(
project_id=project_id,
file=file,
file_path=file_path,
)

def _wait_for_digitization(self, project_id: str, document_id: str) -> None:
def result_getter() -> Tuple[str, Optional[str], Optional[str]]:
result = self.request(
Expand Down Expand Up @@ -917,6 +1008,7 @@ def start_ixp_extraction(
tag: str,
file: Optional[FileContent] = None,
file_path: Optional[str] = None,
attachment: Optional[Attachment] = None,
) -> StartExtractionResponse:
"""Start an IXP extraction process without waiting for results (non-blocking).

Expand All @@ -929,9 +1021,10 @@ def start_ixp_extraction(
tag (str): Tag of the published project version (e.g., "staging").
file (FileContent, optional): The document file to be processed.
file_path (str, optional): Path to the document file to be processed.
attachment (Attachment, optional): An existing attachment to use for digitization.

Note:
Either `file` or `file_path` must be provided, but not both.
Either `file`, `file_path` or `attachment` must be provided, but not more than one.

Returns:
ExtractionStartResponse: Contains the operation_id, document_id, project_id, and tag
Expand All @@ -946,14 +1039,14 @@ def start_ixp_extraction(
# start_response.operation_id can be used to poll for results later
```
"""
_exactly_one_must_be_provided(file=file, file_path=file_path)
_exactly_one_must_be_provided(
file=file, file_path=file_path, attachment=attachment
)

project_id = self._get_project_id_by_name(project_name, ProjectType.IXP)

document_id = self._start_digitization(
project_id=project_id,
file=file,
file_path=file_path,
project_id=project_id, file=file, file_path=file_path, attachment=attachment
)

return self._start_extraction(
Expand All @@ -971,9 +1064,12 @@ async def start_ixp_extraction_async(
tag: str,
file: Optional[FileContent] = None,
file_path: Optional[str] = None,
attachment: Optional[Attachment] = None,
) -> StartExtractionResponse:
"""Asynchronous version of the [`start_ixp_extraction`][uipath.platform.documents._documents_service.DocumentsService.start_ixp_extraction] method."""
_exactly_one_must_be_provided(file=file, file_path=file_path)
_exactly_one_must_be_provided(
file=file, file_path=file_path, attachment=attachment
)

project_id = await self._get_project_id_by_name_async(
project_name, ProjectType.IXP
Expand All @@ -983,6 +1079,7 @@ async def start_ixp_extraction_async(
project_id=project_id,
file=file,
file_path=file_path,
attachment=attachment,
)

return await self._start_extraction_async(
Expand Down
75 changes: 74 additions & 1 deletion tests/sdk/services/test_documents_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pytest_httpx import HTTPXMock

from uipath.platform import UiPathApiConfig, UiPathExecutionContext
from uipath.platform.attachments import Attachment
from uipath.platform.documents import (
ActionPriority,
ClassificationResult,
Expand Down Expand Up @@ -2078,6 +2079,78 @@ async def test_start_ixp_extraction(
assert response.project_id == project_id
assert response.tag == "staging"

@pytest.mark.parametrize("mode", ["sync", "async"])
@pytest.mark.asyncio
async def test_start_ixp_extraction_using_attachment(
self,
httpx_mock: HTTPXMock,
service: DocumentsService,
base_url: str,
org: str,
tenant: str,
mode: str,
):
# ARRANGE
project_id = str(uuid4())
document_id = str(uuid4())
operation_id = str(uuid4())
attachment = Attachment(
ID=uuid4(), # type: ignore
FullName="alex.pdf",
MimeType="application/pdf",
)

httpx_mock.add_response(
url=f"{base_url}{org}{tenant}/du_/api/framework/projects?api-version=1.1&type=IXP",
status_code=200,
match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"},
json={
"projects": [
{"id": project_id, "name": "TestProjectIXP"},
]
},
)
httpx_mock.add_response(
url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/digitization/startFromJobAttachment?api-version=1.1",
status_code=200,
match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"},
match_json={
"attachmentId": str(attachment.id),
"fileName": attachment.full_name,
"mimeType": attachment.mime_type,
"folderId": str(UUID(int=0)),
},
json={"documentId": document_id},
)
httpx_mock.add_response(
method="POST",
url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/staging/document-types/{UUID(int=0)}/extraction/start?api-version=1.1",
status_code=200,
match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"},
match_json={"documentId": document_id},
json={"operationId": operation_id},
)

# ACT
if mode == "async":
response = await service.start_ixp_extraction_async(
project_name="TestProjectIXP",
tag="staging",
attachment=attachment,
)
else:
response = service.start_ixp_extraction(
project_name="TestProjectIXP",
tag="staging",
attachment=attachment,
)

# ASSERT
assert response.operation_id == operation_id
assert response.document_id == document_id
assert response.project_id == project_id
assert response.tag == "staging"

@pytest.mark.parametrize("mode", ["sync", "async"])
@pytest.mark.asyncio
async def test_start_ixp_extraction_invalid_parameters(
Expand All @@ -2088,7 +2161,7 @@ async def test_start_ixp_extraction_invalid_parameters(
# ACT & ASSERT
with pytest.raises(
ValueError,
match="Exactly one of `file, file_path` must be provided",
match="Exactly one of `file, file_path, attachment` must be provided",
):
if mode == "async":
await service.start_ixp_extraction_async(
Expand Down
Loading