-
Notifications
You must be signed in to change notification settings - Fork 1.7k
feat: enable mypy session for documentai-toolbox #16690
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
cccd359
6abd528
cbbacda
20d9ae8
67eb298
3ca1496
c2c4e4e
1dea669
5c1fe11
94a5f3e
1a972f0
7126d34
f8312eb
bddf806
49a17b1
410c1ad
ff8695d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,12 +17,12 @@ | |
| import dataclasses | ||
| import json | ||
| from types import SimpleNamespace | ||
| from typing import List, Optional, Type | ||
| from typing import Any, List, Optional, Type, cast | ||
|
|
||
| from google.cloud import documentai | ||
|
|
||
|
|
||
| def _get_target_object(json_data: any, target_object: str) -> Optional[SimpleNamespace]: | ||
| def _get_target_object(json_data: Any, target_object: str) -> Any: | ||
| r"""Returns SimpleNamespace of target_object. | ||
|
|
||
| Args: | ||
|
|
@@ -72,47 +72,21 @@ class Block: | |
| page_number: | ||
| Optional. | ||
| """ | ||
| type_: SimpleNamespace = dataclasses.field(init=True, repr=False) | ||
| text: SimpleNamespace = dataclasses.field(init=True, repr=False) | ||
| bounding_box: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=True, repr=False, default=None | ||
| ) | ||
| block_references: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=True, repr=False, default=None | ||
| ) | ||
| block_id: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
| confidence: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
| page_number: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
| page_width: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
| page_height: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
| bounding_width: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
| bounding_height: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
| bounding_type: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
| bounding_unit: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
| bounding_x: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
| bounding_y: Optional[SimpleNamespace] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
| type_: Any = dataclasses.field(init=True, repr=False) | ||
| text: Any = dataclasses.field(init=True, repr=False) | ||
| bounding_box: Any = dataclasses.field(init=True, repr=False, default=None) | ||
| block_references: Any = dataclasses.field(init=True, repr=False, default=None) | ||
| block_id: Any = dataclasses.field(init=False, repr=False, default=None) | ||
| confidence: Any = dataclasses.field(init=False, repr=False, default=None) | ||
| page_number: Any = dataclasses.field(init=False, repr=False, default=None) | ||
| page_width: Any = dataclasses.field(init=False, repr=False, default=None) | ||
| page_height: Any = dataclasses.field(init=False, repr=False, default=None) | ||
| bounding_width: Any = dataclasses.field(init=False, repr=False, default=None) | ||
| bounding_height: Any = dataclasses.field(init=False, repr=False, default=None) | ||
| bounding_type: Any = dataclasses.field(init=False, repr=False, default=None) | ||
| bounding_unit: Any = dataclasses.field(init=False, repr=False, default=None) | ||
| bounding_x: Any = dataclasses.field(init=False, repr=False, default=None) | ||
| bounding_y: Any = dataclasses.field(init=False, repr=False, default=None) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Many fields in the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment is OBE. Much like the above comment: Locally Gemini and I have gone round and round today trying to find the ideal collection of types so that mypy passes and so that unittests pass. What we have now is not necessarily ideal, but it is likely better than what we had and further churn may not be cost effective. |
||
| docproto_width: Optional[float] = dataclasses.field( | ||
| init=False, repr=False, default=None | ||
| ) | ||
|
|
@@ -180,7 +154,7 @@ def load_blocks_from_schema( | |
|
|
||
| blocks: List[Block] = [] | ||
| ens = _get_target_object(objects, entities) | ||
| for i in ens: | ||
| for i in cast(Any, ens): | ||
|
chalmerlowe marked this conversation as resolved.
Outdated
|
||
| entity = i | ||
|
|
||
| block_text = "" | ||
|
|
@@ -203,11 +177,13 @@ def load_blocks_from_schema( | |
| b = Block( | ||
| type_=block_type, | ||
| text=block_text, | ||
| bounding_box=_get_target_object(entity, normalized_vertices), | ||
| bounding_box=_get_target_object(entity, normalized_vertices) | ||
| if normalized_vertices is not None | ||
| else None, | ||
| ) | ||
|
|
||
| if id_: | ||
| b.id_ = _get_target_object(entity, id_) | ||
| b.block_id = _get_target_object(entity, id_) | ||
| if confidence: | ||
| b.confidence = _get_target_object(entity, confidence) | ||
| if page_number and page_number in entity: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,7 +20,11 @@ | |
|
|
||
| from google.api_core.gapic_v1 import client_info | ||
|
|
||
| from google.cloud import documentai, documentai_toolbox, storage | ||
| from google.cloud import ( # type: ignore[attr-defined] | ||
| documentai, | ||
| documentai_toolbox, | ||
| storage, | ||
| ) | ||
| from google.cloud.documentai_toolbox import constants | ||
|
|
||
|
|
||
|
|
@@ -91,6 +95,7 @@ def get_blobs( | |
| if gcs_uri: | ||
| gcs_bucket_name, gcs_prefix = split_gcs_uri(gcs_uri) | ||
|
|
||
| assert gcs_prefix is not None | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should avoid using
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @parthea fixed. |
||
| if re.match(constants.FILE_CHECK_REGEX, gcs_prefix): | ||
| raise ValueError("gcs_prefix cannot contain file types") | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The return type of
_get_target_objecthas been changed toAny, which significantly reduces type safety. Given that the docstring explicitly states it returns aSimpleNamespace, it would be better to useOptional[SimpleNamespace]or a more specific type if possible. If this is a temporary measure to resolve mypy errors, please consider adding a TODO to refine this type later.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Locally Gemini and I have gone round and round today trying to find the ideal collection of types so that mypy passes and so that unittests pass.
What we have now is not necessarily ideal, but it is likely better than what we had and further churn may not be cost effective.