Skip to content

Commit a1417ae

Browse files
authored
test: enable mypy session for documentai-toolbox (#16690)
This PR enables the mypy session in noxfile.py for documentai-toolbox and aligns it with the GAPIC generator template.
1 parent 192ccc5 commit a1417ae

10 files changed

Lines changed: 94 additions & 56 deletions

File tree

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,14 @@
2525
from .utilities import docai_utilities, gcs_utilities
2626
from .wrappers import document, entity, page
2727

28-
__all__ = (document, page, entity, converter, docai_utilities, gcs_utilities)
28+
__all__ = (
29+
"document",
30+
"page",
31+
"entity",
32+
"converter",
33+
"docai_utilities",
34+
"gcs_utilities",
35+
)
2936

3037

3138
class Python37DeprecationWarning(DeprecationWarning): # pragma: NO COVER

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# limitations under the License.
1515
#
1616

17+
1718
from typing import Callable, List, Optional
1819

1920
from intervaltree import intervaltree
@@ -190,16 +191,21 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly:
190191
y_multiplier = 1.0
191192
normalized_vertices: List[documentai.NormalizedVertex] = []
192193

193-
if block.page_width and block.page_height:
194+
if (
195+
block.page_width
196+
and block.page_height
197+
and block.docproto_width is not None
198+
and block.docproto_height is not None
199+
):
194200
x_multiplier = _get_multiplier(
195201
docproto_coordinate=block.docproto_width,
196202
external_coordinate=block.page_width,
197-
input_bbox_units=block.bounding_unit,
203+
input_bbox_units=block.bounding_unit or "normalized",
198204
)
199205
y_multiplier = _get_multiplier(
200206
docproto_coordinate=block.docproto_height,
201207
external_coordinate=block.page_height,
202-
input_bbox_units=block.bounding_unit,
208+
input_bbox_units=block.bounding_unit or "normalized",
203209
)
204210

205211
if block.bounding_type == "1":
@@ -208,13 +214,13 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly:
208214
for coordinate in block.bounding_box:
209215
x = _convert_bbox_units(
210216
coordinate[f"{block.bounding_x}"],
211-
input_bbox_units=block.bounding_unit,
217+
input_bbox_units=block.bounding_unit or "normalized",
212218
width=block.docproto_width,
213219
multiplier=x_multiplier,
214220
)
215221
y = _convert_bbox_units(
216222
coordinate[f"{block.bounding_y}"],
217-
input_bbox_units=block.bounding_unit,
223+
input_bbox_units=block.bounding_unit or "normalized",
218224
height=block.docproto_height,
219225
multiplier=y_multiplier,
220226
)
@@ -224,18 +230,24 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly:
224230
elif block.bounding_type == "2":
225231
# Type 2 : bounding box has 1 (x,y) coordinates for the top left corner
226232
# and (width, height)
233+
if not isinstance(block.bounding_box, dict):
234+
raise TypeError("Expected dict for bounding_box in Type 2")
227235
x_min = _convert_bbox_units(
228236
block.bounding_box[f"{block.bounding_x}"],
229-
input_bbox_units=block.bounding_unit,
237+
input_bbox_units=block.bounding_unit or "normalized",
230238
width=block.page_width,
231239
multiplier=x_multiplier,
232240
)
233241
y_min = _convert_bbox_units(
234242
block.bounding_box[f"{block.bounding_y}"],
235-
input_bbox_units=block.bounding_unit,
243+
input_bbox_units=block.bounding_unit or "normalized",
236244
width=block.page_height,
237245
multiplier=y_multiplier,
238246
)
247+
if block.bounding_width is None or block.bounding_height is None:
248+
raise ValueError(
249+
"bounding_width and bounding_height must be set for Type 2"
250+
)
239251
x_max = x_min + block.bounding_width
240252
y_max = y_min + block.bounding_height
241253
normalized_vertices.extend(
@@ -249,16 +261,18 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly:
249261

250262
elif block.bounding_type == "3":
251263
# Type 3 : bounding_box: [x1, y1, x2, y2, x3, y3, x4, y4]
264+
if not isinstance(block.bounding_box, list):
265+
raise TypeError("Expected list for bounding_box in Type 3")
252266
for idx in range(0, len(block.bounding_box), 2):
253267
x = _convert_bbox_units(
254268
block.bounding_box[idx],
255-
input_bbox_units=block.bounding_unit,
269+
input_bbox_units=block.bounding_unit or "normalized",
256270
width=block.docproto_width,
257271
multiplier=x_multiplier,
258272
)
259273
y = _convert_bbox_units(
260274
block.bounding_box[idx + 1],
261-
input_bbox_units=block.bounding_unit,
275+
input_bbox_units=block.bounding_unit or "normalized",
262276
width=block.docproto_height,
263277
multiplier=y_multiplier,
264278
)

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
import dataclasses
1818
import json
1919
from types import SimpleNamespace
20-
from typing import List, Optional, Type
20+
from typing import Any, List, Optional, Type, Union
2121

2222
from google.cloud import documentai
2323

2424

25-
def _get_target_object(json_data: any, target_object: str) -> Optional[SimpleNamespace]:
25+
def _get_target_object(json_data: Any, target_object: str) -> Any:
2626
r"""Returns SimpleNamespace of target_object.
2727
2828
Args:
@@ -72,45 +72,39 @@ class Block:
7272
page_number:
7373
Optional.
7474
"""
75-
type_: SimpleNamespace = dataclasses.field(init=True, repr=False)
76-
text: SimpleNamespace = dataclasses.field(init=True, repr=False)
77-
bounding_box: Optional[SimpleNamespace] = dataclasses.field(
75+
type_: Any = dataclasses.field(init=True, repr=False)
76+
text: str = dataclasses.field(init=True, repr=False)
77+
bounding_box: Optional[Union[SimpleNamespace, List[Any]]] = dataclasses.field(
7878
init=True, repr=False, default=None
7979
)
80-
block_references: Optional[SimpleNamespace] = dataclasses.field(
81-
init=True, repr=False, default=None
82-
)
83-
block_id: Optional[SimpleNamespace] = dataclasses.field(
84-
init=False, repr=False, default=None
85-
)
86-
confidence: Optional[SimpleNamespace] = dataclasses.field(
87-
init=False, repr=False, default=None
88-
)
89-
page_number: Optional[SimpleNamespace] = dataclasses.field(
80+
block_references: Any = dataclasses.field(init=True, repr=False, default=None)
81+
block_id: Optional[str] = dataclasses.field(init=False, repr=False, default=None)
82+
confidence: Optional[float] = dataclasses.field(
9083
init=False, repr=False, default=None
9184
)
92-
page_width: Optional[SimpleNamespace] = dataclasses.field(
85+
page_number: Optional[int] = dataclasses.field(init=False, repr=False, default=None)
86+
page_width: Optional[float] = dataclasses.field(
9387
init=False, repr=False, default=None
9488
)
95-
page_height: Optional[SimpleNamespace] = dataclasses.field(
89+
page_height: Optional[float] = dataclasses.field(
9690
init=False, repr=False, default=None
9791
)
98-
bounding_width: Optional[SimpleNamespace] = dataclasses.field(
92+
bounding_width: Optional[float] = dataclasses.field(
9993
init=False, repr=False, default=None
10094
)
101-
bounding_height: Optional[SimpleNamespace] = dataclasses.field(
95+
bounding_height: Optional[float] = dataclasses.field(
10296
init=False, repr=False, default=None
10397
)
104-
bounding_type: Optional[SimpleNamespace] = dataclasses.field(
98+
bounding_type: Optional[str] = dataclasses.field(
10599
init=False, repr=False, default=None
106100
)
107-
bounding_unit: Optional[SimpleNamespace] = dataclasses.field(
101+
bounding_unit: Optional[str] = dataclasses.field(
108102
init=False, repr=False, default=None
109103
)
110-
bounding_x: Optional[SimpleNamespace] = dataclasses.field(
104+
bounding_x: Optional[float] = dataclasses.field(
111105
init=False, repr=False, default=None
112106
)
113-
bounding_y: Optional[SimpleNamespace] = dataclasses.field(
107+
bounding_y: Optional[float] = dataclasses.field(
114108
init=False, repr=False, default=None
115109
)
116110
docproto_width: Optional[float] = dataclasses.field(
@@ -180,6 +174,8 @@ def load_blocks_from_schema(
180174

181175
blocks: List[Block] = []
182176
ens = _get_target_object(objects, entities)
177+
if not isinstance(ens, (list, dict)):
178+
raise TypeError("Expected list or dict for entities")
183179
for i in ens:
184180
entity = i
185181

@@ -203,11 +199,13 @@ def load_blocks_from_schema(
203199
b = Block(
204200
type_=block_type,
205201
text=block_text,
206-
bounding_box=_get_target_object(entity, normalized_vertices),
202+
bounding_box=_get_target_object(entity, normalized_vertices)
203+
if normalized_vertices is not None
204+
else None,
207205
)
208206

209207
if id_:
210-
b.id_ = _get_target_object(entity, id_)
208+
b.block_id = _get_target_object(entity, id_)
211209
if confidence:
212210
b.confidence = _get_target_object(entity, confidence)
213211
if page_number and page_number in entity:

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/converter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ def convert_from_config(
424424

425425
print("-------- Converting Started --------")
426426
files, labels, did_not_convert = _get_docproto_files(
427-
futures_list, project_id, location, processor_id
427+
list(futures_list), project_id, location, processor_id
428428
)
429429

430430
print("-------- Finished Converting --------")

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/vision_helpers.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -243,17 +243,17 @@ def _generate_entity_annotations(
243243
"""
244244
entity_annotations: List[EntityAnnotation] = []
245245
for token in page_info.page.tokens:
246-
v: vision.Vertex = []
246+
v: list[vision.Vertex] = []
247247
if token.layout.bounding_poly.vertices:
248248
for vertex in token.layout.bounding_poly.vertices:
249-
v.append({"x": int(vertex.x), "y": int(vertex.y)})
249+
v.append(vision.Vertex(x=int(vertex.x), y=int(vertex.y)))
250250
else:
251251
for normalized_vertex in token.layout.bounding_poly.normalized_vertices:
252252
v.append(
253-
{
254-
"x": int(normalized_vertex.x * page_info.page.dimension.width),
255-
"y": int(normalized_vertex.y * page_info.page.dimension.height),
256-
}
253+
vision.Vertex(
254+
x=int(normalized_vertex.x * page_info.page.dimension.width),
255+
y=int(normalized_vertex.y * page_info.page.dimension.height),
256+
)
257257
)
258258

259259
text_start_index = token.layout.text_anchor.text_segments[0].start_index

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020

2121
from google.api_core.gapic_v1 import client_info
2222

23-
from google.cloud import documentai, documentai_toolbox, storage
23+
from google.cloud import documentai # type: ignore[attr-defined]
24+
from google.cloud import documentai_toolbox
25+
from google.cloud import storage # type: ignore[attr-defined]
2426
from google.cloud.documentai_toolbox import constants
2527

2628

@@ -91,6 +93,8 @@ def get_blobs(
9193
if gcs_uri:
9294
gcs_bucket_name, gcs_prefix = split_gcs_uri(gcs_uri)
9395

96+
if gcs_prefix is None:
97+
raise TypeError("gcs_prefix cannot be None")
9498
if re.match(constants.FILE_CHECK_REGEX, gcs_prefix):
9599
raise ValueError("gcs_prefix cannot contain file types")
96100

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/document.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import glob
2323
import os
2424
import re
25-
from typing import Dict, Iterator, List, Optional, Type, Union
25+
from typing import Any, Dict, Iterable, Iterator, List, Optional, Type, Union
2626

2727
from google.api_core.client_options import ClientOptions
2828
from google.api_core.operation import from_gapic as operation_from_gapic
@@ -51,7 +51,7 @@ def _document_layout_blocks_from_shards(
5151
shards: List[documentai.Document],
5252
) -> Iterator[documentai.Document.DocumentLayout.DocumentLayoutBlock]:
5353
def extract_blocks(
54-
blocks: List[documentai.Document.DocumentLayout.DocumentLayoutBlock],
54+
blocks: Iterable[documentai.Document.DocumentLayout.DocumentLayoutBlock],
5555
) -> Iterator[documentai.Document.DocumentLayout.DocumentLayoutBlock]:
5656
queue = collections.deque(blocks)
5757

@@ -325,8 +325,9 @@ def _dict_to_bigquery(
325325
bq_client = bigquery.Client(
326326
project=project_id, client_info=gcs_utilities._get_client_info()
327327
)
328+
resolved_project_id = project_id or bq_client.project
328329
table_ref = bigquery.DatasetReference(
329-
project=project_id, dataset_id=dataset_name
330+
project=resolved_project_id, dataset_id=dataset_name
330331
).table(table_name)
331332

332333
job_config = bigquery.LoadJobConfig(
@@ -345,7 +346,7 @@ def _dict_to_bigquery(
345346

346347

347348
def _apply_text_offset(
348-
documentai_object: Union[Dict[str, Dict], List], text_offset: int
349+
documentai_object: Union[Dict[str, Any], List[Any]], text_offset: int
349350
) -> None:
350351
r"""Applies a text offset to all text_segments in `documentai_object`.
351352

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/entity.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@ class Entity:
6363

6464
_image: Optional[Image.Image] = dataclasses.field(init=False, default=None)
6565

66-
def __post_init__(self, page_offset: int) -> None:
66+
def __post_init__(self, page_offset: Optional[int]) -> None:
67+
if page_offset is None:
68+
page_offset = 0
69+
6770
self.type_ = self.documentai_object.type_
6871

6972
if self.documentai_object.mention_text:

packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,16 @@
1818
from abc import ABC
1919
import dataclasses
2020
from functools import cached_property
21-
from typing import Iterable, List, Optional, Type
21+
from typing import Iterable, List, Optional, Type, TypeVar
2222

2323
import pandas as pd
2424

2525
from google.cloud import documentai
2626
from google.cloud.documentai_toolbox.constants import ElementWithLayout
2727
from google.cloud.documentai_toolbox.utilities import docai_utilities
2828

29+
T = TypeVar("T", bound="_BasePageElement")
30+
2931

3032
@dataclasses.dataclass
3133
class Table:
@@ -180,9 +182,7 @@ def _text_segment(self) -> documentai.Document.TextAnchor.TextSegment:
180182
"""
181183
return self.documentai_object.layout.text_anchor.text_segments[0]
182184

183-
def _get_children_of_element(
184-
self, potential_children: List["_BasePageElement"]
185-
) -> List["_BasePageElement"]:
185+
def _get_children_of_element(self, potential_children: List[T]) -> List[T]:
186186
"""
187187
Filters potential child elements to identify only those fully contained within this element.
188188

packages/google-cloud-documentai-toolbox/noxfile.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -480,10 +480,21 @@ def prerelease_deps(session, protobuf_implementation):
480480
@nox.session(python=DEFAULT_PYTHON_VERSION)
481481
def mypy(session):
482482
"""Run the type checker."""
483-
484-
# TODO(https://github.com/googleapis/google-cloud-python/issues/16014):
485-
# Enable mypy once this bug is fixed.
486-
session.skip("Temporarily skip mypy. See issue 16014")
483+
session.install(
484+
"mypy<1.16.0",
485+
"types-requests",
486+
"types-protobuf",
487+
"pandas-stubs",
488+
)
489+
session.install("-e", ".")
490+
session.run(
491+
"mypy",
492+
"-p",
493+
"google.cloud.documentai_toolbox",
494+
"--check-untyped-defs",
495+
"--ignore-missing-imports",
496+
*session.posargs,
497+
)
487498

488499

489500
@nox.session(python=DEFAULT_PYTHON_VERSION)

0 commit comments

Comments
 (0)