Skip to content

Commit 36cab9d

Browse files
authored
Merge pull request #26 from full-stack-deep-learning/lab9
Lab 9
2 parents 7c5ef42 + 45326bb commit 36cab9d

File tree

88 files changed

+18702
-104
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+18702
-104
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ logs
4545
.mypy_cache
4646
notebooks/lightning_logs
4747
lightning_logs/
48+
lab9/requirements.txt

lab1/text_recognizer/util.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
"""Utility functions for text_recognizer module."""
2+
from io import BytesIO
23
from pathlib import Path
34
from typing import Union
45
from urllib.request import urlretrieve
5-
6-
# import base64
6+
import base64
77
import hashlib
88

99
from PIL import Image
1010
from tqdm import tqdm
1111
import numpy as np
12+
import smart_open
1213

1314

1415
def to_categorical(y, num_classes):
@@ -17,7 +18,12 @@ def to_categorical(y, num_classes):
1718

1819

1920
def read_image_pil(image_uri: Union[Path, str], grayscale=False) -> Image:
20-
with Image.open(image_uri) as image:
21+
with smart_open.open(image_uri, "rb") as image_file:
22+
return read_image_pil_file(image_file, grayscale)
23+
24+
25+
def read_image_pil_file(image_file, grayscale=False) -> Image:
26+
with Image.open(image_file) as image:
2127
if grayscale:
2228
image = image.convert(mode="L")
2329
else:

lab2/text_recognizer/util.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
"""Utility functions for text_recognizer module."""
2+
from io import BytesIO
23
from pathlib import Path
34
from typing import Union
45
from urllib.request import urlretrieve
5-
6-
# import base64
6+
import base64
77
import hashlib
88

99
from PIL import Image
1010
from tqdm import tqdm
1111
import numpy as np
12+
import smart_open
1213

1314

1415
def to_categorical(y, num_classes):
@@ -17,7 +18,12 @@ def to_categorical(y, num_classes):
1718

1819

1920
def read_image_pil(image_uri: Union[Path, str], grayscale=False) -> Image:
20-
with Image.open(image_uri) as image:
21+
with smart_open.open(image_uri, "rb") as image_file:
22+
return read_image_pil_file(image_file, grayscale)
23+
24+
25+
def read_image_pil_file(image_file, grayscale=False) -> Image:
26+
with Image.open(image_file) as image:
2127
if grayscale:
2228
image = image.convert(mode="L")
2329
else:

lab3/text_recognizer/util.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
"""Utility functions for text_recognizer module."""
2+
from io import BytesIO
23
from pathlib import Path
34
from typing import Union
45
from urllib.request import urlretrieve
5-
6-
# import base64
6+
import base64
77
import hashlib
88

99
from PIL import Image
1010
from tqdm import tqdm
1111
import numpy as np
12+
import smart_open
1213

1314

1415
def to_categorical(y, num_classes):
@@ -17,7 +18,12 @@ def to_categorical(y, num_classes):
1718

1819

1920
def read_image_pil(image_uri: Union[Path, str], grayscale=False) -> Image:
20-
with Image.open(image_uri) as image:
21+
with smart_open.open(image_uri, "rb") as image_file:
22+
return read_image_pil_file(image_file, grayscale)
23+
24+
25+
def read_image_pil_file(image_file, grayscale=False) -> Image:
26+
with Image.open(image_file) as image:
2127
if grayscale:
2228
image = image.convert(mode="L")
2329
else:

lab4/text_recognizer/lit_models/transformer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import torch.nn as nn
2-
import wandb
2+
try:
3+
import wandb
4+
except ModuleNotFoundError:
5+
pass
6+
37

48
from .metrics import CharacterErrorRate
59
from .base import BaseLitModel

lab4/text_recognizer/util.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
"""Utility functions for text_recognizer module."""
2+
from io import BytesIO
23
from pathlib import Path
34
from typing import Union
45
from urllib.request import urlretrieve
5-
6-
# import base64
6+
import base64
77
import hashlib
88

99
from PIL import Image
1010
from tqdm import tqdm
1111
import numpy as np
12+
import smart_open
1213

1314

1415
def to_categorical(y, num_classes):
@@ -17,7 +18,12 @@ def to_categorical(y, num_classes):
1718

1819

1920
def read_image_pil(image_uri: Union[Path, str], grayscale=False) -> Image:
20-
with Image.open(image_uri) as image:
21+
with smart_open.open(image_uri, "rb") as image_file:
22+
return read_image_pil_file(image_file, grayscale)
23+
24+
25+
def read_image_pil_file(image_file, grayscale=False) -> Image:
26+
with Image.open(image_file) as image:
2127
if grayscale:
2228
image = image.convert(mode="L")
2329
else:

lab5/text_recognizer/lit_models/transformer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import torch.nn as nn
2-
import wandb
2+
try:
3+
import wandb
4+
except ModuleNotFoundError:
5+
pass
6+
37

48
from .metrics import CharacterErrorRate
59
from .base import BaseLitModel

lab5/text_recognizer/util.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
"""Utility functions for text_recognizer module."""
2+
from io import BytesIO
23
from pathlib import Path
34
from typing import Union
45
from urllib.request import urlretrieve
5-
6-
# import base64
6+
import base64
77
import hashlib
88

99
from PIL import Image
1010
from tqdm import tqdm
1111
import numpy as np
12+
import smart_open
1213

1314

1415
def to_categorical(y, num_classes):
@@ -17,7 +18,12 @@ def to_categorical(y, num_classes):
1718

1819

1920
def read_image_pil(image_uri: Union[Path, str], grayscale=False) -> Image:
20-
with Image.open(image_uri) as image:
21+
with smart_open.open(image_uri, "rb") as image_file:
22+
return read_image_pil_file(image_file, grayscale)
23+
24+
25+
def read_image_pil_file(image_file, grayscale=False) -> Image:
26+
with Image.open(image_file) as image:
2127
if grayscale:
2228
image = image.convert(mode="L")
2329
else:

lab7/text_recognizer/lit_models/transformer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import torch.nn as nn
2-
import wandb
2+
try:
3+
import wandb
4+
except ModuleNotFoundError:
5+
pass
6+
37

48
from .metrics import CharacterErrorRate
59
from .base import BaseLitModel

lab7/text_recognizer/models/resnet_transformer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def predict(self, x: torch.Tensor) -> torch.Tensor:
173173
y = output_tokens[:, :Sy] # (B, Sy)
174174
output = self.decode(x, y) # (Sy, B, C)
175175
output = torch.argmax(output, dim=-1) # (Sy, B)
176-
output_tokens[:, Sy] = output[-1:] # Set the last output token
176+
output_tokens[:, Sy : Sy + 1] = output[-1:] # Set the last output token
177177

178178
# Early stopping of prediction loop to speed up prediction
179179
if ((output_tokens[:, Sy] == self.end_token) | (output_tokens[:, Sy] == self.padding_token)).all():

0 commit comments

Comments
 (0)