Skip to content

Commit e2ebb99

Browse files
committed
Merge branch 'main' into nightly
2 parents 5ed4a46 + 0f2fc19 commit e2ebb99

File tree

5 files changed

+13
-13
lines changed

5 files changed

+13
-13
lines changed

unsloth/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def get_device_count():
117117
cutlass = Path(xformers_location) / "ops" / "fmha" / "cutlass.py"
118118

119119
if cutlass.exists():
120-
with open(cutlass, "r+") as f:
120+
with open(cutlass, "r+", encoding = "utf-8") as f:
121121
text = f.read()
122122
# See https://github.com/facebookresearch/xformers/issues/1176#issuecomment-2545829591
123123
if "num_splits_key=-1," in text:

unsloth/dataprep/synthetic.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def chunk_data(self, filename = None):
230230
if not hasattr(self, "overlap") or not hasattr(self, "max_generation_tokens"):
231231
raise RuntimeError("Please use prepare_qa_generation first!")
232232

233-
with open(filename, "r") as f: text = f.read()
233+
with open(filename, "r", encoding = "utf-8") as f: text = f.read()
234234

235235
max_tokens = self.max_seq_length - self.max_generation_tokens*2 - 128 # -128 to reduce errors
236236
if max_tokens <= 5:
@@ -253,7 +253,7 @@ def chunk_data(self, filename = None):
253253
chunked_text = self.tokenizer.decode(input_ids[left : right])
254254
new_filename = f"{filename}_{i}{extension}"
255255
all_filenames.append(new_filename)
256-
with open(new_filename, "w") as f: f.write(chunked_text)
256+
with open(new_filename, "w", encoding = "utf-8") as f: f.write(chunked_text)
257257
pass
258258
return all_filenames
259259
pass
@@ -295,7 +295,7 @@ def prepare_qa_generation(
295295
.replace("{cleanup_batch_size}", str(cleanup_batch_size))\
296296
.replace("{cleanup_temperature}", str(cleanup_temperature))
297297

298-
with open("synthetic_data_kit_config.yaml", "w") as f: f.write(config)
298+
with open("synthetic_data_kit_config.yaml", "w", encoding = "utf-8") as f: f.write(config)
299299

300300
self.overlap = overlap
301301
pass

unsloth/models/llama.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1921,7 +1921,7 @@ def from_pretrained(
19211921

19221922
has_rope_scaling = False
19231923
try:
1924-
with open(inspect.getfile(model_function), "r") as file:
1924+
with open(inspect.getfile(model_function), "r", encoding = "utf-8") as file:
19251925
has_rope_scaling = "self.config.rope_scaling" in file.read()
19261926
except: pass
19271927
has_rope_scaling = True

unsloth/save.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,7 +1530,7 @@ def upload_to_huggingface(
15301530
# We also upload a config.json file
15311531
if create_config:
15321532
import json
1533-
with open("_temporary_unsloth_config.json", "w") as file:
1533+
with open("_temporary_unsloth_config.json", "w", encoding = "utf-8") as file:
15341534
json.dump({"model_type" : model.config.model_type}, file, indent = 4)
15351535
pass
15361536
hf_api.upload_file(
@@ -1708,7 +1708,7 @@ def push_to_ollama(
17081708
gguf_location=gguf_location
17091709
)
17101710

1711-
with open(f"Modelfile_{model_name}", "w") as f:
1711+
with open(f"Modelfile_{model_name}", "w", encoding = "utf-8") as f:
17121712
f.write(model_file)
17131713
f.close()
17141714

@@ -1872,7 +1872,7 @@ def unsloth_save_pretrained_gguf(
18721872
modelfile_location = None
18731873
if modelfile is not None:
18741874
modelfile_location = os.path.join(new_save_directory, "Modelfile")
1875-
with open(modelfile_location, "w") as file:
1875+
with open(modelfile_location, "w", encoding = "utf-8") as file:
18761876
file.write(modelfile)
18771877
pass
18781878
print(f"Unsloth: Saved Ollama Modelfile to {modelfile_location}")
@@ -2050,7 +2050,7 @@ def unsloth_push_to_hub_gguf(
20502050
modelfile_location = None
20512051
if modelfile is not None:
20522052
modelfile_location = os.path.join(new_save_directory, "Modelfile")
2053-
with open(modelfile_location, "w") as file:
2053+
with open(modelfile_location, "w", encoding = "utf-8") as file:
20542054
file.write(modelfile)
20552055
pass
20562056
print(f"Unsloth: Saved Ollama Modelfile to {modelfile_location}")

unsloth/tokenizer_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ def fix_sentencepiece_tokenizer(
360360
old_tokenizer.save_pretrained(temporary_location)
361361

362362
tokenizer_file = sentencepiece_model_pb2.ModelProto()
363-
tokenizer_file.ParseFromString(open(f"{temporary_location}/tokenizer.model", "rb").read())
363+
tokenizer_file.ParseFromString(open(f"{temporary_location}/tokenizer.model", "rb", encoding = "utf-8").read())
364364

365365
# Now save the new tokenizer
366366
new_tokenizer.save_pretrained(temporary_location)
@@ -385,7 +385,7 @@ def fix_sentencepiece_tokenizer(
385385
pass
386386

387387
# And now write it
388-
with open(f"{temporary_location}/tokenizer.model", "wb") as file:
388+
with open(f"{temporary_location}/tokenizer.model", "wb", encoding = "utf-8") as file:
389389
file.write(tokenizer_file.SerializeToString())
390390
pass
391391

@@ -423,7 +423,7 @@ class SentencePieceTokenTypes(IntEnum):
423423
# Load tokenizer.model
424424
tokenizer_file = sentencepiece_model_pb2.ModelProto()
425425
if not os.path.isfile(f"{saved_location}/tokenizer.model"): return
426-
tokenizer_file.ParseFromString(open(f"{saved_location}/tokenizer.model", "rb").read())
426+
tokenizer_file.ParseFromString(open(f"{saved_location}/tokenizer.model", "rb", encoding = "utf-8").read())
427427
sentence_piece_size = len(tokenizer_file.pieces)
428428

429429
# Load added_tokens_json
@@ -457,7 +457,7 @@ class SentencePieceTokenTypes(IntEnum):
457457

458458
tokenizer_file.pieces.extend(new_tokens)
459459

460-
with open(f"{saved_location}/tokenizer.model", "wb") as file:
460+
with open(f"{saved_location}/tokenizer.model", "wb", encoding = "utf-8") as file:
461461
file.write(tokenizer_file.SerializeToString())
462462
pass
463463

0 commit comments

Comments
 (0)