-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathtranslator.py
More file actions
67 lines (55 loc) · 2.38 KB
/
translator.py
File metadata and controls
67 lines (55 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import json
import time
import requests
import streamlit as st
@st.cache_data(show_spinner=False)
def hcmus_translate(text):
url = 'https://tools.clc.hcmus.edu.vn/api/web/clc-sinonom/sinonom-transliteration'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0',
'content-type': 'application/json',
}
response = requests.request('POST', url, headers=headers, data=json.dumps({'text': text}))
time.sleep(0.1)
try:
result = json.loads(response.text)['data']
return result['result_text_transcription'][0].strip()
except:
print(f'[ERR] "{text}": {response.text}')
return 'Cannot translate this text.'
@st.cache_data(show_spinner=False)
def hvdic_translate(text):
def is_nom_text(result):
for phonetics_dict in result:
if phonetics_dict['t'] == 3 and len(phonetics_dict['o']) <= 0:
return True
return False
url = 'https://hvdic.thivien.net/transcript-query.json.php'
headers = { 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' }
# Request phonetics for Hán Việt (lang=1) first, if the response result is not
# Hán Việt (contains blank lists) => Request phonetics for Nôm (lang=3)
for lang in [1, 3]:
payload = f'mode=trans&lang={lang}&input={text}'
response = requests.request('POST', url, headers=headers, data=payload.encode())
time.sleep(0.1)
try:
result = json.loads(response.text)['result']
except:
print(f'[ERR] {text}: {response.text}')
result = {}
if not is_nom_text(result): break
return result
@st.cache_data(show_spinner=False)
def hvdic_render(text):
phonetics = ''
for d in hvdic_translate(text):
if d['t'] == 3 and len(d['o']) > 0:
if len(d['o']) == 1: phonetics += d['o'][0] + ' '
else: phonetics += f'''
<select name="{d['o'][0]}">
{''.join([f'<option><p>{o}</p></option>' for o in d['o']])}
</select>
'''.replace('\n', '')
else: phonetics += '[UNK] '
if len(phonetics) > 0: return phonetics.strip()
return 'No response from hvdic => You can clear caches (Press C) and reload.'