Skip to content

Commit d9bd0ae

Browse files
committed
gt: Fix UTF-8 console output cursor position calculation
The fix add the `hb_cdpUTF8CharWidth` function in `cdpapi.c` to check the GT driver's `fWideCharWidth` flag. When enabled, it uses the `mk_wcwidth` function from the public domain implementation for accurate Unicode TR11 width calculation. When disabled, it returns the default width of 1 for backward compatibility. - Add HB_GTI_WIDECHARWIDTH switch, disabled by default for zero overhead - Only activate width calculation when user calls hb_gtInfo(HB_GTI_WIDECHARWIDTH,.T.) - Based on public-domain mk_wcwidth; supports narrow(1)/wide(2)/zero(0) - 100% backward compatible, no binary bloat - Fixes cursor mis-alignment in UTF-8 terminals with CJK/Emoji Usage: hb_cdpSelect("UTF8EX") hb_gtInfo(HB_GTI_WIDECHARWIDTH,.T.) && enable
1 parent 8a87153 commit d9bd0ae

File tree

8 files changed

+65
-103
lines changed

8 files changed

+65
-103
lines changed

include/hbgtcore.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ typedef struct _HB_GT_BASE
297297
HB_BOOL fBlinking;
298298
HB_BOOL fStdOutCon;
299299
HB_BOOL fStdErrCon;
300+
HB_BOOL fWideCharWidth; /* Enable Unicode wide character width calculation for UTF-8 */
300301
int iCursorShape;
301302
int iDispCount;
302303
int iExtCount;

include/hbgtinfo.ch

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@
154154
#define HB_GTI_SYSMENUADD 77 /* Add item to window system menu with keycode to generate when selected (supported by: GTWVT) */
155155
#define HB_GTI_MSGBOX 78 /* Display native MessageBox (supported by: GTQTC) */
156156
#define HB_GTI_SOUND 79 /* play sound file (supported by: GTQTC) */
157+
#define HB_GTI_WIDECHARWIDTH 80 /* Enable/Disable Unicode wide character width calculation (for UTF-8) */
157158

158159
/* Font weights */
159160
#define HB_GTI_FONTW_THIN 1

src/codepage/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ C_SOURCES := \
55
cp_big5.c \
66
cp_gbk.c \
77
cp_utf8.c \
8+
mk_wcwidth.c \
89
cp_u16le.c \
910
cpbg866.c \
1011
cpbgiso.c \

src/codepage/cp_utf8.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848

4949
#include "hbapi.h"
5050
#include "hbapicdp.h"
51+
#include "mk_wcwidth.h"
5152

5253
#include "uc16def.c"
5354

@@ -87,7 +88,6 @@ static HB_CDP_LEN_FUNC( UTF8_len )
8788
{
8889
HB_SYMBOL_UNUSED( cdp );
8990

90-
/* Return byte length (1-6 bytes) for UTF-8 encoding */
9191
return hb_cdpUTF8CharSize( wc );
9292
}
9393

src/codepage/mk_wcwidth.c

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
/*
22
* mk_wcwidth.c
33
*
4-
* Copyright (C) 2020 Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>>
4+
* Copyright (C) 2001 Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>
55
*
66
* This software is placed in the public domain.
77
*
8-
* This file is part of the mk_wcwidth() Unicode width calculation function.
8+
* Historical reference: https://www.postgresql.org/message-id/attachment/8417/pg_mb_utf8.c
99
*
10-
* Original source: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
10+
* Original source: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
1111
*
1212
* Adapted for Harbour by Dongming Wang <wangdongming / at / gmail.com>
13-
*
13+
*
14+
* ANSI C89 compatible version for Harbour
15+
*
1416
* This is a simplified implementation of Unicode TR11 (East Asian Width).
1517
* It covers the most common Unicode character ranges.
1618
*
@@ -122,6 +124,12 @@ static const struct interval wide[] =
122124
* 0: Control characters, non-printing characters, combining characters
123125
* 1: Most characters (Latin, Cyrillic, Greek, Arabic, etc.)
124126
* 2: East Asian full-width characters (Chinese, Japanese, Korean)
127+
*
128+
* Notes:
129+
* - This function implements Unicode TR11 (East Asian Width)
130+
* - Ambiguous characters are treated as narrow (width 1)
131+
* - Private use area characters are treated as narrow (width 1)
132+
* - Unassigned characters are treated as narrow (width 1)
125133
*/
126134
int mk_wcwidth( wchar_t ucs )
127135
{
@@ -156,6 +164,10 @@ int mk_wcwidth( wchar_t ucs )
156164
* Returns:
157165
* Total width of the string in screen columns
158166
* -1 if the string contains a non-printable character
167+
*
168+
* Notes:
169+
* - This function processes the entire string until null terminator
170+
* - Returns -1 if any character has width 0 (non-printable)
159171
*/
160172
int mk_wcswidth( const wchar_t *pwcs )
161173
{
@@ -187,6 +199,10 @@ int mk_wcswidth( const wchar_t *pwcs )
187199
* Returns:
188200
* Total width of the substring in screen columns
189201
* -1 if the string contains a non-printable character
202+
*
203+
* Notes:
204+
* - This function processes at most n characters
205+
* - Returns -1 if any character has width 0 (non-printable)
190206
*/
191207
int mk_wcswidth_cjk( const wchar_t *pwcs, size_t n )
192208
{

src/codepage/mk_wcwidth.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,28 @@
11
/*
22
* mk_wcwidth.h
33
*
4-
* Copyright (C) 2020 Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>>
4+
* Copyright (C) 2001 Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>
55
*
66
* This software is placed in the public domain.
77
*
8-
* This file is part of the mk_wcwidth() Unicode width calculation function.
8+
* Historical reference: https://www.postgresql.org/message-id/attachment/8417/pg_mb_utf8.c
99
*
10-
* Original source: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
10+
* Original source: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
1111
*
1212
* Adapted for Harbour by Dongming Wang <wangdongming / at / gmail.com>
13-
*
13+
*
14+
* ANSI C89 compatible version for Harbour
1415
*/
1516

1617
#ifndef MK_WCWIDTH_H
1718
#define MK_WCWIDTH_H
1819

19-
#include <wchar.h>
20+
/* Include Harbour type definitions */
21+
#include "hbdefs.h"
22+
23+
/* Type aliases: preserve original API */
24+
#define wchar_t HB_WCHAR
25+
#define size_t HB_SIZE
2026

2127
/* Combining character width */
2228
#define COMBINING_WIDTH 0

src/rtl/cdpapi.c

Lines changed: 22 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
#include "hbapi.h"
4949
#include "hbapierr.h"
5050
#include "hbapicdp.h"
51+
#include "hbgtcore.h"
52+
#include "../codepage/mk_wcwidth.h"
5153
#include "hbthread.h"
5254

5355

@@ -3558,103 +3560,30 @@ const char ** hb_cdpList( void )
35583560

35593561
/* Calculate Unicode character display width (East Asian Width)
35603562
* Returns: 1 for narrow characters, 2 for wide characters
3563+
*
3564+
* This function checks if wide character width calculation is enabled
3565+
* via the fWideCharWidth flag in the GT driver. If enabled, it uses the
3566+
* mk_wcwidth() function from the public domain implementation for
3567+
* accurate Unicode TR11 width calculation. Otherwise, it returns 1
3568+
* for backward compatibility.
35613569
*/
35623570
int hb_cdpUTF8CharWidth( HB_WCHAR wc )
35633571
{
3564-
/* Narrow characters (width 1) */
3565-
if( wc < 0x1100 )
3566-
return 1;
3567-
3568-
/* Hangul Jamo (width 2) */
3569-
if( wc >= 0x1100 && wc <= 0x115F )
3570-
return 2;
3571-
3572-
/* Hangul Compatibility Jamo (width 2) */
3573-
if( wc >= 0x3130 && wc <= 0x318F )
3574-
return 2;
3575-
3576-
/* CJK Radicals Supplement (width 2) */
3577-
if( wc >= 0x2E80 && wc <= 0x2EFF )
3578-
return 2;
3579-
3580-
/* Kangxi Radicals (width 2) */
3581-
if( wc >= 0x2F00 && wc <= 0x2FDF )
3582-
return 2;
3583-
3584-
/* CJK Strokes (width 2) */
3585-
if( wc >= 0x31C0 && wc <= 0x31EF )
3586-
return 2;
3587-
3588-
/* CJK Symbols and Punctuation (width 2) */
3589-
if( wc >= 0x3000 && wc <= 0x303F )
3590-
return 2;
3591-
3592-
/* Hiragana (width 2) */
3593-
if( wc >= 0x3040 && wc <= 0x309F )
3594-
return 2;
3595-
3596-
/* Katakana (width 2) */
3597-
if( wc >= 0x30A0 && wc <= 0x30FF )
3598-
return 2;
3599-
3600-
/* Bopomofo (width 2) */
3601-
if( wc >= 0x3100 && wc <= 0x312F )
3602-
return 2;
3603-
3604-
/* Bopomofo Extended (width 2) */
3605-
if( wc >= 0x31A0 && wc <= 0x31BF )
3606-
return 2;
3607-
3608-
/* Enclosed CJK Letters and Months (width 2) */
3609-
if( wc >= 0x3200 && wc <= 0x32FF )
3610-
return 2;
3611-
3612-
/* CJK Compatibility (width 2) */
3613-
if( wc >= 0x3300 && wc <= 0x33FF )
3614-
return 2;
3615-
3616-
/* CJK Unified Ideographs Extension A (width 2) */
3617-
if( wc >= 0x3400 && wc <= 0x4DBF )
3618-
return 2;
3619-
3620-
/* CJK Unified Ideographs (width 2) */
3621-
if( wc >= 0x4E00 && wc <= 0x9FFF )
3622-
return 2;
3623-
3624-
/* Yi Syllables (width 2) */
3625-
if( wc >= 0xA000 && wc <= 0xA48F )
3626-
return 2;
3627-
3628-
/* Yi Radicals (width 2) */
3629-
if( wc >= 0xA490 && wc <= 0xA4CF )
3630-
return 2;
3631-
3632-
/* Hangul Syllables (width 2) */
3633-
if( wc >= 0xAC00 && wc <= 0xD7AF )
3634-
return 2;
3635-
3636-
/* CJK Compatibility Ideographs (width 2) */
3637-
if( wc >= 0xF900 && wc <= 0xFAFF )
3638-
return 2;
3639-
3640-
/* Halfwidth and Fullwidth Forms (width 1 for halfwidth, 2 for fullwidth) */
3641-
if( wc >= 0xFF00 && wc <= 0xFFEF )
3572+
PHB_GT pGT;
3573+
3574+
/* Check if wide character width calculation is enabled */
3575+
pGT = hb_gt_Base();
3576+
if( pGT )
36423577
{
3643-
/* Halfwidth characters (width 1) */
3644-
if( wc >= 0xFF61 && wc <= 0xFF9F )
3645-
return 1;
3646-
/* Fullwidth characters (width 2) */
3647-
return 2;
3578+
if( pGT->fWideCharWidth )
3579+
{
3580+
hb_gt_BaseFree( pGT );
3581+
/* Use mk_wcwidth for accurate width calculation */
3582+
return mk_wcwidth( (wchar_t)wc );
3583+
}
3584+
hb_gt_BaseFree( pGT );
36483585
}
3649-
3650-
/* Miscellaneous Symbols (width 2) */
3651-
if( wc >= 0x2600 && wc <= 0x26FF )
3652-
return 2;
3653-
3654-
/* Dingbats (width 2) */
3655-
if( wc >= 0x2700 && wc <= 0x27BF )
3656-
return 2;
3657-
3658-
/* Default: narrow character */
3586+
3587+
/* Default: narrow character (width 1) */
36593588
return 1;
36603589
}

src/rtl/hbgtcore.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ static void hb_gt_def_BaseInit( PHB_GT_BASE pGT )
107107
pGT->fBlinking = HB_TRUE;
108108
pGT->fStdOutCon = HB_FALSE;
109109
pGT->fStdErrCon = HB_FALSE;
110+
pGT->fWideCharWidth = HB_FALSE; /* Default: disable Unicode wide char width calc */
110111
pGT->iCursorShape = SC_NORMAL;
111112
pGT->iDispCount = 0;
112113
pGT->iExtCount = 0;
@@ -2017,6 +2018,13 @@ static HB_BOOL hb_gt_def_Info( PHB_GT pGT, int iType, PHB_GT_INFO pInfo )
20172018
HB_GTSELF_VERSION( pGT, hb_itemGetNI( pInfo->pNewVal ) ) );
20182019
break;
20192020

2021+
case HB_GTI_WIDECHARWIDTH:
2022+
/* Enable/Disable Unicode wide character width calculation */
2023+
pInfo->pResult = hb_itemPutL( pInfo->pResult, pGT->fWideCharWidth );
2024+
if( hb_itemType( pInfo->pNewVal ) & HB_IT_LOGICAL )
2025+
pGT->fWideCharWidth = hb_itemGetL( pInfo->pNewVal );
2026+
break;
2027+
20202028
default:
20212029
return HB_FALSE;
20222030
}

0 commit comments

Comments
 (0)