Skip to content

Commit 8331ff0

Browse files
committed
gt: Fix UTF-8 console output cursor position calculation
The fix add the `hb_cdpUTF8CharWidth` function in `cdpapi.c` to check the GT driver's `fWideCharWidth` flag. When enabled, it uses the `mk_wcwidth` function from the public domain implementation for accurate Unicode TR11 width calculation. When disabled, it returns the default width of 1 for backward compatibility. - Add HB_GTI_WIDECHARWIDTH switch, disabled by default for zero overhead - Only activate width calculation when user calls hb_gtInfo(HB_GTI_WIDECHARWIDTH,.T.) - Based on public-domain mk_wcwidth; supports narrow(1)/wide(2)/zero(0) - 100% backward compatible, no binary bloat - Fixes cursor mis-alignment in UTF-8 terminals with CJK/Emoji Usage: hb_cdpSelect("UTF8EX") hb_gtInfo(HB_GTI_WIDECHARWIDTH,.T.) && enable
1 parent 8a87153 commit 8331ff0

File tree

8 files changed

+67
-117
lines changed

8 files changed

+67
-117
lines changed

include/hbgtcore.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ typedef struct _HB_GT_BASE
297297
HB_BOOL fBlinking;
298298
HB_BOOL fStdOutCon;
299299
HB_BOOL fStdErrCon;
300+
HB_BOOL fWideCharWidth; /* Enable Unicode wide character width calculation for UTF-8 */
300301
int iCursorShape;
301302
int iDispCount;
302303
int iExtCount;

include/hbgtinfo.ch

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@
154154
#define HB_GTI_SYSMENUADD 77 /* Add item to window system menu with keycode to generate when selected (supported by: GTWVT) */
155155
#define HB_GTI_MSGBOX 78 /* Display native MessageBox (supported by: GTQTC) */
156156
#define HB_GTI_SOUND 79 /* play sound file (supported by: GTQTC) */
157+
#define HB_GTI_WIDECHARWIDTH 80 /* Enable/Disable Unicode wide character width calculation (for UTF-8) */
157158

158159
/* Font weights */
159160
#define HB_GTI_FONTW_THIN 1

src/codepage/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ C_SOURCES := \
55
cp_big5.c \
66
cp_gbk.c \
77
cp_utf8.c \
8+
mk_wcwidth.c \
89
cp_u16le.c \
910
cpbg866.c \
1011
cpbgiso.c \

src/codepage/cp_utf8.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848

4949
#include "hbapi.h"
5050
#include "hbapicdp.h"
51+
#include "mk_wcwidth.h"
5152

5253
#include "uc16def.c"
5354

@@ -87,7 +88,6 @@ static HB_CDP_LEN_FUNC( UTF8_len )
8788
{
8889
HB_SYMBOL_UNUSED( cdp );
8990

90-
/* Return byte length (1-6 bytes) for UTF-8 encoding */
9191
return hb_cdpUTF8CharSize( wc );
9292
}
9393

src/codepage/mk_wcwidth.c

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
/*
22
* mk_wcwidth.c
33
*
4-
* Copyright (C) 2020 Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>>
4+
* Copyright (C) 2001 Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>
55
*
66
* This software is placed in the public domain.
77
*
8-
* This file is part of the mk_wcwidth() Unicode width calculation function.
8+
* Historical reference: https://www.postgresql.org/message-id/attachment/8417/pg_mb_utf8.c
99
*
10-
* Original source: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
10+
* Original source: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
1111
*
1212
* Adapted for Harbour by Dongming Wang <wangdongming / at / gmail.com>
13-
*
13+
*
14+
* ANSI C89 compatible version for Harbour
15+
*
1416
* This is a simplified implementation of Unicode TR11 (East Asian Width).
1517
* It covers the most common Unicode character ranges.
1618
*
@@ -122,6 +124,12 @@ static const struct interval wide[] =
122124
* 0: Control characters, non-printing characters, combining characters
123125
* 1: Most characters (Latin, Cyrillic, Greek, Arabic, etc.)
124126
* 2: East Asian full-width characters (Chinese, Japanese, Korean)
127+
*
128+
* Notes:
129+
* - This function implements Unicode TR11 (East Asian Width)
130+
* - Ambiguous characters are treated as narrow (width 1)
131+
* - Private use area characters are treated as narrow (width 1)
132+
* - Unassigned characters are treated as narrow (width 1)
125133
*/
126134
int mk_wcwidth( wchar_t ucs )
127135
{
@@ -156,6 +164,10 @@ int mk_wcwidth( wchar_t ucs )
156164
* Returns:
157165
* Total width of the string in screen columns
158166
* -1 if the string contains a non-printable character
167+
*
168+
* Notes:
169+
* - This function processes the entire string until null terminator
170+
* - Returns -1 if any character has width 0 (non-printable)
159171
*/
160172
int mk_wcswidth( const wchar_t *pwcs )
161173
{
@@ -187,6 +199,10 @@ int mk_wcswidth( const wchar_t *pwcs )
187199
* Returns:
188200
* Total width of the substring in screen columns
189201
* -1 if the string contains a non-printable character
202+
*
203+
* Notes:
204+
* - This function processes at most n characters
205+
* - Returns -1 if any character has width 0 (non-printable)
190206
*/
191207
int mk_wcswidth_cjk( const wchar_t *pwcs, size_t n )
192208
{

src/codepage/mk_wcwidth.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,28 @@
11
/*
22
* mk_wcwidth.h
33
*
4-
* Copyright (C) 2020 Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>>
4+
* Copyright (C) 2001 Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>
55
*
66
* This software is placed in the public domain.
77
*
8-
* This file is part of the mk_wcwidth() Unicode width calculation function.
8+
* Historical reference: https://www.postgresql.org/message-id/attachment/8417/pg_mb_utf8.c
99
*
10-
* Original source: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
10+
* Original source: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
1111
*
1212
* Adapted for Harbour by Dongming Wang <wangdongming / at / gmail.com>
13-
*
13+
*
14+
* ANSI C89 compatible version for Harbour
1415
*/
1516

1617
#ifndef MK_WCWIDTH_H
1718
#define MK_WCWIDTH_H
1819

19-
#include <wchar.h>
20+
/* Include Harbour type definitions */
21+
#include "hbdefs.h"
22+
23+
/* Type aliases: preserve original API */
24+
#define wchar_t HB_WCHAR
25+
#define size_t HB_SIZE
2026

2127
/* Combining character width */
2228
#define COMBINING_WIDTH 0

src/rtl/cdpapi.c

Lines changed: 0 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -3555,106 +3555,3 @@ const char ** hb_cdpList( void )
35553555

35563556
return list;
35573557
}
3558-
3559-
/* Calculate Unicode character display width (East Asian Width)
3560-
* Returns: 1 for narrow characters, 2 for wide characters
3561-
*/
3562-
int hb_cdpUTF8CharWidth( HB_WCHAR wc )
3563-
{
3564-
/* Narrow characters (width 1) */
3565-
if( wc < 0x1100 )
3566-
return 1;
3567-
3568-
/* Hangul Jamo (width 2) */
3569-
if( wc >= 0x1100 && wc <= 0x115F )
3570-
return 2;
3571-
3572-
/* Hangul Compatibility Jamo (width 2) */
3573-
if( wc >= 0x3130 && wc <= 0x318F )
3574-
return 2;
3575-
3576-
/* CJK Radicals Supplement (width 2) */
3577-
if( wc >= 0x2E80 && wc <= 0x2EFF )
3578-
return 2;
3579-
3580-
/* Kangxi Radicals (width 2) */
3581-
if( wc >= 0x2F00 && wc <= 0x2FDF )
3582-
return 2;
3583-
3584-
/* CJK Strokes (width 2) */
3585-
if( wc >= 0x31C0 && wc <= 0x31EF )
3586-
return 2;
3587-
3588-
/* CJK Symbols and Punctuation (width 2) */
3589-
if( wc >= 0x3000 && wc <= 0x303F )
3590-
return 2;
3591-
3592-
/* Hiragana (width 2) */
3593-
if( wc >= 0x3040 && wc <= 0x309F )
3594-
return 2;
3595-
3596-
/* Katakana (width 2) */
3597-
if( wc >= 0x30A0 && wc <= 0x30FF )
3598-
return 2;
3599-
3600-
/* Bopomofo (width 2) */
3601-
if( wc >= 0x3100 && wc <= 0x312F )
3602-
return 2;
3603-
3604-
/* Bopomofo Extended (width 2) */
3605-
if( wc >= 0x31A0 && wc <= 0x31BF )
3606-
return 2;
3607-
3608-
/* Enclosed CJK Letters and Months (width 2) */
3609-
if( wc >= 0x3200 && wc <= 0x32FF )
3610-
return 2;
3611-
3612-
/* CJK Compatibility (width 2) */
3613-
if( wc >= 0x3300 && wc <= 0x33FF )
3614-
return 2;
3615-
3616-
/* CJK Unified Ideographs Extension A (width 2) */
3617-
if( wc >= 0x3400 && wc <= 0x4DBF )
3618-
return 2;
3619-
3620-
/* CJK Unified Ideographs (width 2) */
3621-
if( wc >= 0x4E00 && wc <= 0x9FFF )
3622-
return 2;
3623-
3624-
/* Yi Syllables (width 2) */
3625-
if( wc >= 0xA000 && wc <= 0xA48F )
3626-
return 2;
3627-
3628-
/* Yi Radicals (width 2) */
3629-
if( wc >= 0xA490 && wc <= 0xA4CF )
3630-
return 2;
3631-
3632-
/* Hangul Syllables (width 2) */
3633-
if( wc >= 0xAC00 && wc <= 0xD7AF )
3634-
return 2;
3635-
3636-
/* CJK Compatibility Ideographs (width 2) */
3637-
if( wc >= 0xF900 && wc <= 0xFAFF )
3638-
return 2;
3639-
3640-
/* Halfwidth and Fullwidth Forms (width 1 for halfwidth, 2 for fullwidth) */
3641-
if( wc >= 0xFF00 && wc <= 0xFFEF )
3642-
{
3643-
/* Halfwidth characters (width 1) */
3644-
if( wc >= 0xFF61 && wc <= 0xFF9F )
3645-
return 1;
3646-
/* Fullwidth characters (width 2) */
3647-
return 2;
3648-
}
3649-
3650-
/* Miscellaneous Symbols (width 2) */
3651-
if( wc >= 0x2600 && wc <= 0x26FF )
3652-
return 2;
3653-
3654-
/* Dingbats (width 2) */
3655-
if( wc >= 0x2700 && wc <= 0x27BF )
3656-
return 2;
3657-
3658-
/* Default: narrow character */
3659-
return 1;
3660-
}

src/rtl/hbgtcore.c

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "hbapifs.h"
5959
#include "hbapierr.h"
6060
#include "hbapicdp.h"
61+
#include "../codepage/mk_wcwidth.h"
6162
#include "hbdate.h"
6263
#include "hbset.h"
6364
#include "hbvm.h"
@@ -87,6 +88,25 @@ void hb_gt_BaseUnlock( PHB_GT pGT )
8788
HB_GTSELF_UNLOCK( pGT );
8889
}
8990

91+
/* Calculate Unicode character display width based on GT driver setting
92+
* This helper function checks the fWideCharWidth flag and returns
93+
* the appropriate display width for the character.
94+
*
95+
* When fWideCharWidth is enabled, uses mk_wcwidth() for accurate
96+
* Unicode TR11 East Asian Width calculation. Otherwise returns 1
97+
* for backward compatibility.
98+
*/
99+
static int hb_gt_charDispWidth( PHB_GT pGT, HB_WCHAR wc )
100+
{
101+
if( pGT->fWideCharWidth )
102+
{
103+
/* Use accurate Unicode TR11 width calculation */
104+
return mk_wcwidth( (wchar_t)wc );
105+
}
106+
/* Default: narrow character (width 1) for backward compatibility */
107+
return 1;
108+
}
109+
90110
void hb_gt_BaseLock( PHB_GT pGT )
91111
{
92112
HB_GTSELF_LOCK( pGT );
@@ -107,6 +127,7 @@ static void hb_gt_def_BaseInit( PHB_GT_BASE pGT )
107127
pGT->fBlinking = HB_TRUE;
108128
pGT->fStdOutCon = HB_FALSE;
109129
pGT->fStdErrCon = HB_FALSE;
130+
pGT->fWideCharWidth = HB_FALSE; /* Default: disable Unicode wide char width calc */
110131
pGT->iCursorShape = SC_NORMAL;
111132
pGT->iDispCount = 0;
112133
pGT->iExtCount = 0;
@@ -863,11 +884,11 @@ static int hb_gt_def_PutText( PHB_GT pGT, int iRow, int iCol, int iColor, const
863884
if( ! HB_GTSELF_PUTCHAR( pGT, iRow, iCol, iColor, 0, wc ) )
864885
{
865886
while( HB_CDPCHAR_GET( cdp, szText, nLen, &nIndex, &wc ) )
866-
iDispCol += hb_cdpUTF8CharWidth( wc );
887+
iDispCol += hb_gt_charDispWidth( pGT, wc );
867888
break;
868889
}
869890
/* Add character display width to actual display position */
870-
iDispCol += hb_cdpUTF8CharWidth( wc );
891+
iDispCol += hb_gt_charDispWidth( pGT, wc );
871892
iCol++; /* Cell index only increments by 1 */
872893
}
873894
return iDispCol;
@@ -885,7 +906,7 @@ static int hb_gt_def_PutTextW( PHB_GT pGT, int iRow, int iCol, int iColor, const
885906
break;
886907

887908
/* Add character display width to actual display position */
888-
iDispCol += hb_cdpUTF8CharWidth( *szText );
909+
iDispCol += hb_gt_charDispWidth( pGT, *szText );
889910

890911
szText++;
891912
++iCol; /* Cell index only increments by 1 */
@@ -1038,7 +1059,7 @@ static void hb_gt_def_WriteCon( PHB_GT pGT, const char * szText, HB_SIZE nLength
10381059
break;
10391060

10401061
default:
1041-
iCol += hb_cdpUTF8CharWidth(wc);
1062+
iCol += hb_gt_charDispWidth( pGT, wc );
10421063
if( iCol > iMaxCol || iCol <= 0 )
10431064
{
10441065
/* If the cursor position started off the left edge,
@@ -2017,6 +2038,13 @@ static HB_BOOL hb_gt_def_Info( PHB_GT pGT, int iType, PHB_GT_INFO pInfo )
20172038
HB_GTSELF_VERSION( pGT, hb_itemGetNI( pInfo->pNewVal ) ) );
20182039
break;
20192040

2041+
case HB_GTI_WIDECHARWIDTH:
2042+
/* Enable/Disable Unicode wide character width calculation */
2043+
pInfo->pResult = hb_itemPutL( pInfo->pResult, pGT->fWideCharWidth );
2044+
if( hb_itemType( pInfo->pNewVal ) & HB_IT_LOGICAL )
2045+
pGT->fWideCharWidth = hb_itemGetL( pInfo->pNewVal );
2046+
break;
2047+
20202048
default:
20212049
return HB_FALSE;
20222050
}

0 commit comments

Comments
 (0)