Skip to content

Commit 36eb8f2

Browse files
committed
Summary of Changes
New Files include/hbgtwide.h – public header for wide-character width calculation Renamed / Refactored src/codepage/mk_wcwidth.c → src/codepage/hb_wcwidth.c (re-implemented with Harbour types, no macros) Updated Makefiles & References src/codepage/Makefile – source list changed from mk_wcwidth.c to hb_wcwidth.c src/rtl/hbgtcore.c – include path switched to hbgtwide.h src/rtl/Makefile – adjusted -I path (../codepage → ../include) Review Comments Addressed Source-level types instead of macros Uses Harbour types: HB_WCHAR32, HB_SIZE, HB_WCHAR. Header placed under include/ and renamed to hbgtwide.h Public include now matches project convention. HB_ namespace for all exported symbols mk_wcwidth() → hb_wcwidth() Helper tables/functions already prefixed (hb_wcswidth, hb_wcswidth_cjk). All widths returned as literal 0 / 1 / 2; no #define used. Character tables remain inside CODEPAGE modules combining[] and wide[] static tables live in src/codepage/hb_wcwidth.c. Deferred for Future Work Emoji support Static Unicode TR-11 rules are used; emoji widths can vary by font—dynamic hooks can be added later. Dynamic width tables Maintainer noted potential need for runtime-replaceable tables; current code keeps static arrays but leaves the function-pointer path open. Full glyph knowledge As reviewer stated, “perfect timing” for complete glyph handling is not now; this patch provides the minimal, working foundation. Merge branch 'fix-utf8-cursor-col' of github.com:woodhead2019/harbour into fix-utf8-cursor-col Signed-off-by: woodhead2019 <woodhead2019@users.noreply.github.com>
2 parents 59b579e + 8620e82 commit 36eb8f2

File tree

4 files changed

+31
-73
lines changed

4 files changed

+31
-73
lines changed
Lines changed: 15 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,31 @@
11
/*
2-
* mk_wcwidth.h
2+
* hbgtwide.h - Graphic Terminal Wide Character Width Support
33
*
44
* Copyright (C) 2001 Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>
55
*
66
* This software is placed in the public domain.
77
*
8-
* Historical reference: https://www.postgresql.org/message-id/attachment/8417/pg_mb_utf8.c
9-
*
108
* Original source: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
119
*
1210
* Adapted for Harbour by Dongming Wang <wangdongming / at / gmail.com>
1311
*
1412
* ANSI C89 compatible version for Harbour
13+
*
14+
* Unicode TR11 (East Asian Width) implementation
1515
*/
1616

17-
#ifndef MK_WCWIDTH_H
18-
#define MK_WCWIDTH_H
17+
#ifndef HBGTWIDE_H_
18+
#define HBGTWIDE_H_
1919

20-
/* Include Harbour type definitions */
2120
#include "hbdefs.h"
2221

23-
/* Type aliases: preserve original API */
24-
#define wchar_t HB_WCHAR
25-
#define size_t HB_SIZE
26-
27-
/* Combining character width */
28-
#define COMBINING_WIDTH 0
29-
30-
/* Zero width character */
31-
#define ZERO_WIDTH 0
32-
33-
/* Full width character */
34-
#define FULL_WIDTH 2
35-
36-
/* Half width character */
37-
#define HALF_WIDTH 1
38-
39-
/* Wide character width */
40-
#define WIDE_WIDTH 2
41-
42-
/* Narrow character width */
43-
#define NARROW_WIDTH 1
44-
45-
/* Neutral character width */
46-
#define NEUTRAL_WIDTH 1
47-
48-
/* Ambiguous character width */
49-
#define AMBIGUOUS_WIDTH 1
22+
HB_EXTERN_BEGIN
5023

5124
/*
52-
* Function: mk_wcwidth
53-
* --------------------
5425
* Returns the width in screen columns of a Unicode code point.
5526
*
5627
* Parameters:
57-
* ucs: Unicode code point (wchar_t)
28+
* ucs: Unicode code point (HB_WCHAR32)
5829
*
5930
* Returns:
6031
* 0: Control characters, non-printing characters, combining characters
@@ -67,15 +38,13 @@
6738
* - Private use area characters are treated as narrow (width 1)
6839
* - Unassigned characters are treated as narrow (width 1)
6940
*/
70-
int mk_wcwidth( wchar_t ucs );
41+
HB_EXTERN int hb_wcwidth( HB_WCHAR32 ucs );
7142

7243
/*
73-
* Function: mk_wcswidth
74-
* ---------------------
7544
* Returns the width in screen columns of a null-terminated Unicode string.
7645
*
7746
* Parameters:
78-
* pwcs: Pointer to wide character string
47+
* pwcs: Pointer to wide character string (HB_WCHAR*)
7948
*
8049
* Returns:
8150
* Total width of the string in screen columns
@@ -85,15 +54,13 @@ int mk_wcwidth( wchar_t ucs );
8554
* - This function processes the entire string until null terminator
8655
* - Returns -1 if any character has width 0 (non-printable)
8756
*/
88-
int mk_wcswidth( const wchar_t *pwcs );
57+
HB_EXTERN int hb_wcswidth( const HB_WCHAR *pwcs );
8958

9059
/*
91-
* Function: mk_wcswidth_cjk
92-
* -------------------------
9360
* Returns the width in screen columns of a substring of a Unicode string.
9461
*
9562
* Parameters:
96-
* pwcs: Pointer to wide character string
63+
* pwcs: Pointer to wide character string (HB_WCHAR*)
9764
* n: Maximum number of characters to process
9865
*
9966
* Returns:
@@ -104,6 +71,8 @@ int mk_wcswidth( const wchar_t *pwcs );
10471
* - This function processes at most n characters
10572
* - Returns -1 if any character has width 0 (non-printable)
10673
*/
107-
int mk_wcswidth_cjk( const wchar_t *pwcs, size_t n );
74+
HB_EXTERN int hb_wcswidth_cjk( const HB_WCHAR *pwcs, HB_SIZE n );
75+
76+
HB_EXTERN_END
10877

109-
#endif /* MK_WCWIDTH_H */
78+
#endif /* HBGTWIDE_H_ */

src/codepage/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ C_SOURCES := \
55
cp_big5.c \
66
cp_gbk.c \
77
cp_utf8.c \
8-
mk_wcwidth.c \
8+
hb_wcwidth.c \
99
cp_u16le.c \
1010
cpbg866.c \
1111
cpbgiso.c \
Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
/*
2-
* mk_wcwidth.c
2+
* hb_wcwidth.c - Graphic Terminal Wide Character Width Implementation
33
*
44
* Copyright (C) 2001 Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>
55
*
66
* This software is placed in the public domain.
77
*
8-
* Historical reference: https://www.postgresql.org/message-id/attachment/8417/pg_mb_utf8.c
9-
*
108
* Original source: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
119
*
1210
* Adapted for Harbour by Dongming Wang <wangdongming / at / gmail.com>
@@ -15,20 +13,19 @@
1513
*
1614
* This is a simplified implementation of Unicode TR11 (East Asian Width).
1715
* It covers the most common Unicode character ranges.
18-
*
1916
*/
2017

21-
#include "mk_wcwidth.h"
22-
#include <stddef.h>
18+
#include "hbgtwide.h"
2319

20+
/* Interval structure for binary search in range tables */
2421
struct interval
2522
{
2623
unsigned int first;
2724
unsigned int last;
2825
};
2926

3027
/* Binary search in range table */
31-
static int bisearch( wchar_t ucs, const struct interval *table, int max )
28+
static int bisearch( HB_WCHAR32 ucs, const struct interval *table, int max )
3229
{
3330
int min = 0;
3431
int mid;
@@ -113,12 +110,10 @@ static const struct interval wide[] =
113110
};
114111

115112
/*
116-
* Function: mk_wcwidth
117-
* --------------------
118113
* Returns the width in screen columns of a Unicode code point.
119114
*
120115
* Parameters:
121-
* ucs: Unicode code point (wchar_t)
116+
* ucs: Unicode code point (HB_WCHAR32)
122117
*
123118
* Returns:
124119
* 0: Control characters, non-printing characters, combining characters
@@ -131,7 +126,7 @@ static const struct interval wide[] =
131126
* - Private use area characters are treated as narrow (width 1)
132127
* - Unassigned characters are treated as narrow (width 1)
133128
*/
134-
int mk_wcwidth( wchar_t ucs )
129+
int hb_wcwidth( HB_WCHAR32 ucs )
135130
{
136131
/* Test for 8-bit control characters */
137132
if( ucs == 0 )
@@ -144,8 +139,6 @@ int mk_wcwidth( wchar_t ucs )
144139
if( bisearch( ucs, combining, sizeof( combining ) / sizeof( struct interval ) - 1 ) )
145140
return 0;
146141

147-
/* If we arrive here, ucs is not a combining or C0/C1 control character */
148-
149142
/* Binary search in table of wide characters */
150143
if( bisearch( ucs, wide, sizeof( wide ) / sizeof( struct interval ) - 1 ) )
151144
return 2;
@@ -154,12 +147,10 @@ int mk_wcwidth( wchar_t ucs )
154147
}
155148

156149
/*
157-
* Function: mk_wcswidth
158-
* ---------------------
159150
* Returns the width in screen columns of a null-terminated Unicode string.
160151
*
161152
* Parameters:
162-
* pwcs: Pointer to wide character string
153+
* pwcs: Pointer to wide character string (HB_WCHAR*)
163154
*
164155
* Returns:
165156
* Total width of the string in screen columns
@@ -169,13 +160,13 @@ int mk_wcwidth( wchar_t ucs )
169160
* - This function processes the entire string until null terminator
170161
* - Returns -1 if any character has width 0 (non-printable)
171162
*/
172-
int mk_wcswidth( const wchar_t *pwcs )
163+
int hb_wcswidth( const HB_WCHAR *pwcs )
173164
{
174165
int width = 0;
175166

176167
while( *pwcs != L'\0' )
177168
{
178-
int w = mk_wcwidth( *pwcs );
169+
int w = hb_wcwidth( (HB_WCHAR32)*pwcs );
179170

180171
if( w < 0 )
181172
return -1;
@@ -188,12 +179,10 @@ int mk_wcswidth( const wchar_t *pwcs )
188179
}
189180

190181
/*
191-
* Function: mk_wcswidth_cjk
192-
* -------------------------
193182
* Returns the width in screen columns of a substring of a Unicode string.
194183
*
195184
* Parameters:
196-
* pwcs: Pointer to wide character string
185+
* pwcs: Pointer to wide character string (HB_WCHAR*)
197186
* n: Maximum number of characters to process
198187
*
199188
* Returns:
@@ -204,13 +193,13 @@ int mk_wcswidth( const wchar_t *pwcs )
204193
* - This function processes at most n characters
205194
* - Returns -1 if any character has width 0 (non-printable)
206195
*/
207-
int mk_wcswidth_cjk( const wchar_t *pwcs, size_t n )
196+
int hb_wcswidth_cjk( const HB_WCHAR *pwcs, HB_SIZE n )
208197
{
209198
int width = 0;
210199

211200
while( n-- > 0 && *pwcs != L'\0' )
212201
{
213-
int w = mk_wcwidth( *pwcs );
202+
int w = hb_wcwidth( (HB_WCHAR32)*pwcs );
214203

215204
if( w < 0 )
216205
return -1;

src/rtl/hbgtcore.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
#include "hbapifs.h"
5959
#include "hbapierr.h"
6060
#include "hbapicdp.h"
61-
#include "../codepage/mk_wcwidth.h"
61+
#include "hbgtwide.h"
6262
#include "hbdate.h"
6363
#include "hbset.h"
6464
#include "hbvm.h"
@@ -92,7 +92,7 @@ void hb_gt_BaseUnlock( PHB_GT pGT )
9292
* This helper function checks the fWideCharWidth flag and returns
9393
* the appropriate display width for the character.
9494
*
95-
* When fWideCharWidth is enabled, uses mk_wcwidth() for accurate
95+
* When fWideCharWidth is enabled, uses hb_wcwidth() for accurate
9696
* Unicode TR11 East Asian Width calculation. Otherwise returns 1
9797
* for backward compatibility.
9898
*/
@@ -101,7 +101,7 @@ static int hb_gt_charDispWidth( PHB_GT pGT, HB_WCHAR wc )
101101
if( pGT->fWideCharWidth )
102102
{
103103
/* Use accurate Unicode TR11 width calculation */
104-
return mk_wcwidth( (wchar_t)wc );
104+
return hb_wcwidth( (HB_WCHAR32)wc );
105105
}
106106
/* Default: narrow character (width 1) for backward compatibility */
107107
return 1;

0 commit comments

Comments
 (0)