Skip to content

Commit d9b49e3

Browse files
committed
* regex.c (mbc_startpos): become macro.
* regex.c (euc_startpos): added for improvement. * regex.c (sjis_startpos): ditto. * regex.c (utf8_startpos): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@2040 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
1 parent dad91ce commit d9b49e3

File tree

2 files changed

+84
-78
lines changed

2 files changed

+84
-78
lines changed

ChangeLog

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
Fri Feb 1 17:46:39 2002 Nobuyoshi Nakada <nobu.nakada@nifty.ne.jp>
2+
3+
* regex.c (mbc_startpos): become macro.
4+
5+
* regex.c (euc_startpos): added for improvement.
6+
7+
* regex.c (sjis_startpos): ditto.
8+
9+
* regex.c (utf8_startpos): ditto.
10+
111
Fri Feb 1 00:03:30 2002 Yukihiro Matsumoto <matz@ruby-lang.org>
212

313
* file.c (rb_stat_inspect): print dev, rdev in hexadecimal.

regex.c

Lines changed: 74 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,9 @@ re_set_syntax(syntax)
478478
#define WC2MBC1ST(c) \
479479
((current_mbctype != MBCTYPE_UTF8) ? ((c<0x100) ? (c) : (((c)>>8)&0xff)) : utf8_firstbyte(c))
480480

481-
int mbc_startpos _((const char *start, int pos));
481+
typedef unsigned int (*mbc_startpos_func_t) _((const char *string, unsigned int pos));
482+
const mbc_startpos_func_t mbc_startpos_func[];
483+
#define mbc_startpos(start, pos) (*mbc_startpos_func[current_mbctype])((start), (pos))
482484

483485
static unsigned int
484486
utf8_firstbyte(c)
@@ -4384,7 +4386,6 @@ re_free_registers(regs)
43844386
Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto)
43854387
Last change: Jul. 9, 1993 by t^2 */
43864388
static const unsigned char mbctab_ascii[] = {
4387-
/* forward scan */
43884389
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43894390
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43904391
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -4401,28 +4402,9 @@ static const unsigned char mbctab_ascii[] = {
44014402
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44024403
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44034404
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4404-
4405-
/* reverse scan */
4406-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4407-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4408-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4409-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4410-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4411-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4412-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4413-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4414-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4415-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4416-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4417-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4418-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4419-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4420-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4421-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
44224405
};
44234406

44244407
static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */
4425-
/* forward scan */
44264408
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44274409
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44284410
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -4439,28 +4421,9 @@ static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */
44394421
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44404422
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44414423
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
4442-
4443-
/* reverse scan */
4444-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4445-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4446-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4447-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4448-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4449-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4450-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4451-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4452-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4453-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4454-
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4455-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4456-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4457-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4458-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4459-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
44604424
};
44614425

4462-
static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
4463-
/* forward scan */
4426+
static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFC */
44644427
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44654428
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44664429
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -4476,9 +4439,10 @@ static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
44764439
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44774440
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44784441
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4479-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
4442+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
4443+
};
44804444

4481-
/* reverse scan */
4445+
static const unsigned char mbctab_sjis_trail[] = { /* 0x40-0x7E,0x80-0xFC */
44824446
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44834447
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44844448
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -4498,7 +4462,6 @@ static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
44984462
};
44994463

45004464
static const unsigned char mbctab_utf8[] = {
4501-
/* forward scan */
45024465
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45034466
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45044467
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -4515,24 +4478,6 @@ static const unsigned char mbctab_utf8[] = {
45154478
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45164479
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
45174480
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
4518-
4519-
/* reverse scan */
4520-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4521-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4522-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4523-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4524-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4525-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4526-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4527-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4528-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4529-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4530-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4531-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4532-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4533-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4534-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4535-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
45364481
};
45374482

45384483
const unsigned char *re_mbctab = mbctab_ascii;
@@ -4561,36 +4506,87 @@ re_mbcinit(mbctype)
45614506
}
45624507
}
45634508

4564-
int
4565-
mbc_startpos(string, pos)
4509+
#define mbc_isfirst(t, c) (t)[(unsigned char)(c)]
4510+
#define mbc_len(t, c) ((t)[(unsigned char)(c)]+1)
4511+
4512+
static unsigned int asc_startpos _((const char *string, unsigned int pos));
4513+
static unsigned int
4514+
asc_startpos(string, pos)
45664515
const char *string;
4567-
int pos;
4516+
unsigned int pos;
45684517
{
4569-
int i = pos, w;
4518+
return pos;
4519+
}
4520+
4521+
#define euc_islead(c) ((unsigned char)((c) - 0xa1) > 0xfe - 0xa1)
4522+
#define euc_mbclen(c) mbc_len(mbctab_euc, (c))
4523+
static unsigned int euc_startpos _((const char *string, unsigned int pos));
4524+
static unsigned int
4525+
euc_startpos(string, pos)
4526+
const char *string;
4527+
unsigned int pos;
4528+
{
4529+
unsigned int i = pos, w;
45704530

4571-
while (i > 0 && re_mbctab[(unsigned char)string[i]+256]) {
4531+
while (i > 0 && !euc_islead(string[i])) {
45724532
--i;
45734533
}
4574-
if (i == pos || i + (w = mbclen(string[i])) > pos) return i;
4534+
if (i == pos || i + (w = euc_mbclen(string[i])) > pos) {
4535+
return i;
4536+
}
45754537
i += w;
4538+
return i + ((pos - i) & ~1);
4539+
}
45764540

4577-
switch (current_mbctype) {
4578-
case MBCTYPE_EUC:
4579-
return i + ((pos - i) & ~1);
4541+
#define sjis_isfirst(c) mbc_isfirst(mbctab_sjis, (c))
4542+
#define sjis_istrail(c) mbctab_sjis_trail[(unsigned char)(c)]
4543+
#define sjis_mbclen(c) mbc_len(mbctab_sjis, (c))
4544+
static unsigned int sjis_startpos _((const char *string, unsigned int pos));
4545+
static unsigned int
4546+
sjis_startpos(string, pos)
4547+
const char *string;
4548+
unsigned int pos;
4549+
{
4550+
unsigned int i = pos, w;
45804551

4581-
case MBCTYPE_SJIS:
4582-
while (i + (w = mbclen(string[i])) < pos) {
4583-
i += w;
4584-
}
4552+
if (i > 0 && sjis_istrail(string[i])) {
4553+
do {
4554+
if (!sjis_isfirst(string[--i])) {
4555+
++i;
4556+
break;
4557+
}
4558+
} while (i > 0);
4559+
}
4560+
if (i == pos || i + (w = sjis_mbclen(string[i])) > pos) {
45854561
return i;
4562+
}
4563+
i += w;
4564+
return i + ((pos - i) & ~1);
4565+
}
45864566

4587-
case MBCTYPE_UTF8:
4567+
#define utf8_islead(c) ((unsigned char)((c) & 0xc0) != 0x80)
4568+
#define utf8_mbclen(c) mbc_len(mbctab_utf8, (c))
4569+
static unsigned int utf8_startpos _((const char *string, unsigned int pos));
4570+
static unsigned int
4571+
utf8_startpos(string, pos)
4572+
const char *string;
4573+
unsigned int pos;
4574+
{
4575+
unsigned int i = pos, w;
4576+
4577+
while (i > 0 && !utf8_islead(string[i])) {
4578+
--i;
4579+
}
4580+
if (i == pos || i + (w = utf8_mbclen(string[i])) > pos) {
45884581
return i;
4589-
default:
4590-
return pos;
45914582
}
4583+
return i + w;
45924584
}
45934585

4586+
const mbc_startpos_func_t mbc_startpos_func[4] = {
4587+
asc_startpos, euc_startpos, sjis_startpos, utf8_startpos
4588+
};
4589+
45944590
/*
45954591
vi: sw=2 ts=8
45964592
Local variables:

0 commit comments

Comments
 (0)