Skip to content

Commit e193fd8

Browse files
author
matz
committed
* pack.c (utf8_to_uv): added checks for malformed or redundant
UTF-8 sequences. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3105 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
1 parent 96986a7 commit e193fd8

File tree

3 files changed

+66
-15
lines changed

3 files changed

+66
-15
lines changed

ChangeLog

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ Sun Dec 1 22:43:29 2002 Nobuyoshi Nakada <nobu.nokada@softhome.net>
88
* win32/win32.c (rb_w32_stat): empty path is invalid, and return
99
ENOENT rather than EBADF in such case. [ruby-talk:57177]
1010

11+
Fri Nov 29 18:01:48 2002 Yukihiro Matsumoto <matz@ruby-lang.org>
12+
13+
* pack.c (utf8_to_uv): added checks for malformed or redundant
14+
UTF-8 sequences.
15+
1116
Thu Nov 28 12:08:30 2002 Akinori MUSHA <knu@iDaemons.org>
1217

1318
* lib/mkmf.rb: Avoid the use of "clean::" in favor of "clean:" in

ext/socket/socket.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2922,4 +2922,19 @@ Init_socket()
29222922
#ifdef NI_DGRAM
29232923
sock_define_const("NI_DGRAM", NI_DGRAM);
29242924
#endif
2925+
#ifdef SHUT_RD
2926+
sock_define_const("SHUT_RD", SHUT_RD);
2927+
#else
2928+
sock_define_const("SHUT_RD", 0);
2929+
#endif
2930+
#ifdef SHUT_WR
2931+
sock_define_const("SHUT_WR", SHUT_WR);
2932+
#else
2933+
sock_define_const("SHUT_WR", 1);
2934+
#endif
2935+
#ifdef SHUT_RDWR
2936+
sock_define_const("SHUT_RDWR", SHUT_RDWR);
2937+
#else
2938+
sock_define_const("SHUT_RDWR", 2);
2939+
#endif
29252940
}

pack.c

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1855,25 +1855,56 @@ utf8_to_uv(p, lenp)
18551855
char *p;
18561856
long *lenp;
18571857
{
1858-
int c = (*p++)&0xff;
1859-
unsigned long uv;
1860-
long n = 1;
1861-
1862-
if (c < 0xc0) n = 1;
1863-
else if (c < 0xe0) n = 2;
1864-
else if (c < 0xf0) n = 3;
1865-
else if (c < 0xf8) n = 4;
1866-
else if (c < 0xfc) n = 5;
1867-
else if (c < 0xfe) n = 6;
1868-
else if (c == 0xfe) n = 7;
1869-
if (n > *lenp) return 0;
1858+
int c = *p++ & 0xff;
1859+
unsigned long uv = c;
1860+
long n;
1861+
1862+
if (!(uv & 0x80)) {
1863+
*lenp = 1;
1864+
return uv;
1865+
}
1866+
if (!(uv & 0x40)) {
1867+
rb_warning("malformed UTF-8 character");
1868+
*lenp = 1;
1869+
return uv;
1870+
}
1871+
1872+
if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1873+
else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1874+
else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1875+
else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1876+
else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1877+
else if (!(uv & 0x01)) { n = 7; uv = 0; }
1878+
else { n = 13; uv = 0; }
1879+
if (n > *lenp) {
1880+
rb_warning("malformed UTF-8 character (expected %d bytes, given %d bytes)",
1881+
n, *lenp);
1882+
return 0xfffd;
1883+
}
18701884
*lenp = n--;
18711885

1872-
uv = c;
18731886
if (n != 0) {
1874-
uv &= (1<<(BYTEWIDTH-2-n)) - 1;
18751887
while (n--) {
1876-
uv = uv << 6 | (*p++ & ((1<<6)-1));
1888+
c = *p++ & 0xff;
1889+
if ((c & 0xc0) != 0x80) {
1890+
rb_warning("malformed UTF-8 character");
1891+
*lenp -= n + 1;
1892+
return 0xfffd;
1893+
}
1894+
else {
1895+
c &= 0x3f;
1896+
if (uv == 0 && c == 0) {
1897+
int i;
1898+
1899+
for (i=0; n-i>0 && (p[i] & 0x3f) == 0; i++)
1900+
;
1901+
rb_warning("redundant UTF-8 sequence (skip %d bytes)", i+1);
1902+
n -= i;
1903+
p += i;
1904+
continue;
1905+
}
1906+
uv = uv << 6 | c;
1907+
}
18771908
}
18781909
}
18791910
return uv;

0 commit comments

Comments
 (0)