… | |
… | |
101 | wchar_t * |
101 | wchar_t * |
102 | rxvt_utf8towcs (const char *str, int len) |
102 | rxvt_utf8towcs (const char *str, int len) |
103 | { |
103 | { |
104 | if (len < 0) len = strlen (str); |
104 | if (len < 0) len = strlen (str); |
105 | |
105 | |
106 | wchar_t *r = (wchar_t *)rxvt_malloc ((len + 1) * sizeof (wchar_t)); |
106 | wchar_t *r = (wchar_t *)rxvt_malloc ((len + 1) * sizeof (wchar_t)), |
107 | wchar_t *p = r; |
107 | *p = r; |
108 | |
108 | |
109 | unsigned char *s = (unsigned char *)str; |
109 | unsigned char *s = (unsigned char *)str, |
|
|
110 | *e = s + len; |
110 | |
111 | |
111 | while (len) |
112 | for (;;) |
112 | { |
113 | { |
|
|
114 | len = e - s; |
|
|
115 | |
|
|
116 | if (len == 0) |
|
|
117 | break; |
113 | if (s[0] < 0x80) |
118 | else if (s[0] < 0x80) |
114 | { |
|
|
115 | *p++ = *s++; len--; |
119 | *p++ = *s++; |
116 | } |
|
|
117 | else if (len > 0 |
120 | else if (len >= 2 |
118 | && s[0] >= 0xc2 && s[0] <= 0xdf |
121 | && s[0] >= 0xc2 && s[0] <= 0xdf |
119 | && (s[1] & 0xc0) == 0x80) |
122 | && (s[1] & 0xc0) == 0x80) |
120 | { |
123 | { |
121 | *p++ = ((s[0] & 0x1f) << 6) |
124 | *p++ = ((s[0] & 0x1f) << 6) |
122 | | (s[1] & 0x3f); |
125 | | (s[1] & 0x3f); |
123 | s += 2; len -= 2; |
126 | s += 2; |
124 | } |
127 | } |
125 | else if (len > 1 |
128 | else if (len >= 3 |
126 | && ( (s[0] == 0xe0 && s[1] >= 0xa0 && s[1] <= 0xbf) |
129 | && ( (s[0] == 0xe0 && s[1] >= 0xa0 && s[1] <= 0xbf) |
127 | || (s[0] >= 0xe1 && s[0] <= 0xec && s[1] >= 0x80 && s[1] <= 0xbf) |
130 | || (s[0] >= 0xe1 && s[0] <= 0xec && s[1] >= 0x80 && s[1] <= 0xbf) |
128 | || (s[0] == 0xed && s[1] >= 0x80 && s[1] <= 0x9f) |
131 | || (s[0] == 0xed && s[1] >= 0x80 && s[1] <= 0x9f) |
129 | || (s[0] >= 0xee && s[0] <= 0xef && s[1] >= 0x80 && s[1] <= 0xbf) |
132 | || (s[0] >= 0xee && s[0] <= 0xef && s[1] >= 0x80 && s[1] <= 0xbf) |
130 | ) |
133 | ) |
131 | && (s[2] & 0xc0) == 0x80) |
134 | && (s[2] & 0xc0) == 0x80) |
132 | { |
135 | { |
133 | *p++ = ((s[0] & 0x0f) << 12) |
136 | *p++ = ((s[0] & 0x0f) << 12) |
134 | | ((s[1] & 0x3f) << 6) |
137 | | ((s[1] & 0x3f) << 6) |
135 | | (s[2] & 0x3f); |
138 | | (s[2] & 0x3f); |
136 | s += 3; len -= 3; |
139 | s += 3; |
137 | } |
140 | } |
138 | else if (len > 2 |
141 | else if (len >= 4 |
139 | && ( (s[0] == 0xf0 && s[1] >= 0x90 && s[1] <= 0xbf) |
142 | && ( (s[0] == 0xf0 && s[1] >= 0x90 && s[1] <= 0xbf) |
140 | || (s[0] >= 0xf1 && s[0] <= 0xf3 && s[1] >= 0x80 && s[1] <= 0xbf) |
143 | || (s[0] >= 0xf1 && s[0] <= 0xf3 && s[1] >= 0x80 && s[1] <= 0xbf) |
141 | || (s[0] == 0xf4 && s[1] >= 0x80 && s[1] <= 0x8f) |
144 | || (s[0] == 0xf4 && s[1] >= 0x80 && s[1] <= 0x8f) |
142 | ) |
145 | ) |
143 | && (s[2] & 0xc0) == 0x80 |
146 | && (s[2] & 0xc0) == 0x80 |
… | |
… | |
145 | { |
148 | { |
146 | *p++ = ((s[0] & 0x07) << 18) |
149 | *p++ = ((s[0] & 0x07) << 18) |
147 | | ((s[1] & 0x3f) << 12) |
150 | | ((s[1] & 0x3f) << 12) |
148 | | ((s[2] & 0x3f) << 6) |
151 | | ((s[2] & 0x3f) << 6) |
149 | | (s[3] & 0x3f); |
152 | | (s[3] & 0x3f); |
150 | s += 4; len -= 4; |
153 | s += 4; |
151 | } |
154 | } |
152 | else |
155 | else |
153 | { |
156 | { |
154 | *p++ = 0xfffd; |
157 | *p++ = 0xfffd; |
155 | s++; len--; |
158 | s++; |
156 | } |
159 | } |
157 | } |
160 | } |
158 | |
161 | |
159 | *p = 0; |
162 | *p = 0; |
160 | |
163 | |