author | Julius Plenz <plenz@cis.fu-berlin.de> | 2011-03-10 16:03:23 (UTC) |
---|---|---|
committer | Lars Hjemli <hjemli@gmail.com> | 2011-03-26 14:21:07 (UTC) |
commit | 0a799424f682071da9f5b632d1394308e9255bb5 (patch) (unidiff) | |
tree | cbc64c3d05d27cffb6c997b7ace6924838a829ff /parsing.c | |
parent | 5db02854e64fa41aa459ea7d13fc827063deda41 (diff) | |
download | cgit-0a799424f682071da9f5b632d1394308e9255bb5.zip cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.gz cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.bz2 |
fix two encoding bugs
reencode() takes three arguments in the order (txt, from, to), opposed to
reencode_string, which will, like iconv, handle the arguments with from
and to swapped. Fix that (this makes reencode more intuitive).
If src and dst encoding are equivalent, don't do any encoding.
If no special encoding parameter is found within the commit, assume
UTF-8 and explicitly convert to PAGE_ENCODING. The change to reencode()
mentioned above avoids re-encoding a UTF-8 string to UTF-8, for example.
Signed-off-by: Julius Plenz <plenz@cis.fu-berlin.de>
Signed-off-by: Lars Hjemli <hjemli@gmail.com>
-rw-r--r-- | parsing.c | 24 |
1 files changed, 15 insertions, 9 deletions
@@ -97,25 +97,29 @@ char *parse_user(char *t, char **name, char **email, unsigned long *date) | |||
97 | } | 97 | } |
98 | 98 | ||
99 | #ifdef NO_ICONV | 99 | #ifdef NO_ICONV |
100 | #define reencode(a, b, c) | 100 | #define reencode(a, b, c) |
101 | #else | 101 | #else |
102 | const char *reencode(char **txt, const char *src_enc, const char *dst_enc) | 102 | const char *reencode(char **txt, const char *src_enc, const char *dst_enc) |
103 | { | 103 | { |
104 | char *tmp; | 104 | char *tmp; |
105 | 105 | ||
106 | if (!txt || !*txt || !src_enc || !dst_enc) | 106 | if (!txt || !*txt || !src_enc || !dst_enc) |
107 | return *txt; | 107 | return *txt; |
108 | 108 | ||
109 | tmp = reencode_string(*txt, src_enc, dst_enc); | 109 | /* no encoding needed if src_enc equals dst_enc */ |
110 | if(!strcasecmp(src_enc, dst_enc)) | ||
111 | return *txt; | ||
112 | |||
113 | tmp = reencode_string(*txt, dst_enc, src_enc); | ||
110 | if (tmp) { | 114 | if (tmp) { |
111 | free(*txt); | 115 | free(*txt); |
112 | *txt = tmp; | 116 | *txt = tmp; |
113 | } | 117 | } |
114 | return *txt; | 118 | return *txt; |
115 | } | 119 | } |
116 | #endif | 120 | #endif |
117 | 121 | ||
118 | struct commitinfo *cgit_parse_commit(struct commit *commit) | 122 | struct commitinfo *cgit_parse_commit(struct commit *commit) |
119 | { | 123 | { |
120 | struct commitinfo *ret; | 124 | struct commitinfo *ret; |
121 | char *p = commit->buffer, *t = commit->buffer; | 125 | char *p = commit->buffer, *t = commit->buffer; |
@@ -151,24 +155,28 @@ struct commitinfo *cgit_parse_commit(struct commit *commit) | |||
151 | &ret->committer_date); | 155 | &ret->committer_date); |
152 | } | 156 | } |
153 | 157 | ||
154 | if (p && !strncmp(p, "encoding ", 9)) { | 158 | if (p && !strncmp(p, "encoding ", 9)) { |
155 | p += 9; | 159 | p += 9; |
156 | t = strchr(p, '\n'); | 160 | t = strchr(p, '\n'); |
157 | if (t) { | 161 | if (t) { |
158 | ret->msg_encoding = substr(p, t + 1); | 162 | ret->msg_encoding = substr(p, t + 1); |
159 | p = t + 1; | 163 | p = t + 1; |
160 | } | 164 | } |
161 | } | 165 | } |
162 | 166 | ||
167 | /* if no special encoding is found, assume UTF-8 */ | ||
168 | if(!ret->msg_encoding) | ||
169 | ret->msg_encoding = xstrdup("UTF-8"); | ||
170 | |||
163 | // skip unknown header fields | 171 | // skip unknown header fields |
164 | while (p && *p && (*p != '\n')) { | 172 | while (p && *p && (*p != '\n')) { |
165 | p = strchr(p, '\n'); | 173 | p = strchr(p, '\n'); |
166 | if (p) | 174 | if (p) |
167 | p++; | 175 | p++; |
168 | } | 176 | } |
169 | 177 | ||
170 | // skip empty lines between headers and message | 178 | // skip empty lines between headers and message |
171 | while (p && *p == '\n') | 179 | while (p && *p == '\n') |
172 | p++; | 180 | p++; |
173 | 181 | ||
174 | if (!p) | 182 | if (!p) |
@@ -180,32 +188,30 @@ struct commitinfo *cgit_parse_commit(struct commit *commit) | |||
180 | p = t + 1; | 188 | p = t + 1; |
181 | 189 | ||
182 | while (p && *p == '\n') { | 190 | while (p && *p == '\n') { |
183 | p = strchr(p, '\n'); | 191 | p = strchr(p, '\n'); |
184 | if (p) | 192 | if (p) |
185 | p++; | 193 | p++; |
186 | } | 194 | } |
187 | if (p) | 195 | if (p) |
188 | ret->msg = xstrdup(p); | 196 | ret->msg = xstrdup(p); |
189 | } else | 197 | } else |
190 | ret->subject = xstrdup(p); | 198 | ret->subject = xstrdup(p); |
191 | 199 | ||
192 | if (ret->msg_encoding) { | 200 | reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING); |
193 | reencode(&ret->author, PAGE_ENCODING, ret->msg_encoding); | 201 | reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING); |
194 | reencode(&ret->author_email, PAGE_ENCODING, ret->msg_encoding); | 202 | reencode(&ret->committer, ret->msg_encoding, PAGE_ENCODING); |
195 | reencode(&ret->committer, PAGE_ENCODING, ret->msg_encoding); | 203 | reencode(&ret->committer_email, ret->msg_encoding, PAGE_ENCODING); |
196 | reencode(&ret->committer_email, PAGE_ENCODING, ret->msg_encoding); | 204 | reencode(&ret->subject, ret->msg_encoding, PAGE_ENCODING); |
197 | reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding); | 205 | reencode(&ret->msg, ret->msg_encoding, PAGE_ENCODING); |
198 | reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding); | ||
199 | } | ||
200 | 206 | ||
201 | return ret; | 207 | return ret; |
202 | } | 208 | } |
203 | 209 | ||
204 | 210 | ||
205 | struct taginfo *cgit_parse_tag(struct tag *tag) | 211 | struct taginfo *cgit_parse_tag(struct tag *tag) |
206 | { | 212 | { |
207 | void *data; | 213 | void *data; |
208 | enum object_type type; | 214 | enum object_type type; |
209 | unsigned long size; | 215 | unsigned long size; |
210 | char *p; | 216 | char *p; |
211 | struct taginfo *ret; | 217 | struct taginfo *ret; |