summaryrefslogtreecommitdiffabout
path: root/parsing.c
authorJulius Plenz <plenz@cis.fu-berlin.de>2011-03-10 16:03:23 (UTC)
committer Lars Hjemli <hjemli@gmail.com>2011-03-26 14:21:07 (UTC)
commit0a799424f682071da9f5b632d1394308e9255bb5 (patch) (unidiff)
treecbc64c3d05d27cffb6c997b7ace6924838a829ff /parsing.c
parent5db02854e64fa41aa459ea7d13fc827063deda41 (diff)
downloadcgit-0a799424f682071da9f5b632d1394308e9255bb5.zip
cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.gz
cgit-0a799424f682071da9f5b632d1394308e9255bb5.tar.bz2
fix two encoding bugs
reencode() takes three arguments in the order (txt, from, to), opposed to reencode_string, which will, like iconv, handle the arguments with from and to swapped. Fix that (this makes reencode more intuitive). If src and dst encoding are equivalent, don't do any encoding. If no special encoding parameter is found within the commit, assume UTF-8 and explicitly convert to PAGE_ENCODING. The change to reencode() mentioned above avoids re-encoding a UTF-8 string to UTF-8, for example. Signed-off-by: Julius Plenz <plenz@cis.fu-berlin.de> Signed-off-by: Lars Hjemli <hjemli@gmail.com>
Diffstat (limited to 'parsing.c') (more/less context) (ignore whitespace changes)
-rw-r--r--parsing.c24
1 files changed, 15 insertions, 9 deletions
diff --git a/parsing.c b/parsing.c
index f37c49d..c9e4350 100644
--- a/parsing.c
+++ b/parsing.c
@@ -61,187 +61,193 @@ char *substr(const char *head, const char *tail)
61 buf[tail - head] = '\0'; 61 buf[tail - head] = '\0';
62 return buf; 62 return buf;
63} 63}
64 64
65char *parse_user(char *t, char **name, char **email, unsigned long *date) 65char *parse_user(char *t, char **name, char **email, unsigned long *date)
66{ 66{
67 char *p = t; 67 char *p = t;
68 int mode = 1; 68 int mode = 1;
69 69
70 while (p && *p) { 70 while (p && *p) {
71 if (mode == 1 && *p == '<') { 71 if (mode == 1 && *p == '<') {
72 *name = substr(t, p - 1); 72 *name = substr(t, p - 1);
73 t = p; 73 t = p;
74 mode++; 74 mode++;
75 } else if (mode == 1 && *p == '\n') { 75 } else if (mode == 1 && *p == '\n') {
76 *name = substr(t, p); 76 *name = substr(t, p);
77 p++; 77 p++;
78 break; 78 break;
79 } else if (mode == 2 && *p == '>') { 79 } else if (mode == 2 && *p == '>') {
80 *email = substr(t, p + 1); 80 *email = substr(t, p + 1);
81 t = p; 81 t = p;
82 mode++; 82 mode++;
83 } else if (mode == 2 && *p == '\n') { 83 } else if (mode == 2 && *p == '\n') {
84 *email = substr(t, p); 84 *email = substr(t, p);
85 p++; 85 p++;
86 break; 86 break;
87 } else if (mode == 3 && isdigit(*p)) { 87 } else if (mode == 3 && isdigit(*p)) {
88 *date = atol(p); 88 *date = atol(p);
89 mode++; 89 mode++;
90 } else if (*p == '\n') { 90 } else if (*p == '\n') {
91 p++; 91 p++;
92 break; 92 break;
93 } 93 }
94 p++; 94 p++;
95 } 95 }
96 return p; 96 return p;
97} 97}
98 98
99#ifdef NO_ICONV 99#ifdef NO_ICONV
100#define reencode(a, b, c) 100#define reencode(a, b, c)
101#else 101#else
102const char *reencode(char **txt, const char *src_enc, const char *dst_enc) 102const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
103{ 103{
104 char *tmp; 104 char *tmp;
105 105
106 if (!txt || !*txt || !src_enc || !dst_enc) 106 if (!txt || !*txt || !src_enc || !dst_enc)
107 return *txt; 107 return *txt;
108 108
109 tmp = reencode_string(*txt, src_enc, dst_enc); 109 /* no encoding needed if src_enc equals dst_enc */
110 if(!strcasecmp(src_enc, dst_enc))
111 return *txt;
112
113 tmp = reencode_string(*txt, dst_enc, src_enc);
110 if (tmp) { 114 if (tmp) {
111 free(*txt); 115 free(*txt);
112 *txt = tmp; 116 *txt = tmp;
113 } 117 }
114 return *txt; 118 return *txt;
115} 119}
116#endif 120#endif
117 121
118struct commitinfo *cgit_parse_commit(struct commit *commit) 122struct commitinfo *cgit_parse_commit(struct commit *commit)
119{ 123{
120 struct commitinfo *ret; 124 struct commitinfo *ret;
121 char *p = commit->buffer, *t = commit->buffer; 125 char *p = commit->buffer, *t = commit->buffer;
122 126
123 ret = xmalloc(sizeof(*ret)); 127 ret = xmalloc(sizeof(*ret));
124 ret->commit = commit; 128 ret->commit = commit;
125 ret->author = NULL; 129 ret->author = NULL;
126 ret->author_email = NULL; 130 ret->author_email = NULL;
127 ret->committer = NULL; 131 ret->committer = NULL;
128 ret->committer_email = NULL; 132 ret->committer_email = NULL;
129 ret->subject = NULL; 133 ret->subject = NULL;
130 ret->msg = NULL; 134 ret->msg = NULL;
131 ret->msg_encoding = NULL; 135 ret->msg_encoding = NULL;
132 136
133 if (p == NULL) 137 if (p == NULL)
134 return ret; 138 return ret;
135 139
136 if (strncmp(p, "tree ", 5)) 140 if (strncmp(p, "tree ", 5))
137 die("Bad commit: %s", sha1_to_hex(commit->object.sha1)); 141 die("Bad commit: %s", sha1_to_hex(commit->object.sha1));
138 else 142 else
139 p += 46; // "tree " + hex[40] + "\n" 143 p += 46; // "tree " + hex[40] + "\n"
140 144
141 while (!strncmp(p, "parent ", 7)) 145 while (!strncmp(p, "parent ", 7))
142 p += 48; // "parent " + hex[40] + "\n" 146 p += 48; // "parent " + hex[40] + "\n"
143 147
144 if (p && !strncmp(p, "author ", 7)) { 148 if (p && !strncmp(p, "author ", 7)) {
145 p = parse_user(p + 7, &ret->author, &ret->author_email, 149 p = parse_user(p + 7, &ret->author, &ret->author_email,
146 &ret->author_date); 150 &ret->author_date);
147 } 151 }
148 152
149 if (p && !strncmp(p, "committer ", 9)) { 153 if (p && !strncmp(p, "committer ", 9)) {
150 p = parse_user(p + 9, &ret->committer, &ret->committer_email, 154 p = parse_user(p + 9, &ret->committer, &ret->committer_email,
151 &ret->committer_date); 155 &ret->committer_date);
152 } 156 }
153 157
154 if (p && !strncmp(p, "encoding ", 9)) { 158 if (p && !strncmp(p, "encoding ", 9)) {
155 p += 9; 159 p += 9;
156 t = strchr(p, '\n'); 160 t = strchr(p, '\n');
157 if (t) { 161 if (t) {
158 ret->msg_encoding = substr(p, t + 1); 162 ret->msg_encoding = substr(p, t + 1);
159 p = t + 1; 163 p = t + 1;
160 } 164 }
161 } 165 }
162 166
167 /* if no special encoding is found, assume UTF-8 */
168 if(!ret->msg_encoding)
169 ret->msg_encoding = xstrdup("UTF-8");
170
163 // skip unknown header fields 171 // skip unknown header fields
164 while (p && *p && (*p != '\n')) { 172 while (p && *p && (*p != '\n')) {
165 p = strchr(p, '\n'); 173 p = strchr(p, '\n');
166 if (p) 174 if (p)
167 p++; 175 p++;
168 } 176 }
169 177
170 // skip empty lines between headers and message 178 // skip empty lines between headers and message
171 while (p && *p == '\n') 179 while (p && *p == '\n')
172 p++; 180 p++;
173 181
174 if (!p) 182 if (!p)
175 return ret; 183 return ret;
176 184
177 t = strchr(p, '\n'); 185 t = strchr(p, '\n');
178 if (t) { 186 if (t) {
179 ret->subject = substr(p, t); 187 ret->subject = substr(p, t);
180 p = t + 1; 188 p = t + 1;
181 189
182 while (p && *p == '\n') { 190 while (p && *p == '\n') {
183 p = strchr(p, '\n'); 191 p = strchr(p, '\n');
184 if (p) 192 if (p)
185 p++; 193 p++;
186 } 194 }
187 if (p) 195 if (p)
188 ret->msg = xstrdup(p); 196 ret->msg = xstrdup(p);
189 } else 197 } else
190 ret->subject = xstrdup(p); 198 ret->subject = xstrdup(p);
191 199
192 if (ret->msg_encoding) { 200 reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING);
193 reencode(&ret->author, PAGE_ENCODING, ret->msg_encoding); 201 reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING);
194 reencode(&ret->author_email, PAGE_ENCODING, ret->msg_encoding); 202 reencode(&ret->committer, ret->msg_encoding, PAGE_ENCODING);
195 reencode(&ret->committer, PAGE_ENCODING, ret->msg_encoding); 203 reencode(&ret->committer_email, ret->msg_encoding, PAGE_ENCODING);
196 reencode(&ret->committer_email, PAGE_ENCODING, ret->msg_encoding); 204 reencode(&ret->subject, ret->msg_encoding, PAGE_ENCODING);
197 reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding); 205 reencode(&ret->msg, ret->msg_encoding, PAGE_ENCODING);
198 reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding);
199 }
200 206
201 return ret; 207 return ret;
202} 208}
203 209
204 210
205struct taginfo *cgit_parse_tag(struct tag *tag) 211struct taginfo *cgit_parse_tag(struct tag *tag)
206{ 212{
207 void *data; 213 void *data;
208 enum object_type type; 214 enum object_type type;
209 unsigned long size; 215 unsigned long size;
210 char *p; 216 char *p;
211 struct taginfo *ret; 217 struct taginfo *ret;
212 218
213 data = read_sha1_file(tag->object.sha1, &type, &size); 219 data = read_sha1_file(tag->object.sha1, &type, &size);
214 if (!data || type != OBJ_TAG) { 220 if (!data || type != OBJ_TAG) {
215 free(data); 221 free(data);
216 return 0; 222 return 0;
217 } 223 }
218 224
219 ret = xmalloc(sizeof(*ret)); 225 ret = xmalloc(sizeof(*ret));
220 ret->tagger = NULL; 226 ret->tagger = NULL;
221 ret->tagger_email = NULL; 227 ret->tagger_email = NULL;
222 ret->tagger_date = 0; 228 ret->tagger_date = 0;
223 ret->msg = NULL; 229 ret->msg = NULL;
224 230
225 p = data; 231 p = data;
226 232
227 while (p && *p) { 233 while (p && *p) {
228 if (*p == '\n') 234 if (*p == '\n')
229 break; 235 break;
230 236
231 if (!strncmp(p, "tagger ", 7)) { 237 if (!strncmp(p, "tagger ", 7)) {
232 p = parse_user(p + 7, &ret->tagger, &ret->tagger_email, 238 p = parse_user(p + 7, &ret->tagger, &ret->tagger_email,
233 &ret->tagger_date); 239 &ret->tagger_date);
234 } else { 240 } else {
235 p = strchr(p, '\n'); 241 p = strchr(p, '\n');
236 if (p) 242 if (p)
237 p++; 243 p++;
238 } 244 }
239 } 245 }
240 246
241 // skip empty lines between headers and message 247 // skip empty lines between headers and message
242 while (p && *p == '\n') 248 while (p && *p == '\n')
243 p++; 249 p++;
244 250
245 if (p && *p) 251 if (p && *p)
246 ret->msg = xstrdup(p); 252 ret->msg = xstrdup(p);
247 free(data); 253 free(data);