parsing.c: be prepared for unexpected content in commit/tag objects

When parsing commits and tags cgit made too many assumptions about the formatting of said objects. This patch tries to make the code be more prepared to handle 'malformed' objects. Signed-off-by: Lars Hjemli <hjemli@gmail.com>
author: Lars Hjemli <hjemli@gmail.com> 2008-09-14 07:45:37 (UTC)
committer: Lars Hjemli <hjemli@gmail.com> 2008-09-15 20:33:11 (UTC)
commit: a8305a9543969206aa7cec03948c5a19950eedb9 (patch) (side-by-side diff)
tree: 8e2cf67ec72bc296b76a4fcb6db5ec8250f0a502
parent: b28765135dd6f52635977454eaf95d0e6c7e7271 (diff)
download: cgit-a8305a9543969206aa7cec03948c5a19950eedb9.zip
cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.gz
cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.bz2
2 files changed, 97 insertions, 64 deletions
diff --git a/cgit.h b/cgit.h
index 1615616..08fd95a 100644
--- a/cgit.h
+++ b/cgit.h
@@ -64,49 +64,49 @@ struct cgit_repo {
 };
 
 struct cgit_repolist {
 	int length;
 	int count;
 	struct cgit_repo *repos;
 };
 
 struct commitinfo {
 	struct commit *commit;
 	char *author;
 	char *author_email;
 	unsigned long author_date;
 	char *committer;
 	char *committer_email;
 	unsigned long committer_date;
 	char *subject;
 	char *msg;
 	char *msg_encoding;
 };
 
 struct taginfo {
 	char *tagger;
 	char *tagger_email;
-	int tagger_date;
+	unsigned long tagger_date;
 	char *msg;
 };
 
 struct refinfo {
 	const char *refname;
 	struct object *object;
 	union {
 		struct taginfo *tag;
 		struct commitinfo *commit;
 	};
 };
 
 struct reflist {
 	struct refinfo **refs;
 	int alloc;
 	int count;
 };
 
 struct cgit_query {
 	int has_symref;
 	int has_sha1;
 	char *raw;
 	char *repo;
 	char *page;
diff --git a/parsing.c b/parsing.c
index 66e8b3d..c8f3048 100644
--- a/parsing.c
+++ b/parsing.c
@@ -41,168 +41,201 @@ void cgit_parse_url(const char *url)
 
 		ctx.qry.repo = ctx.repo->url;
 		p = strchr(cmd + 1, '/');
 		if (p) {
 			p[0] = '\0';
 			if (p[1])
 				ctx.qry.path = trim_end(p + 1, '/');
 		}
 		if (cmd[1])
 			ctx.qry.page = xstrdup(cmd + 1);
 		return;
 	}
 }
 
 char *substr(const char *head, const char *tail)
 {
 	char *buf;
 
 	buf = xmalloc(tail - head + 1);
 	strncpy(buf, head, tail - head);
 	buf[tail - head] = '\0';
 	return buf;
 }
 
+char *parse_user(char *t, char **name, char **email, unsigned long *date)
+{
+	char *p = t;
+	int mode = 1;
+
+	while (p && *p) {
+		if (mode == 1 && *p == '<') {
+			*name = substr(t, p - 1);
+			t = p;
+			mode++;
+		} else if (mode == 1 && *p == '\n') {
+			*name = substr(t, p);
+			p++;
+			break;
+		} else if (mode == 2 && *p == '>') {
+			*email = substr(t, p + 1);
+			t = p;
+			mode++;
+		} else if (mode == 2 && *p == '\n') {
+			*email = substr(t, p);
+			p++;
+			break;
+		} else if (mode == 3 && isdigit(*p)) {
+			*date = atol(p);
+			mode++;
+		} else if (*p == '\n') {
+			p++;
+			break;
+		}
+		p++;
+	}
+	return p;
+}
+
+const char *reencode(char **txt, const char *src_enc, const char *dst_enc)
+{
+	char *tmp;
+
+	if (!txt || !*txt || !src_enc || !dst_enc)
+		return *txt;
+
+	tmp = reencode_string(*txt, src_enc, dst_enc);
+	if (tmp) {
+		free(*txt);
+		*txt = tmp;
+	}
+	return *txt;
+}
+
 struct commitinfo *cgit_parse_commit(struct commit *commit)
 {
 	struct commitinfo *ret;
 	char *p = commit->buffer, *t = commit->buffer;
 
 	ret = xmalloc(sizeof(*ret));
 	ret->commit = commit;
 	ret->author = NULL;
 	ret->author_email = NULL;
 	ret->committer = NULL;
 	ret->committer_email = NULL;
 	ret->subject = NULL;
 	ret->msg = NULL;
 	ret->msg_encoding = NULL;
 
 	if (p == NULL)
 		return ret;
 
 	if (strncmp(p, "tree ", 5))
 		die("Bad commit: %s", sha1_to_hex(commit->object.sha1));
 	else
 		p += 46; // "tree " + hex[40] + "\n"
 
 	while (!strncmp(p, "parent ", 7))
 		p += 48; // "parent " + hex[40] + "\n"
 
-	if (!strncmp(p, "author ", 7)) {
-		p += 7;
-		t = strchr(p, '<') - 1;
-		ret->author = substr(p, t);
-		p = t;
-		t = strchr(t, '>') + 1;
-		ret->author_email = substr(p, t);
-		ret->author_date = atol(t+1);
-		p = strchr(t, '\n') + 1;
+	if (p && !strncmp(p, "author ", 7)) {
+		p = parse_user(p + 7, &ret->author, &ret->author_email,
+			&ret->author_date);
 	}
 
-	if (!strncmp(p, "committer ", 9)) {
-		p += 9;
-		t = strchr(p, '<') - 1;
-		ret->committer = substr(p, t);
-		p = t;
-		t = strchr(t, '>') + 1;
-		ret->committer_email = substr(p, t);
-		ret->committer_date = atol(t+1);
-		p = strchr(t, '\n') + 1;
+	if (p && !strncmp(p, "committer ", 9)) {
+		p = parse_user(p + 9, &ret->committer, &ret->committer_email,
+			&ret->committer_date);
 	}
 
-	if (!strncmp(p, "encoding ", 9)) {
+	if (p && !strncmp(p, "encoding ", 9)) {
 		p += 9;
-		t = strchr(p, '\n') + 1;
-		ret->msg_encoding = substr(p, t);
-		p = t;
-	} else
-		ret->msg_encoding = xstrdup(PAGE_ENCODING);
+		t = strchr(p, '\n');
+		if (t) {
+			ret->msg_encoding = substr(p, t + 1);
+			p = t + 1;
+		}
+	}
 
-	while (*p && (*p != '\n'))
-		p = strchr(p, '\n') + 1; // skip unknown header fields
+	// skip unknown header fields
+	while (p && *p && (*p != '\n')) {
+		p = strchr(p, '\n');
+		if (p)
+			p++;
+	}
 
-	while (*p == '\n')
-		p = strchr(p, '\n') + 1;
+	// skip empty lines between headers and message
+	while (p && *p == '\n')
+		p++;
+
+	if (!p)
+		return ret;
 
 	t = strchr(p, '\n');
 	if (t) {
-		if (*t == '\0')
-			ret->subject = "** empty **";
-		else
-			ret->subject = substr(p, t);
+		ret->subject = substr(p, t);
 		p = t + 1;
 
-		while (*p == '\n')
-			p = strchr(p, '\n') + 1;
-		ret->msg = xstrdup(p);
-	} else
-		ret->subject = substr(p, p+strlen(p));
-
-	if(strcmp(ret->msg_encoding, PAGE_ENCODING)) {
-		t = reencode_string(ret->subject, PAGE_ENCODING,
-				    ret->msg_encoding);
-		if(t) {
-			free(ret->subject);
-			ret->subject = t;
+		while (p && *p == '\n') {
+			p = strchr(p, '\n');
+			if (p)
+				p++;
 		}
+		if (p)
+			ret->msg = xstrdup(p);
+	} else
+		ret->subject = xstrdup(p);
 
-		t = reencode_string(ret->msg, PAGE_ENCODING,
-				    ret->msg_encoding);
-		if(t) {
-			free(ret->msg);
-			ret->msg = t;
-		}
+	if (ret->msg_encoding) {
+		reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding);
+		reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding);
 	}
 
 	return ret;
 }
 
 
 struct taginfo *cgit_parse_tag(struct tag *tag)
 {
 	void *data;
 	enum object_type type;
 	unsigned long size;
-	char *p, *t;
+	char *p;
 	struct taginfo *ret;
 
 	data = read_sha1_file(tag->object.sha1, &type, &size);
 	if (!data || type != OBJ_TAG) {
 		free(data);
 		return 0;
 	}
 
 	ret = xmalloc(sizeof(*ret));
 	ret->tagger = NULL;
 	ret->tagger_email = NULL;
 	ret->tagger_date = 0;
 	ret->msg = NULL;
 
 	p = data;
 
 	while (p && *p) {
 		if (*p == '\n')
 			break;
 
 		if (!strncmp(p, "tagger ", 7)) {
-			p += 7;
-			t = strchr(p, '<') - 1;
-			ret->tagger = substr(p, t);
-			p = t;
-			t = strchr(t, '>') + 1;
-			ret->tagger_email = substr(p, t);
-			ret->tagger_date = atol(t+1);
+			p = parse_user(p + 7, &ret->tagger, &ret->tagger_email,
+				&ret->tagger_date);
+		} else {
+			p = strchr(p, '\n');
+			if (p)
+				p++;
 		}
-		p = strchr(p, '\n') + 1;
 	}
 
-	while (p && *p && (*p != '\n'))
-		p = strchr(p, '\n') + 1; // skip unknown tag fields
+	// skip empty lines between headers and message
+	while (p && *p == '\n')
+		p++;
 
-	while (p && (*p == '\n'))
-		p = strchr(p, '\n') + 1;
 	if (p && *p)
 		ret->msg = xstrdup(p);
 	free(data);
 	return ret;
 }
author	Lars Hjemli <hjemli@gmail.com>	2008-09-14 07:45:37 (UTC)
committer	Lars Hjemli <hjemli@gmail.com>	2008-09-15 20:33:11 (UTC)
commit	a8305a9543969206aa7cec03948c5a19950eedb9 (patch) (side-by-side diff)
tree	8e2cf67ec72bc296b76a4fcb6db5ec8250f0a502
parent	b28765135dd6f52635977454eaf95d0e6c7e7271 (diff)
download	cgit-a8305a9543969206aa7cec03948c5a19950eedb9.zip cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.gz cgit-a8305a9543969206aa7cec03948c5a19950eedb9.tar.bz2