aboutsummaryrefslogtreecommitdiffstats
path: root/t/helper/test-xml-encode.c
diff options
context:
space:
mode:
authorJohannes Schindelin <johannes.schindelin@gmx.de>2019-01-29 06:19:27 -0800
committerJunio C Hamano <gitster@pobox.com>2019-01-29 09:26:46 -0800
commit222319081515f9f6a4c699bd3a406f181a6ddc3b (patch)
treecda71fef70560debe6b0d6978796e78fd69eb4b0 /t/helper/test-xml-encode.c
parent4419de916493d8a4292e9b78be6c18aa3641d353 (diff)
downloadgit-222319081515f9f6a4c699bd3a406f181a6ddc3b.tar.gz
tests: optionally write results as JUnit-style .xml
This will come in handy when publishing the results of Git's test suite during an automated Azure DevOps run. Note: we need to make extra sure that invalid UTF-8 encoding is turned into valid UTF-8 (using the Replacement Character, \uFFFD) because t9902's trace contains such invalid byte sequences, and the task in the Azure Pipeline that uploads the test results would refuse to do anything if it was asked to parse an .xml file with invalid UTF-8 in it. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 't/helper/test-xml-encode.c')
-rw-r--r--t/helper/test-xml-encode.c80
1 files changed, 80 insertions, 0 deletions
diff --git a/t/helper/test-xml-encode.c b/t/helper/test-xml-encode.c
new file mode 100644
index 0000000000..a648bbd961
--- /dev/null
+++ b/t/helper/test-xml-encode.c
@@ -0,0 +1,80 @@
+#include "test-tool.h"
+
+static const char *utf8_replace_character = "&#xfffd;";
+
+/*
+ * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
+ * in an XML file.
+ */
+int cmd__xml_encode(int argc, const char **argv)
+{
+ unsigned char buf[1024], tmp[4], *tmp2 = NULL;
+ ssize_t cur = 0, len = 1, remaining = 0;
+ unsigned char ch;
+
+ for (;;) {
+ if (++cur == len) {
+ len = xread(0, buf, sizeof(buf));
+ if (!len)
+ return 0;
+ if (len < 0)
+ die_errno("Could not read <stdin>");
+ cur = 0;
+ }
+ ch = buf[cur];
+
+ if (tmp2) {
+ if ((ch & 0xc0) != 0x80) {
+ fputs(utf8_replace_character, stdout);
+ tmp2 = NULL;
+ cur--;
+ continue;
+ }
+ *tmp2 = ch;
+ tmp2++;
+ if (--remaining == 0) {
+ fwrite(tmp, tmp2 - tmp, 1, stdout);
+ tmp2 = NULL;
+ }
+ continue;
+ }
+
+ if (!(ch & 0x80)) {
+ /* 0xxxxxxx */
+ if (ch == '&')
+ fputs("&amp;", stdout);
+ else if (ch == '\'')
+ fputs("&apos;", stdout);
+ else if (ch == '"')
+ fputs("&quot;", stdout);
+ else if (ch == '<')
+ fputs("&lt;", stdout);
+ else if (ch == '>')
+ fputs("&gt;", stdout);
+ else if (ch >= 0x20)
+ fputc(ch, stdout);
+ else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
+ fprintf(stdout, "&#x%02x;", ch);
+ else
+ fputs(utf8_replace_character, stdout);
+ } else if ((ch & 0xe0) == 0xc0) {
+ /* 110XXXXx 10xxxxxx */
+ tmp[0] = ch;
+ remaining = 1;
+ tmp2 = tmp + 1;
+ } else if ((ch & 0xf0) == 0xe0) {
+ /* 1110XXXX 10Xxxxxx 10xxxxxx */
+ tmp[0] = ch;
+ remaining = 2;
+ tmp2 = tmp + 1;
+ } else if ((ch & 0xf8) == 0xf0) {
+ /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
+ tmp[0] = ch;
+ remaining = 3;
+ tmp2 = tmp + 1;
+ } else
+ fputs(utf8_replace_character, stdout);
+ }
+
+ return 0;
+}