aboutsummaryrefslogtreecommitdiffstats
path: root/src/wps/upnp_xml.c
blob: ca0925cb5833092aa70adf22ebbe1eb07abc5eb2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
/*
 * UPnP XML helper routines
 * Copyright (c) 2000-2003 Intel Corporation
 * Copyright (c) 2006-2007 Sony Corporation
 * Copyright (c) 2008-2009 Atheros Communications
 * Copyright (c) 2009, Jouni Malinen <j@w1.fi>
 *
 * See wps_upnp.c for more details on licensing and code history.
 */

#include "includes.h"

#include "common.h"
#include "base64.h"
#include "http.h"
#include "upnp_xml.h"


/*
 * XML parsing and formatting
 *
 * XML is a markup language based on unicode; usually (and in our case,
 * always!) based on utf-8. utf-8 uses a variable number of bytes per
 * character. utf-8 has the advantage that all non-ASCII unicode characters are
 * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII
 * characters are single ascii bytes, thus we can use typical text processing.
 *
 * (One other interesting thing about utf-8 is that it is possible to look at
 * any random byte and determine if it is the first byte of a character as
 * versus a continuation byte).
 *
 * The base syntax of XML uses a few ASCII punctionation characters; any
 * characters that would appear in the payload data are rewritten using
 * sequences, e.g., &amp; for ampersand(&) and &lt for left angle bracket (<).
 * Five such escapes total (more can be defined but that does not apply to our
 * case). Thus we can safely parse for angle brackets etc.
 *
 * XML describes tree structures of tagged data, with each element beginning
 * with an opening tag <label> and ending with a closing tag </label> with
 * matching label. (There is also a self-closing tag <label/> which is supposed
 * to be equivalent to <label></label>, i.e., no payload, but we are unlikely
 * to see it for our purpose).
 *
 * Actually the opening tags are a little more complicated because they can
 * contain "attributes" after the label (delimited by ascii space or tab chars)
 * of the form attribute_label="value" or attribute_label='value'; as it turns
 * out we do not have to read any of these attributes, just ignore them.
 *
 * Labels are any sequence of chars other than space, tab, right angle bracket
 * (and ?), but may have an inner structure of <namespace><colon><plain_label>.
 * As it turns out, we can ignore the namespaces, in fact we can ignore the
 * entire tree hierarchy, because the plain labels we are looking for will be
 * unique (not in general, but for this application). We do however have to be
 * careful to skip over the namespaces.
 *
 * In generating XML we have to be more careful, but that is easy because
 * everything we do is pretty canned. The only real care to take is to escape
 * any special chars in our payload.
 */

/**
 * xml_next_tag - Advance to next tag
 * @in: Input
 * @out: OUT: start of tag just after '<'
 * @out_tagname: OUT: start of name of tag, skipping namespace
 * @end: OUT: one after tag
 * Returns: 0 on success, 1 on failure
 *
 * A tag has form:
 *     <left angle bracket><...><right angle bracket>
 * Within the angle brackets, there is an optional leading forward slash (which
 * makes the tag an ending tag), then an optional leading label (followed by
 * colon) and then the tag name itself.
 *
 * Note that angle brackets present in the original data must have been encoded
 * as &lt; and &gt; so they will not trouble us.
 */
int xml_next_tag(const char *in, const char **out,
		 const char **out_tagname, const char **end)
{
	while (*in && *in != '<')
		in++;
	if (*in != '<')
		return 1;
	*out = ++in;
	if (*in == '/')
		in++;
	*out_tagname = in; /* maybe */
	while (isalnum(*in) || *in == '-')
		in++;
	if (*in == ':')
		*out_tagname = ++in;
	while (*in && *in != '>')
		in++;
	if (*in != '>')
		return 1;
	*end = ++in;
	return 0;
}


/* xml_data_encode -- format data for xml file, escaping special characters.
 *
 * Note that we assume we are using utf8 both as input and as output!
 * In utf8, characters may be classed as follows:
 *     0xxxxxxx(2) -- 1 byte ascii char
 *     11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80
 *         110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here)
 *         1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here)
 *         11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here)
 *      10xxxxxx(2) -- extension byte (6 payload bits per byte)
 *      Some values implied by the above are however illegal because they
 *      do not represent unicode chars or are not the shortest encoding.
 * Actually, we can almost entirely ignore the above and just do
 * text processing same as for ascii text.
 *
 * XML is written with arbitrary unicode characters, except that five
 * characters have special meaning and so must be escaped where they
 * appear in payload data... which we do here.
 */
void xml_data_encode(struct wpabuf *buf, const char *data, int len)
{
	int i;
	for (i = 0; i < len; i++) {
		u8 c = ((u8 *) data)[i];
		if (c == '<') {
			wpabuf_put_str(buf, "&lt;");
			continue;
		}
		if (c == '>') {
			wpabuf_put_str(buf, "&gt;");
			continue;
		}
		if (c == '&') {
			wpabuf_put_str(buf, "&amp;");
			continue;
		}
		if (c == '\'') {
			wpabuf_put_str(buf, "&apos;");
			continue;
		}
		if (c == '"') {
			wpabuf_put_str(buf, "&quot;");
			continue;
		}
		/*
		 * We could try to represent control characters using the
		 * sequence: &#x; where x is replaced by a hex numeral, but not
		 * clear why we would do this.
		 */
		wpabuf_put_u8(buf, c);
	}
}


/* xml_add_tagged_data -- format tagged data as a new xml line.
 *
 * tag must not have any special chars.
 * data may have special chars, which are escaped.
 */
void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data)
{
	wpabuf_printf(buf, "<%s>", tag);
	xml_data_encode(buf, data, os_strlen(data));
	wpabuf_printf(buf, "</%s>\n", tag);
}


/* A POST body looks something like (per upnp spec):
 * <?xml version="1.0"?>
 * <s:Envelope
 *     xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"
 *     s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
 *   <s:Body>
 *     <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v">
 *       <argumentName>in arg value</argumentName>
 *       other in args and their values go here, if any
 *     </u:actionName>
 *   </s:Body>
 * </s:Envelope>
 *
 * where :
 *      s: might be some other namespace name followed by colon
 *      u: might be some other namespace name followed by colon
 *      actionName will be replaced according to action requested
 *      schema following actionName will be WFA scheme instead
 *      argumentName will be actual argument name
 *      (in arg value) will be actual argument value
 */
char * xml_get_first_item(const char *doc, const char *item)
{
	const char *match = item;
	int match_len = os_strlen(item);
	const char *tag, *tagname, *end;
	char *value;

	/*
	 * This is crude: ignore any possible tag name conflicts and go right
	 * to the first tag of this name. This should be ok for the limited
	 * domain of UPnP messages.
	 */
	for (;;) {
		if (xml_next_tag(doc, &tag, &tagname, &end))
			return NULL;
		doc = end;
		if (!os_strncasecmp(tagname, match, match_len) &&
		    *tag != '/' &&
		    (tagname[match_len] == '>' ||
		     !isgraph(tagname[match_len]))) {
			break;
		}
	}
	end = doc;
	while (*end && *end != '<')
		end++;
	value = os_zalloc(1 + (end - doc));
	if (value == NULL)
		return NULL;
	os_memcpy(value, doc, end - doc);
	return value;
}


struct wpabuf * xml_get_base64_item(const char *data, const char *name,
				    enum http_reply_code *ret)
{
	char *msg;
	struct wpabuf *buf;
	unsigned char *decoded;
	size_t len;

	msg = xml_get_first_item(data, name);
	if (msg == NULL) {
		*ret = UPNP_ARG_VALUE_INVALID;
		return NULL;
	}

	decoded = base64_decode(msg, os_strlen(msg), &len);
	os_free(msg);
	if (decoded == NULL) {
		*ret = UPNP_OUT_OF_MEMORY;
		return NULL;
	}

	buf = wpabuf_alloc_ext_data(decoded, len);
	if (buf == NULL) {
		os_free(decoded);
		*ret = UPNP_OUT_OF_MEMORY;
		return NULL;
	}
	return buf;
}