upnp_xml.c

Go to the documentation of this file.
00001 
00012 #include "includes.h"
00013 
00014 #include "common.h"
00015 #include "base64.h"
00016 #include "http.h"
00017 #include "upnp_xml.h"
00018 
00019 
00020 /*
00021  * XML parsing and formatting
00022  *
00023  * XML is a markup language based on unicode; usually (and in our case,
00024  * always!) based on utf-8. utf-8 uses a variable number of bytes per
00025  * character. utf-8 has the advantage that all non-ASCII unicode characters are
00026  * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII
00027  * characters are single ascii bytes, thus we can use typical text processing.
00028  *
00029  * (One other interesting thing about utf-8 is that it is possible to look at
00030  * any random byte and determine if it is the first byte of a character as
00031  * versus a continuation byte).
00032  *
00033  * The base syntax of XML uses a few ASCII punctionation characters; any
00034  * characters that would appear in the payload data are rewritten using
00035  * sequences, e.g., &amp; for ampersand(&) and &lt for left angle bracket (<).
00036  * Five such escapes total (more can be defined but that does not apply to our
00037  * case). Thus we can safely parse for angle brackets etc.
00038  *
00039  * XML describes tree structures of tagged data, with each element beginning
00040  * with an opening tag <label> and ending with a closing tag </label> with
00041  * matching label. (There is also a self-closing tag <label/> which is supposed
00042  * to be equivalent to <label></label>, i.e., no payload, but we are unlikely
00043  * to see it for our purpose).
00044  *
00045  * Actually the opening tags are a little more complicated because they can
00046  * contain "attributes" after the label (delimited by ascii space or tab chars)
00047  * of the form attribute_label="value" or attribute_label='value'; as it turns
00048  * out we do not have to read any of these attributes, just ignore them.
00049  *
00050  * Labels are any sequence of chars other than space, tab, right angle bracket
00051  * (and ?), but may have an inner structure of <namespace><colon><plain_label>.
00052  * As it turns out, we can ignore the namespaces, in fact we can ignore the
00053  * entire tree hierarchy, because the plain labels we are looking for will be
00054  * unique (not in general, but for this application). We do however have to be
00055  * careful to skip over the namespaces.
00056  *
00057  * In generating XML we have to be more careful, but that is easy because
00058  * everything we do is pretty canned. The only real care to take is to escape
00059  * any special chars in our payload.
00060  */
00061 
00080 static int xml_next_tag(const char *in, const char **out,
00081                         const char **out_tagname, const char **end)
00082 {
00083         while (*in && *in != '<')
00084                 in++;
00085         if (*in != '<')
00086                 return 1;
00087         *out = ++in;
00088         if (*in == '/')
00089                 in++;
00090         *out_tagname = in; /* maybe */
00091         while (isalnum(*in) || *in == '-')
00092                 in++;
00093         if (*in == ':')
00094                 *out_tagname = ++in;
00095         while (*in && *in != '>')
00096                 in++;
00097         if (*in != '>')
00098                 return 1;
00099         *end = ++in;
00100         return 0;
00101 }
00102 
00103 
00104 /* xml_data_encode -- format data for xml file, escaping special characters.
00105  *
00106  * Note that we assume we are using utf8 both as input and as output!
00107  * In utf8, characters may be classed as follows:
00108  *     0xxxxxxx(2) -- 1 byte ascii char
00109  *     11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80
00110  *         110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here)
00111  *         1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here)
00112  *         11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here)
00113  *      10xxxxxx(2) -- extension byte (6 payload bits per byte)
00114  *      Some values implied by the above are however illegal because they
00115  *      do not represent unicode chars or are not the shortest encoding.
00116  * Actually, we can almost entirely ignore the above and just do
00117  * text processing same as for ascii text.
00118  *
00119  * XML is written with arbitrary unicode characters, except that five
00120  * characters have special meaning and so must be escaped where they
00121  * appear in payload data... which we do here.
00122  */
00123 void xml_data_encode(struct wpabuf *buf, const char *data, int len)
00124 {
00125         int i;
00126         for (i = 0; i < len; i++) {
00127                 u8 c = ((u8 *) data)[i];
00128                 if (c == '<') {
00129                         wpabuf_put_str(buf, "&lt;");
00130                         continue;
00131                 }
00132                 if (c == '>') {
00133                         wpabuf_put_str(buf, "&gt;");
00134                         continue;
00135                 }
00136                 if (c == '&') {
00137                         wpabuf_put_str(buf, "&amp;");
00138                         continue;
00139                 }
00140                 if (c == '\'') {
00141                         wpabuf_put_str(buf, "&apos;");
00142                         continue;
00143                 }
00144                 if (c == '"') {
00145                         wpabuf_put_str(buf, "&quot;");
00146                         continue;
00147                 }
00148                 /*
00149                  * We could try to represent control characters using the
00150                  * sequence: &#x; where x is replaced by a hex numeral, but not
00151                  * clear why we would do this.
00152                  */
00153                 wpabuf_put_u8(buf, c);
00154         }
00155 }
00156 
00157 
00158 /* xml_add_tagged_data -- format tagged data as a new xml line.
00159  *
00160  * tag must not have any special chars.
00161  * data may have special chars, which are escaped.
00162  */
00163 void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data)
00164 {
00165         wpabuf_printf(buf, "<%s>", tag);
00166         xml_data_encode(buf, data, os_strlen(data));
00167         wpabuf_printf(buf, "</%s>\n", tag);
00168 }
00169 
00170 
00171 /* A POST body looks something like (per upnp spec):
00172  * <?xml version="1.0"?>
00173  * <s:Envelope
00174  *     xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"
00175  *     s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
00176  *   <s:Body>
00177  *     <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v">
00178  *       <argumentName>in arg value</argumentName>
00179  *       other in args and their values go here, if any
00180  *     </u:actionName>
00181  *   </s:Body>
00182  * </s:Envelope>
00183  *
00184  * where :
00185  *      s: might be some other namespace name followed by colon
00186  *      u: might be some other namespace name followed by colon
00187  *      actionName will be replaced according to action requested
00188  *      schema following actionName will be WFA scheme instead
00189  *      argumentName will be actual argument name
00190  *      (in arg value) will be actual argument value
00191  */
00192 char * xml_get_first_item(const char *doc, const char *item)
00193 {
00194         const char *match = item;
00195         int match_len = os_strlen(item);
00196         const char *tag, *tagname, *end;
00197         char *value;
00198 
00199         /*
00200          * This is crude: ignore any possible tag name conflicts and go right
00201          * to the first tag of this name. This should be ok for the limited
00202          * domain of UPnP messages.
00203          */
00204         for (;;) {
00205                 if (xml_next_tag(doc, &tag, &tagname, &end))
00206                         return NULL;
00207                 doc = end;
00208                 if (!os_strncasecmp(tagname, match, match_len) &&
00209                     *tag != '/' &&
00210                     (tagname[match_len] == '>' ||
00211                      !isgraph(tagname[match_len]))) {
00212                         break;
00213                 }
00214         }
00215         end = doc;
00216         while (*end && *end != '<')
00217                 end++;
00218         value = os_zalloc(1 + (end - doc));
00219         if (value == NULL)
00220                 return NULL;
00221         os_memcpy(value, doc, end - doc);
00222         return value;
00223 }
00224 
00225 
00226 struct wpabuf * xml_get_base64_item(const char *data, const char *name,
00227                                     enum http_reply_code *ret)
00228 {
00229         char *msg;
00230         struct wpabuf *buf;
00231         unsigned char *decoded;
00232         size_t len;
00233 
00234         msg = xml_get_first_item(data, name);
00235         if (msg == NULL) {
00236                 *ret = UPNP_ARG_VALUE_INVALID;
00237                 return NULL;
00238         }
00239 
00240         decoded = base64_decode((unsigned char *) msg, os_strlen(msg), &len);
00241         os_free(msg);
00242         if (decoded == NULL) {
00243                 *ret = UPNP_OUT_OF_MEMORY;
00244                 return NULL;
00245         }
00246 
00247         buf = wpabuf_alloc_ext_data(decoded, len);
00248         if (buf == NULL) {
00249                 os_free(decoded);
00250                 *ret = UPNP_OUT_OF_MEMORY;
00251                 return NULL;
00252         }
00253         return buf;
00254 }
00255