00001 00012 #include "includes.h" 00013 00014 #include "common.h" 00015 #include "base64.h" 00016 #include "http.h" 00017 #include "upnp_xml.h" 00018 00019 00020 /* 00021 * XML parsing and formatting 00022 * 00023 * XML is a markup language based on unicode; usually (and in our case, 00024 * always!) based on utf-8. utf-8 uses a variable number of bytes per 00025 * character. utf-8 has the advantage that all non-ASCII unicode characters are 00026 * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII 00027 * characters are single ascii bytes, thus we can use typical text processing. 00028 * 00029 * (One other interesting thing about utf-8 is that it is possible to look at 00030 * any random byte and determine if it is the first byte of a character as 00031 * versus a continuation byte). 00032 * 00033 * The base syntax of XML uses a few ASCII punctionation characters; any 00034 * characters that would appear in the payload data are rewritten using 00035 * sequences, e.g., & for ampersand(&) and < for left angle bracket (<). 00036 * Five such escapes total (more can be defined but that does not apply to our 00037 * case). Thus we can safely parse for angle brackets etc. 00038 * 00039 * XML describes tree structures of tagged data, with each element beginning 00040 * with an opening tag <label> and ending with a closing tag </label> with 00041 * matching label. (There is also a self-closing tag <label/> which is supposed 00042 * to be equivalent to <label></label>, i.e., no payload, but we are unlikely 00043 * to see it for our purpose). 00044 * 00045 * Actually the opening tags are a little more complicated because they can 00046 * contain "attributes" after the label (delimited by ascii space or tab chars) 00047 * of the form attribute_label="value" or attribute_label='value'; as it turns 00048 * out we do not have to read any of these attributes, just ignore them. 00049 * 00050 * Labels are any sequence of chars other than space, tab, right angle bracket 00051 * (and ?), but may have an inner structure of <namespace><colon><plain_label>. 00052 * As it turns out, we can ignore the namespaces, in fact we can ignore the 00053 * entire tree hierarchy, because the plain labels we are looking for will be 00054 * unique (not in general, but for this application). We do however have to be 00055 * careful to skip over the namespaces. 00056 * 00057 * In generating XML we have to be more careful, but that is easy because 00058 * everything we do is pretty canned. The only real care to take is to escape 00059 * any special chars in our payload. 00060 */ 00061 00080 static int xml_next_tag(const char *in, const char **out, 00081 const char **out_tagname, const char **end) 00082 { 00083 while (*in && *in != '<') 00084 in++; 00085 if (*in != '<') 00086 return 1; 00087 *out = ++in; 00088 if (*in == '/') 00089 in++; 00090 *out_tagname = in; /* maybe */ 00091 while (isalnum(*in) || *in == '-') 00092 in++; 00093 if (*in == ':') 00094 *out_tagname = ++in; 00095 while (*in && *in != '>') 00096 in++; 00097 if (*in != '>') 00098 return 1; 00099 *end = ++in; 00100 return 0; 00101 } 00102 00103 00104 /* xml_data_encode -- format data for xml file, escaping special characters. 00105 * 00106 * Note that we assume we are using utf8 both as input and as output! 00107 * In utf8, characters may be classed as follows: 00108 * 0xxxxxxx(2) -- 1 byte ascii char 00109 * 11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80 00110 * 110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here) 00111 * 1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here) 00112 * 11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here) 00113 * 10xxxxxx(2) -- extension byte (6 payload bits per byte) 00114 * Some values implied by the above are however illegal because they 00115 * do not represent unicode chars or are not the shortest encoding. 00116 * Actually, we can almost entirely ignore the above and just do 00117 * text processing same as for ascii text. 00118 * 00119 * XML is written with arbitrary unicode characters, except that five 00120 * characters have special meaning and so must be escaped where they 00121 * appear in payload data... which we do here. 00122 */ 00123 void xml_data_encode(struct wpabuf *buf, const char *data, int len) 00124 { 00125 int i; 00126 for (i = 0; i < len; i++) { 00127 u8 c = ((u8 *) data)[i]; 00128 if (c == '<') { 00129 wpabuf_put_str(buf, "<"); 00130 continue; 00131 } 00132 if (c == '>') { 00133 wpabuf_put_str(buf, ">"); 00134 continue; 00135 } 00136 if (c == '&') { 00137 wpabuf_put_str(buf, "&"); 00138 continue; 00139 } 00140 if (c == '\'') { 00141 wpabuf_put_str(buf, "'"); 00142 continue; 00143 } 00144 if (c == '"') { 00145 wpabuf_put_str(buf, """); 00146 continue; 00147 } 00148 /* 00149 * We could try to represent control characters using the 00150 * sequence: &#x; where x is replaced by a hex numeral, but not 00151 * clear why we would do this. 00152 */ 00153 wpabuf_put_u8(buf, c); 00154 } 00155 } 00156 00157 00158 /* xml_add_tagged_data -- format tagged data as a new xml line. 00159 * 00160 * tag must not have any special chars. 00161 * data may have special chars, which are escaped. 00162 */ 00163 void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data) 00164 { 00165 wpabuf_printf(buf, "<%s>", tag); 00166 xml_data_encode(buf, data, os_strlen(data)); 00167 wpabuf_printf(buf, "</%s>\n", tag); 00168 } 00169 00170 00171 /* A POST body looks something like (per upnp spec): 00172 * <?xml version="1.0"?> 00173 * <s:Envelope 00174 * xmlns:s="http://schemas.xmlsoap.org/soap/envelope/" 00175 * s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/"> 00176 * <s:Body> 00177 * <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v"> 00178 * <argumentName>in arg value</argumentName> 00179 * other in args and their values go here, if any 00180 * </u:actionName> 00181 * </s:Body> 00182 * </s:Envelope> 00183 * 00184 * where : 00185 * s: might be some other namespace name followed by colon 00186 * u: might be some other namespace name followed by colon 00187 * actionName will be replaced according to action requested 00188 * schema following actionName will be WFA scheme instead 00189 * argumentName will be actual argument name 00190 * (in arg value) will be actual argument value 00191 */ 00192 char * xml_get_first_item(const char *doc, const char *item) 00193 { 00194 const char *match = item; 00195 int match_len = os_strlen(item); 00196 const char *tag, *tagname, *end; 00197 char *value; 00198 00199 /* 00200 * This is crude: ignore any possible tag name conflicts and go right 00201 * to the first tag of this name. This should be ok for the limited 00202 * domain of UPnP messages. 00203 */ 00204 for (;;) { 00205 if (xml_next_tag(doc, &tag, &tagname, &end)) 00206 return NULL; 00207 doc = end; 00208 if (!os_strncasecmp(tagname, match, match_len) && 00209 *tag != '/' && 00210 (tagname[match_len] == '>' || 00211 !isgraph(tagname[match_len]))) { 00212 break; 00213 } 00214 } 00215 end = doc; 00216 while (*end && *end != '<') 00217 end++; 00218 value = os_zalloc(1 + (end - doc)); 00219 if (value == NULL) 00220 return NULL; 00221 os_memcpy(value, doc, end - doc); 00222 return value; 00223 } 00224 00225 00226 struct wpabuf * xml_get_base64_item(const char *data, const char *name, 00227 enum http_reply_code *ret) 00228 { 00229 char *msg; 00230 struct wpabuf *buf; 00231 unsigned char *decoded; 00232 size_t len; 00233 00234 msg = xml_get_first_item(data, name); 00235 if (msg == NULL) { 00236 *ret = UPNP_ARG_VALUE_INVALID; 00237 return NULL; 00238 } 00239 00240 decoded = base64_decode((unsigned char *) msg, os_strlen(msg), &len); 00241 os_free(msg); 00242 if (decoded == NULL) { 00243 *ret = UPNP_OUT_OF_MEMORY; 00244 return NULL; 00245 } 00246 00247 buf = wpabuf_alloc_ext_data(decoded, len); 00248 if (buf == NULL) { 00249 os_free(decoded); 00250 *ret = UPNP_OUT_OF_MEMORY; 00251 return NULL; 00252 } 00253 return buf; 00254 } 00255