2009-11-08 07:06:01 -05:00
|
|
|
/*
|
|
|
|
* UPnP XML helper routines
|
|
|
|
* Copyright (c) 2000-2003 Intel Corporation
|
|
|
|
* Copyright (c) 2006-2007 Sony Corporation
|
|
|
|
* Copyright (c) 2008-2009 Atheros Communications
|
|
|
|
* Copyright (c) 2009, Jouni Malinen <j@w1.fi>
|
|
|
|
*
|
|
|
|
* See wps_upnp.c for more details on licensing and code history.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "includes.h"
|
|
|
|
|
|
|
|
#include "common.h"
|
|
|
|
#include "base64.h"
|
|
|
|
#include "http.h"
|
|
|
|
#include "upnp_xml.h"
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XML parsing and formatting
|
|
|
|
*
|
|
|
|
* XML is a markup language based on unicode; usually (and in our case,
|
|
|
|
* always!) based on utf-8. utf-8 uses a variable number of bytes per
|
|
|
|
* character. utf-8 has the advantage that all non-ASCII unicode characters are
|
|
|
|
* represented by sequences of non-ascii (high bit set) bytes, whereas ASCII
|
|
|
|
* characters are single ascii bytes, thus we can use typical text processing.
|
|
|
|
*
|
|
|
|
* (One other interesting thing about utf-8 is that it is possible to look at
|
|
|
|
* any random byte and determine if it is the first byte of a character as
|
|
|
|
* versus a continuation byte).
|
|
|
|
*
|
|
|
|
* The base syntax of XML uses a few ASCII punctionation characters; any
|
|
|
|
* characters that would appear in the payload data are rewritten using
|
|
|
|
* sequences, e.g., & for ampersand(&) and < for left angle bracket (<).
|
|
|
|
* Five such escapes total (more can be defined but that does not apply to our
|
|
|
|
* case). Thus we can safely parse for angle brackets etc.
|
|
|
|
*
|
|
|
|
* XML describes tree structures of tagged data, with each element beginning
|
|
|
|
* with an opening tag <label> and ending with a closing tag </label> with
|
|
|
|
* matching label. (There is also a self-closing tag <label/> which is supposed
|
|
|
|
* to be equivalent to <label></label>, i.e., no payload, but we are unlikely
|
|
|
|
* to see it for our purpose).
|
|
|
|
*
|
|
|
|
* Actually the opening tags are a little more complicated because they can
|
|
|
|
* contain "attributes" after the label (delimited by ascii space or tab chars)
|
|
|
|
* of the form attribute_label="value" or attribute_label='value'; as it turns
|
|
|
|
* out we do not have to read any of these attributes, just ignore them.
|
|
|
|
*
|
|
|
|
* Labels are any sequence of chars other than space, tab, right angle bracket
|
|
|
|
* (and ?), but may have an inner structure of <namespace><colon><plain_label>.
|
|
|
|
* As it turns out, we can ignore the namespaces, in fact we can ignore the
|
|
|
|
* entire tree hierarchy, because the plain labels we are looking for will be
|
|
|
|
* unique (not in general, but for this application). We do however have to be
|
|
|
|
* careful to skip over the namespaces.
|
|
|
|
*
|
|
|
|
* In generating XML we have to be more careful, but that is easy because
|
|
|
|
* everything we do is pretty canned. The only real care to take is to escape
|
|
|
|
* any special chars in our payload.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xml_next_tag - Advance to next tag
|
|
|
|
* @in: Input
|
|
|
|
* @out: OUT: start of tag just after '<'
|
|
|
|
* @out_tagname: OUT: start of name of tag, skipping namespace
|
|
|
|
* @end: OUT: one after tag
|
|
|
|
* Returns: 0 on success, 1 on failure
|
|
|
|
*
|
|
|
|
* A tag has form:
|
|
|
|
* <left angle bracket><...><right angle bracket>
|
|
|
|
* Within the angle brackets, there is an optional leading forward slash (which
|
|
|
|
* makes the tag an ending tag), then an optional leading label (followed by
|
|
|
|
* colon) and then the tag name itself.
|
|
|
|
*
|
|
|
|
* Note that angle brackets present in the original data must have been encoded
|
|
|
|
* as < and > so they will not trouble us.
|
|
|
|
*/
|
2011-08-31 06:56:03 -04:00
|
|
|
int xml_next_tag(const char *in, const char **out,
|
|
|
|
const char **out_tagname, const char **end)
|
2009-11-08 07:06:01 -05:00
|
|
|
{
|
|
|
|
while (*in && *in != '<')
|
|
|
|
in++;
|
|
|
|
if (*in != '<')
|
|
|
|
return 1;
|
|
|
|
*out = ++in;
|
|
|
|
if (*in == '/')
|
|
|
|
in++;
|
|
|
|
*out_tagname = in; /* maybe */
|
|
|
|
while (isalnum(*in) || *in == '-')
|
|
|
|
in++;
|
|
|
|
if (*in == ':')
|
|
|
|
*out_tagname = ++in;
|
|
|
|
while (*in && *in != '>')
|
|
|
|
in++;
|
|
|
|
if (*in != '>')
|
|
|
|
return 1;
|
|
|
|
*end = ++in;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* xml_data_encode -- format data for xml file, escaping special characters.
|
|
|
|
*
|
|
|
|
* Note that we assume we are using utf8 both as input and as output!
|
|
|
|
* In utf8, characters may be classed as follows:
|
|
|
|
* 0xxxxxxx(2) -- 1 byte ascii char
|
|
|
|
* 11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80
|
|
|
|
* 110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here)
|
|
|
|
* 1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here)
|
|
|
|
* 11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here)
|
|
|
|
* 10xxxxxx(2) -- extension byte (6 payload bits per byte)
|
|
|
|
* Some values implied by the above are however illegal because they
|
|
|
|
* do not represent unicode chars or are not the shortest encoding.
|
|
|
|
* Actually, we can almost entirely ignore the above and just do
|
|
|
|
* text processing same as for ascii text.
|
|
|
|
*
|
|
|
|
* XML is written with arbitrary unicode characters, except that five
|
|
|
|
* characters have special meaning and so must be escaped where they
|
|
|
|
* appear in payload data... which we do here.
|
|
|
|
*/
|
|
|
|
void xml_data_encode(struct wpabuf *buf, const char *data, int len)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
u8 c = ((u8 *) data)[i];
|
|
|
|
if (c == '<') {
|
|
|
|
wpabuf_put_str(buf, "<");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (c == '>') {
|
|
|
|
wpabuf_put_str(buf, ">");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (c == '&') {
|
|
|
|
wpabuf_put_str(buf, "&");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (c == '\'') {
|
|
|
|
wpabuf_put_str(buf, "'");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (c == '"') {
|
|
|
|
wpabuf_put_str(buf, """);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* We could try to represent control characters using the
|
|
|
|
* sequence: &#x; where x is replaced by a hex numeral, but not
|
|
|
|
* clear why we would do this.
|
|
|
|
*/
|
|
|
|
wpabuf_put_u8(buf, c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* xml_add_tagged_data -- format tagged data as a new xml line.
|
|
|
|
*
|
|
|
|
* tag must not have any special chars.
|
|
|
|
* data may have special chars, which are escaped.
|
|
|
|
*/
|
|
|
|
void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data)
|
|
|
|
{
|
|
|
|
wpabuf_printf(buf, "<%s>", tag);
|
|
|
|
xml_data_encode(buf, data, os_strlen(data));
|
|
|
|
wpabuf_printf(buf, "</%s>\n", tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* A POST body looks something like (per upnp spec):
|
|
|
|
* <?xml version="1.0"?>
|
|
|
|
* <s:Envelope
|
|
|
|
* xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"
|
|
|
|
* s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
|
|
|
|
* <s:Body>
|
|
|
|
* <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v">
|
|
|
|
* <argumentName>in arg value</argumentName>
|
|
|
|
* other in args and their values go here, if any
|
|
|
|
* </u:actionName>
|
|
|
|
* </s:Body>
|
|
|
|
* </s:Envelope>
|
|
|
|
*
|
|
|
|
* where :
|
|
|
|
* s: might be some other namespace name followed by colon
|
|
|
|
* u: might be some other namespace name followed by colon
|
|
|
|
* actionName will be replaced according to action requested
|
|
|
|
* schema following actionName will be WFA scheme instead
|
|
|
|
* argumentName will be actual argument name
|
|
|
|
* (in arg value) will be actual argument value
|
|
|
|
*/
|
|
|
|
char * xml_get_first_item(const char *doc, const char *item)
|
|
|
|
{
|
|
|
|
const char *match = item;
|
|
|
|
int match_len = os_strlen(item);
|
|
|
|
const char *tag, *tagname, *end;
|
|
|
|
char *value;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is crude: ignore any possible tag name conflicts and go right
|
|
|
|
* to the first tag of this name. This should be ok for the limited
|
|
|
|
* domain of UPnP messages.
|
|
|
|
*/
|
|
|
|
for (;;) {
|
|
|
|
if (xml_next_tag(doc, &tag, &tagname, &end))
|
|
|
|
return NULL;
|
|
|
|
doc = end;
|
|
|
|
if (!os_strncasecmp(tagname, match, match_len) &&
|
|
|
|
*tag != '/' &&
|
|
|
|
(tagname[match_len] == '>' ||
|
|
|
|
!isgraph(tagname[match_len]))) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
end = doc;
|
|
|
|
while (*end && *end != '<')
|
|
|
|
end++;
|
|
|
|
value = os_zalloc(1 + (end - doc));
|
|
|
|
if (value == NULL)
|
|
|
|
return NULL;
|
|
|
|
os_memcpy(value, doc, end - doc);
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
struct wpabuf * xml_get_base64_item(const char *data, const char *name,
|
|
|
|
enum http_reply_code *ret)
|
|
|
|
{
|
|
|
|
char *msg;
|
|
|
|
struct wpabuf *buf;
|
|
|
|
unsigned char *decoded;
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
msg = xml_get_first_item(data, name);
|
|
|
|
if (msg == NULL) {
|
|
|
|
*ret = UPNP_ARG_VALUE_INVALID;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
decoded = base64_decode((unsigned char *) msg, os_strlen(msg), &len);
|
|
|
|
os_free(msg);
|
|
|
|
if (decoded == NULL) {
|
|
|
|
*ret = UPNP_OUT_OF_MEMORY;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf = wpabuf_alloc_ext_data(decoded, len);
|
|
|
|
if (buf == NULL) {
|
|
|
|
os_free(decoded);
|
|
|
|
*ret = UPNP_OUT_OF_MEMORY;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return buf;
|
|
|
|
}
|