Skip to content

feat: improved XMLArgs processing #3358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
May 4, 2025
Merged
40 changes: 40 additions & 0 deletions apache2/apache2_config.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ void *create_directory_config(apr_pool_t *mp, char *path)

/* xml external entity */
dcfg->xml_external_entity = NOT_SET;
dcfg->parse_xml_into_args = NOT_SET;

return dcfg;
}
Expand Down Expand Up @@ -640,6 +641,8 @@ void *merge_directory_configs(apr_pool_t *mp, void *_parent, void *_child)
/* xml external entity */
merged->xml_external_entity = (child->xml_external_entity == NOT_SET
? parent->xml_external_entity : child->xml_external_entity);
merged->parse_xml_into_args = (child->parse_xml_into_args == NOT_SET
? parent->parse_xml_into_args : child->parse_xml_into_args);

return merged;
}
Expand Down Expand Up @@ -773,6 +776,7 @@ void init_directory_config(directory_config *dcfg)

/* xml external entity */
if (dcfg->xml_external_entity == NOT_SET) dcfg->xml_external_entity = 0;
if (dcfg->parse_xml_into_args == NOT_SET) dcfg->parse_xml_into_args = 0;

}

Expand Down Expand Up @@ -3698,6 +3702,34 @@ static const char *cmd_cache_transformations(cmd_parms *cmd, void *_dcfg,
return NULL;
}

/**
* \brief Add SecParseXmlIntoArgs configuration option
*
* \param cmd Pointer to configuration data
* \param _dcfg Pointer to directory configuration
* \param p1 Pointer to configuration option
*
* \retval NULL On Success
* \retval apr_psprintf On error
*/
static const char *cmd_parse_xml_into_args(cmd_parms *cmd, void *_dcfg, const char *p1)
{
assert(cmd != NULL);
assert(_dcfg != NULL);
assert(p1 != NULL);
// Normally useless code, left to be safe for the moment
if (_dcfg == NULL) {
ap_log_perror(APLOG_MARK, APLOG_EMERG, 0, cmd->pool, "cmd_parse_xml_into_args: _dcfg is NULL");
return NULL;
}
directory_config *dcfg = (directory_config *)_dcfg;
if (strcasecmp(p1, "on") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_ON; }
else if (strcasecmp(p1, "off") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_OFF; }
else if (strcasecmp(p1, "onlyargs") == 0) { dcfg->parse_xml_into_args = MSC_XML_ARGS_ONLYARGS; }
else return apr_psprintf(cmd->pool, "ModSecurity: Invalid value for SecParseXmlIntoArgs: %s", p1);

return NULL;
}

/* -- Configuration directives definitions -- */

Expand Down Expand Up @@ -4466,5 +4498,13 @@ const command_rec module_directives[] = {
"Set Hash parameter"
),

AP_INIT_TAKE1 (
"SecParseXmlIntoArgs",
cmd_parse_xml_into_args,
NULL,
CMD_SCOPE_ANY,
"On, Off or OnlyArgs"
),

{ NULL }
};
5 changes: 5 additions & 0 deletions apache2/modsecurity.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,10 @@ extern DSOLOCAL int *unicode_map_table;
#define RULE_EXCEPTION_REMOVE_MSG 4
#define RULE_EXCEPTION_REMOVE_TAG 5

#define MSC_XML_ARGS_OFF 0
#define MSC_XML_ARGS_ON 1
#define MSC_XML_ARGS_ONLYARGS 2

#define NBSP 160

struct rule_exception {
Expand Down Expand Up @@ -643,6 +647,7 @@ struct directory_config {

/* xml */
int xml_external_entity;
int parse_xml_into_args;

/* This will be used whenever ModSecurity will be ready
* to ask the server for newer rules.
Expand Down
251 changes: 230 additions & 21 deletions apache2/msc_xml.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,124 @@

#include "msc_xml.h"

static void msc_xml_on_start_elementns(
void *ctx,
const xmlChar *localname,
const xmlChar *prefix,
const xmlChar *URI,
int nb_namespaces,
const xmlChar **namespaces,
int nb_attributes,
int nb_defaulted,
const xmlChar **attributes
) {

// get the length of XML tag (localname)
size_t taglen = strlen((const char *)localname);
modsec_rec * msr = (modsec_rec *)ctx;
msc_xml_parser_state * xml_parser_state = msr->xml->xml_parser_state;

// pathlen contains the concatenated strings of tags with '.'
// eg xml.root.level1.leaf
xml_parser_state->pathlen += (taglen + 1);
char *newpath = apr_pstrcat(msr->mp, xml_parser_state->currpath, ".", (char *)localname, NULL);
xml_parser_state->currpath = newpath;

int *new_stack_item = (int *)apr_array_push(xml_parser_state->has_child_stack);
*new_stack_item = 0;
xml_parser_state->depth++;
// set the current value to null
// this is necessary because if there is any text between the tags (new line, etc)
// it will be added to the current value
xml_parser_state->currval = NULL;

// if there is an item before the current one we set that has a child
if (xml_parser_state->depth > 1) {
int *parent_stack_item = &((int *)xml_parser_state->has_child_stack->elts)[xml_parser_state->has_child_stack->nelts - 2];
*parent_stack_item = 1;
}

}

static void msc_xml_on_end_elementns(
void* ctx,
const xmlChar* localname,
const xmlChar* prefix,
const xmlChar* URI
) {

size_t taglen = strlen((const char *)localname);
modsec_rec * msr = (modsec_rec *)ctx;
msc_xml_parser_state * xml_parser_state = msr->xml->xml_parser_state;

// if the node is a leaf we add it as argument
// get the top item from the stack which tells this info
int * top_stack_item = apr_array_pop(xml_parser_state->has_child_stack);
if (*top_stack_item == 0) {

if (apr_table_elts(msr->arguments)->nelts >= msr->txcfg->arguments_limit) {
if (msr->txcfg->debuglog_level >= 4) {
msr_log(msr, 4, "Skipping request argument, over limit (XML): name \"%s\", value \"%s\"",
log_escape_ex(msr->mp, xml_parser_state->currpath, strlen(xml_parser_state->currpath)),
log_escape_ex(msr->mp, xml_parser_state->currval, strlen(xml_parser_state->currval)));
}
msr->msc_reqbody_error = 1;
msr->xml->xml_error = apr_psprintf(msr->mp, "More than %ld ARGS (GET + XML)", msr->txcfg->arguments_limit);
xmlStopParser((xmlParserCtxtPtr)msr->xml->parsing_ctx_arg);
}
else {

msc_arg * arg = (msc_arg *) apr_pcalloc(msr->mp, sizeof(msc_arg));

arg->name = xml_parser_state->currpath;
arg->name_len = strlen(arg->name);
arg->value = xml_parser_state->currval;
arg->value_len = strlen(xml_parser_state->currval);
arg->value_origin_len = arg->value_len;
arg->origin = "XML";

if (msr->txcfg->debuglog_level >= 9) {
msr_log(msr, 9, "Adding XML argument '%s' with value '%s'",
xml_parser_state->currpath, xml_parser_state->currval);
}

apr_table_addn(msr->arguments,
log_escape_nq_ex(msr->mp, arg->name, arg->name_len), (void *) arg);
} // end else
} // end top_stack_item == 0

// decrease the length of current path length - +1 because of the '\0'
xml_parser_state->pathlen -= (taglen + 1);

// -1 is needed because we don't need the last '.'
char * newpath = apr_pstrndup(msr->mp, xml_parser_state->currpath, xml_parser_state->pathlen - 1);
xml_parser_state->currpath = newpath;

xml_parser_state->depth--;
xml_parser_state->currval = NULL;
}

static void msc_xml_on_characters(void *ctx, const xmlChar *ch, int len) {

modsec_rec * msr = (modsec_rec *)ctx;
msc_xml_parser_state * xml_parser_state = msr->xml->xml_parser_state;

// libxml2 SAX parser will call this function multiple times
// during the parsing of a single node, if the value has multibyte
// characters, so we need to concatenate the values
xml_parser_state->currval = apr_pstrcat(msr->mp,
((xml_parser_state->currval != NULL) ? xml_parser_state->currval : ""),
apr_pstrndup(msr->mp, (const char *)ch, len),
NULL);
// check if the memory allocation was successful
if (xml_parser_state->currval == NULL) {
msr->xml->xml_error = apr_psprintf(msr->mp, "Failed to allocate memory for XML value.");
xmlStopParser((xmlParserCtxtPtr)msr->xml->parsing_ctx_arg);
}

}


static xmlParserInputBufferPtr
xml_unload_external_entity(const char *URI, xmlCharEncoding enc) {
return NULL;
Expand All @@ -37,6 +155,33 @@ int xml_init(modsec_rec *msr, char **error_msg) {
entity = xmlParserInputBufferCreateFilenameDefault(xml_unload_external_entity);
}

if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) {

msr->xml->sax_handler = (xmlSAXHandler *)apr_pcalloc(msr->mp, sizeof(xmlSAXHandler));
memset(msr->xml->sax_handler, 0, sizeof(xmlSAXHandler));
if (msr->xml->sax_handler == NULL) {
*error_msg = apr_psprintf(msr->mp, "XML: Failed to create SAX handler.");
return -1;
}

msr->xml->sax_handler->initialized = XML_SAX2_MAGIC;
msr->xml->sax_handler->startElementNs = msc_xml_on_start_elementns;
msr->xml->sax_handler->endElementNs = msc_xml_on_end_elementns;
msr->xml->sax_handler->characters = msc_xml_on_characters;

// set the parser state struct
msr->xml->xml_parser_state = apr_pcalloc(msr->mp, sizeof(msc_xml_parser_state));
msr->xml->xml_parser_state->depth = 0;
msr->xml->xml_parser_state->pathlen = 4; // "xml\0"
msr->xml->xml_parser_state->currpath = apr_pstrdup(msr->mp, "xml");
msr->xml->xml_parser_state->currval = NULL;
msr->xml->xml_parser_state->currpathbufflen = 4;
// initialize the stack with item of 10
// this will store the information about nodes
// 10 is just an initial value, it can be automatically incremented
msr->xml->xml_parser_state->has_child_stack = apr_array_make(msr->mp, 10, sizeof(int));
}

return 1;
}

Expand Down Expand Up @@ -68,7 +213,7 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char
* enable us to pass it the first chunk of data so that
* it can attempt to auto-detect the encoding.
*/
if (msr->xml->parsing_ctx == NULL) {
if (msr->xml->parsing_ctx == NULL && msr->xml->parsing_ctx_arg == NULL) {

/* First invocation. */

Expand All @@ -86,18 +231,52 @@ int xml_process_chunk(modsec_rec *msr, const char *buf, unsigned int size, char

*/

msr->xml->parsing_ctx = xmlCreatePushParserCtxt(NULL, NULL, buf, size, "body.xml");
if (msr->xml->parsing_ctx == NULL) {
*error_msg = apr_psprintf(msr->mp, "XML: Failed to create parsing context.");
return -1;
if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) {
msr->xml->parsing_ctx = xmlCreatePushParserCtxt(NULL, NULL, buf, size, "body.xml");
if (msr->xml->parsing_ctx == NULL) {
*error_msg = apr_psprintf(msr->mp, "XML: Failed to create parsing context.");
return -1;
}
}
if (msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) {
msr->xml->parsing_ctx_arg = xmlCreatePushParserCtxt(
msr->xml->sax_handler,
msr,
buf,
size,
NULL);
if (msr->xml->parsing_ctx_arg == NULL) {
*error_msg = apr_psprintf(msr->mp, "XML: Failed to create parsing context for ARGS.");
return -1;
}
}
} else {

/* Not a first invocation. */
msr_log(msr, 4, "XML: Continue parsing.");
if (msr->xml->parsing_ctx != NULL &&
msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) {
xmlParseChunk(msr->xml->parsing_ctx, buf, size, 0);
if (msr->xml->parsing_ctx->wellFormed != 1) {
*error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document.");
return -1;
}
}

xmlParseChunk(msr->xml->parsing_ctx, buf, size, 0);
if (msr->xml->parsing_ctx->wellFormed != 1) {
*error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document.");
if (msr->xml->parsing_ctx_arg != NULL &&
msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) {
if (xmlParseChunk(msr->xml->parsing_ctx_arg, buf, size, 0) != 0) {
if (msr->xml->xml_error) {
*error_msg = msr->xml->xml_error;
}
else {
*error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document for ARGS.");
}
return -1;
}
}
if (msr->xml->xml_error) {
*error_msg = msr->xml->xml_error;
return -1;
}
}
Expand All @@ -114,23 +293,44 @@ int xml_complete(modsec_rec *msr, char **error_msg) {
*error_msg = NULL;

/* Only if we have a context, meaning we've done some work. */
if (msr->xml->parsing_ctx != NULL) {
/* This is how we signalise the end of parsing to libxml. */
xmlParseChunk(msr->xml->parsing_ctx, NULL, 0, 1);
if (msr->xml->parsing_ctx != NULL || msr->xml->parsing_ctx_arg != NULL) {
if (msr->xml->parsing_ctx != NULL &&
msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_ONLYARGS) {
/* This is how we signal the end of parsing to libxml. */
xmlParseChunk(msr->xml->parsing_ctx, NULL, 0, 1);

/* Preserve the results for our reference. */
msr->xml->well_formed = msr->xml->parsing_ctx->wellFormed;
msr->xml->doc = msr->xml->parsing_ctx->myDoc;
/* Preserve the results for our reference. */
msr->xml->well_formed = msr->xml->parsing_ctx->wellFormed;
msr->xml->doc = msr->xml->parsing_ctx->myDoc;

/* Clean up everything else. */
xmlFreeParserCtxt(msr->xml->parsing_ctx);
msr->xml->parsing_ctx = NULL;
msr_log(msr, 4, "XML: Parsing complete (well_formed %u).", msr->xml->well_formed);
/* Clean up everything else. */
xmlFreeParserCtxt(msr->xml->parsing_ctx);
msr->xml->parsing_ctx = NULL;
msr_log(msr, 4, "XML: Parsing complete (well_formed %u).", msr->xml->well_formed);

if (msr->xml->well_formed != 1) {
*error_msg = apr_psprintf(msr->mp, "XML: Failed parsing document.");
return -1;
if (msr->xml->well_formed != 1) {
*error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document.");
return -1;
}
}

if (msr->xml->parsing_ctx_arg != NULL &&
msr->txcfg->parse_xml_into_args != MSC_XML_ARGS_OFF) {
if (xmlParseChunk(msr->xml->parsing_ctx_arg, NULL, 0, 1) != 0) {
if (msr->xml->xml_error) {
*error_msg = msr->xml->xml_error;
}
else {
*error_msg = apr_psprintf(msr->mp, "XML: Failed to parse document for ARGS.");
}
xmlFreeParserCtxt(msr->xml->parsing_ctx_arg);
msr->xml->parsing_ctx_arg = NULL;
return -1;
}
xmlFreeParserCtxt(msr->xml->parsing_ctx_arg);
msr->xml->parsing_ctx_arg = NULL;
}

}

return 1;
Expand All @@ -152,6 +352,15 @@ apr_status_t xml_cleanup(modsec_rec *msr) {
xmlFreeParserCtxt(msr->xml->parsing_ctx);
msr->xml->parsing_ctx = NULL;
}
if (msr->xml->parsing_ctx_arg != NULL) {

if (msr->xml->parsing_ctx_arg->myDoc) {
xmlFreeDoc(msr->xml->parsing_ctx_arg->myDoc);
}

xmlFreeParserCtxt(msr->xml->parsing_ctx_arg);
msr->xml->parsing_ctx_arg = NULL;
}
if (msr->xml->doc != NULL) {
xmlFreeDoc(msr->xml->doc);
msr->xml->doc = NULL;
Expand Down
Loading