132 lines
3.9 KiB
Diff
132 lines
3.9 KiB
Diff
From 4365a5e1153dd5ed7d269d00f8f14daee0fac5c8 Mon Sep 17 00:00:00 2001
|
|
From: Nick Wellnhofer <wellnhofer@aevum.de>
|
|
Date: Mon, 26 Feb 2024 15:14:28 +0100
|
|
Subject: [PATCH] xmlreader: Fix xmlTextReaderConstEncoding
|
|
|
|
Regression from commit f1c1f5c6.
|
|
|
|
Fixes #697.
|
|
---
|
|
SAX2.c | 12 +-----------
|
|
include/private/parser.h | 2 ++
|
|
parserInternals.c | 24 ++++++++++++++++++++++++
|
|
xmlreader.c | 22 ++++++++++------------
|
|
4 files changed, 37 insertions(+), 23 deletions(-)
|
|
|
|
diff --git a/SAX2.c b/SAX2.c
|
|
index ed21a559..bb72e160 100644
|
|
--- a/SAX2.c
|
|
+++ b/SAX2.c
|
|
@@ -955,17 +955,7 @@ xmlSAX2EndDocument(void *ctx)
|
|
|
|
doc = ctxt->myDoc;
|
|
if ((doc != NULL) && (doc->encoding == NULL)) {
|
|
- const xmlChar *encoding = NULL;
|
|
-
|
|
- if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
|
|
- (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
|
|
- /* Preserve encoding exactly */
|
|
- encoding = ctxt->encoding;
|
|
- } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
|
|
- encoding = BAD_CAST ctxt->input->buf->encoder->name;
|
|
- } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
|
|
- encoding = BAD_CAST "UTF-8";
|
|
- }
|
|
+ const xmlChar *encoding = xmlGetActualEncoding(ctxt);
|
|
|
|
if (encoding != NULL) {
|
|
doc->encoding = xmlStrdup(encoding);
|
|
diff --git a/include/private/parser.h b/include/private/parser.h
|
|
index 40d179fe..7f8f6912 100644
|
|
--- a/include/private/parser.h
|
|
+++ b/include/private/parser.h
|
|
@@ -48,6 +48,8 @@ XML_HIDDEN void
|
|
xmlDetectEncoding(xmlParserCtxtPtr ctxt);
|
|
XML_HIDDEN void
|
|
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding);
|
|
+XML_HIDDEN const xmlChar *
|
|
+xmlGetActualEncoding(xmlParserCtxtPtr ctxt);
|
|
|
|
XML_HIDDEN xmlParserNsData *
|
|
xmlParserNsCreate(void);
|
|
diff --git a/parserInternals.c b/parserInternals.c
|
|
index e6b4cb14..166397bd 100644
|
|
--- a/parserInternals.c
|
|
+++ b/parserInternals.c
|
|
@@ -1479,6 +1479,30 @@ xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
|
|
}
|
|
}
|
|
|
|
+/**
|
|
+ * xmlGetActualEncoding:
|
|
+ * @ctxt: the parser context
|
|
+ *
|
|
+ * Returns the actual used to parse the document. This can differ from
|
|
+ * the declared encoding.
|
|
+ */
|
|
+const xmlChar *
|
|
+xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
|
|
+ const xmlChar *encoding = NULL;
|
|
+
|
|
+ if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
|
|
+ (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
|
|
+ /* Preserve encoding exactly */
|
|
+ encoding = ctxt->encoding;
|
|
+ } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
|
|
+ encoding = BAD_CAST ctxt->input->buf->encoder->name;
|
|
+ } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
|
|
+ encoding = BAD_CAST "UTF-8";
|
|
+ }
|
|
+
|
|
+ return(encoding);
|
|
+}
|
|
+
|
|
/************************************************************************
|
|
* *
|
|
* Commodity functions to handle entities processing *
|
|
diff --git a/xmlreader.c b/xmlreader.c
|
|
index 1f903306..5fdeb2b8 100644
|
|
--- a/xmlreader.c
|
|
+++ b/xmlreader.c
|
|
@@ -40,6 +40,7 @@
|
|
#endif
|
|
|
|
#include "private/buf.h"
|
|
+#include "private/parser.h"
|
|
#include "private/tree.h"
|
|
#ifdef LIBXML_XINCLUDE_ENABLED
|
|
#include "private/xinclude.h"
|
|
@@ -2795,20 +2796,17 @@ xmlTextReaderReadAttributeValue(xmlTextReaderPtr reader) {
|
|
*/
|
|
const xmlChar *
|
|
xmlTextReaderConstEncoding(xmlTextReaderPtr reader) {
|
|
- xmlDocPtr doc = NULL;
|
|
+ const xmlChar *encoding = NULL;
|
|
+
|
|
if (reader == NULL)
|
|
- return(NULL);
|
|
- if (reader->doc != NULL)
|
|
- doc = reader->doc;
|
|
- else if (reader->ctxt != NULL)
|
|
- doc = reader->ctxt->myDoc;
|
|
- if (doc == NULL)
|
|
- return(NULL);
|
|
+ return(NULL);
|
|
|
|
- if (doc->encoding == NULL)
|
|
- return(NULL);
|
|
- else
|
|
- return(CONSTSTR(doc->encoding));
|
|
+ if (reader->ctxt != NULL)
|
|
+ encoding = xmlGetActualEncoding(reader->ctxt);
|
|
+ else if (reader->doc != NULL)
|
|
+ encoding = reader->doc->encoding;
|
|
+
|
|
+ return(CONSTSTR(encoding));
|
|
}
|
|
|
|
|
|
--
|
|
GitLab
|
|
|