38 return XmlDocument (textToParse).getDocumentElement();
41 std::unique_ptr<XmlElement> parseXML (
const String& textToParse)
43 return XmlDocument (textToParse).getDocumentElement();
46 std::unique_ptr<XmlElement> parseXML (
const File& file)
51 std::unique_ptr<XmlElement> parseXMLIfTagMatches (
const String& textToParse,
StringRef requiredTag)
53 return XmlDocument (textToParse).getDocumentElementIfTagMatches (requiredTag);
56 std::unique_ptr<XmlElement> parseXMLIfTagMatches (
const File& file,
StringRef requiredTag)
58 return XmlDocument (file).getDocumentElementIfTagMatches (requiredTag);
63 inputSource.reset (newSource);
68 ignoreEmptyTextElements = shouldBeIgnored;
71 namespace XmlIdentifierChars
73 static bool isIdentifierCharSlow (juce_wchar c) noexcept
76 || c ==
'_' || c ==
'-' || c ==
':' || c ==
'.';
79 static bool isIdentifierChar (juce_wchar c) noexcept
81 static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
83 return ((
int) c < (
int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (uint32) (1 << (c & 31))) != 0)
84 : isIdentifierCharSlow (c);
103 while (isIdentifierChar (*p))
112 if (originalText.isEmpty() && inputSource !=
nullptr)
114 std::unique_ptr<InputStream> in (inputSource->createInputStream());
121 #if JUCE_STRING_UTF_TYPE == 8 125 auto* text =
static_cast<const char*
> (data.
getData());
147 return parseDocumentElement (originalText.getCharPointer(), onlyReadOuterDocumentElement);
153 if (xml->hasTagName (requiredTag))
164 void XmlDocument::setLastError (
const String& desc,
const bool carryOn)
167 errorOccurred = ! carryOn;
170 String XmlDocument::getFileContents (
const String& filename)
const 172 if (inputSource !=
nullptr)
174 std::unique_ptr<InputStream> in (inputSource->createInputStreamFor (filename.
trim().
unquoted()));
177 return in->readEntireStreamAsString();
183 juce_wchar XmlDocument::readNextChar() noexcept
185 auto c = input.getAndAdvance();
197 bool onlyReadOuterDocumentElement)
200 errorOccurred =
false;
202 needToLoadDTD =
true;
206 lastError =
"not enough input";
208 else if (! parseHeader())
210 lastError =
"malformed header";
212 else if (! parseDTD())
214 lastError =
"malformed DTD";
219 std::unique_ptr<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
228 bool XmlDocument::parseHeader()
230 skipNextWhiteSpace();
236 if (headerEnd.isEmpty())
240 auto encoding =
String (input, headerEnd)
254 jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase (
"utf-"));
257 input = headerEnd + 2;
258 skipNextWhiteSpace();
264 bool XmlDocument::parseDTD()
269 auto dtdStart = input;
271 for (
int n = 1; n > 0;)
273 auto c = readNextChar();
284 dtdText =
String (dtdStart, input - 1).
trim();
290 void XmlDocument::skipNextWhiteSpace()
294 input = input.findEndOfWhitespace();
311 if (closeComment < 0)
317 input += closeComment + 3;
326 if (closeBracket < 0)
332 input += closeBracket + 2;
341 void XmlDocument::readQuotedString (
String& result)
343 auto quote = readNextChar();
347 auto c = readNextChar();
364 auto character = *input;
366 if (character == quote)
373 if (character ==
'&')
381 setLastError (
"unmatched quotes",
false);
392 XmlElement* XmlDocument::readNextElement (
const bool alsoParseSubElements)
395 skipNextWhiteSpace();
403 auto endOfToken = XmlIdentifierChars::findEndOfToken (input);
405 if (endOfToken == input)
408 skipNextWhiteSpace();
409 endOfToken = XmlIdentifierChars::findEndOfToken (input);
411 if (endOfToken == input)
413 setLastError (
"tag name missing",
false);
425 skipNextWhiteSpace();
429 if (c ==
'/' && input[1] ==
'>')
440 if (alsoParseSubElements)
441 readChildElements (*node);
447 if (XmlIdentifierChars::isIdentifierChar (c))
449 auto attNameEnd = XmlIdentifierChars::findEndOfToken (input);
451 if (attNameEnd != input)
453 auto attNameStart = input;
455 skipNextWhiteSpace();
457 if (readNextChar() ==
'=')
459 skipNextWhiteSpace();
460 auto nextChar = *input;
462 if (nextChar ==
'"' || nextChar ==
'\'')
464 auto* newAtt =
new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
465 readQuotedString (newAtt->value);
466 attributeAppender.
append (newAtt);
472 setLastError (
"expected '=' after attribute '" 473 +
String (attNameStart, attNameEnd) +
"'",
false);
481 setLastError (
"illegal character found in " + node->
getTagName() +
": '" + c +
"'",
false);
491 void XmlDocument::readChildElements (
XmlElement& parent)
497 auto preWhitespaceInput = input;
498 skipNextWhiteSpace();
502 setLastError (
"unmatched tags",
false);
513 auto closeTag = input.indexOf ((juce_wchar)
'>');
516 input += closeTag + 1;
524 auto inputStart = input;
532 setLastError (
"unterminated CDATA section",
false);
537 if (c0 ==
']' && input[1] ==
']' && input[2] ==
'>')
550 if (
auto* n = readNextElement (
true))
558 input = preWhitespaceInput;
560 bool contentShouldBeUsed = ! ignoreEmptyTextElements;
568 if (input[1] ==
'!' && input[2] ==
'-' && input[3] ==
'-')
573 if (closeComment < 0)
575 setLastError (
"unterminated comment",
false);
580 input += closeComment + 3;
589 setLastError (
"unmatched tags",
false);
601 auto oldInput = input;
602 auto oldOutOfData = outOfData;
607 while (
auto* n = readNextElement (
true))
611 outOfData = oldOutOfData;
615 textElementContent << entity;
623 auto nextChar = *input;
625 if (nextChar ==
'\r')
629 if (input[1] ==
'\n')
633 if (nextChar ==
'<' || nextChar ==
'&')
638 setLastError (
"unmatched tags",
false);
649 if (contentShouldBeUsed)
655 void XmlDocument::readEntity (
String& result)
685 else if (*input ==
'#')
690 if (*input ==
'x' || *input ==
'X')
695 while (input[0] !=
';')
699 if (hexValue < 0 || ++numChars > 8)
701 setLastError (
"illegal escape sequence",
true);
705 charCode = (charCode << 4) | hexValue;
711 else if (input[0] >=
'0' && input[0] <=
'9')
715 while (input[0] !=
';')
719 setLastError (
"illegal escape sequence",
true);
723 charCode = charCode * 10 + ((int) input[0] -
'0');
731 setLastError (
"illegal escape sequence",
true);
736 result << (juce_wchar) charCode;
740 auto entityNameStart = input;
741 auto closingSemiColon = input.
indexOf ((juce_wchar)
';');
743 if (closingSemiColon < 0)
750 input += closingSemiColon + 1;
751 result += expandExternalEntity (
String (entityNameStart, (
size_t) closingSemiColon));
768 if (char1 ==
'x' || char1 ==
'X')
771 if (char1 >=
'0' && char1 <=
'9')
774 setLastError (
"illegal escape sequence",
false);
778 return expandExternalEntity (ent);
781 String XmlDocument::expandExternalEntity (
const String& entity)
785 if (dtdText.isNotEmpty())
788 tokenisedDTD.addTokens (dtdText,
true);
790 if (tokenisedDTD[tokenisedDTD.size() - 2].equalsIgnoreCase (
"system")
791 && tokenisedDTD[tokenisedDTD.size() - 1].isQuotedString())
793 auto fn = tokenisedDTD[tokenisedDTD.size() - 1];
795 tokenisedDTD.
clear();
796 tokenisedDTD.addTokens (getFileContents (fn),
true);
800 tokenisedDTD.clear();
801 auto openBracket = dtdText.indexOfChar (
'[');
805 auto closeBracket = dtdText.lastIndexOfChar (
']');
807 if (closeBracket > openBracket)
808 tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
809 closeBracket),
true);
813 for (
int i = tokenisedDTD.size(); --i >= 0;)
815 if (tokenisedDTD[i].startsWithChar (
'%')
816 && tokenisedDTD[i].endsWithChar (
';'))
818 auto parsed = getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1));
822 tokenisedDTD.remove (i);
824 for (
int j = newToks.
size(); --j >= 0;)
825 tokenisedDTD.insert (i, newToks[j]);
830 needToLoadDTD =
false;
833 for (
int i = 0; i < tokenisedDTD.size(); ++i)
835 if (tokenisedDTD[i] == entity)
837 if (tokenisedDTD[i - 1].equalsIgnoreCase (
"<!entity"))
844 while (ampersand >= 0)
846 auto semiColon = ent.
indexOf (i + 1,
";");
850 setLastError (
"entity without terminating semi-colon",
false);
854 auto resolved = expandEntity (ent.
substring (i + 1, semiColon));
868 setLastError (
"unknown entity",
true);
872 String XmlDocument::getParameterEntity (
const String& entity)
874 for (
int i = 0; i < tokenisedDTD.size(); ++i)
876 if (tokenisedDTD[i] == entity
877 && tokenisedDTD [i - 1] ==
"%" 878 && tokenisedDTD [i - 2].equalsIgnoreCase (
"<!entity"))
883 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (
">"));
Wraps a pointer to a null-terminated ASCII character string, and provides various methods to operate ...
void setEmptyTextElementsIgnored(bool shouldBeIgnored) noexcept
Sets a flag to change the treatment of empty text elements.
String fromFirstOccurrenceOf(StringRef substringToStartFrom, bool includeSubStringInResult, bool ignoreCase) const
Returns a section of the string starting from a given substring.
virtual bool writeByte(char byte)
Writes a single byte to the stream.
static std::unique_ptr< XmlElement > parse(const File &file)
A handy static method that parses a file.
A simple class for holding temporary references to a string literal or String.
static XmlElement * createTextElement(const String &text)
Creates a text element that can be added to a parent element.
std::unique_ptr< XmlElement > getDocumentElement(bool onlyReadOuterDocumentElement=false)
Creates an XmlElement object to represent the main document node.
void setInputSource(InputSource *newSource) noexcept
Sets an input source object to use for parsing documents that reference external entities.
void append(ObjectType *const newItem) noexcept
Appends an item to the list.
int64 writeFromInputStream(InputStream &, int64 maxNumBytesToWrite) override
Reads data from an input stream and writes it to this stream.
String trimCharactersAtEnd(StringRef charactersToTrim) const
Returns a copy of this string, having removed a specified set of characters from its end...
Used to build a tree of elements representing an XML document.
CharPointerType getCharPointer() const noexcept
Returns the character pointer currently being used to store this string.
bool isEmpty() const noexcept
Returns true if this pointer is pointing to a null character.
A special array for holding a list of strings.
~XmlDocument()
Destructor.
static bool isWhitespace(char character) noexcept
Checks whether a character is whitespace.
int addTokens(StringRef stringToTokenise, bool preserveQuotedStrings)
Breaks up a string into tokens and adds them to this array.
bool equalsIgnoreCase(const String &other) const noexcept
Case-insensitive comparison with another string.
const String & getLastParseError() const noexcept
Returns the parsing error that occurred the last time getDocumentElement was called.
String substring(int startIndex, int endIndex) const
Returns a subsection of the string.
static CharPointerType1 find(CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
Returns a pointer to the first occurrence of a substring in a string.
String unquoted() const
Removes quotation marks from around the string, (if there are any).
const void * getData() const noexcept
Returns a pointer to the data that has been written to the stream.
static int getHexDigitValue(juce_wchar digit) noexcept
Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit.
Represents a local file or directory.
int indexOf(StringRef textToLookFor) const noexcept
Searches for a substring within this string.
size_t getDataSize() const noexcept
Returns the number of bytes of data that have been written to the stream.
static bool isByteOrderMark(const void *possibleByteOrder) noexcept
Returns true if the first three bytes in this pointer are the UTF8 byte-order mark (BOM)...
String upToFirstOccurrenceOf(StringRef substringToEndWith, bool includeSubStringInResult, bool ignoreCase) const
Returns the start of this string, up to the first occurrence of a substring.
bool startsWithChar(juce_wchar character) const noexcept
Tests whether the string begins with a particular character.
static bool isByteOrderMarkLittleEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (little endian)...
Allows efficient repeated insertions into a list.
void clear() noexcept
Resets this string to be empty.
int getIntValue() const noexcept
Reads the value of the string as a decimal number (up to 32 bits in size).
static String charToString(juce_wchar character)
Creates a string from a single character.
int size() const noexcept
Returns the number of strings in the array.
static bool isByteOrderMarkBigEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (big endian)...
bool appendUTF8Char(juce_wchar character)
Appends the utf-8 bytes for a unicode character.
String toUTF8() const
Returns a String created from the (UTF8) data that has been written to the stream.
bool containsNonWhitespaceChars() const noexcept
Returns true if this string contains any non-whitespace characters.
Writes data to an internal memory buffer, which grows as required.
int indexOfChar(juce_wchar characterToLookFor) const noexcept
Searches for a character inside this string.
String trim() const
Returns a copy of this string with any whitespace characters removed from the start and end...
static bool isLetterOrDigit(char character) noexcept
Checks whether a character is alphabetic or numeric.
String toString() const
Attempts to detect the encoding of the data and convert it to a string.
void appendCharPointer(CharPointerType startOfTextToAppend, CharPointerType endOfTextToAppend)
Appends a string to the end of this one.
XmlDocument(const String &documentText)
Creates an XmlDocument from the xml text.
static int compareUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
Compares two null-terminated character strings, up to a given number of characters.
const String & getTagName() const noexcept
Returns this element's tag type name.
int getHexValue32() const noexcept
Parses the string as a hexadecimal number.
std::unique_ptr< XmlElement > getDocumentElementIfTagMatches(StringRef requiredTag)
Does an inexpensive check to see whether the outer element has the given tag name, and then does a full parse if it matches.
Wraps a pointer to a null-terminated UTF-8 character string, and provides various methods to operate ...