Skip to content

Commit

Permalink
Merge pull request stleary#362 from johnjaylward/FixXMLUnescape
Browse files Browse the repository at this point in the history
Fixes XML Unescaping
  • Loading branch information
stleary authored Aug 27, 2017
2 parents 4cb1ae8 + de855c5 commit 2565abd
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 36 deletions.
2 changes: 1 addition & 1 deletion JSONML.java
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ private static Object parse(
if (!(token instanceof String)) {
throw x.syntaxError("Missing value");
}
newjo.accumulate(attribute, keepStrings ? XML.unescape((String)token) :XML.stringToValue((String)token));
newjo.accumulate(attribute, keepStrings ? ((String)token) :XML.stringToValue((String)token));
token = null;
} else {
newjo.accumulate(attribute, "");
Expand Down
38 changes: 5 additions & 33 deletions XML.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public static String escape(String string) {
if (mustEscape(cp)) {
sb.append("&#x");
sb.append(Integer.toHexString(cp));
sb.append(";");
sb.append(';');
} else {
sb.appendCodePoint(cp);
}
Expand Down Expand Up @@ -191,31 +191,7 @@ public static String unescape(String string) {
final int semic = string.indexOf(';', i);
if (semic > i) {
final String entity = string.substring(i + 1, semic);
if (entity.charAt(0) == '#') {
int cp;
if (entity.charAt(1) == 'x') {
// hex encoded unicode
cp = Integer.parseInt(entity.substring(2), 16);
} else {
// decimal encoded unicode
cp = Integer.parseInt(entity.substring(1));
}
sb.appendCodePoint(cp);
} else {
if ("quot".equalsIgnoreCase(entity)) {
sb.append('"');
} else if ("amp".equalsIgnoreCase(entity)) {
sb.append('&');
} else if ("apos".equalsIgnoreCase(entity)) {
sb.append('\'');
} else if ("lt".equalsIgnoreCase(entity)) {
sb.append('<');
} else if ("gt".equalsIgnoreCase(entity)) {
sb.append('>');
} else {// unsupported xml entity. leave encoded
sb.append('&').append(entity).append(';');
}
}
sb.append(XMLTokener.unescapeEntity(entity));
// skip past the entity we just parsed.
i += entity.length() + 1;
} else {
Expand Down Expand Up @@ -364,7 +340,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
throw x.syntaxError("Missing value");
}
jsonobject.accumulate(string,
keepStrings ? unescape((String)token) : stringToValue((String) token));
keepStrings ? ((String)token) : stringToValue((String) token));
token = null;
} else {
jsonobject.accumulate(string, "");
Expand Down Expand Up @@ -396,7 +372,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
string = (String) token;
if (string.length() > 0) {
jsonobject.accumulate("content",
keepStrings ? unescape(string) : stringToValue(string));
keepStrings ? string : stringToValue(string));
}

} else if (token == LT) {
Expand Down Expand Up @@ -430,11 +406,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
* @return JSON value of this string or the string
*/
public static Object stringToValue(String string) {
Object ret = JSONObject.stringToValue(string);
if(ret instanceof String){
return unescape((String)ret);
}
return ret;
return JSONObject.stringToValue(string);
}

/**
Expand Down
33 changes: 31 additions & 2 deletions XMLTokener.java
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,37 @@ public Object nextEntity(char ampersand) throws JSONException {
}
}
String string = sb.toString();
Object object = entity.get(string);
return object != null ? object : ampersand + string + ";";
return unescapeEntity(string);
}

/**
* Unescapes an XML entity encoding;
* @param e entity (only the actual entity value, not the preceding & or ending ;
* @return
*/
static String unescapeEntity(String e) {
// validate
if (e == null || e.isEmpty()) {
return "";
}
// if our entity is an encoded unicode point, parse it.
if (e.charAt(0) == '#') {
int cp;
if (e.charAt(1) == 'x') {
// hex encoded unicode
cp = Integer.parseInt(e.substring(2), 16);
} else {
// decimal encoded unicode
cp = Integer.parseInt(e.substring(1));
}
return new String(new int[] {cp},0,1);
}
Character knownEntity = entity.get(e);
if(knownEntity==null) {
// we don't know the entity so keep it encoded
return '&' + e + ';';
}
return knownEntity.toString();
}


Expand Down

0 comments on commit 2565abd

Please sign in to comment.