001/** 002 * Logback: the reliable, generic, fast and flexible logging framework. 003 * Copyright (C) 1999-2015, QOS.ch. All rights reserved. 004 * 005 * This program and the accompanying materials are dual-licensed under 006 * either the terms of the Eclipse Public License v1.0 as published by 007 * the Eclipse Foundation 008 * 009 * or (per the licensee's choosing) 010 * 011 * under the terms of the GNU Lesser General Public License version 2.1 012 * as published by the Free Software Foundation. 013 */ 014package ch.qos.logback.core.helpers; 015 016import java.util.regex.Pattern; 017 018/** 019 * Utility class for transforming strings. 020 * 021 * @author Ceki Gülcü 022 * @author Michael A. McAngus 023 */ 024public class Transform { 025 private static final String CDATA_START = "<![CDATA["; 026 private static final String CDATA_END = "]]>"; 027 private static final String CDATA_PSEUDO_END = "]]>"; 028 private static final String CDATA_EMBEDED_END = CDATA_END + CDATA_PSEUDO_END + CDATA_START; 029 private static final int CDATA_END_LEN = CDATA_END.length(); 030 private static final Pattern UNSAFE_XML_CHARS = Pattern.compile("[\u0000-\u0008\u000b\u000c\u000e-\u001f<>&'\"]"); 031 032 /** 033 * This method takes a string which may contain HTML tags (ie, <b>, 034 * <table>, etc) and replaces any '<','>' ... characters with 035 * respective predefined entity references. 036 * 037 * @param input The text to be converted. 038 */ 039 public static String escapeTags(final String input) { 040 if (input == null || input.length() == 0 || !UNSAFE_XML_CHARS.matcher(input).find()) { 041 return input; 042 } 043 StringBuffer buf = new StringBuffer(input); 044 return escapeTags(buf); 045 } 046 047 /** 048 * This method takes a StringBuilder which may contain HTML tags (ie, <b>, 049 * <table>, etc) and replaces any '<' and '>' characters with 050 * respective predefined entity references. 051 * 052 * @param buf StringBuffer to transform 053 * @return 054 */ 055 public static String escapeTags(final StringBuffer buf) { 056 for (int i = 0; i < buf.length(); i++) { 057 char ch = buf.charAt(i); 058 switch (ch) { 059 case '\t': 060 case '\n': 061 case '\r': 062 // These characters are below '\u0020' but are allowed: 063 break; 064 case '&': 065 buf.replace(i, i + 1, "&"); 066 break; 067 case '<': 068 buf.replace(i, i + 1, "<"); 069 break; 070 case '>': 071 buf.replace(i, i + 1, ">"); 072 break; 073 case '"': 074 buf.replace(i, i + 1, """); 075 break; 076 case '\'': 077 buf.replace(i, i + 1, "'"); 078 break; 079 default: 080 if (ch < '\u0020') { 081 // These characters are not allowed, 082 // replace them with "Object replacement character": 083 buf.replace(i, i + 1, "\uFFFD"); 084 } 085 break; 086 } 087 } 088 return buf.toString(); 089 } 090 091 /** 092 * Ensures that embedded CDEnd strings (]]>) are handled properly within 093 * message, NDC and throwable tag text. 094 * 095 * @param output Writer. The initial CDStart (<![CDATA[) and final CDEnd 096 * (]]>) of the CDATA section are the responsibility of the 097 * calling method. 098 * 099 * @param str The String that is inserted into an existing CDATA Section. 100 */ 101 public static void appendEscapingCDATA(StringBuilder output, String str) { 102 if (str == null) { 103 return; 104 } 105 106 int end = str.indexOf(CDATA_END); 107 108 if (end < 0) { 109 output.append(str); 110 111 return; 112 } 113 114 int start = 0; 115 116 while (end > -1) { 117 output.append(str.substring(start, end)); 118 output.append(CDATA_EMBEDED_END); 119 start = end + CDATA_END_LEN; 120 121 if (start < str.length()) { 122 end = str.indexOf(CDATA_END, start); 123 } else { 124 return; 125 } 126 } 127 128 output.append(str.substring(start)); 129 } 130}