diff --git a/build.gradle b/build.gradle index 399408b..77e2248 100644 --- a/build.gradle +++ b/build.gradle @@ -10,7 +10,7 @@ buildscript { } } -version "1.4.20" +version "1.4.21" group "au.org.ala.plugins.grails" apply plugin:"eclipse" diff --git a/grails-app/controllers/au/org/ala/bie/ExternalSiteController.groovy b/grails-app/controllers/au/org/ala/bie/ExternalSiteController.groovy index c2b33a6..a9f41c4 100755 --- a/grails-app/controllers/au/org/ala/bie/ExternalSiteController.groovy +++ b/grails-app/controllers/au/org/ala/bie/ExternalSiteController.groovy @@ -13,15 +13,18 @@ package au.org.ala.bie -import au.org.ala.citation.BHLAdaptor import com.google.common.util.concurrent.RateLimiter import grails.converters.JSON +import groovy.json.JsonOutput import groovy.json.JsonSlurper import org.jsoup.Jsoup import org.jsoup.nodes.Document import org.jsoup.select.Elements +import org.owasp.html.HtmlPolicyBuilder +import org.owasp.html.PolicyFactory import java.text.MessageFormat +import java.util.regex.Pattern /** * Controller that proxies external webservice calls to get around cross domain issues @@ -33,6 +36,9 @@ class ExternalSiteController { RateLimiter eolRateLimiter = RateLimiter.create(1.0) // rate max requests per second (Double) RateLimiter genbankRateLimiter = RateLimiter.create(3.0) // rate max requests per second (Double) + // by default do not sanitise EOL response + boolean sanitiseEol = grailsApplication.config.eol.sanitise ?: false + def index() {} def eol = { @@ -56,7 +62,8 @@ class ExternalSiteController { page = MessageFormat.format(page, pageId) log.debug("EOL page url = ${page}") def pageText = new URL(page).text ?: '{}' - jsonOutput = pageText + def updatedPageText = updateEolOutput(pageText) + jsonOutput = sanitiseEol ? sanitiseEolOutput(updatedPageText) : updatedPageText } } @@ -116,7 +123,6 @@ class ExternalSiteController { } def scholar = { - def searchStrings = params.list("s") def searchParams = "\"" + searchStrings.join("\" OR \"") + "\"" def scholarBase = grailsApplication.config.literature?.scholar?.url ?: "https://scholar.google.com" @@ -179,4 +185,84 @@ class ExternalSiteController { } } } + + /** + * Update EOL content before rendering. + * @param text EOL response + * @return updated EOL response + */ + String updateEolOutput(String text){ + String updateValues = grailsApplication.config.eol.response.update + if (updateValues){ + String[] values = updateValues.split(",") + values.each { pairs -> + String[] valuePairs = pairs.split(";") + String replacement = valuePairs.length == 1 ? "''" :valuePairs[1] + text = text.replace(valuePairs[0], replacement) + } + } + text + } + + /** + * Sanitise EOL response with defined policy. + * @param text EOL response + * @return processed EOL response + */ + String sanitiseEolOutput(String text) { + def json = new JsonSlurper().parseText(text) + + if(json.taxonConcept?.dataObjects){ + PolicyFactory policy = getPolicyFactory() + json.taxonConcept.dataObjects.each { dataObject -> + String desc = dataObject.description + String processedDesc = sanitiseBodyText(policy, desc) + dataObject.description = processedDesc + } + } + JsonOutput.toJson(json) + } + + /** + * Utility to sanitise HTML text and only allow links to be kept, removing any + * other HTML markup. + * @param policy PolicyFactory + * @param input HTML String + * @return output sanitized HTML String + */ + String sanitiseBodyText(PolicyFactory policy, String input) { + // Sanitize the HTML based on given policy + String sanitisedHtml = policy.sanitize(input) + sanitisedHtml + } + + private PolicyFactory getPolicyFactory(){ + HtmlPolicyBuilder builder = new HtmlPolicyBuilder() + .allowStandardUrlProtocols() + .requireRelNofollowOnLinks() + + String allowedElements = grailsApplication.config.eol.html.allowedElements + if (allowedElements){ + String[] elements = allowedElements.split(",") + elements.each { + builder.allowElements(it) + } + } + + String allowedAttributes = grailsApplication.config.eol.html.allowAttributes + if (allowedAttributes){ + String[] attributes = allowedAttributes.split(",") + attributes.each { attribute -> + String[] values = attribute.split (";") + if (values.length == 2){ + builder.allowAttributes(values[0]).onElements(values[1]) + } else { + builder.allowAttributes(values[0]).matching(Pattern.compile(values[2], Pattern.CASE_INSENSITIVE)).onElements(values[1]) + } + + } + } + + builder.toFactory() + } } diff --git a/src/test/groovy/au/org/ala/bie/ExternalSiteControllerSpec.groovy b/src/test/groovy/au/org/ala/bie/ExternalSiteControllerSpec.groovy new file mode 100644 index 0000000..56d67a1 --- /dev/null +++ b/src/test/groovy/au/org/ala/bie/ExternalSiteControllerSpec.groovy @@ -0,0 +1,77 @@ +package au.org.ala.bie + +import grails.test.mixin.TestFor +import org.owasp.html.HtmlPolicyBuilder +import org.owasp.html.PolicyFactory +import spock.lang.Specification + +import java.util.regex.Pattern + +/** + * Unit test for {@link au.org.ala.bie.ExternalSiteController} + */ +@TestFor(ExternalSiteController) +class ExternalSiteControllerSpec extends Specification { + + PolicyFactory policy + def setup() { + String allowedElements = "h2,div,a,br,i,b,span,ul,li,p,sup" + String allowedAttributes ="href;a;^(http|https|mailto|#).+,class;span,id;span,src;img;^(http|https).+" + + HtmlPolicyBuilder builder = new HtmlPolicyBuilder() + .allowStandardUrlProtocols() + .requireRelNofollowOnLinks() + + if (allowedElements) { + String[] elements = allowedElements.split(",") + elements.each { + builder.allowElements(it) + } + } + + if (allowedAttributes){ + String[] attributes = allowedAttributes.split(",") + attributes.each { attribute -> + String[] values = attribute.split (";") + if (values.length == 2){ + builder.allowAttributes(values[0]).onElements(values[1]) + } else { + builder.allowAttributes(values[0]).matching(Pattern.compile(values[2], Pattern.CASE_INSENSITIVE)).onElements(values[1]) + } + + } + } + + policy = builder.toFactory() + } + + def cleanup() { + } + + void "test suspect attributes"() { + given: + def text = 'Some invalid image attribute ' + when: + def html = controller.sanitiseBodyText(policy, text) + then: + html == 'Some invalid image attribute ' + } + + void "test disallowed elements"() { + given: + def text = 'Some invalid element