Skip to content

Commit

Permalink
Merge pull request #201 from AtlasOfLivingAustralia/develop
Browse files Browse the repository at this point in the history
Fixes for #195 and #200
  • Loading branch information
Rita-C authored Jan 20, 2020
2 parents 9f8d89d + 2f6e094 commit 678029f
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 4 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ buildscript {
}
}

version "1.4.20"
version "1.4.21"
group "au.org.ala.plugins.grails"

apply plugin:"eclipse"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,18 @@

package au.org.ala.bie

import au.org.ala.citation.BHLAdaptor
import com.google.common.util.concurrent.RateLimiter
import grails.converters.JSON
import groovy.json.JsonOutput
import groovy.json.JsonSlurper
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.select.Elements
import org.owasp.html.HtmlPolicyBuilder
import org.owasp.html.PolicyFactory

import java.text.MessageFormat
import java.util.regex.Pattern

/**
* Controller that proxies external webservice calls to get around cross domain issues
Expand All @@ -33,6 +36,9 @@ class ExternalSiteController {
RateLimiter eolRateLimiter = RateLimiter.create(1.0) // rate max requests per second (Double)
RateLimiter genbankRateLimiter = RateLimiter.create(3.0) // rate max requests per second (Double)

// by default do not sanitise EOL response
boolean sanitiseEol = grailsApplication.config.eol.sanitise ?: false

def index() {}

def eol = {
Expand All @@ -56,7 +62,8 @@ class ExternalSiteController {
page = MessageFormat.format(page, pageId)
log.debug("EOL page url = ${page}")
def pageText = new URL(page).text ?: '{}'
jsonOutput = pageText
def updatedPageText = updateEolOutput(pageText)
jsonOutput = sanitiseEol ? sanitiseEolOutput(updatedPageText) : updatedPageText
}
}

Expand Down Expand Up @@ -116,7 +123,6 @@ class ExternalSiteController {
}

def scholar = {

def searchStrings = params.list("s")
def searchParams = "\"" + searchStrings.join("\" OR \"") + "\""
def scholarBase = grailsApplication.config.literature?.scholar?.url ?: "https://scholar.google.com"
Expand Down Expand Up @@ -179,4 +185,84 @@ class ExternalSiteController {
}
}
}

/**
* Update EOL content before rendering.
* @param text EOL response
* @return updated EOL response
*/
String updateEolOutput(String text){
String updateValues = grailsApplication.config.eol.response.update
if (updateValues){
String[] values = updateValues.split(",")
values.each { pairs ->
String[] valuePairs = pairs.split(";")
String replacement = valuePairs.length == 1 ? "''" :valuePairs[1]
text = text.replace(valuePairs[0], replacement)
}
}
text
}

/**
* Sanitise EOL response with defined policy.
* @param text EOL response
* @return processed EOL response
*/
String sanitiseEolOutput(String text) {
def json = new JsonSlurper().parseText(text)

if(json.taxonConcept?.dataObjects){
PolicyFactory policy = getPolicyFactory()
json.taxonConcept.dataObjects.each { dataObject ->
String desc = dataObject.description
String processedDesc = sanitiseBodyText(policy, desc)
dataObject.description = processedDesc
}
}
JsonOutput.toJson(json)
}

/**
* Utility to sanitise HTML text and only allow links to be kept, removing any
* other HTML markup.
* @param policy PolicyFactory
* @param input HTML String
* @return output sanitized HTML String
*/
String sanitiseBodyText(PolicyFactory policy, String input) {
// Sanitize the HTML based on given policy
String sanitisedHtml = policy.sanitize(input)
sanitisedHtml
}

private PolicyFactory getPolicyFactory(){
HtmlPolicyBuilder builder = new HtmlPolicyBuilder()
.allowStandardUrlProtocols()
.requireRelNofollowOnLinks()

String allowedElements = grailsApplication.config.eol.html.allowedElements
if (allowedElements){
String[] elements = allowedElements.split(",")
elements.each {
builder.allowElements(it)
}
}

String allowedAttributes = grailsApplication.config.eol.html.allowAttributes
if (allowedAttributes){
String[] attributes = allowedAttributes.split(",")
attributes.each { attribute ->
String[] values = attribute.split (";")
if (values.length == 2){
builder.allowAttributes(values[0]).onElements(values[1])
} else {
builder.allowAttributes(values[0]).matching(Pattern.compile(values[2], Pattern.CASE_INSENSITIVE)).onElements(values[1])
}

}
}

builder.toFactory()
}
}
77 changes: 77 additions & 0 deletions src/test/groovy/au/org/ala/bie/ExternalSiteControllerSpec.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package au.org.ala.bie

import grails.test.mixin.TestFor
import org.owasp.html.HtmlPolicyBuilder
import org.owasp.html.PolicyFactory
import spock.lang.Specification

import java.util.regex.Pattern

/**
* Unit test for {@link au.org.ala.bie.ExternalSiteController}
*/
@TestFor(ExternalSiteController)
class ExternalSiteControllerSpec extends Specification {

PolicyFactory policy
def setup() {
String allowedElements = "h2,div,a,br,i,b,span,ul,li,p,sup"
String allowedAttributes ="href;a;^(http|https|mailto|#).+,class;span,id;span,src;img;^(http|https).+"

HtmlPolicyBuilder builder = new HtmlPolicyBuilder()
.allowStandardUrlProtocols()
.requireRelNofollowOnLinks()

if (allowedElements) {
String[] elements = allowedElements.split(",")
elements.each {
builder.allowElements(it)
}
}

if (allowedAttributes){
String[] attributes = allowedAttributes.split(",")
attributes.each { attribute ->
String[] values = attribute.split (";")
if (values.length == 2){
builder.allowAttributes(values[0]).onElements(values[1])
} else {
builder.allowAttributes(values[0]).matching(Pattern.compile(values[2], Pattern.CASE_INSENSITIVE)).onElements(values[1])
}

}
}

policy = builder.toFactory()
}

def cleanup() {
}

void "test suspect attributes"() {
given:
def text = 'Some invalid image attribute <a href="http://en.wikipedia.org/wiki/File:1_Acacia_oswaldii_foliage.jpg"><img alt=" src=" width="220" height="230"></a>'
when:
def html = controller.sanitiseBodyText(policy, text)
then:
html == 'Some invalid image attribute <a href="http://en.wikipedia.org/wiki/File:1_Acacia_oswaldii_foliage.jpg" rel="nofollow"></a>'
}

void "test disallowed elements"() {
given:
def text = 'Some invalid element <h1>h1 content</h1>'
when:
def html = controller.sanitiseBodyText(policy, text)
then:
html == 'Some invalid element h1 content'
}

void "test valid html text"() {
given:
def text = '<h2>Contents</h2> <span></span></div> <ul> <li><a href=\"#Description\"><span>1</span> <span>Description</span></a></li> <li><a href=\"#Distribution\"><span>2</span> <span>Distribution</span></a></li> <li><a href=\"#Classification\"><span>3</span> <span>Classification</span></a></li> <li><a href=\"#See_also\"><span>4</span> <span>See also</span></a></li> <li><a href=\"#References\"><span>5</span> <span>References</span></a></li> </ul> </div> '
when:
def html = controller.sanitiseBodyText(policy, text)
then:
html == '<h2>Contents</h2> <ul><li><a href="#Description" rel="nofollow">1 Description</a></li><li><a href="#Distribution" rel="nofollow">2 Distribution</a></li><li><a href="#Classification" rel="nofollow">3 Classification</a></li><li><a href="#See_also" rel="nofollow">4 See also</a></li><li><a href="#References" rel="nofollow">5 References</a></li></ul> '
}
}

0 comments on commit 678029f

Please sign in to comment.