From 5d56dd740648886b6317d430d66d81c56c9e0bdc Mon Sep 17 00:00:00 2001 From: Owen Reynolds Date: Wed, 11 Dec 2024 14:53:39 +0000 Subject: [PATCH 1/7] First version of support for language preferences (fixes #13) --- .../rdf/dt/RDFModelConfigurationDialog.java | 65 ++++- .../org/eclipse/epsilon/emc/rdf/RDFModel.java | 21 ++ .../epsilon/emc/rdf/RDFQualifiedName.java | 4 + .../eclipse/epsilon/emc/rdf/RDFResource.java | 74 ++++- .../example.eol | 4 +- .../spiderman.ttl | 2 +- .../resources/spiderman-multiLang.ttl | 17 ++ .../emc/rdf/RDFModelLanguageTagsTest.java | 265 ++++++++++++++++++ .../eclipse/epsilon/emc/rdf/RDFModelTest.java | 8 +- 9 files changed, 443 insertions(+), 17 deletions(-) create mode 100644 tests/org.eclipse.epsilon.emc.rdf.tests/resources/spiderman-multiLang.ttl create mode 100644 tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelLanguageTagsTest.java diff --git a/bundles/org.eclipse.epsilon.emc.rdf.dt/src/org/eclipse/epsilon/emc/rdf/dt/RDFModelConfigurationDialog.java b/bundles/org.eclipse.epsilon.emc.rdf.dt/src/org/eclipse/epsilon/emc/rdf/dt/RDFModelConfigurationDialog.java index 8d4dfab..0c50455 100644 --- a/bundles/org.eclipse.epsilon.emc.rdf.dt/src/org/eclipse/epsilon/emc/rdf/dt/RDFModelConfigurationDialog.java +++ b/bundles/org.eclipse.epsilon.emc.rdf.dt/src/org/eclipse/epsilon/emc/rdf/dt/RDFModelConfigurationDialog.java @@ -15,8 +15,10 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import org.eclipse.core.resources.IFile; @@ -33,6 +35,8 @@ import org.eclipse.jface.viewers.TableViewerColumn; import org.eclipse.jface.viewers.TextCellEditor; import org.eclipse.swt.SWT; +import org.eclipse.swt.events.ModifyEvent; +import org.eclipse.swt.events.ModifyListener; import org.eclipse.swt.events.SelectionAdapter; import org.eclipse.swt.events.SelectionEvent; import org.eclipse.swt.layout.FillLayout; @@ -40,6 +44,8 @@ import org.eclipse.swt.widgets.Button; import org.eclipse.swt.widgets.Composite; import org.eclipse.swt.widgets.FileDialog; +import org.eclipse.swt.widgets.Label; +import org.eclipse.swt.widgets.Text; public class RDFModelConfigurationDialog extends AbstractModelConfigurationDialog { @@ -74,7 +80,7 @@ protected Object getValue(Object element) { protected void setValue(Object element, Object value) { ((NamespaceMappingTableEntry)element).prefix = String.valueOf(value); viewer.update(element, null); - validateURLs(); + validateForm(); } } @@ -107,7 +113,7 @@ protected Object getValue(Object element) { protected void setValue(Object element, Object value) { ((NamespaceMappingTableEntry)element).url = String.valueOf(value); viewer.update(element, null); - validateURLs(); + validateForm(); } } @@ -140,7 +146,7 @@ protected Object getValue(Object element) { protected void setValue(Object element, Object value) { ((URLTableEntry)element).url = String.valueOf(value); viewer.update(element, null); - validateURLs(); + validateForm(); } } @@ -177,6 +183,7 @@ protected void createGroups(Composite control) { createNameAliasGroup(control); createRDFUrlsGroup(control); createNamespaceMappingGroup(control); + createLanguagePreferenceGroup(control); } private Composite createNamespaceMappingGroup(Composite parent) { @@ -346,6 +353,7 @@ public void widgetSelected(SelectionEvent e) { urls.remove(it.next()); } urlList.refresh(); + validateForm(); } } }); @@ -357,6 +365,7 @@ public void widgetSelected(SelectionEvent e) { public void widgetSelected(SelectionEvent e) { urls.clear(); urlList.refresh(); + validateForm(); } }); @@ -365,6 +374,30 @@ public void widgetSelected(SelectionEvent e) { return groupContent; } + + protected Label languagePreferenceLabel; + protected Text languagePreferenceText; + + private Composite createLanguagePreferenceGroup(Composite parent) { + final Composite groupContent = DialogUtil.createGroupContainer(parent, "Language tag preference", 1); + + languagePreferenceLabel = new Label(groupContent, SWT.NONE); + languagePreferenceLabel.setText("Comma-separated preferred language tags, in descending priority:"); + + languagePreferenceText = new Text(groupContent, SWT.BORDER); + languagePreferenceText.setLayoutData(new GridData(GridData.FILL_HORIZONTAL)); + languagePreferenceText.addModifyListener(new ModifyListener() { + @Override + public void modifyText(ModifyEvent event) { + validateForm(); + } + }); + + groupContent.layout(); + groupContent.pack(); + return groupContent; + } + @Override protected void loadProperties(){ super.loadProperties(); @@ -389,12 +422,14 @@ protected void loadProperties(){ } } } + + languagePreferenceText.setText(properties.getProperty(RDFModel.PROPERTY_LANGUAGE_PREFERENCE)); this.urlList.refresh(); this.nsMappingTable.refresh(); - validateURLs(); + validateForm(); } - + @Override protected void storeProperties(){ super.storeProperties(); @@ -408,9 +443,27 @@ protected void storeProperties(){ String.join(",", nsMappingEntries.stream() .map(e -> e.prefix + "=" + e.url) .collect(Collectors.toList()))); + + properties.put(RDFModel.PROPERTY_LANGUAGE_PREFERENCE, + languagePreferenceText.getText().replaceAll("\\s", "")); } - protected void validateURLs() { + protected void validateForm() { + String text = languagePreferenceText.getText().strip(); + if (text.length() > 0) { + Set invalidTags = new HashSet<>(); + for (String tag : text.split(",")) { + if (!RDFModel.isValidLanguageTag(tag)) { + invalidTags.add(tag); + } + } + if (!invalidTags.isEmpty()) { + setErrorMessage(String.format( + "Invalid tags: %s", String.join(" ", invalidTags))); + return; + } + } + for (URLTableEntry entry : this.urls) { String errorMessage = validateURL(entry.url); if (errorMessage != null) { diff --git a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java index df0de72..270821e 100644 --- a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java +++ b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java @@ -17,6 +17,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Map.Entry; @@ -38,6 +39,8 @@ public class RDFModel extends CachedModel { + public static final String PROPERTY_LANGUAGE_PREFERENCE = "languagePreference"; + public static final String PROPERTY_URIS = "uris"; /** @@ -50,6 +53,7 @@ public class RDFModel extends CachedModel { */ public static final String PROPERTY_PREFIXES = "prefixes"; + protected final List languagePreference = new ArrayList<>(); protected final Map customPrefixesMap = new HashMap<>(); protected final List uris = new ArrayList<>(); protected Model model; @@ -154,6 +158,13 @@ public void load(StringProperties properties, IRelativePathResolver resolver) th customPrefixesMap.put(sPrefix, sURI); } } + + this.languagePreference.clear(); + if (!properties.getProperty(PROPERTY_LANGUAGE_PREFERENCE).isEmpty()) { + for (String tag : properties.getProperty(PROPERTY_LANGUAGE_PREFERENCE).split(",")) { + this.languagePreference.add(tag.strip()); + } + } load(); } @@ -300,6 +311,10 @@ public void setUri(String uri) { public Map getCustomPrefixesMap() { return this.customPrefixesMap; } + + public List getLanguagePreference() { + return languagePreference; + } /** *

@@ -338,4 +353,10 @@ public String getPrefix(String namespaceURI) { } return model.getNsURIPrefix(namespaceURI); } + + // Using Java's Locale class to check that tags conform to bcp47 structure + public static boolean isValidLanguageTag (String bcp47tag) { + boolean isValidBCP47 = !("und".equals(Locale.forLanguageTag(bcp47tag).toLanguageTag())); + return isValidBCP47; + } } diff --git a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFQualifiedName.java b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFQualifiedName.java index ef022db..84fb326 100644 --- a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFQualifiedName.java +++ b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFQualifiedName.java @@ -73,6 +73,10 @@ public static RDFQualifiedName from(String prefix, String nsURI, String localNam public RDFQualifiedName withLocalName(String newLocalName) { return new RDFQualifiedName(prefix, namespaceURI, newLocalName, languageTag); } + + public RDFQualifiedName withLanguageTag(String newLanguageTag) { + return new RDFQualifiedName(prefix, namespaceURI, localName, newLanguageTag); + } @Override public String toString() { diff --git a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java index 2c65b38..854093a 100644 --- a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java +++ b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java @@ -16,6 +16,7 @@ import java.util.Collection; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; import org.apache.jena.rdf.model.Literal; import org.apache.jena.rdf.model.Property; @@ -28,11 +29,12 @@ import org.apache.jena.vocabulary.RDF; import org.eclipse.epsilon.eol.execute.context.IEolContext; +import com.google.common.collect.HashMultimap; import com.google.common.collect.ListMultimap; +import com.google.common.collect.Multimap; import com.google.common.collect.MultimapBuilder; public class RDFResource extends RDFModelElement { - protected static final String LITERAL_SUFFIX = "_literal"; enum LiteralMode { @@ -53,16 +55,80 @@ public Resource getResource() { public Collection getProperty(String property, IEolContext context) { final RDFQualifiedName pName = RDFQualifiedName.from(property, this.owningModel::getNamespaceURI); - Collection value = getProperty(pName, context, LiteralMode.VALUES_ONLY); + Collection value = getProperty(pName, context, LiteralMode.RAW); + if (!value.isEmpty() && !value.stream().anyMatch(p -> p instanceof RDFResource)) { + value = filterByPreferredLanguage(value, LiteralMode.VALUES_ONLY); + if (!value.isEmpty()) { + return value; + } + } + if (value.isEmpty() && pName.localName.endsWith(LITERAL_SUFFIX)) { - final String localNameWithoutSuffix = pName.localName.substring(0, pName.localName.length() - LITERAL_SUFFIX.length()); + final String localNameWithoutSuffix = pName.localName.substring(0, + pName.localName.length() - LITERAL_SUFFIX.length()); RDFQualifiedName withoutLiteral = pName.withLocalName(localNameWithoutSuffix); - return getProperty(withoutLiteral, context, LiteralMode.RAW); + + value = getProperty(withoutLiteral, context, LiteralMode.RAW); + if (!value.isEmpty() && !value.stream().anyMatch(p -> p instanceof RDFResource)) { + value = filterByPreferredLanguage(value, LiteralMode.RAW); + } } return value; } + private Collection filterByPreferredLanguage(Collection value, LiteralMode literalMode) { + // If no preferred languages are specified, don't do any filtering + if (super.getModel().getLanguagePreference().isEmpty()) { + switch (literalMode) { + case RAW: + return value; + case VALUES_ONLY: + return value.stream().map(e -> e instanceof RDFLiteral + ? ((RDFLiteral) e).getValue() : e).collect(Collectors.toList()); + default: + throw new IllegalArgumentException("Unknown literal mode " + literalMode); + } + } + + // Otherwise, group literals by language tag + Multimap literalsByTag = HashMultimap.create(); + for (Object element : value) { + if (element instanceof RDFLiteral) { + RDFLiteral literal = (RDFLiteral) element; + literalsByTag.put(literal.getLanguage() == null ? "" : literal.getLanguage(), literal); + } else { + // TODO #19 see if we run into this scenario (perhaps with integers instead of strings?), print some warning, return value as is as fallback + throw new IllegalArgumentException("Expected RDFLiteral while filtering based on preferred languages, but got " + element); + } + } + + for (String tag : super.getModel().getLanguagePreference()) { + if (literalsByTag.containsKey(tag)) { + switch (literalMode) { + case RAW: + return new ArrayList<>(literalsByTag.get(tag)); + case VALUES_ONLY: + return literalsByTag.get(tag).stream().map(l -> + l.getValue()).collect(Collectors.toList()); + } + } + } + + // If we don't find any matches in the preferred languages, + // fall back to the untagged literals (if any). + Collection rawFromUntagged = literalsByTag.get(""); + switch (literalMode) { + case RAW: + return new ArrayList<>(rawFromUntagged); + case VALUES_ONLY: + return rawFromUntagged.stream().map(l -> l.getValue()) + .collect(Collectors.toList()); + default: + throw new IllegalArgumentException("Unknown literal mode " + literalMode); + } + } + public Collection getProperty(RDFQualifiedName pName, IEolContext context, LiteralMode literalMode) { // Filter statements by prefix and local name ExtendedIterator itStatements = null; diff --git a/examples/org.eclipse.epsilon.examples.emc.rdf.turtles/example.eol b/examples/org.eclipse.epsilon.examples.emc.rdf.turtles/example.eol index c325cb2..b83a58f 100644 --- a/examples/org.eclipse.epsilon.examples.emc.rdf.turtles/example.eol +++ b/examples/org.eclipse.epsilon.examples.emc.rdf.turtles/example.eol @@ -8,8 +8,8 @@ goblin.enemyOf.println('Enemies of Green Goblin (without prefix): '); // // Q. Do we want some setting of "preferred language" for the RDF driver, instead of showing values for all languages? -goblin.enemyOf.name.flatten.println('All names of the enemies of the Green Goblin: '); -goblin.enemyOf.`name@`.flatten.println('All names in the default language of the enemies of the Green Goblin: '); +goblin.enemyOf.name.flatten.println('All names of the enemies of the Green Goblin (using language preferences, if any): '); +goblin.enemyOf.`name@`.flatten.println('All names without a language tag of the enemies of the Green Goblin: '); goblin.enemyOf.`name@ru`.flatten.println('All Russian names of the enemies of the Green Goblin: '); goblin.enemyOf.name_literal.flatten.println('All name literals of the enemies of the Green Goblin: '); goblin.enemyOf.`name_literal@`.flatten.println('All name literals in the default language of the enemies of the Green Goblin: '); diff --git a/examples/org.eclipse.epsilon.examples.emc.rdf.turtles/spiderman.ttl b/examples/org.eclipse.epsilon.examples.emc.rdf.turtles/spiderman.ttl index 3ea3e38..e7bca8f 100644 --- a/examples/org.eclipse.epsilon.examples.emc.rdf.turtles/spiderman.ttl +++ b/examples/org.eclipse.epsilon.examples.emc.rdf.turtles/spiderman.ttl @@ -14,4 +14,4 @@ <#spiderman> rel:enemyOf <#green-goblin> ; a foaf:Person ; - foaf:name "Spiderman", "Человек-паук"@ru . \ No newline at end of file + foaf:name "Spiderman", "Человек-паук"@ru ,"スパイダーマン"@ja . \ No newline at end of file diff --git a/tests/org.eclipse.epsilon.emc.rdf.tests/resources/spiderman-multiLang.ttl b/tests/org.eclipse.epsilon.emc.rdf.tests/resources/spiderman-multiLang.ttl new file mode 100644 index 0000000..e7bca8f --- /dev/null +++ b/tests/org.eclipse.epsilon.emc.rdf.tests/resources/spiderman-multiLang.ttl @@ -0,0 +1,17 @@ +# Sample Turtle document from the W3C: https://www.w3.org/TR/turtle/#sec-intro + +@base . +@prefix rdf: . +@prefix rdfs: . +@prefix foaf: . +@prefix rel: . + +<#green-goblin> + rel:enemyOf <#spiderman> ; + a foaf:Person ; # in the context of the Marvel universe + foaf:name "Green Goblin" . + +<#spiderman> + rel:enemyOf <#green-goblin> ; + a foaf:Person ; + foaf:name "Spiderman", "Человек-паук"@ru ,"スパイダーマン"@ja . \ No newline at end of file diff --git a/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelLanguageTagsTest.java b/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelLanguageTagsTest.java new file mode 100644 index 0000000..de559e7 --- /dev/null +++ b/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelLanguageTagsTest.java @@ -0,0 +1,265 @@ +/******************************************************************************** + * Copyright (c) 2024 University of York + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Antonio Garcia-Dominguez - initial API and implementation + ********************************************************************************/ +package org.eclipse.epsilon.emc.rdf; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.eclipse.epsilon.common.util.StringProperties; +import org.eclipse.epsilon.eol.exceptions.models.EolModelLoadingException; +import org.eclipse.epsilon.eol.execute.context.EolContext; +import org.eclipse.epsilon.eol.execute.introspection.IPropertyGetter; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class RDFModelLanguageTagsTest { + + private RDFModel model; + private IPropertyGetter pGetter; + private EolContext context; + + private static final String SPIDERMAN_MULTILANG_TTL = "resources/spiderman-multiLang.ttl"; + + private static final String LANGUAGE_PREFERENCE_INVALID_STRING = "e n,en-us,123,ja,ru"; + private static final String LANGUAGE_PREFERENCE_JA_STRING = "e n,en-us,123,ja,ru"; + private static final String LANGUAGE_PREFERENCE_EN_STRING = "en"; + + private static final String SPIDERMAN_URI = "http://example.org/#spiderman"; + private static final String SPIDERMAN_NAME = "Spiderman"; + private static final String SPIDERMAN_NAME_RU = "Человек-паук"; + private static final String SPIDERMAN_NAME_JA = "スパイダーマン"; + + private static final Set SPIDERMAN_NAMES = new HashSet<>(Arrays.asList(SPIDERMAN_NAME, SPIDERMAN_NAME_RU, SPIDERMAN_NAME_JA)); + + private static final String GREEN_GOBLIN_NAME = "Green Goblin"; + + private static final Set ALL_NAMES = new HashSet<>(); + private static final Set ALL_NAMES_UNTAGGED = new HashSet<>(Arrays.asList(GREEN_GOBLIN_NAME, SPIDERMAN_NAME)); + static { + ALL_NAMES.add(GREEN_GOBLIN_NAME); + ALL_NAMES.addAll(SPIDERMAN_NAMES); + } + + public void setupModel (String languagePreference) throws EolModelLoadingException { + this.model = new RDFModel(); + //model.setUri("resources/spiderman.ttl"); + StringProperties props = new StringProperties(); + props.put(RDFModel.PROPERTY_URIS, SPIDERMAN_MULTILANG_TTL); + props.put(RDFModel.PROPERTY_LANGUAGE_PREFERENCE, languagePreference); + model.load(props); + + this.pGetter = model.getPropertyGetter(); + this.context = new EolContext(); + } + + @After + public void teardown() { + if (model != null) { + model.dispose(); + } + } + + @Test + public void modelLanguageTagPropertyLoad() throws Exception { + setupModel(LANGUAGE_PREFERENCE_INVALID_STRING); + String answer = "[" + LANGUAGE_PREFERENCE_INVALID_STRING.replaceAll("\\s", "") + "]"; + List langList = model.getLanguagePreference(); + String langListString = model.getLanguagePreference().toString().replaceAll("\\s", ""); + assertEquals(answer, langListString); + } + + @Test + public void modelLanguageTagPropertyLoadEMPTY() throws Exception { + setupModel(null); + assertTrue(model.getLanguagePreference().isEmpty()); + } + + + @Test + public void modelLanguageTagValidator() throws Exception { + setupModel(LANGUAGE_PREFERENCE_JA_STRING); + // Test for different tag patterns + assertTrue(RDFModel.isValidLanguageTag("en")); + assertFalse(RDFModel.isValidLanguageTag("e n")); + + assertTrue(RDFModel.isValidLanguageTag("en-us")); + assertFalse(RDFModel.isValidLanguageTag("e n-u s")); + + assertFalse(RDFModel.isValidLanguageTag("123")); + } + + @Test + public void getAllContentsNamesWithoutPrefixNoPreferredLanguageTags() throws Exception { + setupModel(null); + Set names = new HashSet<>(); + for (RDFModelElement o : model.allContents()) { + names.addAll((Collection) pGetter.invoke(o, "name", context)); + } + assertEquals("With no language preference and no tag, all values are returned", ALL_NAMES, names);; + } + + @Test + public void getNamesWithoutPrefixNoPreferredLanguageTag() throws Exception { + setupModel(null); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>((Collection) pGetter.invoke(res, "name", context)); + assertEquals("With no language preference and no tag, all values are returned", SPIDERMAN_NAMES, names); + } + + // EN preferred but not available + + @Test + public void getNamesWithoutPrefixUsingPreferredLanguageTagEN() throws Exception { + setupModel(LANGUAGE_PREFERENCE_EN_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>((Collection) pGetter.invoke(res, "name", context)); + assertEquals("Should return untagged when language preference cant be matched",Collections.singleton(SPIDERMAN_NAME), names); + } + + @Test + public void getNamesWithPrefixUsingPreferredLanguageTagEN() throws Exception { + setupModel(LANGUAGE_PREFERENCE_EN_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>((Collection) pGetter.invoke(res, "foaf:name", context)); + assertEquals("Should return untagged when language preference cant be matched",Collections.singleton(SPIDERMAN_NAME), names); + } + + @Test + public void getNameLiteralWithPrefixUsingPreferredLanguageTagEN() throws Exception { + setupModel(LANGUAGE_PREFERENCE_EN_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>(); + for (RDFLiteral l : (Collection) pGetter.invoke(res, "foaf:name_literal", context)) { + names.add((String) l.getValue()); + } + assertEquals("Should return untagged when language preference cant be matched",Collections.singleton(SPIDERMAN_NAME), names); + } + + @Test + public void getNameLiteralWithoutPrefixUsingPreferredLanguageTagEN() throws Exception { + setupModel(LANGUAGE_PREFERENCE_EN_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>(); + for (RDFLiteral l : (Collection) pGetter.invoke(res, "name_literal", context)) { + names.add((String) l.getValue()); + } + assertEquals("Should return untagged when language preference cant be matched",Collections.singleton(SPIDERMAN_NAME), names); + } + + // JA preferred and available + + @Test + public void getNamesWithoutPrefixUsingPreferredLanguageTagJA() throws Exception { + setupModel(LANGUAGE_PREFERENCE_JA_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>((Collection) pGetter.invoke(res, "name", context)); + assertEquals(Collections.singleton(SPIDERMAN_NAME_JA), names); + } + + @Test + public void getNamesWithPrefixUsingPreferredLanguageTagJA() throws Exception { + setupModel(LANGUAGE_PREFERENCE_JA_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>((Collection) pGetter.invoke(res, "foaf:name", context)); + assertEquals(Collections.singleton(SPIDERMAN_NAME_JA), names); + } + + @Test + public void getNameLiteralWithPrefixUsingPreferredLanguageTagJA() throws Exception { + setupModel(LANGUAGE_PREFERENCE_JA_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>(); + for (RDFLiteral l : (Collection) pGetter.invoke(res, "foaf:name_literal", context)) { + names.add((String) l.getValue()); + } + assertEquals(Collections.singleton(SPIDERMAN_NAME_JA), names); + } + + @Test + public void getNameLiteralWithoutPrefixUsingPreferredLanguageTagJA() throws Exception { + setupModel(LANGUAGE_PREFERENCE_JA_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>(); + for (RDFLiteral l : (Collection) pGetter.invoke(res, "name_literal", context)) { + names.add((String) l.getValue()); + } + assertEquals(Collections.singleton(SPIDERMAN_NAME_JA), names); + } + + // Empty Tag -- untagged + + @Test + public void getNamesWithoutPrefixAndNoTag() throws Exception { + setupModel(LANGUAGE_PREFERENCE_JA_STRING); + Set names = new HashSet<>(); + for (RDFModelElement o : model.allContents()) { + names.addAll((Collection) pGetter.invoke(o, "name@", context)); + } + assertEquals(ALL_NAMES_UNTAGGED, names); + } + + @Test + public void getNamesWithPrefixAndNoTag() throws Exception { + setupModel(LANGUAGE_PREFERENCE_JA_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>((Collection) pGetter.invoke(res, "foaf:name@", context)); + assertEquals(Collections.singleton(SPIDERMAN_NAME), names); + } + + @Test + public void getNamesWithDoubleColonPrefixAndNoTag() throws Exception { + setupModel(LANGUAGE_PREFERENCE_JA_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>((Collection) pGetter.invoke(res, "foaf::name@", context)); + assertEquals(Collections.singleton(SPIDERMAN_NAME), names); + } + + @Test + public void getNameLiteralsWithPrefixAndNoTag() throws Exception { + setupModel(LANGUAGE_PREFERENCE_JA_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>(); + for (RDFLiteral l : (Collection) pGetter.invoke(res, "foaf:name_literal@", context)) { + names.add((String) l.getValue()); + } + assertEquals(Collections.singleton(SPIDERMAN_NAME), names); + } + + // RU tag requested and available + + @Test + public void getNamesWithoutPrefixWithRULanguageTag() throws Exception { + setupModel(LANGUAGE_PREFERENCE_JA_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>((Collection) pGetter.invoke(res, "name@ru", context)); + assertEquals(Collections.singleton(SPIDERMAN_NAME_RU), names); + } + + @Test + public void getNamesWithPrefixAndRULanguageTag() throws Exception { + setupModel(LANGUAGE_PREFERENCE_JA_STRING); + RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); + Set names = new HashSet<>((Collection) pGetter.invoke(res, "foaf:name@ru", context)); + assertEquals(Collections.singleton(SPIDERMAN_NAME_RU), names); + } + +} \ No newline at end of file diff --git a/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelTest.java b/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelTest.java index f3a92b1..cd729e7 100644 --- a/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelTest.java +++ b/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelTest.java @@ -78,21 +78,21 @@ public void getNamesWithoutPrefix() throws Exception { for (RDFModelElement o : model.allContents()) { names.addAll((Collection) pGetter.invoke(o, "name", context)); } - assertEquals(ALL_NAMES, names); + assertEquals("With no language preference and no tag, all values are returned", ALL_NAMES, names); } @Test public void getNamesWithPrefix() throws Exception { RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); Set names = new HashSet<>((Collection) pGetter.invoke(res, "foaf:name", context)); - assertEquals(SPIDERMAN_NAMES, names); + assertEquals("With no language preference and no tag, all values are returned", SPIDERMAN_NAMES, names); } @Test public void getNamesWithDoubleColonPrefix() throws Exception { RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); Set names = new HashSet<>((Collection) pGetter.invoke(res, "foaf::name", context)); - assertEquals(SPIDERMAN_NAMES, names); + assertEquals("With no language preference and no tag, all values are returned", SPIDERMAN_NAMES, names); } @Test @@ -102,7 +102,7 @@ public void getNameLiteralsWithPrefix() throws Exception { for (RDFLiteral l : (Collection) pGetter.invoke(res, "foaf:name_literal", context)) { names.add((String) l.getValue()); } - assertEquals(SPIDERMAN_NAMES, names); + assertEquals("With no language preference and no tag, all values are returned", SPIDERMAN_NAMES, names); } @Test From d37948f9438ec474bafa05a7112867d1d5b68428 Mon Sep 17 00:00:00 2001 From: Antonio Garcia-Dominguez Date: Fri, 20 Dec 2024 10:32:24 +0100 Subject: [PATCH 2/7] RDFModel: fail-fast when invalid language tags are used --- .../org/eclipse/epsilon/emc/rdf/RDFModel.java | 15 +++++++-- ...va => RDFModelPreferredLanguagesTest.java} | 32 +++++++------------ 2 files changed, 24 insertions(+), 23 deletions(-) rename tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/{RDFModelLanguageTagsTest.java => RDFModelPreferredLanguagesTest.java} (91%) diff --git a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java index 270821e..9c3b307 100644 --- a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java +++ b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java @@ -160,9 +160,18 @@ public void load(StringProperties properties, IRelativePathResolver resolver) th } this.languagePreference.clear(); - if (!properties.getProperty(PROPERTY_LANGUAGE_PREFERENCE).isEmpty()) { + String sLanguagePreference = properties.getProperty(PROPERTY_LANGUAGE_PREFERENCE, ""); + if (!sLanguagePreference.isBlank()) { for (String tag : properties.getProperty(PROPERTY_LANGUAGE_PREFERENCE).split(",")) { - this.languagePreference.add(tag.strip()); + tag = tag.strip(); + if (isValidLanguageTag(tag)) { + this.languagePreference.add(tag); + } else { + throw new EolModelLoadingException( + new IllegalArgumentException( + String.format("'%s' is not a valid BCP 47 tag", tag) + ), this); + } } } @@ -353,7 +362,7 @@ public String getPrefix(String namespaceURI) { } return model.getNsURIPrefix(namespaceURI); } - + // Using Java's Locale class to check that tags conform to bcp47 structure public static boolean isValidLanguageTag (String bcp47tag) { boolean isValidBCP47 = !("und".equals(Locale.forLanguageTag(bcp47tag).toLanguageTag())); diff --git a/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelLanguageTagsTest.java b/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelPreferredLanguagesTest.java similarity index 91% rename from tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelLanguageTagsTest.java rename to tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelPreferredLanguagesTest.java index de559e7..5b8601f 100644 --- a/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelLanguageTagsTest.java +++ b/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelPreferredLanguagesTest.java @@ -14,13 +14,13 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashSet; -import java.util.List; import java.util.Set; import org.eclipse.epsilon.common.util.StringProperties; @@ -28,10 +28,9 @@ import org.eclipse.epsilon.eol.execute.context.EolContext; import org.eclipse.epsilon.eol.execute.introspection.IPropertyGetter; import org.junit.After; -import org.junit.Before; import org.junit.Test; -public class RDFModelLanguageTagsTest { +public class RDFModelPreferredLanguagesTest { private RDFModel model; private IPropertyGetter pGetter; @@ -40,7 +39,7 @@ public class RDFModelLanguageTagsTest { private static final String SPIDERMAN_MULTILANG_TTL = "resources/spiderman-multiLang.ttl"; private static final String LANGUAGE_PREFERENCE_INVALID_STRING = "e n,en-us,123,ja,ru"; - private static final String LANGUAGE_PREFERENCE_JA_STRING = "e n,en-us,123,ja,ru"; + private static final String LANGUAGE_PREFERENCE_JA_STRING = "en,en-us,ja,ru"; private static final String LANGUAGE_PREFERENCE_EN_STRING = "en"; private static final String SPIDERMAN_URI = "http://example.org/#spiderman"; @@ -61,7 +60,7 @@ public class RDFModelLanguageTagsTest { public void setupModel (String languagePreference) throws EolModelLoadingException { this.model = new RDFModel(); - //model.setUri("resources/spiderman.ttl"); + StringProperties props = new StringProperties(); props.put(RDFModel.PROPERTY_URIS, SPIDERMAN_MULTILANG_TTL); props.put(RDFModel.PROPERTY_LANGUAGE_PREFERENCE, languagePreference); @@ -79,12 +78,9 @@ public void teardown() { } @Test - public void modelLanguageTagPropertyLoad() throws Exception { - setupModel(LANGUAGE_PREFERENCE_INVALID_STRING); - String answer = "[" + LANGUAGE_PREFERENCE_INVALID_STRING.replaceAll("\\s", "") + "]"; - List langList = model.getLanguagePreference(); - String langListString = model.getLanguagePreference().toString().replaceAll("\\s", ""); - assertEquals(answer, langListString); + public void invalidLanguageTagThrowsException() throws Exception { + assertThrows(EolModelLoadingException.class, + () -> setupModel(LANGUAGE_PREFERENCE_INVALID_STRING)); } @Test @@ -96,15 +92,11 @@ public void modelLanguageTagPropertyLoadEMPTY() throws Exception { @Test public void modelLanguageTagValidator() throws Exception { - setupModel(LANGUAGE_PREFERENCE_JA_STRING); - // Test for different tag patterns - assertTrue(RDFModel.isValidLanguageTag("en")); - assertFalse(RDFModel.isValidLanguageTag("e n")); - - assertTrue(RDFModel.isValidLanguageTag("en-us")); - assertFalse(RDFModel.isValidLanguageTag("e n-u s")); - - assertFalse(RDFModel.isValidLanguageTag("123")); + assertTrue("English tag is accepted", RDFModel.isValidLanguageTag("en")); + assertFalse("English tag with space in the middle is rejected", RDFModel.isValidLanguageTag("e n")); + assertTrue("American English is accepted", RDFModel.isValidLanguageTag("en-us")); + assertFalse("American English with space in the middle is rejected", RDFModel.isValidLanguageTag("e n-u s")); + assertFalse("A number is not a valid language tag", RDFModel.isValidLanguageTag("123")); } @Test From 4f517944181660c3e88cb198e7e2f554a039d60a Mon Sep 17 00:00:00 2001 From: Antonio Garcia-Dominguez Date: Fri, 20 Dec 2024 10:35:29 +0100 Subject: [PATCH 3/7] RDFModel: unify how we process properties --- .../src/org/eclipse/epsilon/emc/rdf/RDFModel.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java index 9c3b307..a427da5 100644 --- a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java +++ b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFModel.java @@ -140,13 +140,16 @@ public void load(StringProperties properties, IRelativePathResolver resolver) th * EMC drivers (e.g. the EmfModel class). */ this.uris.clear(); - for (String uri : properties.getProperty(PROPERTY_URIS).split(",")) { - this.uris.add(uri.strip()); + String sUris = properties.getProperty(PROPERTY_URIS, "").strip(); + if (!sUris.isEmpty()) { + for (String uri : sUris.split(",")) { + this.uris.add(uri.strip()); + } } this.customPrefixesMap.clear(); String sPrefixes = properties.getProperty(PROPERTY_PREFIXES, "").strip(); - if (sPrefixes.length() > 0) { + if (!sPrefixes.isEmpty()) { for (String sItem : sPrefixes.split(",")) { int idxEquals = sItem.indexOf('='); if (idxEquals <= 0 || idxEquals == sItem.length() - 1) { @@ -160,9 +163,9 @@ public void load(StringProperties properties, IRelativePathResolver resolver) th } this.languagePreference.clear(); - String sLanguagePreference = properties.getProperty(PROPERTY_LANGUAGE_PREFERENCE, ""); - if (!sLanguagePreference.isBlank()) { - for (String tag : properties.getProperty(PROPERTY_LANGUAGE_PREFERENCE).split(",")) { + String sLanguagePreference = properties.getProperty(PROPERTY_LANGUAGE_PREFERENCE, "").strip(); + if (!sLanguagePreference.isEmpty()) { + for (String tag : sLanguagePreference.split(",")) { tag = tag.strip(); if (isValidLanguageTag(tag)) { this.languagePreference.add(tag); From e6eb49ed1d420fbf80b1c08551597053e1da554d Mon Sep 17 00:00:00 2001 From: Antonio Garcia-Dominguez Date: Fri, 20 Dec 2024 10:59:39 +0100 Subject: [PATCH 4/7] Should honor @ru and ignore preferences --- .../eclipse/epsilon/emc/rdf/RDFResource.java | 20 ++++++++++++------- .../rdf/RDFModelPreferredLanguagesTest.java | 17 ++++++++-------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java index 854093a..267ee02 100644 --- a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java +++ b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java @@ -56,20 +56,20 @@ public Collection getProperty(String property, IEolContext context) { final RDFQualifiedName pName = RDFQualifiedName.from(property, this.owningModel::getNamespaceURI); Collection value = getProperty(pName, context, LiteralMode.RAW); - if (!value.isEmpty() && !value.stream().anyMatch(p -> p instanceof RDFResource)) { - value = filterByPreferredLanguage(value, LiteralMode.VALUES_ONLY); - if (!value.isEmpty()) { - return value; - } + if (pName.languageTag == null && !value.stream().anyMatch(p -> p instanceof RDFResource)) { + value = filterByPreferredLanguage(value, LiteralMode.RAW); + } + if (!value.isEmpty()) { + return convertLiteralsToValues(value); } - if (value.isEmpty() && pName.localName.endsWith(LITERAL_SUFFIX)) { + if (pName.localName.endsWith(LITERAL_SUFFIX)) { final String localNameWithoutSuffix = pName.localName.substring(0, pName.localName.length() - LITERAL_SUFFIX.length()); RDFQualifiedName withoutLiteral = pName.withLocalName(localNameWithoutSuffix); value = getProperty(withoutLiteral, context, LiteralMode.RAW); - if (!value.isEmpty() && !value.stream().anyMatch(p -> p instanceof RDFResource)) { + if (pName.languageTag == null && !value.stream().anyMatch(p -> p instanceof RDFResource)) { value = filterByPreferredLanguage(value, LiteralMode.RAW); } } @@ -77,6 +77,12 @@ public Collection getProperty(String property, IEolContext context) { return value; } + private Collection convertLiteralsToValues(Collection value) { + return value.stream() + .map(e -> e instanceof RDFLiteral ? ((RDFLiteral)e).getValue() : e) + .collect(Collectors.toList()); + } + private Collection filterByPreferredLanguage(Collection value, LiteralMode literalMode) { // If no preferred languages are specified, don't do any filtering if (super.getModel().getLanguagePreference().isEmpty()) { diff --git a/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelPreferredLanguagesTest.java b/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelPreferredLanguagesTest.java index 5b8601f..66ede90 100644 --- a/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelPreferredLanguagesTest.java +++ b/tests/org.eclipse.epsilon.emc.rdf.tests/src/org/eclipse/epsilon/emc/rdf/RDFModelPreferredLanguagesTest.java @@ -30,6 +30,7 @@ import org.junit.After; import org.junit.Test; +@SuppressWarnings("unchecked") public class RDFModelPreferredLanguagesTest { private RDFModel model; @@ -38,8 +39,8 @@ public class RDFModelPreferredLanguagesTest { private static final String SPIDERMAN_MULTILANG_TTL = "resources/spiderman-multiLang.ttl"; - private static final String LANGUAGE_PREFERENCE_INVALID_STRING = "e n,en-us,123,ja,ru"; - private static final String LANGUAGE_PREFERENCE_JA_STRING = "en,en-us,ja,ru"; + private static final String LANGUAGE_PREFERENCE_INVALID_STRING = "e n,en-us,123,ja"; + private static final String LANGUAGE_PREFERENCE_JA_STRING = "en,en-us,ja"; private static final String LANGUAGE_PREFERENCE_EN_STRING = "en"; private static final String SPIDERMAN_URI = "http://example.org/#spiderman"; @@ -124,7 +125,7 @@ public void getNamesWithoutPrefixUsingPreferredLanguageTagEN() throws Exception setupModel(LANGUAGE_PREFERENCE_EN_STRING); RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); Set names = new HashSet<>((Collection) pGetter.invoke(res, "name", context)); - assertEquals("Should return untagged when language preference cant be matched",Collections.singleton(SPIDERMAN_NAME), names); + assertEquals("Should return untagged when language preference can't be matched",Collections.singleton(SPIDERMAN_NAME), names); } @Test @@ -132,7 +133,7 @@ public void getNamesWithPrefixUsingPreferredLanguageTagEN() throws Exception { setupModel(LANGUAGE_PREFERENCE_EN_STRING); RDFResource res = (RDFResource) model.getElementById(SPIDERMAN_URI); Set names = new HashSet<>((Collection) pGetter.invoke(res, "foaf:name", context)); - assertEquals("Should return untagged when language preference cant be matched",Collections.singleton(SPIDERMAN_NAME), names); + assertEquals("Should return untagged when language preference can't be matched",Collections.singleton(SPIDERMAN_NAME), names); } @Test @@ -143,7 +144,7 @@ public void getNameLiteralWithPrefixUsingPreferredLanguageTagEN() throws Excepti for (RDFLiteral l : (Collection) pGetter.invoke(res, "foaf:name_literal", context)) { names.add((String) l.getValue()); } - assertEquals("Should return untagged when language preference cant be matched",Collections.singleton(SPIDERMAN_NAME), names); + assertEquals("Should return untagged when language preference can't be matched", Collections.singleton(SPIDERMAN_NAME), names); } @Test @@ -154,7 +155,7 @@ public void getNameLiteralWithoutPrefixUsingPreferredLanguageTagEN() throws Exce for (RDFLiteral l : (Collection) pGetter.invoke(res, "name_literal", context)) { names.add((String) l.getValue()); } - assertEquals("Should return untagged when language preference cant be matched",Collections.singleton(SPIDERMAN_NAME), names); + assertEquals("Should return untagged when language preference can't be matched",Collections.singleton(SPIDERMAN_NAME), names); } // JA preferred and available @@ -197,7 +198,7 @@ public void getNameLiteralWithoutPrefixUsingPreferredLanguageTagJA() throws Exce assertEquals(Collections.singleton(SPIDERMAN_NAME_JA), names); } - // Empty Tag -- untagged + // Empty Tag - ignore language preference and use untagged value @Test public void getNamesWithoutPrefixAndNoTag() throws Exception { @@ -236,7 +237,7 @@ public void getNameLiteralsWithPrefixAndNoTag() throws Exception { assertEquals(Collections.singleton(SPIDERMAN_NAME), names); } - // RU tag requested and available + // RU tag requested - ignore language preferences @Test public void getNamesWithoutPrefixWithRULanguageTag() throws Exception { From 4abb9d5aa736dd9e6920a173abb36ed5ed678c89 Mon Sep 17 00:00:00 2001 From: Antonio Garcia-Dominguez Date: Fri, 20 Dec 2024 11:13:08 +0100 Subject: [PATCH 5/7] Move filtering by preference to 3-arg getProperty This avoids some of the code duplication we had, as the logic really should have been in the 3-argument getProperty and not the 2-argument one (which is only for convenience and for the _literal suffix). --- .../eclipse/epsilon/emc/rdf/RDFResource.java | 84 +++++++------------ 1 file changed, 31 insertions(+), 53 deletions(-) diff --git a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java index 267ee02..57a52a6 100644 --- a/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java +++ b/bundles/org.eclipse.epsilon.emc.rdf/src/org/eclipse/epsilon/emc/rdf/RDFResource.java @@ -36,8 +36,8 @@ public class RDFResource extends RDFModelElement { protected static final String LITERAL_SUFFIX = "_literal"; - - enum LiteralMode { + + public enum LiteralMode { RAW, VALUES_ONLY } @@ -54,47 +54,28 @@ public Resource getResource() { public Collection getProperty(String property, IEolContext context) { final RDFQualifiedName pName = RDFQualifiedName.from(property, this.owningModel::getNamespaceURI); + Collection value = getProperty(pName, context, LiteralMode.VALUES_ONLY); - Collection value = getProperty(pName, context, LiteralMode.RAW); - if (pName.languageTag == null && !value.stream().anyMatch(p -> p instanceof RDFResource)) { - value = filterByPreferredLanguage(value, LiteralMode.RAW); - } - if (!value.isEmpty()) { - return convertLiteralsToValues(value); - } - - if (pName.localName.endsWith(LITERAL_SUFFIX)) { + if (value.isEmpty() && pName.localName.endsWith(LITERAL_SUFFIX)) { final String localNameWithoutSuffix = pName.localName.substring(0, pName.localName.length() - LITERAL_SUFFIX.length()); RDFQualifiedName withoutLiteral = pName.withLocalName(localNameWithoutSuffix); - value = getProperty(withoutLiteral, context, LiteralMode.RAW); - if (pName.languageTag == null && !value.stream().anyMatch(p -> p instanceof RDFResource)) { - value = filterByPreferredLanguage(value, LiteralMode.RAW); - } } return value; } - private Collection convertLiteralsToValues(Collection value) { + protected Collection convertLiteralsToValues(Collection value) { return value.stream() .map(e -> e instanceof RDFLiteral ? ((RDFLiteral)e).getValue() : e) .collect(Collectors.toList()); } - private Collection filterByPreferredLanguage(Collection value, LiteralMode literalMode) { + protected Collection filterByPreferredLanguage(Collection value) { // If no preferred languages are specified, don't do any filtering if (super.getModel().getLanguagePreference().isEmpty()) { - switch (literalMode) { - case RAW: - return value; - case VALUES_ONLY: - return value.stream().map(e -> e instanceof RDFLiteral - ? ((RDFLiteral) e).getValue() : e).collect(Collectors.toList()); - default: - throw new IllegalArgumentException("Unknown literal mode " + literalMode); - } + return value; } // Otherwise, group literals by language tag @@ -111,28 +92,14 @@ private Collection filterByPreferredLanguage(Collection value, L for (String tag : super.getModel().getLanguagePreference()) { if (literalsByTag.containsKey(tag)) { - switch (literalMode) { - case RAW: - return new ArrayList<>(literalsByTag.get(tag)); - case VALUES_ONLY: - return literalsByTag.get(tag).stream().map(l -> - l.getValue()).collect(Collectors.toList()); - } + return new ArrayList<>(literalsByTag.get(tag)); } } // If we don't find any matches in the preferred languages, // fall back to the untagged literals (if any). Collection rawFromUntagged = literalsByTag.get(""); - switch (literalMode) { - case RAW: - return new ArrayList<>(rawFromUntagged); - case VALUES_ONLY: - return rawFromUntagged.stream().map(l -> l.getValue()) - .collect(Collectors.toList()); - default: - throw new IllegalArgumentException("Unknown literal mode " + literalMode); - } + return new ArrayList<>(rawFromUntagged); } public Collection getProperty(RDFQualifiedName pName, IEolContext context, LiteralMode literalMode) { @@ -158,13 +125,14 @@ public Collection getProperty(RDFQualifiedName pName, IEolContext contex }); } + Collection rawValues; if (pName.prefix == null) { // If no prefix was specified, watch out for ambiguity and issue warning in that case ListMultimap values = MultimapBuilder.hashKeys().arrayListValues().build(); while (itStatements.hasNext()) { Statement stmt = itStatements.next(); values.put(stmt.getPredicate().getURI(), - convertToModelObject(stmt.getObject(), literalMode)); + convertToModelObject(stmt.getObject())); } final Set distinctKeys = values.keySet(); @@ -176,15 +144,30 @@ public Collection getProperty(RDFQualifiedName pName, IEolContext contex )); } - return values.values(); + rawValues = values.values(); } else { // Prefix was specified: we don't have to worry about ambiguity final List values = new ArrayList<>(); while (itStatements.hasNext()) { Statement stmt = itStatements.next(); - values.add(convertToModelObject(stmt.getObject(), literalMode)); + values.add(convertToModelObject(stmt.getObject())); } - return values; + rawValues = values; + } + + // Filter by preferred languages if any are set + if (pName.languageTag == null && !rawValues.stream().anyMatch(p -> p instanceof RDFResource)) { + rawValues = filterByPreferredLanguage(rawValues); + } + + // Convert literals to values depending on mode + switch (literalMode) { + case VALUES_ONLY: + return convertLiteralsToValues(rawValues); + case RAW: + return rawValues; + default: + throw new IllegalArgumentException("Unknown literal mode " + literalMode); } } @@ -201,14 +184,9 @@ public String getUri() { return resource.getURI(); } - protected Object convertToModelObject(RDFNode node, LiteralMode lMode) { + protected Object convertToModelObject(RDFNode node) { if (node instanceof Literal) { - switch (lMode) { - case RAW: - return new RDFLiteral((Literal) node, this.owningModel); - case VALUES_ONLY: - return ((Literal) node).getValue(); - } + return new RDFLiteral((Literal) node, this.owningModel); } else if (node instanceof Resource) { return new RDFResource((Resource) node, this.owningModel); } From f06e4fcc686c7798e332c68cceb42bf3e2d66f32 Mon Sep 17 00:00:00 2001 From: Antonio Garcia-Dominguez Date: Fri, 20 Dec 2024 11:18:48 +0100 Subject: [PATCH 6/7] Set project encodings --- .../.settings/org.eclipse.core.resources.prefs | 2 ++ .../.settings/org.eclipse.core.resources.prefs | 2 ++ .../.settings/org.eclipse.core.resources.prefs | 2 ++ .../.settings/org.eclipse.core.resources.prefs | 2 ++ .../.settings/org.eclipse.core.resources.prefs | 2 ++ .../.settings/org.eclipse.core.resources.prefs | 2 ++ .../.settings/org.eclipse.core.resources.prefs | 2 ++ .../.settings/org.eclipse.core.resources.prefs | 2 ++ 8 files changed, 16 insertions(+) create mode 100644 bundles/org.eclipse.epsilon.emc.rdf.dt/.settings/org.eclipse.core.resources.prefs create mode 100644 bundles/org.eclipse.epsilon.emc.rdf/.settings/org.eclipse.core.resources.prefs create mode 100644 features/org.eclipse.epsilon.emc.rdf.dt.feature/.settings/org.eclipse.core.resources.prefs create mode 100644 features/org.eclipse.epsilon.emc.rdf.feature/.settings/org.eclipse.core.resources.prefs create mode 100644 releng/org.eclipse.epsilon.emc.rdf.target.epsilon21/.settings/org.eclipse.core.resources.prefs create mode 100644 releng/org.eclipse.epsilon.emc.rdf.target/.settings/org.eclipse.core.resources.prefs create mode 100644 releng/org.eclipse.epsilon.emc.rdf.updatesite/.settings/org.eclipse.core.resources.prefs create mode 100644 tests/org.eclipse.epsilon.emc.rdf.tests/.settings/org.eclipse.core.resources.prefs diff --git a/bundles/org.eclipse.epsilon.emc.rdf.dt/.settings/org.eclipse.core.resources.prefs b/bundles/org.eclipse.epsilon.emc.rdf.dt/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..99f26c0 --- /dev/null +++ b/bundles/org.eclipse.epsilon.emc.rdf.dt/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +encoding/=UTF-8 diff --git a/bundles/org.eclipse.epsilon.emc.rdf/.settings/org.eclipse.core.resources.prefs b/bundles/org.eclipse.epsilon.emc.rdf/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..99f26c0 --- /dev/null +++ b/bundles/org.eclipse.epsilon.emc.rdf/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +encoding/=UTF-8 diff --git a/features/org.eclipse.epsilon.emc.rdf.dt.feature/.settings/org.eclipse.core.resources.prefs b/features/org.eclipse.epsilon.emc.rdf.dt.feature/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..99f26c0 --- /dev/null +++ b/features/org.eclipse.epsilon.emc.rdf.dt.feature/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +encoding/=UTF-8 diff --git a/features/org.eclipse.epsilon.emc.rdf.feature/.settings/org.eclipse.core.resources.prefs b/features/org.eclipse.epsilon.emc.rdf.feature/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..99f26c0 --- /dev/null +++ b/features/org.eclipse.epsilon.emc.rdf.feature/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +encoding/=UTF-8 diff --git a/releng/org.eclipse.epsilon.emc.rdf.target.epsilon21/.settings/org.eclipse.core.resources.prefs b/releng/org.eclipse.epsilon.emc.rdf.target.epsilon21/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..99f26c0 --- /dev/null +++ b/releng/org.eclipse.epsilon.emc.rdf.target.epsilon21/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +encoding/=UTF-8 diff --git a/releng/org.eclipse.epsilon.emc.rdf.target/.settings/org.eclipse.core.resources.prefs b/releng/org.eclipse.epsilon.emc.rdf.target/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..99f26c0 --- /dev/null +++ b/releng/org.eclipse.epsilon.emc.rdf.target/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +encoding/=UTF-8 diff --git a/releng/org.eclipse.epsilon.emc.rdf.updatesite/.settings/org.eclipse.core.resources.prefs b/releng/org.eclipse.epsilon.emc.rdf.updatesite/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..99f26c0 --- /dev/null +++ b/releng/org.eclipse.epsilon.emc.rdf.updatesite/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +encoding/=UTF-8 diff --git a/tests/org.eclipse.epsilon.emc.rdf.tests/.settings/org.eclipse.core.resources.prefs b/tests/org.eclipse.epsilon.emc.rdf.tests/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..99f26c0 --- /dev/null +++ b/tests/org.eclipse.epsilon.emc.rdf.tests/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +encoding/=UTF-8 From e9828714e4f02af0274ca4bec43a8ced4bbae7b0 Mon Sep 17 00:00:00 2001 From: Antonio Garcia-Dominguez Date: Fri, 20 Dec 2024 11:34:39 +0100 Subject: [PATCH 7/7] README: document language preferences --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 79b01bc..c03f3b6 100644 --- a/README.md +++ b/README.md @@ -138,3 +138,16 @@ spider.`name_literal@ru`.println('Name literal of Spiderman in Russian: '); * `value`: the raw value of the literal (usually a String, but it can be different for typed literals - see [Apache Jena typed literals](https://jena.apache.org/documentation/notes/typed-literals.html)). * `language`: the language tag for the literal (if any). * `datatypeURI`: the datatype URI for the literal. + +### Limiting returned literals to preferred languages + +The "Language tag preference" section of the RDF model configuration dialog allows for specifying a comma-separated list of [BCP 47](https://www.ietf.org/rfc/bcp/bcp47.txt) language tags. +If these preferences are set, `x.property` will filter literals, by only returning the values for the first tag with matches, or falling back to the untagged values if no matches are found for any of the mentioned tags. + +For instance, if we set the language preferences to `en-gb,en`, filtering `x.property` will work as follows: + +* If any `en-gb` literals exist, return only those. +* If any `en` literals exist, return only those. +* Otherwise, return the untagged literals (if any). + +Language preferences do not apply if an explicit language tag is used: `x.property@en` will always get the `en`-tagged literals, and `x.property@` will always get the untagged literals.