diff --git a/src/main/java/org/zumult/backend/BackendInterface.java b/src/main/java/org/zumult/backend/BackendInterface.java index 3c819bd..86e95ed 100644 --- a/src/main/java/org/zumult/backend/BackendInterface.java +++ b/src/main/java/org/zumult/backend/BackendInterface.java @@ -57,6 +57,7 @@ public interface BackendInterface { public Media getMedia(String mediaID, Media.MEDIA_FORMAT format) throws IOException; public Transcript getTranscript(String transcriptID) throws IOException; + public Transcript getTranscript(String transcriptID, Transcript.TranscriptFormats transcriptFormat) throws IOException; // #223 public Protocol getProtocol(String protocolID) throws IOException; diff --git a/src/main/java/org/zumult/backend/implementations/AbstractBackend.java b/src/main/java/org/zumult/backend/implementations/AbstractBackend.java index c3a5947..4442e89 100644 --- a/src/main/java/org/zumult/backend/implementations/AbstractBackend.java +++ b/src/main/java/org/zumult/backend/implementations/AbstractBackend.java @@ -414,5 +414,13 @@ public SearchResultBigrams searchBigrams(String queryString, String queryLanguag bigramType, annotationLayerIDs4BigramGroups, elementsInBetweenToBeIgnored, scope, minFreq, maxFreq); } + + @Override + public Transcript getTranscript(String transcriptID, Transcript.TranscriptFormats transcriptFormat) throws IOException { + // for #223, does nothing for the moment + return getTranscript(transcriptID); + } + + } diff --git a/src/main/java/org/zumult/backend/implementations/COMAFileSystem.java b/src/main/java/org/zumult/backend/implementations/COMAFileSystem.java index 6dc62d3..239de4d 100644 --- a/src/main/java/org/zumult/backend/implementations/COMAFileSystem.java +++ b/src/main/java/org/zumult/backend/implementations/COMAFileSystem.java @@ -52,6 +52,7 @@ import org.zumult.objects.implementations.COMAMedia; import org.zumult.objects.implementations.COMASpeaker; import org.zumult.objects.implementations.COMATranscript; +import org.zumult.objects.implementations.EXBTranscript; import org.zumult.query.SearchServiceException; import org.zumult.query.SearchResultPlus; import org.zumult.query.KWIC; @@ -285,8 +286,45 @@ public Transcript getTranscript(String transcriptID) throws IOException { } return null; + } + @Override + public Transcript getTranscript(String transcriptID, Transcript.TranscriptFormats transcriptFormat) throws IOException { + try { + switch (transcriptFormat){ + case ISOTEI : + return getTranscript(transcriptID); + case EXB : + String corpusID = findCorpusID(transcriptID); + if (corpusID==null){ + throw new IOException("Error: No corpus found for: " + transcriptID); + } + Corpus corpus = getCorpus(corpusID); + Document corpusDocument = corpus.getDocument(); + String xp = "//Transcription[@Id='" + transcriptID + "']"; + Element transcriptionElement = (Element) (Node) xPath.evaluate(xp, corpusDocument.getDocumentElement(), XPathConstants.NODE); + String nsLink = transcriptionElement.getElementsByTagName("NSLink").item(0).getTextContent(); + File corpusFolder = new File(topFolder, corpusID); + String nsLinkModified = nsLink.substring(0, nsLink.lastIndexOf(".")) + ".exb"; + File resolvedPath = corpusFolder.toPath().resolve(nsLinkModified).toFile(); + + if (!(resolvedPath.exists())){ + throw new IOException("Error: No transcript found for: " + transcriptID); + } + + String xmlString = IOHelper.readUTF8(resolvedPath); + return new EXBTranscript(xmlString); + case EAF : + + } + return super.getTranscript(transcriptID, transcriptFormat); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/OverriddenMethodBody + } catch (XPathExpressionException ex) { + Logger.getLogger(COMAFileSystem.class.getName()).log(Level.SEVERE, null, ex); + throw new IOException(ex); + } } + + @Override public IDList getCorpora() throws IOException { diff --git a/src/main/java/org/zumult/backend/implementations/TestCOMABackend.java b/src/main/java/org/zumult/backend/implementations/TestCOMABackend.java index 9e0e545..5797a3d 100644 --- a/src/main/java/org/zumult/backend/implementations/TestCOMABackend.java +++ b/src/main/java/org/zumult/backend/implementations/TestCOMABackend.java @@ -78,13 +78,20 @@ private void doit() { } - String transcriptID = "IDE57E5B6C-E67B-B454-E462-4E4868C79333"; + //String transcriptID = "IDE57E5B6C-E67B-B454-E462-4E4868C79333"; + String transcriptID = "ISO_manv_2018_e_triage"; IDList videos4Transcript = bi.getVideos4Transcript(transcriptID); System.out.println("Videos : " + String.join(" / ", videos4Transcript) ); String tokenID = "w120"; Transcript transcript = bi.getTranscript(transcriptID); + Transcript transcript2 = bi.getTranscript(transcriptID, Transcript.TranscriptFormats.EXB); + + System.out.println(transcript2.toXML()); + + System.exit(0); + String audioID = transcript.getMetadataValue(bi.findMetadataKeyByID("Transcript_Recording ID")); String url = bi.getMedia(audioID).getURL(); System.out.println("--- Media URL for transcript " + transcriptID + " : " + url); diff --git a/src/main/java/org/zumult/objects/Transcript.java b/src/main/java/org/zumult/objects/Transcript.java index 2dfdbf9..66af4d6 100644 --- a/src/main/java/org/zumult/objects/Transcript.java +++ b/src/main/java/org/zumult/objects/Transcript.java @@ -15,6 +15,13 @@ */ public interface Transcript extends XMLSerializable, Identifiable, Metadatable { + // #223 + public enum TranscriptFormats { + ISOTEI, + EXB, + EAF + } + public int getNumberOfTokens(); public int getNumberOfTypes(); diff --git a/src/main/java/org/zumult/objects/implementations/EXBTranscript.java b/src/main/java/org/zumult/objects/implementations/EXBTranscript.java new file mode 100644 index 0000000..5b18944 --- /dev/null +++ b/src/main/java/org/zumult/objects/implementations/EXBTranscript.java @@ -0,0 +1,153 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package org.zumult.objects.implementations; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.zumult.objects.MetadataKey; +import org.zumult.objects.TokenFilter; +import org.zumult.objects.TokenList; +import org.zumult.objects.Transcript; + +/** + * + * @author bernd + */ +// new for #223 +public class EXBTranscript extends AbstractXMLObject implements Transcript { + + public EXBTranscript(Document xmlDocument) { + super(xmlDocument); + } + + public EXBTranscript(String xmlString) { + super(xmlString); + } + + + + @Override + public int getNumberOfTokens() { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public int getNumberOfTypes() { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public double getStartTime() { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public double getEndTime() { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public double getTimeForID(String id) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public Transcript getPart(String id1, String id2, boolean expandToFullAnnotationBlock) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public Transcript getPart(double time1, double time2, boolean expandToFullAnnotationBlock) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public TokenList getTokenList(String type) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public TokenList getTokenList(String type, TokenFilter filter) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public Element getElementById(String id) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public NodeList getTokensByPOS(String pos) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public NodeList getAllTokens() { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public Document getXmlDocument() { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public NodeList getAnchorsByAttribute(String attribute) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public NodeList getAnnotationBlocksBySpeaker(String speaker) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public String getSpeakerInitialsBySpeakerID(String speakerID) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public String getSpeakerIDBySpeakerInitials(String speakerInitials) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public String getAnnotationBlockID(String annotationBlockID, int distance) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public String getFirstAnnotationBlockIDForTime(double time) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public void removeAnnotations() { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public String getLanguage() { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public void setTimelineToZero() { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + + @Override + public String getID() { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + @Override + public String getMetadataValue(MetadataKey key) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + +}