From 9620e064cc8eced4c4b1d435bf92a763b9ce5ba6 Mon Sep 17 00:00:00 2001 From: LeonardoGonzales Date: Tue, 1 Aug 2023 09:28:32 +0100 Subject: [PATCH 1/5] Changes in XML parser to allow partial content --- .../uniprot/core/xml/uniprot/FlagUpdater.java | 30 +-- .../core/xml/uniprot/OrganismConverter.java | 17 +- .../core/xml/uniprot/SequenceConverter.java | 5 +- .../xml/uniprot/UniProtEntryConverter.java | 71 ++++-- .../core/xml/ConverterXMLToFFTest.java | 221 ++++++++++++++++++ 5 files changed, 302 insertions(+), 42 deletions(-) create mode 100644 xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/FlagUpdater.java b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/FlagUpdater.java index 7bdbb1094..37730c3cc 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/FlagUpdater.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/FlagUpdater.java @@ -16,22 +16,26 @@ public class FlagUpdater implements Updater { @Override public ProteinDescription fromXml(ProteinDescription modelObject, SequenceType xmlObject) { - FlagType fType = Optional.ofNullable(modelObject.getFlag()).map(Flag::getType).orElse(null); - String frag = xmlObject.getFragment(); - if (xmlObject.isPrecursor() != null && xmlObject.isPrecursor()) { - if (SINGLE.equals(frag)) { - fType = FlagType.FRAGMENT_PRECURSOR; + ProteinDescriptionBuilder result = ProteinDescriptionBuilder.from(modelObject); + if(xmlObject != null) { + FlagType fType = Optional.ofNullable(modelObject.getFlag()).map(Flag::getType).orElse(null); + String frag = xmlObject.getFragment(); + if (xmlObject.isPrecursor() != null && xmlObject.isPrecursor()) { + if (SINGLE.equals(frag)) { + fType = FlagType.FRAGMENT_PRECURSOR; + } else if (MULTIPLE.equals(frag)) { + fType = FlagType.FRAGMENTS_PRECURSOR; + } else { + fType = FlagType.PRECURSOR; + } + } else if (SINGLE.equals(frag)) { + fType = FlagType.FRAGMENT; } else if (MULTIPLE.equals(frag)) { - fType = FlagType.FRAGMENTS_PRECURSOR; - } else { - fType = FlagType.PRECURSOR; + fType = FlagType.FRAGMENTS; } - } else if (SINGLE.equals(frag)) { - fType = FlagType.FRAGMENT; - } else if (MULTIPLE.equals(frag)) { - fType = FlagType.FRAGMENTS; + result.flag(fType); } - return ProteinDescriptionBuilder.from(modelObject).flag(fType).build(); + return result.build(); } @Override diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/OrganismConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/OrganismConverter.java index bbf0285bd..ffa3e24fd 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/OrganismConverter.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/OrganismConverter.java @@ -4,6 +4,7 @@ import org.uniprot.core.uniprotkb.taxonomy.Organism; import org.uniprot.core.uniprotkb.taxonomy.impl.OrganismBuilder; +import org.uniprot.core.util.Utils; import org.uniprot.core.xml.Converter; import org.uniprot.core.xml.jaxb.uniprot.ObjectFactory; import org.uniprot.core.xml.jaxb.uniprot.OrganismType; @@ -24,10 +25,18 @@ public OrganismConverter(EvidenceIndexMapper evRefMapper, ObjectFactory xmlUnipr @Override public Organism fromXml(OrganismType xmlObj) { OrganismBuilder builder = new OrganismBuilder(); - builder.taxonId(Long.parseLong(xmlObj.getDbReference().get(0).getId())); - OrganismConverterUtil.updateOrganismNameFromXml(xmlObj.getName(), builder); - builder.lineagesSet(xmlObj.getLineage().getTaxon()); - builder.evidencesSet(evRefMapper.parseEvidenceIds(xmlObj.getEvidence())); + if(xmlObj != null) { + if (Utils.notNullNotEmpty(xmlObj.getDbReference())) { + builder.taxonId(Long.parseLong(xmlObj.getDbReference().get(0).getId())); + } + if (Utils.notNullNotEmpty(xmlObj.getName())) { + OrganismConverterUtil.updateOrganismNameFromXml(xmlObj.getName(), builder); + } + if (xmlObj.getLineage() != null) { + builder.lineagesSet(xmlObj.getLineage().getTaxon()); + } + builder.evidencesSet(evRefMapper.parseEvidenceIds(xmlObj.getEvidence())); + } return builder.build(); } diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/SequenceConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/SequenceConverter.java index 9eff14dbc..759163e20 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/SequenceConverter.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/SequenceConverter.java @@ -19,7 +19,10 @@ public SequenceConverter(ObjectFactory xmlUniprotFactory) { @Override public Sequence fromXml(SequenceType xmlObj) { - String sequence = xmlObj.getValue(); + String sequence = ""; + if(xmlObj != null){ + sequence = xmlObj.getValue(); + } // sequence = sequence.replaceAll(" ", ""); return new SequenceBuilder(sequence).build(); } diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/UniProtEntryConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/UniProtEntryConverter.java index 28f844d7c..703a9c71e 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/UniProtEntryConverter.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/UniProtEntryConverter.java @@ -16,6 +16,7 @@ import org.uniprot.core.uniprotkb.impl.EntryAuditBuilder; import org.uniprot.core.uniprotkb.impl.UniProtKBAccessionBuilder; import org.uniprot.core.uniprotkb.impl.UniProtKBEntryBuilder; +import org.uniprot.core.util.Utils; import org.uniprot.core.xml.Converter; import org.uniprot.core.xml.jaxb.uniprot.*; import org.uniprot.core.xml.uniprot.citation.ReferenceConverter; @@ -166,30 +167,32 @@ private SequenceType toXmlForSequence(UniProtKBEntry entry) { // ..with multiple interactions. private List fromXmlForComments(Entry xmlEntry) { List uniComments = new ArrayList<>(); - List comments = xmlEntry.getComment(); - List interactionComment = - comments.stream() - .filter(val -> val.getType().equals(INTERACTION)) - .collect(Collectors.toList()); + if(Utils.notNullNotEmpty(xmlEntry.getComment())) { + List comments = xmlEntry.getComment(); + List interactionComment = + comments.stream() + .filter(val -> val.getType().equals(INTERACTION)) + .collect(Collectors.toList()); - boolean interactionsFirst = true; - for (org.uniprot.core.xml.jaxb.uniprot.CommentType commentType : comments) { - if (commentType.getType().equals(INTERACTION)) { - if (interactionsFirst) { - interactionsFirst = false; + boolean interactionsFirst = true; + for (org.uniprot.core.xml.jaxb.uniprot.CommentType commentType : comments) { + if (commentType.getType().equals(INTERACTION)) { + if (interactionsFirst) { + interactionsFirst = false; + uniComments.add( + CommentConverterFactory.INSTANCE + .createInteractionCommentConverter(this.xmlUniprotFactory) + .fromXml(interactionComment)); + } + } else { + org.uniprot.core.uniprotkb.comment.CommentType type = + org.uniprot.core.uniprotkb.comment.CommentType.typeOf( + commentType.getType()); uniComments.add( CommentConverterFactory.INSTANCE - .createInteractionCommentConverter(this.xmlUniprotFactory) - .fromXml(interactionComment)); + .createCommentConverter(type, evRefMapper, xmlUniprotFactory) + .fromXml(commentType)); } - } else { - org.uniprot.core.uniprotkb.comment.CommentType type = - org.uniprot.core.uniprotkb.comment.CommentType.typeOf( - commentType.getType()); - uniComments.add( - CommentConverterFactory.INSTANCE - .createCommentConverter(type, evRefMapper, xmlUniprotFactory) - .fromXml(commentType)); } } return uniComments; @@ -233,9 +236,9 @@ private UniProtKBEntryBuilder createUniprotEntryBuilderFromXml(Entry xmlEntry) { List accessions = xmlEntry.getAccession(); return new UniProtKBEntryBuilder( accessions.get(0), - xmlEntry.getName().get(0), + getUniProtId(xmlEntry), UniProtKBEntryType.typeOf(xmlEntry.getDataset())) - .proteinExistence(ProteinExistence.typeOf(xmlEntry.getProteinExistence().getType())) + .proteinExistence(getProteinExistence(xmlEntry)) .secondaryAccessionsSet( accessions.subList(1, accessions.size()).stream() .map(sec -> new UniProtKBAccessionBuilder(sec).build()) @@ -243,12 +246,32 @@ private UniProtKBEntryBuilder createUniprotEntryBuilderFromXml(Entry xmlEntry) { .entryAudit(entryAuditFromXml(xmlEntry)); } + private ProteinExistence getProteinExistence(Entry xmlEntry) { + ProteinExistence proteinExistence = ProteinExistence.UNKNOWN; + if(xmlEntry.getProteinExistence() != null){ + proteinExistence = ProteinExistence.typeOf(xmlEntry.getProteinExistence().getType()); + } + return proteinExistence; + } + + private String getUniProtId(Entry xmlEntry) { + String uniProtId = ""; + if(Utils.notNullNotEmpty(xmlEntry.getName())){ + uniProtId = xmlEntry.getName().get(0); + } + return uniProtId; + } + private EntryAudit entryAuditFromXml(Entry xmlEntry) { int version = xmlEntry.getVersion(); LocalDate firstPublic = XmlConverterHelper.dateFromXml(xmlEntry.getCreated()); LocalDate lastUpdated = XmlConverterHelper.dateFromXml(xmlEntry.getModified()); - int seqVersion = xmlEntry.getSequence().getVersion(); - LocalDate seqDate = XmlConverterHelper.dateFromXml(xmlEntry.getSequence().getModified()); + int seqVersion = 0; + LocalDate seqDate = null; + if(xmlEntry.getSequence() != null) { + seqVersion = xmlEntry.getSequence().getVersion(); + seqDate = XmlConverterHelper.dateFromXml(xmlEntry.getSequence().getModified()); + } return new EntryAuditBuilder() .firstPublic(firstPublic) .lastAnnotationUpdate(lastUpdated) diff --git a/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java b/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java new file mode 100644 index 000000000..4709672d2 --- /dev/null +++ b/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java @@ -0,0 +1,221 @@ +package org.uniprot.core.xml; + +import org.junit.jupiter.api.Test; +import org.uniprot.core.flatfile.writer.impl.UniProtFlatfileWriter; +import org.uniprot.core.uniprotkb.UniProtKBEntry; +import org.uniprot.core.uniprotkb.impl.EntryAuditBuilder; +import org.uniprot.core.uniprotkb.impl.UniProtKBEntryBuilder; +import org.uniprot.core.xml.jaxb.uniprot.Uniprot; +import org.uniprot.core.xml.uniprot.UniProtEntryConverter; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.Marshaller; +import javax.xml.bind.Unmarshaller; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.time.LocalDate; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +class ConverterXMLToFFTest { + + @Test + void testLeo() throws Exception { + + String xmlInput = "\n" + + "\n" + + "\n" + + "A0A2E0WTX0\n" + + "TEMPLATE_VALUE\n" + + "\n" + + "\n" + + "NAD(P)H-dependent oxidoreductase\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "similarity\n" + + "Belongs to the nitroreductase family.\n" + + "\n" + + "A\n" + + "" + + " Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License " + + ""; + + String xmlInput2 = + "\n" + + "\n" + + "\n" + + "A0A2E0WTX0\n" + + "A0A2E0WTX0_9FLAO\n" + + "\n" + + "\n" + + "NAD(P)H-dependent oxidoreductase\n" + + "\n" + + "\n" + + "\n" + + "CL830_02265\n" + + "\n" + + "\n" + + "Crocinitomicaceae bacterium\n" + + "\n" + + "\n" + + "Bacteria\n" + + "Bacteroidota\n" + + "Flavobacteriia\n" + + "Flavobacteriales\n" + + "Crocinitomicaceae\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "The Reconstruction of 2,631 Draft Metagenome-Assembled Genomes from the Global Oceans.\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]\n" + + "\n" + + "\n" + + "Belongs to the nitroreductase family.\n" + + "\n" + + "\n" + + "The sequence shown here is derived from an EMBL/GenBank/DDBJ whole genome shotgun (WGS) entry which is preliminary data.\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "MDIIDYYKWRYATKKFNPNKKIPISDIEIIKESIRLAPTSYGLQLFKVIIIENQLKKEALRKFSYNQSQVSDASHLFIFCNSTKVFEKDIDSYIENKSLSQEIPIEKNKGYGDFLKKTLLNKSSEEISEWTKNQLYIALTHLMTACASLKIDSCPIEGFDTSKYNDFLDIDKKSLSAGVVAAIGYRSETDNSQYDKKVRKATKDIFEVD\n" + + "\n" + + " Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License \n" + + ""; + + + + + String inputAddHeader = "\n" + + "\n" + + "\n" + + "A0A2E0WTX0\n" + + "TEMPLATE_VALUE\n" + + "\n" + + "\n" + + "NAD(P)H-dependent oxidoreductase\n" + + "\n" + + "\n" + + "\n" + + "Belongs to the nitroreductase family.\n" + + "\n" + + "" + + ""; + + InputStream targetStream = new ByteArrayInputStream(inputAddHeader.getBytes()); + JAXBContext jaxbContext = JAXBContext.newInstance("org.uniprot.core.xml.jaxb.uniprot"); + + Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller(); + Uniprot xmlEntry = (Uniprot) jaxbUnmarshaller.unmarshal(targetStream); + assertNotNull(xmlEntry); + + UniProtEntryConverter converter = new UniProtEntryConverter(); + UniProtKBEntry uniprotEntry = xmlEntry.getEntry().stream().map(converter::fromXml).findFirst().orElse(null); + assertNotNull(uniprotEntry); + + UniProtKBEntry auditedEntry = UniProtKBEntryBuilder.from(uniprotEntry) + .entryAudit(new EntryAuditBuilder() + .firstPublic(LocalDate.now()) + .lastAnnotationUpdate(LocalDate.now()) + .lastSequenceUpdate(LocalDate.now()) + .build()) + .build(); + + //"XML FILE" --> "FF PARTIAL" (Curator can use to validate) + + String ffResult = UniProtFlatfileWriter.write(auditedEntry); + assertNotNull(ffResult); + System.out.println(ffResult); + } + + protected Marshaller createMarshaller(JAXBContext jaxbContext) { + try { + Marshaller contextMarshaller = jaxbContext.createMarshaller(); + contextMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE); + contextMarshaller.setProperty(Marshaller.JAXB_FRAGMENT, Boolean.TRUE); + return contextMarshaller; + } catch (Exception e) { + throw new RuntimeException("JAXB marshaller creation failed", e); + } + } + +} From 8c4295c2e8aac683c490883861128c906712af67 Mon Sep 17 00:00:00 2001 From: automated changes Date: Tue, 1 Aug 2023 08:31:26 +0000 Subject: [PATCH 2/5] code format with spotless automatic --- .../uniprot/core/xml/uniprot/FlagUpdater.java | 5 +- .../core/xml/uniprot/OrganismConverter.java | 2 +- .../core/xml/uniprot/SequenceConverter.java | 2 +- .../xml/uniprot/UniProtEntryConverter.java | 8 +- .../core/xml/ConverterXMLToFFTest.java | 114 +++++++++--------- 5 files changed, 67 insertions(+), 64 deletions(-) diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/FlagUpdater.java b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/FlagUpdater.java index 37730c3cc..7f628474a 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/FlagUpdater.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/FlagUpdater.java @@ -17,8 +17,9 @@ public class FlagUpdater implements Updater { @Override public ProteinDescription fromXml(ProteinDescription modelObject, SequenceType xmlObject) { ProteinDescriptionBuilder result = ProteinDescriptionBuilder.from(modelObject); - if(xmlObject != null) { - FlagType fType = Optional.ofNullable(modelObject.getFlag()).map(Flag::getType).orElse(null); + if (xmlObject != null) { + FlagType fType = + Optional.ofNullable(modelObject.getFlag()).map(Flag::getType).orElse(null); String frag = xmlObject.getFragment(); if (xmlObject.isPrecursor() != null && xmlObject.isPrecursor()) { if (SINGLE.equals(frag)) { diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/OrganismConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/OrganismConverter.java index ffa3e24fd..849981777 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/OrganismConverter.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/OrganismConverter.java @@ -25,7 +25,7 @@ public OrganismConverter(EvidenceIndexMapper evRefMapper, ObjectFactory xmlUnipr @Override public Organism fromXml(OrganismType xmlObj) { OrganismBuilder builder = new OrganismBuilder(); - if(xmlObj != null) { + if (xmlObj != null) { if (Utils.notNullNotEmpty(xmlObj.getDbReference())) { builder.taxonId(Long.parseLong(xmlObj.getDbReference().get(0).getId())); } diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/SequenceConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/SequenceConverter.java index 759163e20..01eb914f7 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/SequenceConverter.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/SequenceConverter.java @@ -20,7 +20,7 @@ public SequenceConverter(ObjectFactory xmlUniprotFactory) { @Override public Sequence fromXml(SequenceType xmlObj) { String sequence = ""; - if(xmlObj != null){ + if (xmlObj != null) { sequence = xmlObj.getValue(); } // sequence = sequence.replaceAll(" ", ""); diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/UniProtEntryConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/UniProtEntryConverter.java index 703a9c71e..62f1075d4 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/UniProtEntryConverter.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/uniprot/UniProtEntryConverter.java @@ -167,7 +167,7 @@ private SequenceType toXmlForSequence(UniProtKBEntry entry) { // ..with multiple interactions. private List fromXmlForComments(Entry xmlEntry) { List uniComments = new ArrayList<>(); - if(Utils.notNullNotEmpty(xmlEntry.getComment())) { + if (Utils.notNullNotEmpty(xmlEntry.getComment())) { List comments = xmlEntry.getComment(); List interactionComment = comments.stream() @@ -248,7 +248,7 @@ private UniProtKBEntryBuilder createUniprotEntryBuilderFromXml(Entry xmlEntry) { private ProteinExistence getProteinExistence(Entry xmlEntry) { ProteinExistence proteinExistence = ProteinExistence.UNKNOWN; - if(xmlEntry.getProteinExistence() != null){ + if (xmlEntry.getProteinExistence() != null) { proteinExistence = ProteinExistence.typeOf(xmlEntry.getProteinExistence().getType()); } return proteinExistence; @@ -256,7 +256,7 @@ private ProteinExistence getProteinExistence(Entry xmlEntry) { private String getUniProtId(Entry xmlEntry) { String uniProtId = ""; - if(Utils.notNullNotEmpty(xmlEntry.getName())){ + if (Utils.notNullNotEmpty(xmlEntry.getName())) { uniProtId = xmlEntry.getName().get(0); } return uniProtId; @@ -268,7 +268,7 @@ private EntryAudit entryAuditFromXml(Entry xmlEntry) { LocalDate lastUpdated = XmlConverterHelper.dateFromXml(xmlEntry.getModified()); int seqVersion = 0; LocalDate seqDate = null; - if(xmlEntry.getSequence() != null) { + if (xmlEntry.getSequence() != null) { seqVersion = xmlEntry.getSequence().getVersion(); seqDate = XmlConverterHelper.dateFromXml(xmlEntry.getSequence().getModified()); } diff --git a/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java b/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java index 4709672d2..13e986d78 100644 --- a/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java +++ b/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java @@ -1,5 +1,15 @@ package org.uniprot.core.xml; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.time.LocalDate; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.Marshaller; +import javax.xml.bind.Unmarshaller; + import org.junit.jupiter.api.Test; import org.uniprot.core.flatfile.writer.impl.UniProtFlatfileWriter; import org.uniprot.core.uniprotkb.UniProtKBEntry; @@ -8,39 +18,31 @@ import org.uniprot.core.xml.jaxb.uniprot.Uniprot; import org.uniprot.core.xml.uniprot.UniProtEntryConverter; -import javax.xml.bind.JAXBContext; -import javax.xml.bind.Marshaller; -import javax.xml.bind.Unmarshaller; -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.time.LocalDate; - -import static org.junit.jupiter.api.Assertions.assertNotNull; - class ConverterXMLToFFTest { @Test void testLeo() throws Exception { - String xmlInput = "\n" + - "\n" + - "\n" + - "A0A2E0WTX0\n" + - "TEMPLATE_VALUE\n" + - "\n" + - "\n" + - "NAD(P)H-dependent oxidoreductase\n" + - "\n" + - "\n" + - "\n" + - "\n" + - "similarity\n" + - "Belongs to the nitroreductase family.\n" + - "\n" + - "A\n" + - "" + - " Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License " + - ""; + String xmlInput = + "\n" + + "\n" + + "\n" + + "A0A2E0WTX0\n" + + "TEMPLATE_VALUE\n" + + "\n" + + "\n" + + "NAD(P)H-dependent oxidoreductase\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "similarity\n" + + "Belongs to the nitroreductase family.\n" + + "\n" + + "A\n" + + "" + + " Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License " + + ""; String xmlInput2 = "\n" @@ -162,24 +164,22 @@ void testLeo() throws Exception { + " Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License \n" + ""; - - - - String inputAddHeader = "\n" + - "\n" + - "\n" + - "A0A2E0WTX0\n" + - "TEMPLATE_VALUE\n" + - "\n" + - "\n" + - "NAD(P)H-dependent oxidoreductase\n" + - "\n" + - "\n" + - "\n" + - "Belongs to the nitroreductase family.\n" + - "\n" + - "" + - ""; + String inputAddHeader = + "\n" + + "\n" + + "\n" + + "A0A2E0WTX0\n" + + "TEMPLATE_VALUE\n" + + "\n" + + "\n" + + "NAD(P)H-dependent oxidoreductase\n" + + "\n" + + "\n" + + "\n" + + "Belongs to the nitroreductase family.\n" + + "\n" + + "" + + ""; InputStream targetStream = new ByteArrayInputStream(inputAddHeader.getBytes()); JAXBContext jaxbContext = JAXBContext.newInstance("org.uniprot.core.xml.jaxb.uniprot"); @@ -189,18 +189,21 @@ void testLeo() throws Exception { assertNotNull(xmlEntry); UniProtEntryConverter converter = new UniProtEntryConverter(); - UniProtKBEntry uniprotEntry = xmlEntry.getEntry().stream().map(converter::fromXml).findFirst().orElse(null); + UniProtKBEntry uniprotEntry = + xmlEntry.getEntry().stream().map(converter::fromXml).findFirst().orElse(null); assertNotNull(uniprotEntry); - UniProtKBEntry auditedEntry = UniProtKBEntryBuilder.from(uniprotEntry) - .entryAudit(new EntryAuditBuilder() - .firstPublic(LocalDate.now()) - .lastAnnotationUpdate(LocalDate.now()) - .lastSequenceUpdate(LocalDate.now()) - .build()) - .build(); + UniProtKBEntry auditedEntry = + UniProtKBEntryBuilder.from(uniprotEntry) + .entryAudit( + new EntryAuditBuilder() + .firstPublic(LocalDate.now()) + .lastAnnotationUpdate(LocalDate.now()) + .lastSequenceUpdate(LocalDate.now()) + .build()) + .build(); - //"XML FILE" --> "FF PARTIAL" (Curator can use to validate) + // "XML FILE" --> "FF PARTIAL" (Curator can use to validate) String ffResult = UniProtFlatfileWriter.write(auditedEntry); assertNotNull(ffResult); @@ -217,5 +220,4 @@ protected Marshaller createMarshaller(JAXBContext jaxbContext) { throw new RuntimeException("JAXB marshaller creation failed", e); } } - } From 5c143e7be5a237f8aabd71792c0c34cd61513c50 Mon Sep 17 00:00:00 2001 From: Shadab Ahmad Date: Mon, 7 Aug 2023 11:34:01 +0100 Subject: [PATCH 3/5] read the input from xml file --- .../core/xml/ConverterXMLToFFTest.java | 165 +----------------- .../src/test/resources/google/entry_v4.xml | 13 ++ .../resources/google/entry_v4_modified.xml | 15 ++ 3 files changed, 31 insertions(+), 162 deletions(-) create mode 100644 xml-parser/src/test/resources/google/entry_v4.xml create mode 100644 xml-parser/src/test/resources/google/entry_v4_modified.xml diff --git a/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java b/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java index 13e986d78..7ddf5c91f 100644 --- a/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java +++ b/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java @@ -2,7 +2,6 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; -import java.io.ByteArrayInputStream; import java.io.InputStream; import java.time.LocalDate; @@ -21,167 +20,9 @@ class ConverterXMLToFFTest { @Test - void testLeo() throws Exception { - - String xmlInput = - "\n" - + "\n" - + "\n" - + "A0A2E0WTX0\n" - + "TEMPLATE_VALUE\n" - + "\n" - + "\n" - + "NAD(P)H-dependent oxidoreductase\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "similarity\n" - + "Belongs to the nitroreductase family.\n" - + "\n" - + "A\n" - + "" - + " Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License " - + ""; - - String xmlInput2 = - "\n" - + "\n" - + "\n" - + "A0A2E0WTX0\n" - + "A0A2E0WTX0_9FLAO\n" - + "\n" - + "\n" - + "NAD(P)H-dependent oxidoreductase\n" - + "\n" - + "\n" - + "\n" - + "CL830_02265\n" - + "\n" - + "\n" - + "Crocinitomicaceae bacterium\n" - + "\n" - + "\n" - + "Bacteria\n" - + "Bacteroidota\n" - + "Flavobacteriia\n" - + "Flavobacteriales\n" - + "Crocinitomicaceae\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "The Reconstruction of 2,631 Draft Metagenome-Assembled Genomes from the Global Oceans.\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]\n" - + "\n" - + "\n" - + "Belongs to the nitroreductase family.\n" - + "\n" - + "\n" - + "The sequence shown here is derived from an EMBL/GenBank/DDBJ whole genome shotgun (WGS) entry which is preliminary data.\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "\n" - + "MDIIDYYKWRYATKKFNPNKKIPISDIEIIKESIRLAPTSYGLQLFKVIIIENQLKKEALRKFSYNQSQVSDASHLFIFCNSTKVFEKDIDSYIENKSLSQEIPIEKNKGYGDFLKKTLLNKSSEEISEWTKNQLYIALTHLMTACASLKIDSCPIEGFDTSKYNDFLDIDKKSLSAGVVAAIGYRSETDNSQYDKKVRKATKDIFEVD\n" - + "\n" - + " Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms Distributed under the Creative Commons Attribution (CC BY 4.0) License \n" - + ""; - - String inputAddHeader = - "\n" - + "\n" - + "\n" - + "A0A2E0WTX0\n" - + "TEMPLATE_VALUE\n" - + "\n" - + "\n" - + "NAD(P)H-dependent oxidoreductase\n" - + "\n" - + "\n" - + "\n" - + "Belongs to the nitroreductase family.\n" - + "\n" - + "" - + ""; - - InputStream targetStream = new ByteArrayInputStream(inputAddHeader.getBytes()); + void testXMLToFF() throws Exception { + String file = "/google/entry_v4_modified.xml"; + InputStream targetStream = ConverterXMLToFFTest.class.getResourceAsStream(file); JAXBContext jaxbContext = JAXBContext.newInstance("org.uniprot.core.xml.jaxb.uniprot"); Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller(); diff --git a/xml-parser/src/test/resources/google/entry_v4.xml b/xml-parser/src/test/resources/google/entry_v4.xml new file mode 100644 index 000000000..ecee62730 --- /dev/null +++ b/xml-parser/src/test/resources/google/entry_v4.xml @@ -0,0 +1,13 @@ + + + A0A2E0WTX0 + + + NAD(P)H-dependent oxidoreductase + + + + similarity + Belongs to the nitroreductase family. + + \ No newline at end of file diff --git a/xml-parser/src/test/resources/google/entry_v4_modified.xml b/xml-parser/src/test/resources/google/entry_v4_modified.xml new file mode 100644 index 000000000..747fe03c4 --- /dev/null +++ b/xml-parser/src/test/resources/google/entry_v4_modified.xml @@ -0,0 +1,15 @@ + + + + A0A2E0WTX0 + Dummy Value + + + NAD(P)H-dependent oxidoreductase + + + + Belongs to the nitroreductase family. + + + \ No newline at end of file From 94f0184af28aeba00b7690e7a0a7142f2d1fbbfe Mon Sep 17 00:00:00 2001 From: Shadab Ahmad Date: Mon, 7 Aug 2023 11:36:17 +0100 Subject: [PATCH 4/5] add xsd in test class --- .../test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java b/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java index 7ddf5c91f..b87f610dd 100644 --- a/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java +++ b/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java @@ -18,15 +18,16 @@ import org.uniprot.core.xml.uniprot.UniProtEntryConverter; class ConverterXMLToFFTest { + //XSD --> https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot.xsd @Test void testXMLToFF() throws Exception { String file = "/google/entry_v4_modified.xml"; - InputStream targetStream = ConverterXMLToFFTest.class.getResourceAsStream(file); + InputStream inputStream = ConverterXMLToFFTest.class.getResourceAsStream(file); JAXBContext jaxbContext = JAXBContext.newInstance("org.uniprot.core.xml.jaxb.uniprot"); Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller(); - Uniprot xmlEntry = (Uniprot) jaxbUnmarshaller.unmarshal(targetStream); + Uniprot xmlEntry = (Uniprot) jaxbUnmarshaller.unmarshal(inputStream); assertNotNull(xmlEntry); UniProtEntryConverter converter = new UniProtEntryConverter(); From a80c50f25f877a3a87b95ef9f4dbe2a068c73f1e Mon Sep 17 00:00:00 2001 From: automated changes Date: Mon, 7 Aug 2023 10:37:28 +0000 Subject: [PATCH 5/5] code format with spotless automatic --- .../test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java b/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java index b87f610dd..749d99270 100644 --- a/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java +++ b/xml-parser/src/test/java/org/uniprot/core/xml/ConverterXMLToFFTest.java @@ -18,7 +18,8 @@ import org.uniprot.core.xml.uniprot.UniProtEntryConverter; class ConverterXMLToFFTest { - //XSD --> https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot.xsd + // XSD --> + // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot.xsd @Test void testXMLToFF() throws Exception {