Skip to content

Commit

Permalink
Migrate to PetitParser2
Browse files Browse the repository at this point in the history
Add tests
Remove unnecessary code generator dependency
Add lowercaseCodes and uppercaseCodes to speed up parsing
  • Loading branch information
Hernán Morales Durand committed Aug 24, 2023
1 parent 76a0b4b commit 61ccedb
Show file tree
Hide file tree
Showing 49 changed files with 846 additions and 544 deletions.
20 changes: 3 additions & 17 deletions repository/BaselineOfBioSmalltalk/BaselineOfBioSmalltalk.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ BaselineOfBioSmalltalk >> baseline: spec [
projectSpecForCommonUtils: spec;
projectSpecForDataDrameInspector: spec;
projectSpecForProjectFramework: spec;
projectSpecForCodeGenerator: spec;
projectSpecForStringExtensions: spec;
projectSpecForFileDialog: spec;
projectSpecForPolyMath: spec;
Expand Down Expand Up @@ -283,16 +282,6 @@ BaselineOfBioSmalltalk >> preLoad [
super preLoad.
]

{ #category : #specs }
BaselineOfBioSmalltalk >> projectSpecForCodeGenerator: spec [

spec
baseline: 'CodeGenerator'
with: [ spec
repository: 'github://hernanmd/CodeGenerator/repository';
loads: #('Smalltalk-Generators') ]
]

{ #category : #specs }
BaselineOfBioSmalltalk >> projectSpecForCommonUtils: spec [

Expand Down Expand Up @@ -340,12 +329,9 @@ BaselineOfBioSmalltalk >> projectSpecForOsSubprocess: spec [
BaselineOfBioSmalltalk >> projectSpecForPetitParser: spec [

spec
project: 'PetitParser' with: [
spec
className: #ConfigurationOfPetitParser;
versionString: '2.0';
loads: #('Core' );
repository: 'http://smalltalkhub.com/mc/Moose/PetitParser/main/' ].
baseline: 'PetitParser2Core'
with: [ spec repository: 'github://kursjan/petitparser2' ].

]

{ #category : #specs }
Expand Down
2 changes: 1 addition & 1 deletion repository/BioBlast/BioQBlastReader.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ BioQBlastReader >> parserForErrorMsg [
Message ID#24 Error: Failed to read the Blast query: Nucleotide FASTA provided for protein sequence
Message ID#24 Error: Failed to read the Blast query: Gi/accession mismatch: requested nucleotide, found protein "

^ ( 'Message ID#' asParser negate plus , 'Message ID#' asParser ) ,
^ ( 'Message ID#' asPParser negate plus , 'Message ID#' asPParser ) ,
( PPPredicateObjectParser anyExceptAnyOf: '</p>' ) plus flatten foldRight: [: a : b | b ]

]
Expand Down
118 changes: 78 additions & 40 deletions repository/BioParserTests/BioAccessionParserTest.class.st
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Class {
#name : #BioAccessionParserTest,
#superclass : #BioAbstractParserTest,
#category : 'BioParserTests-Core'
#category : #'BioParserTests-Core'
}

{ #category : #accessing }
Expand All @@ -21,64 +21,102 @@ BioAccessionParserTest >> setUp [
{ #category : #testing }
BioAccessionParserTest >> testParseAccession01 [

self assert: (( self parser parse: 'gi|555|emb|X65215.1|' ) isKindOf: BioAccession).
self assert: (( self parser parse: 'gi|226437718|gb|AC150860.6|' ) isKindOf: BioAccession).
self assert: (( self parser parse: 'gi|207524544|gb|AC226190.2|' ) isKindOf: BioAccession).
self assert: (( self parser parse: 'gb|AC226190.2|' ) isKindOf: BioAccession).
self assert: (( self parser parse: 'AC150530.4' ) isKindOf: BioAccession).
self assert: (( self parser parse: 'AC150707' ) isKindOf: BioAccession).

self assert: (( self parser parse: '>gi|198282148|ref|NC_011206.1| Acidithiobacillus ferrooxidans ATCC 53993 chromosome, complete genome' ) isKindOf: BioAccession).
self assert: (( self parser parse: '>gi|104773257|ref|NC_008054.1| Lactobacillus delbrueckii subsp. bulgaricus ATCC 11842, complete genome' ) isKindOf: BioAccession).



parseResult := self parser parse: 'gi|555|emb|X65215.1|'.
self assert: (parseResult isKindOf: BioAccession).
self assert: parseResult name equals: 'X65215'.
self assert: parseResult version equals: '1'.

]

{ #category : #testing }
BioAccessionParserTest >> testParseAccession02 [

self assert: ( self parser parse: 'gi|555|emb|X65215.1|' ) name = 'X65215' .
self assert: ( self parser parse: 'gi|226437718|gb|AC150860.6|' ) name = 'AC150860'.
self assert: ( self parser parse: 'gi|207524544|gb|AC226190.2|' ) name = 'AC226190'.
self assert: ( self parser parse: 'gi|207524544|gb|AC226190.2345|' ) name = 'AC226190'.
self assert: ( self parser parse: 'gi|207524544|gb|AC226190.2|ABCDEFG' ) name = 'AC226190'.
self assert: ( self parser parse: 'gi|207524544|gb|AC226190.2|ABCDEFG|' ) name = 'AC226190'.
self assert: ( self parser parse: 'gi|207524544|gb|AC226190.2|ABCDEFG|HIJKL' ) name = 'AC226190'.
self assert: ( self parser parse: 'gb|AC226190.2|' ) name = 'AC226190'.
self assert: ( self parser parse: 'AC150530.4' ) name = 'AC150530'.
self assert: ( self parser parse: 'AC150707' ) name = 'AC150707'
parseResult := self parser parse: 'gi|226437718|gb|AC150860.6|'.
self assert: (parseResult isKindOf: BioAccession).
self assert: parseResult name equals: 'AC150860'.
self assert: parseResult version equals: '6'.

]

{ #category : #testing }
BioAccessionParserTest >> testParseAccession03 [

self assert: ( self parser parse: 'gi|555|emb|X65215.1|' ) version = '1' .
self assert: ( self parser parse: 'gi|226437718|gb|AC150860.6|' ) version = '6'.
self assert: ( self parser parse: 'gi|207524544|gb|AC226190.2|' ) version = '2'.
self assert: ( self parser parse: 'gi|207524544|gb|AC226190.2345|' ) version = '2345'.
self assert: ( self parser parse: 'gi|207524544|gb|AC226190.2|ABCDEFG' ) version = '2'.
self assert: ( self parser parse: 'gi|207524544|gb|AC226190.2|ABCDEFG|' ) version = '2'.
self assert: ( self parser parse: 'gi|207524544|gb|AC226190.2|ABCDEFG|HIJKL' ) version = '2'.
self assert: ( self parser parse: 'gb|AC226190.2|' ) version = '2'.
self assert: ( self parser parse: 'AC150530.4' ) version = '4'.
self assert: ( self parser parse: 'AC150707' ) version isNil
parseResult := self parser parse: 'gi|207524544|gb|AC226190.2|'.
self assert: (parseResult isKindOf: BioAccession).
self assert: parseResult name equals: 'AC226190'.
self assert: parseResult version equals: '2'.
]

{ #category : #testing }
BioAccessionParserTest >> testParseAccession04 [

parseResult := self parser parse: 'gb|AC226190.2|'.
self assert: (parseResult isKindOf: BioAccession).
self assert: parseResult name equals: 'AC226190'.
self assert: parseResult version equals: '2'.
]

{ #category : #testing }
BioAccessionParserTest >> testParseAccession05 [

parseResult := self parser parse: 'AC150530.4'.
self assert: (parseResult isKindOf: BioAccession).
self assert: parseResult name equals: 'AC150530'.
self assert: parseResult version equals: '4'.
]

{ #category : #testing }
BioAccessionParserTest >> testParseAccession06 [

parseResult := self parser parse: 'AC150707'.
self assert: (parseResult isKindOf: BioAccession).
self assert: parseResult name equals: 'AC150707'.
self deny: parseResult hasVersion.
]

{ #category : #testing }
BioAccessionParserTest >> testParseAccession07 [

parseResult := self parser parse: '>gi|198282148|ref|NC_011206.1| Acidithiobacillus ferrooxidans ATCC 53993 chromosome, complete genome'.
self assert: (parseResult isKindOf: BioAccession).
self assert: parseResult name equals: 'NC_011206'.
self assert: parseResult version equals: '1'.
]

{ #category : #testing }
BioAccessionParserTest >> testParseAccession08 [

parseResult := self parser parse: '>gi|104773257|ref|NC_008054.1| Lactobacillus delbrueckii subsp. bulgaricus ATCC 11842, complete genome'.
self assert: (parseResult isKindOf: BioAccession).
self assert: parseResult name equals: 'NC_008054'.
self assert: parseResult version equals: '1'.
]

{ #category : #testing }
BioAccessionParserTest >> testTokenizeAccession01 [

self assert: ( BioParser tokenizeAccession: 'gi|555|emb|X65215.1|' ) = #('X65215' '1') .
self assert: ( BioParser tokenizeAccession: 'gi|226437718|gb|AC150860.6|' ) = #('AC150860' '6').
self assert: ( BioParser tokenizeAccession: 'gi|207524544|gb|AC226190.2|' ) = #('AC226190' '2').
self assert: ( BioParser tokenizeAccession: 'gi|207524544|gb|AC226190.2345|' ) = #('AC226190' '2345').
self assert: ( BioParser tokenizeAccession: 'gb|AC226190.2|' ) = #('AC226190' '2').
self assert: ( BioParser tokenizeAccession: 'AC150530.4' ) = #('AC150530' '4').
self assert: ( BioParser tokenizeAccession: 'AC150707' ) = #('AC150707')
self
assert: (BioParser tokenizeAccession: 'gi|555|emb|X65215.1|')
equals: #( 'X65215' '1' ).
self
assert: (BioParser tokenizeAccession: 'gi|226437718|gb|AC150860.6|')
equals: #( 'AC150860' '6' ).
self
assert: (BioParser tokenizeAccession: 'gi|207524544|gb|AC226190.2|')
equals: #( 'AC226190' '2' ).
self
assert:
(BioParser tokenizeAccession: 'gi|207524544|gb|AC226190.2345|')
equals: #( 'AC226190' '2345' ).
self
assert: (BioParser tokenizeAccession: 'gb|AC226190.2|')
equals: #( 'AC226190' '2' ).
self
assert: (BioParser tokenizeAccession: 'AC150530.4')
equals: #( 'AC150530' '4' ).
self
assert: (BioParser tokenizeAccession: 'AC150707')
equals: #( 'AC150707' )
]

{ #category : #testing }
Expand Down
65 changes: 65 additions & 0 deletions repository/BioParserTests/BioDNANucleotideParserTest.class.st
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
Class {
#name : #BioDNANucleotideParserTest,
#superclass : #BioAbstractParserTest,
#category : #'BioParserTests-Basic'
}

{ #category : #accessing }
BioDNANucleotideParserTest >> setUp [

super setUp.
parser := #dnaLetter asPParser.

]

{ #category : #testing }
BioDNANucleotideParserTest >> testDNALetterMatchA [

self assert: (parser matches: 'a').
self assert: (parser matches: 'A').

]

{ #category : #testing }
BioDNANucleotideParserTest >> testDNALetterMatchC [

self assert: (parser matches: 'c').
self assert: (parser matches: 'C').

]

{ #category : #testing }
BioDNANucleotideParserTest >> testDNALetterMatchG [

self assert: (parser matches: 'g').
self assert: (parser matches: 'G').

]

{ #category : #testing }
BioDNANucleotideParserTest >> testDNALetterMatchN [

self assert: (parser matches: 'N').
self assert: (parser matches: 'n').
]

{ #category : #testing }
BioDNANucleotideParserTest >> testDNALetterMatchT [

self assert: (parser matches: 't').
self assert: (parser matches: 'T').

]

{ #category : #testing }
BioDNANucleotideParserTest >> testDNANucelotideEmpty [

self deny: (parser matches: String empty)
]

{ #category : #testing }
BioDNANucleotideParserTest >> testDNANucleotideCharacter [

self should: [ parser matches: $a ] raise: MessageNotUnderstood.
self should: [ parser matches: nil ] raise: MessageNotUnderstood.
]
69 changes: 0 additions & 69 deletions repository/BioParserTests/BioDNAParserTest.class.st

This file was deleted.

44 changes: 44 additions & 0 deletions repository/BioParserTests/BioDNASequenceParserTest.class.st
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
Class {
#name : #BioDNASequenceParserTest,
#superclass : #BioAbstractParserTest,
#category : #'BioParserTests-Basic'
}

{ #category : #accessing }
BioDNASequenceParserTest >> setUp [

super setUp.
parser := #dnaSequence asPParser.

]

{ #category : #testing }
BioDNASequenceParserTest >> testDNASeqMatchString [

self assert: (parser matches: 'actg').
self assert: (parser matches: 'ACTG').

]

{ #category : #testing }
BioDNASequenceParserTest >> testDNASeqMatches [

self assert: (parser matches: 'a').
self assert: (parser matches: 'A').
self assert: (parser matches: 'N').
self assert: (parser matches: 'n').

]

{ #category : #testing }
BioDNASequenceParserTest >> testDNASeqNumber [

self deny: (parser matches: '8743').
]

{ #category : #testing }
BioDNASequenceParserTest >> testDNASeqParseEmpty [

self deny: (parser matches: String empty).

]
Loading

0 comments on commit 61ccedb

Please sign in to comment.