Skip to content

Commit

Permalink
Add parser changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Hernán Morales Durand committed Aug 30, 2023
1 parent 8c34b6b commit 33c2012
Show file tree
Hide file tree
Showing 10 changed files with 265 additions and 118 deletions.
5 changes: 3 additions & 2 deletions repository/BioParserTests/BioAbstractFASTAParserTest.class.st
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Class {
#name : #BioAbstractFASTAParserTest,
#superclass : #BioAbstractParserTest,
#category : 'BioParserTests-Core'
#category : #'BioParserTests-Core'
}

{ #category : #'samples-single' }
Expand Down Expand Up @@ -39,7 +39,8 @@ BioAbstractFASTAParserTest >> fastaSeq04 [
^ '>YAL068C-7235.2170 Putative promoter sequence
TACGAGAATAATTTCTCATCATCCAGCTTTAACACAAAATTCGCA
CAGTTTTCGTTAAGAGAACTTAACATTTTCTTATGACGTAAATGA
AGTTTATATATAAATTTCCTTTTTATTGGA'
AGTTTATATATAAATTTCCTTTTTATTGGA
'
]

{ #category : #'samples-single' }
Expand Down
294 changes: 214 additions & 80 deletions repository/BioParserTests/BioFASTAParserTest.class.st

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion repository/BioParsers/BioAbstractParser.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ BioAbstractParser >> parseString: aString [
^ [ parser
parse: aString
onError: [: failure | self error: failure message ] ]
on: Exception
on: Error
do: [: ex |
self beFailed.
ex asString ].
Expand Down
28 changes: 15 additions & 13 deletions repository/BioParsers/BioFASTABasicParser.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,14 @@ BioFASTABasicParser >> buildTokens: aCollection [
BioFASTABasicParser >> fastaParser [

^ (
self parserForNonBreakingString ,
$> asPParser ,
(self noneOf: (Array with: Character cr with: Character lf)) plus flatten ,
#newline asPParser ,
(self perform: self fastaSequenceParser)
) plus end


(
(self noneOf: (Array with: Character cr with: Character lf)) plus flatten ,
#newline asPParser star ==> [ :nodes | nodes copyWithoutAll: { Character cr asString . Character lf asString } ]
) plus
) plus end
]

{ #category : #initialization }
Expand All @@ -70,33 +72,33 @@ BioFASTABasicParser >> initializeWith: aString [
" Private - See superimplementor's comment "

super initializeWith: aString.
self fastaSequenceParser: #parserForSequence.
self fastaSequenceParser: #parserForSingleSequence.
parser := self fastaParser.

]

{ #category : #'accessing-parsers' }
BioFASTABasicParser >> newLineParser [

^ #newline asPParser plus optional
^ #newline asPParser
]

{ #category : #'accessing-parsers' }
BioFASTABasicParser >> parserForSequence [
BioFASTABasicParser >> parserForSingleSequence [
" Answer a parser for parsing sequences as usually found in FASTA formatted files "

^ (
(
#word asPParser plus flatten ,
self newLineParser ==> self removeNewlinesBlock
) min: 1
) optimize
self parserForNonBreakingString ,
self newLineParser optional ==> self removeNewlinesBlock
) plus
)
]

{ #category : #removing }
BioFASTABasicParser >> removeNewlinesBlock [

^ [ :nodes | nodes copyWithoutAll: { Character cr . Character lf } ]
^ [ :nodes | nodes copyWithoutAll: { Character cr asString . Character lf asString } ]
]

{ #category : #'accessing private' }
Expand Down
3 changes: 2 additions & 1 deletion repository/BioParsers/BioFASTAMultiParser.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Class {
}

{ #category : #'instance creation' }
BioFASTAMultiParser class >> onAmbigousGapped: anExpressionString [
BioFASTAMultiParser class >> onAmbiguousGapped: anExpressionString [

^ self basicNew
fastaSequenceParser: #parserForAmbibuousGappedSequence;
Expand Down Expand Up @@ -49,6 +49,7 @@ BioFASTAMultiParser >> parseResultClass [
BioFASTAMultiParser >> parserForAmbibuousGappedSequence [
" Configure the receiver's parser to parse DNA alignment (gaps are allowed) "

self halt.
^ (
(
#dnaAmbiguousLetterGapped asPParser plus flatten ,
Expand Down
21 changes: 6 additions & 15 deletions repository/BioParsers/BioParser.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ Class {
BioParser class >> parseAccession: aString [
" Parse aString containing an accession number and answer an accession instance "

^ ( BioAccessionParser on: aString ) parse
^ (BioAccessionParser on: aString) parse
]

{ #category : #'instance creation' }
Expand Down Expand Up @@ -109,37 +109,28 @@ BioParser class >> parseMultiFasta: aFastaString [
" Parser aFastaString representing a MultiFASTA sequence.
Answer a FastaMultiRecord object "

^ (BioFASTAMultiParser onAmbigousGapped: aFastaString) parse
^ (BioFASTAMultiParser onAmbiguousGapped: aFastaString) parse
]

{ #category : #'parse-fasta' }
BioParser class >> parseMultiFastaAlignment: aFastaString [
" Parser aFastaString representing a <String> containing multiple sequences in FASTA format.
Answer a <BioFastaMultiRecord> object "

^ (BioFASTAMultiParser onAmbigousGapped: aFastaString) parse
^ (BioFASTAMultiParser onAmbiguousGapped: aFastaString) parse
]

{ #category : #'parse-fasta' }
BioParser class >> parseMultiFastaAlignmentFile: aFastaFullFileLocation [
" Parse aFastaFullFileLocation representing a <String> containing a file with multiple sequences in FASTA format. Answer a <BioAlignment> object "

^ (BioFASTAMultiParser onAmbigousGapped:
(self openFullFileNamed: aFastaFullFileLocation) contents) parse asAlignment
]

{ #category : #'parse-fasta' }
BioParser class >> parseMultiFastaAlignmentFileFast: aFastaFullFileLocation [
" Parse aFastaFullFileLocation representing a <String> containing a file with multiple sequences in FASTA format. Answer a <BioAlignment> object "

^ (BioFASTAMultiParser
onAmbigousGapped: (self openFullFileNamed: aFastaFullFileLocation)) parse asAlignment
^ (BioFASTAMultiParser onAmbiguousGapped:
(self openFullFileNamed: aFastaFullFileLocation) contents) parse asAlignment
]

{ #category : #'parse-fasta' }
BioParser class >> parseMultiFastaFile: aFastaFullFileLocation [
" Parser aFastaString representing a MultiFASTA sequence.
Answer a FastaMultiRecord object "
" Parse aFastaFullFileLocation (extensions .fa, .fasta, .fas, etc) representing a MultiFASTA sequence, i.e. a file which contains multiple fasta sequences. Answer a <BioFastaMultiRecord> object "

^ (BioFASTAMultiParser on: (self openFullFileNamed: aFastaFullFileLocation) contents) parse
]
Expand Down
10 changes: 7 additions & 3 deletions repository/BioParsers/BioSAXParser.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,12 @@ BioSAXParser >> current: anObject [
current := anObject
]

{ #category : #accessing }
BioSAXParser >> driver [

^ self parser driver
]

{ #category : #private }
BioSAXParser >> estimatedResultSize [
" Answer a <Number> of approximate found nodes in the receiver parsed file "
Expand Down Expand Up @@ -179,9 +185,7 @@ BioSAXParser >> parseDocument [
" Private - Parse receiver's input "

self bioLog: 'Start parsing document...' translated.
[ self driver reset ]
on: MessageNotUnderstood
do: [ :ex | self input reset ].
self input reset.
super parseDocument.
self bioLog: 'End parsing document' translated.
^ results
Expand Down
7 changes: 6 additions & 1 deletion repository/BioTools/BioIUPACAmbiguousRNA.class.st
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"
It represents the IUPAC RNA alphabet for ambiguous bases.
See class side for implementation details.
"
Class {
#name : #BioIUPACAmbiguousRNA,
#superclass : #BioIUPACRNA,
#category : 'BioTools-Alphabets'
#category : #'BioTools-Alphabets'
}

{ #category : #accessing }
Expand Down
7 changes: 6 additions & 1 deletion repository/BioTools/BioIUPACRNA.class.st
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"
Abstract class grouping codes for RNA alphabets.
See subclasses for specific details.
"
Class {
#name : #BioIUPACRNA,
#superclass : #BioIUPACAlphabet,
#category : 'BioTools-Alphabets'
#category : #'BioTools-Alphabets'
}

{ #category : #accessing }
Expand Down
6 changes: 5 additions & 1 deletion repository/BioTools/BioIUPACUnambiguousRNA.class.st
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
"
It represents the IUPAC RNA alphabet for unambiguous bases.
See class side for implementation details.
"
Class {
#name : #BioIUPACUnambiguousRNA,
#superclass : #BioIUPACRNA,
#category : 'BioTools-Alphabets'
#category : #'BioTools-Alphabets'
}

{ #category : #accessing }
Expand Down

0 comments on commit 33c2012

Please sign in to comment.