-
-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for mixed source corpora (#177)
- align all Scripture corpora to Original versification
- Loading branch information
Showing
41 changed files
with
950 additions
and
529 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,51 @@ | ||
namespace SIL.Machine.AspNetCore.Services; | ||
|
||
public enum CorpusType | ||
{ | ||
Text, | ||
Term | ||
} | ||
|
||
public class CorpusService : ICorpusService | ||
{ | ||
public IDictionary<CorpusType, ITextCorpus> CreateTextCorpus(IReadOnlyList<CorpusFile> files) | ||
public IEnumerable<ITextCorpus> CreateTextCorpora(IReadOnlyList<CorpusFile> files) | ||
{ | ||
IDictionary<CorpusType, ITextCorpus> corpora = new Dictionary<CorpusType, ITextCorpus>(); | ||
if (files.Count == 1 && files[0].Format == FileFormat.Paratext) | ||
List<ITextCorpus> corpora = []; | ||
|
||
List<Dictionary<string, IText>> textFileCorpora = []; | ||
foreach (CorpusFile file in files) | ||
{ | ||
corpora[CorpusType.Text] = new ParatextBackupTextCorpus(files[0].Location); | ||
corpora[CorpusType.Term] = new ParatextBackupTermsCorpus(files[0].Location, new string[] { "PN" }); | ||
switch (file.Format) | ||
{ | ||
case FileFormat.Text: | ||
// if there are multiple texts with the same id, then add it to a new corpus or the first | ||
// corpus that doesn't contain a text with that id | ||
Dictionary<string, IText>? corpus = textFileCorpora.FirstOrDefault(c => | ||
!c.ContainsKey(file.TextId) | ||
); | ||
if (corpus is null) | ||
{ | ||
corpus = []; | ||
textFileCorpora.Add(corpus); | ||
} | ||
corpus[file.TextId] = new TextFileText(file.TextId, file.Location); | ||
break; | ||
|
||
case FileFormat.Paratext: | ||
corpora.Add(new ParatextBackupTextCorpus(file.Location)); | ||
break; | ||
} | ||
} | ||
else | ||
foreach (Dictionary<string, IText> corpus in textFileCorpora) | ||
corpora.Add(new DictionaryTextCorpus(corpus.Values)); | ||
|
||
return corpora; | ||
} | ||
|
||
public IEnumerable<ITextCorpus> CreateTermCorpora(IReadOnlyList<CorpusFile> files) | ||
{ | ||
foreach (CorpusFile file in files) | ||
{ | ||
var texts = new List<IText>(); | ||
foreach (CorpusFile file in files) | ||
switch (file.Format) | ||
{ | ||
switch (file.Format) | ||
{ | ||
case FileFormat.Text: | ||
texts.Add(new TextFileText(file.TextId, file.Location)); | ||
break; | ||
} | ||
case FileFormat.Paratext: | ||
yield return new ParatextBackupTermsCorpus(file.Location, ["PN"]); | ||
break; | ||
} | ||
corpora[CorpusType.Text] = new DictionaryTextCorpus(texts); | ||
} | ||
return corpora; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.