Skip to content

Commit

Permalink
added title of section to the extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
sven-h committed Jun 5, 2019
1 parent cff322a commit 6ec9abe
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class GeneralNifExtractor (
val wikiPageExternalLinkProperty = context.ontology.properties("wikiPageExternalLink")
val wikiPageInterWikiLinkProperty = context.ontology.properties("wikiPageInterWikiLink")
val wikiPageInterLanguageLinkProperty = context.ontology.properties("wikiPageInterLanguageLink")
val labelProperty = context.ontology.properties("rdfs:label")



Expand Down Expand Up @@ -69,7 +70,7 @@ class GeneralNifExtractor (
private def writeLongAndShortAbstract(section: NifSection, text:String):ArrayBuffer[Quad] = {
var quads = ArrayBuffer[Quad]()
if (recordAbstracts && section.id == "abstract" && text.length > 0) {
val describingParagraphs = getParagraphsDescribingConcept(section, text)
val describingParagraphs = section.paragraphs//getParagraphsDescribingConcept(section, text)
if(describingParagraphs.size > 0){
quads += longQuad(wikiPage.uri, text.substring(describingParagraphs.head.begin.getOrElse(0), describingParagraphs.last.end.getOrElse(0)), sourceUrl) //text.substring(section.begin.getOrElse(0), section.end.getOrElse(0)), sourceUrl)
quads += shortQuad(wikiPage.uri, getShortAbstract(describingParagraphs, text), sourceUrl) // getShortAbstract(section.paragraphs, text), sourceUrl)
Expand Down Expand Up @@ -184,6 +185,21 @@ class GeneralNifExtractor (
if (section.next.isEmpty)
quads += nifStructure(topSectionUri, RdfNamespace.NIF.append("lastSection"), sectionUri, sourceUrl, null)

//adding title
if(section.beginTitle.nonEmpty && section.endTitle.nonEmpty){
val titleUri = getNifIri("title", section.beginTitle.get, section.endTitle.get)
quads += nifStructure(titleUri, RdfNamespace.RDF.append("type"), RdfNamespace.NIF.append("Title"), sourceUrl, null)
quads += nifStructure(titleUri, RdfNamespace.NIF.append("referenceContext"), nifContextUri, sourceUrl, null)
quads += nifStructure(titleUri, RdfNamespace.NIF.append("beginIndex"), section.beginTitle.get.toString, sourceUrl, RdfNamespace.XSD.append("nonNegativeInteger"))
quads += nifStructure(titleUri, RdfNamespace.NIF.append("endIndex"), section.endTitle.get.toString, sourceUrl, RdfNamespace.XSD.append("nonNegativeInteger"))
quads += nifStructure(titleUri, RdfNamespace.NIF.append("superString"), sectionUri, sourceUrl, null)
if(writeLinkAnchors){
quads += nifStructure(titleUri, RdfNamespace.NIF.append("anchorOf"), section.id, sourceUrl, RdfNamespace.XSD.append("string"))
quads += nifStructure(sectionUri, labelProperty.uri, section.id.trim, sourceUrl, RdfNamespace.XSD.append("string"))
}

}

quads
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class NifExtractionAstVisitor(language : Language)
private var currentSection = new ListBuffer[Int]()

private var context = StringBuilder.newBuilder //contains the whole text of the wikipage
private var nifSection: NifSection = new NifSection(id = "abstract",ref = "", prev = None, next = None, top = None, sub = None, begin = Some(0), end = None, paragraphs = ListBuffer())
private var nifSection: NifSection = new NifSection(id = "abstract",ref = "", prev = None, next = None, top = None, sub = None, begin = Some(0), end = None, beginTitle = None, endTitle = None, paragraphs = ListBuffer())
private var nifParagraph: NifParagraph = new NifParagraph(begin = Some(0), end = None,links = ListBuffer())
private var extLinkNum :Int = 1

Expand All @@ -42,6 +42,8 @@ class NifExtractionAstVisitor(language : Language)
sub = None,
begin = Some(0),
end = None,
beginTitle = None,
endTitle = None,
paragraphs = ListBuffer()
)
nifSection = abstractSection
Expand Down Expand Up @@ -256,6 +258,8 @@ class NifExtractionAstVisitor(language : Language)
sub = None,
begin = None,
end = None,
beginTitle = None,
endTitle = None,
paragraphs = ListBuffer()
)
section.top match{
Expand All @@ -282,10 +286,10 @@ class NifExtractionAstVisitor(language : Language)
tocMap.append(nifSection)
//closeParagraphAndStartNew()
nifParagraph = new NifParagraph(begin = Some(0),end = Some(0),links = ListBuffer()) //dummy paragraph (all lnks in header will be put in dummy)
val startHeading = context.length
nifSection.beginTitle = Some(context.length)
iterate(s.getHeading)
val endHeading = context.length
nifSection.id = context.substring(startHeading, endHeading)
nifSection.endTitle = Some(context.length)
nifSection.id = context.substring(nifSection.beginTitle.get, nifSection.endTitle.get)

closeParagraphAndStartNew()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class NifSection(
var sub: Option[NifSection],
var begin: Option[Int],
var end: Option[Int],
var beginTitle: Option[Int],
var endTitle: Option[Int],
var paragraphs: ListBuffer[NifParagraph]
) {
def addParagraph(nifparagraph: NifParagraph): Unit = paragraphs += nifparagraph
Expand Down

0 comments on commit 6ec9abe

Please sign in to comment.