Skip to content

Commit

Permalink
InterLanguageLinksExtractor directly extracts owl:same
Browse files Browse the repository at this point in the history
  • Loading branch information
sven-h committed Jan 21, 2018
1 parent e6f396d commit f83fa01
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ import scala.language.reflectiveCalls
*/
class InterLanguageLinksExtractor(context: { def ontology : Ontology; def language : Language }) extends PageNodeExtractor
{
private val interLanguageLinksProperty = context.ontology.properties("wikiPageInterLanguageLink")
private val sameAsProperty = context.ontology.properties("owl:sameAs")

override val datasets = Set(DBpediaDatasets.InterLanguageLinks)

private val namespaces = if (context.language == Language.Commons) ExtractorUtils.commonsNamespacesContainingMetadata
else Set(Namespace.Main, Namespace.Template, Namespace.Category)

private val quad = QuadBuilder.apply(context.language, DBpediaDatasets.InterLanguageLinks, interLanguageLinksProperty, null) _
private val quad = QuadBuilder.apply(context.language, DBpediaDatasets.InterLanguageLinks, sameAsProperty, null) _

override def extract(page : PageNode, subjectUri : String) : Seq[Quad] =
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package org.dbpedia.extraction.scripts

import java.io.File

import org.apache.commons.lang.StringEscapeUtils
import org.apache.commons.lang3.StringUtils
import org.dbpedia.extraction.config.ConfigUtils.parseLanguages
import org.dbpedia.extraction.util.RichFile.wrapFile
import org.dbpedia.extraction.util.{DateFinder, IOUtils}
Expand Down Expand Up @@ -50,7 +50,7 @@ object UnmodifiedFeederCacheGenerator {
new QuadMapper().readQuads(finder, "page-ids" + suffix, auto = true) {
quad =>
val pageID = quad.value.toInt
var pageTitle = StringEscapeUtils.escapeSql(quad.subject.split("dbpedia.org/resource/", 2)(1))
var pageTitle = escapeSql(quad.subject.split("dbpedia.org/resource/", 2)(1))
if (pageTitle.contains(':')) {
val splitPageTitle = pageTitle.split(":", 2)
val namespace = Namespace.get(language, splitPageTitle(0).replace("_", " "))
Expand All @@ -69,4 +69,9 @@ object UnmodifiedFeederCacheGenerator {

}
}

def escapeSql(str: String): String = {
if (str == null) return null
StringUtils.replace(str, "'", "''")
}
}

0 comments on commit f83fa01

Please sign in to comment.