Skip to content

Commit

Permalink
updated Collection Download
Browse files Browse the repository at this point in the history
  • Loading branch information
eisenbahnplatte committed Jun 22, 2021
1 parent af59f46 commit 03d9cb3
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import org.apache.http.client.ResponseHandler
import org.apache.http.client.methods.HttpGet
import org.apache.http.impl.client.{BasicResponseHandler, HttpClientBuilder}
import org.dbpedia.databus.client.filehandling.download.Downloader
import org.dbpedia.databus.client.sparql.QueryHandler
import org.dbpedia.databus.client.sparql.queries.DatabusQueries
import org.slf4j.LoggerFactory


Expand Down Expand Up @@ -53,13 +55,16 @@ object SourceHandler {
*/
def handleQuery(query: String, target: File, cache: File, format: String, compression: String, overwrite: Boolean=false):Unit = {

val queryStr = {
var queryStr = {
if (isCollection(query)) getQueryOfCollection(query)
else query
}

printTask("query", queryStr, target.pathAsString)

//necessary due collection queries query the permament DBpedia URIs not the actual download links
if(isCollection(query)) queryStr = DatabusQueries.queryDownloadURLOfDatabusFiles(QueryHandler.executeDownloadQuery(queryStr))

println("DOWNLOAD TOOL:")

val allSHAs = Downloader.downloadWithQuery(queryStr, cache, overwrite)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,24 @@ object Downloader {
results.foreach(fileIRI => {
val fileSHA = QueryHandler.getSHA256Sum(fileIRI)

if (overwrite) {
downloadFile(fileIRI, fileSHA, targetdir) match {
case Some(file: File) => allSHAs = allSHAs :+ fileSHA
case None => ""
}
}
else {
if (!FileUtil.checkIfFileInCache(targetdir, fileSHA)) {
if (fileSHA != "") {
if (overwrite) {
downloadFile(fileIRI, fileSHA, targetdir) match {
case Some(file: File) => allSHAs = allSHAs :+ fileSHA
case None => ""
case Some(file: File) => allSHAs = allSHAs :+ fileSHA
case None => ""
}
}
else {
println(s"$fileIRI --> already exists in Cache")
allSHAs = allSHAs :+ fileSHA
if (!FileUtil.checkIfFileInCache(targetdir, fileSHA)) {
downloadFile(fileIRI, fileSHA, targetdir) match {
case Some(file: File) => allSHAs = allSHAs :+ fileSHA
case None => ""
}
}
else {
println(s"$fileIRI --> already exists in Cache")
allSHAs = allSHAs :+ fileSHA
}
}
}
})
Expand Down Expand Up @@ -80,7 +82,7 @@ object Downloader {
if (!correctFileTransfer) {
println("file download had issues")
LoggerFactory.getLogger("Download-Logger").error(s"couldn't download file $url properly")
file.delete(true)
file.delete(swallowIOExceptions = true)
return None
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@ package org.dbpedia.databus.client.sparql

import java.io.FileNotFoundException
import java.net.URL

import better.files.File
import org.apache.commons.io.FileUtils
import org.apache.jena.JenaRuntime
import org.apache.jena.query._
import org.apache.jena.rdf.model.{Model, ModelFactory}
import org.apache.jena.riot.{RDFDataMgr, RDFLanguages}
import org.dbpedia.databus.client.sparql.queries.{DataIdQueries, DatabusQueries, MappingQueries}
import org.slf4j.LoggerFactory
import org.slf4j.{Logger, LoggerFactory}

object QueryHandler {

val service = "https://databus.dbpedia.org/repo/sparql"
val logger: Logger = LoggerFactory.getLogger(getClass)

def executeQuery(queryString: String, model:Model = ModelFactory.createDefaultModel()): Seq[QuerySolution] = {

Expand Down Expand Up @@ -54,9 +54,15 @@ object QueryHandler {
def getSHA256Sum(url: String): String = {

val results = executeQuery(DatabusQueries.querySha256(url))
val sparqlVar = results.head.varNames().next()

results.head.getLiteral(sparqlVar).getString
try{
val sparqlVar = results.head.varNames().next()
results.head.getLiteral(sparqlVar).getString
} catch {
case noSuchElementException: NoSuchElementException =>
logger.error(s"No Sha Sum found for $url")
""
}

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,17 @@ object DatabusQueries {
| ?mapping <http://tmp-namespace.org/databusFixRequired> ?file .
|}
|""".stripMargin

def queryDownloadURLOfDatabusFiles(files: Seq[String]): String = {
val databusFilesString = files.mkString("(<",">) (<",">)")
s"""
|PREFIX dcat: <http://www.w3.org/ns/dcat#>
|
|SELECT DISTINCT ?file WHERE {
| VALUES (?databusfile) {$databusFilesString}
| ?distribution ?o ?databusfile .
| ?distribution dcat:downloadURL ?file .
|}
|""".stripMargin
}
}

0 comments on commit 03d9cb3

Please sign in to comment.