Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle Zip files on upload #905

Open
wants to merge 6 commits into
base: use-files-directly
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,18 @@
import com.conveyal.file.FileUtils;
import com.conveyal.gtfs.GTFSCache;
import com.conveyal.gtfs.GTFSFeed;
import com.conveyal.gtfs.error.GTFSError;
import com.conveyal.gtfs.error.GeneralError;
import com.conveyal.gtfs.model.Stop;
import com.conveyal.gtfs.validator.PostLoadValidator;
import com.conveyal.osmlib.Node;
import com.conveyal.osmlib.OSM;
import com.conveyal.r5.analyst.progress.ProgressInputStream;
import com.conveyal.r5.analyst.cluster.TransportNetworkConfig;
import com.conveyal.r5.analyst.progress.ProgressInputStream;
import com.conveyal.r5.analyst.progress.Task;
import com.conveyal.r5.streets.OSMCache;
import com.conveyal.r5.util.ExceptionUtils;
import com.mongodb.QueryBuilder;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItem;
import org.bson.types.ObjectId;
import org.locationtech.jts.geom.Envelope;
import org.slf4j.Logger;
Expand Down Expand Up @@ -107,19 +105,25 @@ private Bundle create (Request req, Response res) {
// Do some initial synchronous work setting up the bundle to fail fast if the request is bad.
final Map<String, List<FileItem>> files = HttpUtils.getRequestFiles(req.raw());
final Bundle bundle = new Bundle();
final File osmPbfFile;
final List<File> gtfsZipFiles;
try {
bundle.name = files.get("bundleName").get(0).getString("UTF-8");
bundle.regionId = files.get("regionId").get(0).getString("UTF-8");

if (files.get("osmId") != null) {
osmPbfFile = null;
bundle.osmId = files.get("osmId").get(0).getString("UTF-8");
Bundle bundleWithOsm = Persistence.bundles.find(QueryBuilder.start("osmId").is(bundle.osmId).get()).next();
if (bundleWithOsm == null) {
throw AnalysisServerException.badRequest("Selected OSM does not exist.");
}
} else {
osmPbfFile = HttpUtils.saveFileItemLocally(files.get("osm").get(0));
}

if (files.get("feedGroupId") != null) {
gtfsZipFiles = null;
bundle.feedGroupId = files.get("feedGroupId").get(0).getString("UTF-8");
Bundle bundleWithFeed = Persistence.bundles.find(QueryBuilder.start("feedGroupId").is(bundle.feedGroupId).get()).next();
if (bundleWithFeed == null) {
Expand All @@ -134,6 +138,8 @@ private Bundle create (Request req, Response res) {
bundle.feeds = bundleWithFeed.feeds;
bundle.feedsComplete = bundleWithFeed.feedsComplete;
bundle.totalFeeds = bundleWithFeed.totalFeeds;
} else {
gtfsZipFiles = HttpUtils.saveFileItemsLocally(files.get("feedGroup"));
}
UserPermissions userPermissions = UserPermissions.from(req);
bundle.accessGroup = userPermissions.accessGroup;
Expand All @@ -155,16 +161,15 @@ private Bundle create (Request req, Response res) {
.withWorkProduct(BUNDLE, bundle._id, bundle.regionId)
.withAction(progressListener -> {
try {
if (bundle.osmId == null) {
if (osmPbfFile != null) {
// Process uploaded OSM.
bundle.osmId = new ObjectId().toString();
DiskFileItem fi = (DiskFileItem) files.get("osm").get(0);
// Here we perform minimal validation by loading the OSM, but don't retain the resulting MapDB.
OSM osm = new OSM(null);
osm.intersectionDetection = true;
// Number of entities in an OSM file is unknown, so derive progress from the number of bytes read.
// Wrapping in buffered input stream should reduce number of progress updates.
osm.readPbf(ProgressInputStream.forFileItem(fi, progressListener));
osm.readPbf(ProgressInputStream.forFile(osmPbfFile, progressListener));
// osm.readPbf(new BufferedInputStream(fi.getInputStream()));
Envelope osmBounds = new Envelope();
for (Node n : osm.nodes.values()) {
Expand All @@ -173,10 +178,10 @@ private Bundle create (Request req, Response res) {
osm.close();
checkWgsEnvelopeSize(osmBounds, "OSM data");
// Store the source OSM file. Note that we're not storing the derived MapDB file here.
fileStorage.moveIntoStorage(OSMCache.getKey(bundle.osmId), fi.getStoreLocation());
fileStorage.moveIntoStorage(OSMCache.getKey(bundle.osmId), osmPbfFile);
}

if (bundle.feedGroupId == null) {
if (gtfsZipFiles != null) {
// Process uploaded GTFS files
bundle.feedGroupId = new ObjectId().toString();

Expand All @@ -186,8 +191,7 @@ private Bundle create (Request req, Response res) {
bundle.feeds = new ArrayList<>();
bundle.totalFeeds = files.get("feedGroup").size();

for (FileItem fileItem : files.get("feedGroup")) {
File feedFile = ((DiskFileItem) fileItem).getStoreLocation();
for (File feedFile : gtfsZipFiles) {
ZipFile zipFile = new ZipFile(feedFile);
File tempDbFile = FileUtils.createScratchFile("db");
File tempDbpFile = new File(tempDbFile.getAbsolutePath() + ".p");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.conveyal.analysis.AnalysisServerException;
import com.conveyal.analysis.UserPermissions;
import com.conveyal.analysis.components.TaskScheduler;
import com.conveyal.analysis.datasource.DataSourceUtil;
import com.conveyal.analysis.grids.SeamlessCensusGridExtractor;
import com.conveyal.analysis.models.DataGroup;
import com.conveyal.analysis.models.OpportunityDataset;
Expand All @@ -29,7 +30,6 @@
import com.google.common.io.Files;
import com.mongodb.QueryBuilder;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItem;
import org.apache.commons.io.FilenameUtils;
import org.bson.types.ObjectId;
import org.slf4j.Logger;
Expand All @@ -55,7 +55,6 @@
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate;
import static com.conveyal.analysis.util.JsonUtil.toJson;
import static com.conveyal.file.FileCategory.GRIDS;
import static com.conveyal.r5.analyst.WebMercatorExtents.parseZoom;
Expand Down Expand Up @@ -327,31 +326,25 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res
// are recorded in a persistent purpose-built way rather than falling back on the UI's catch-all error window.
// TODO more standardized mechanism for tracking asynchronous tasks and catching exceptions on them
OpportunityDatasetUploadStatus status = new OpportunityDatasetUploadStatus(regionId, sourceName);
addStatusAndRemoveOldStatuses(status);

// TODO should we delete this temporary directory at the end?
final File tmpDirectory = FileUtils.createScratchDirectory();
final List<File> files = new ArrayList<>();
final List<FileItem> fileItems;
final List<File> files;
final FileStorageFormat uploadFormat;
final Map<String, String> parameters;
try {
// Validate inputs and parameters, which will throw an exception if there's anything wrong with them.
// Call remove() rather than get() so that subsequent code will see only string parameters, not the files.
fileItems = formFields.remove("files");
for (var fi : fileItems) {
var tmpFile = new File(tmpDirectory, fi.getName());
Files.move(((DiskFileItem) fi).getStoreLocation(), tmpFile);
files.add(tmpFile);
}
uploadFormat = detectUploadFormatAndValidate(files);
files = HttpUtils.extractFilesFromFileItemsAndUnzip(formFields.remove("files"));
uploadFormat = DataSourceUtil.detectUploadFormatAndValidate(files);
parameters = extractStringParameters(formFields);
} catch (Exception e) {
status.completeWithError(e);
res.status(400);
return status;
}

// Add the status to the region wide tracker before we begin the heavy tasks.
addStatusAndRemoveOldStatuses(status);

// We are going to call several potentially slow blocking methods to create and persist new pointsets.
// This whole series of actions will be run sequentially but within an asynchronous Executor task.
// After enqueueing, the status is returned so the UI can track progress.
Expand Down Expand Up @@ -631,6 +624,7 @@ public static class OpportunityDatasetUploadStatus implements ProgressListener {
public Status status = Status.PROCESSING;
public String name;
public String message;
public String stackTrace;
public Date createdAt;
public Date completedAt;

Expand All @@ -647,7 +641,8 @@ private void completed (Status status) {
}

public void completeWithError (Exception e) {
message = "Unable to create opportunity dataset. " + ExceptionUtils.stackTraceString(e);
stackTrace = ExceptionUtils.stackTraceString(e);
message = "Unable to create opportunity dataset. " + e.getMessage();
completed(Status.ERROR);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,23 @@
import com.conveyal.analysis.UserPermissions;
import com.conveyal.analysis.models.DataSource;
import com.conveyal.analysis.persistence.AnalysisCollection;
import com.conveyal.analysis.util.HttpUtils;
import com.conveyal.file.FileStorage;
import com.conveyal.file.FileStorageFormat;
import com.conveyal.file.FileStorageKey;
import com.conveyal.r5.analyst.progress.ProgressListener;
import com.conveyal.r5.analyst.progress.TaskAction;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItem;
import org.apache.commons.io.FilenameUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;

import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate;
import static com.conveyal.analysis.util.HttpUtils.getFormField;
import static com.conveyal.file.FileCategory.DATASOURCES;
import static com.conveyal.file.FileStorageFormat.SHP;
Expand Down Expand Up @@ -123,15 +121,9 @@ public static DataSourceUploadAction forFormFields (
// Extract required parameters. Throws AnalysisServerException on failure, e.g. if a field is missing.
final String sourceName = getFormField(formFields, "sourceName", true);
final String regionId = getFormField(formFields, "regionId", true);
final List<FileItem> fileItems = formFields.get("sourceFiles");
final List<File> files = new ArrayList<>();
final List<File> files = HttpUtils.extractFilesFromFileItemsAndUnzip(formFields.get("sourceFiles"));

for (var fi : fileItems) {
var dfi = (DiskFileItem) fi;
files.add(dfi.getStoreLocation());
}

FileStorageFormat format = detectUploadFormatAndValidate(files);
FileStorageFormat format = DataSourceUtil.detectUploadFormatAndValidate(files);
DataSourceIngester ingester = DataSourceIngester.forFormat(format);

String originalFileNames = files.stream().map(File::getName).collect(Collectors.joining(", "));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ public abstract class DataSourceUtil {
* @throws DataSourceException if the type of the upload can't be detected or preconditions are violated.
* @return the expected type of the uploaded file or files, never null.
*/
public static FileStorageFormat detectUploadFormatAndValidate (List<File> fileItems) {
if (isNullOrEmpty(fileItems)) {
public static FileStorageFormat detectUploadFormatAndValidate (List<File> files) {
if (isNullOrEmpty(files)) {
throw new DataSourceException("You must select some files to upload.");
}
Set<String> fileExtensions = extractFileExtensions(fileItems);
Set<String> fileExtensions = extractFileExtensions(files);
if (fileExtensions.isEmpty()) {
throw new DataSourceException("No file extensions seen, cannot detect upload type.");
}
checkFileCharacteristics(fileItems);
checkFileCharacteristics(files);
if (fileExtensions.contains("zip")) {
throw new DataSourceException("Upload of spatial .zip files not yet supported");
// TODO unzip and process unzipped files - will need to peek inside to detect GTFS uploads first.
Expand All @@ -45,7 +45,7 @@ public static FileStorageFormat detectUploadFormatAndValidate (List<File> fileIt
final Set<String> shapefileExtensions = Sets.newHashSet("shp", "dbf", "prj");
if ( ! Sets.intersection(fileExtensions, shapefileExtensions).isEmpty()) {
if (fileExtensions.containsAll(shapefileExtensions)) {
verifyBaseNamesSame(fileItems);
verifyBaseNamesSame(files);
// TODO check that any additional file is .shx, and that there are no more than 4 files.
} else {
throw new DataSourceException("You must multi-select at least SHP, DBF, and PRJ files for shapefile upload.");
Expand Down Expand Up @@ -113,5 +113,4 @@ private static void verifyBaseNamesSame (List<File> files) {
}
}
}

}
60 changes: 57 additions & 3 deletions src/main/java/com/conveyal/analysis/util/HttpUtils.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
package com.conveyal.analysis.util;

import com.conveyal.analysis.AnalysisServerException;
import com.conveyal.file.FileUtils;
import com.conveyal.r5.util.ExceptionUtils;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.FileItemFactory;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;

import javax.servlet.http.HttpServletRequest;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -36,8 +38,8 @@ public static Map<String, List<FileItem>> getRequestFiles (HttpServletRequest re
// If we always saved the FileItems via write() or read them with getInputStream() they would not all need to
// be on disk.
try {
FileItemFactory fileItemFactory = new DiskFileItemFactory(0, null);
ServletFileUpload sfu = new ServletFileUpload(fileItemFactory);
DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(0, null);
ServletFileUpload sfu = new ServletFileUpload(diskFileItemFactory);
return sfu.parseParameterMap(req);
} catch (Exception e) {
throw AnalysisServerException.fileUpload(ExceptionUtils.stackTraceString(e));
Expand Down Expand Up @@ -66,4 +68,56 @@ public static String getFormField(Map<String, List<FileItem>> formFields, String
fieldName));
}
}

/**
* Extracts `FileItem`s contents locally in a temp directory. Automatically unzip files. Return the list of new
* `File` handles.
*/
public static List<File> extractFilesFromFileItemsAndUnzip(List<FileItem> fileItems) {
File directory = FileUtils.createScratchDirectory();
List<File> files = new ArrayList<>();
for (FileItem fi : fileItems) {
File file = moveFileItemIntoDirectory(fi, directory);
String name = file.getName();
if (name.toLowerCase().endsWith(".zip")) {
files.addAll(FileUtils.unzipFileIntoDirectory(file, directory));
} else {
files.add(file);
}
}
return files;
}

/**
* Move `FileItem`s contents into a temporary directory. Return the list of new `File` handles.
*/
public static List<File> saveFileItemsLocally(List<FileItem> fileItems) {
File directory = FileUtils.createScratchDirectory();
List<File> files = new ArrayList<>();
for (FileItem fileItem : fileItems) {
files.add(moveFileItemIntoDirectory(fileItem, directory));
}
return files;
}

/**
* Save the contents of a `FileItem` in a temporary directory and return the `File`.
*/
public static File saveFileItemLocally(FileItem fileItem) {
return moveFileItemIntoDirectory(fileItem, FileUtils.createScratchDirectory());
}

/**
* Move the contents of a `FileItem` to the given directory by calling `write`. `DiskFileItem`s will call `renameTo`
* if the `FileItem's contents are already on the disk.
*/
public static File moveFileItemIntoDirectory(FileItem fileItem, File directory) {
File file = new File(directory, fileItem.getName());
try {
fileItem.write(file);
} catch (Exception e) {
throw new AnalysisServerException(e, "Error storing file in directory on disk. FileItem.write() failed.");
}
return file;
}
}
Loading