From 55276b764821df964c7fcb1fbf294b8e64fd759d Mon Sep 17 00:00:00 2001 From: Levin Herr Date: Fri, 11 Oct 2024 11:46:11 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Move=20Polylines=20to=20files=20whe?= =?UTF-8?q?n=20accessed=20(#2949)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Commands/DatabaseCleaner/Polylines.php | 30 ++++---- app/Console/Commands/PolylinesToFiles.php | 39 ++++++++++ .../Backend/Support/LocationController.php | 2 +- app/Models/PolyLine.php | 26 ++++++- app/Services/PolylineStorageService.php | 73 +++++++++++++++++++ config/trwl.php | 39 +++++----- storage/polylines/.gitignore | 2 + tests/Feature/Commands/CleanUpTest.php | 18 ++++- 8 files changed, 191 insertions(+), 38 deletions(-) create mode 100644 app/Console/Commands/PolylinesToFiles.php create mode 100644 app/Services/PolylineStorageService.php create mode 100644 storage/polylines/.gitignore diff --git a/app/Console/Commands/DatabaseCleaner/Polylines.php b/app/Console/Commands/DatabaseCleaner/Polylines.php index 8b38c911d..777b08897 100644 --- a/app/Console/Commands/DatabaseCleaner/Polylines.php +++ b/app/Console/Commands/DatabaseCleaner/Polylines.php @@ -13,25 +13,29 @@ class Polylines extends Command protected $description = 'Find and delete unused and old polylines from database'; public function handle(): int { - $start = microtime(true); - $rows = DB::table('poly_lines') - ->selectRaw('poly_lines.id, poly_lines.parent_id') - ->leftJoin('hafas_trips', 'poly_lines.id', '=', 'hafas_trips.polyline_id') - ->leftJoin( - 'poly_lines AS parent_poly_lines', - 'poly_lines.id', - '=', - 'parent_poly_lines.parent_id' - ) - ->whereRaw('hafas_trips.polyline_id IS NULL AND parent_poly_lines.parent_id IS NULL') - ->get(); + $start = microtime(true); + $rows = DB::table('poly_lines') + ->selectRaw('poly_lines.id, poly_lines.parent_id') + ->leftJoin('hafas_trips', 'poly_lines.id', '=', 'hafas_trips.polyline_id') + ->leftJoin( + 'poly_lines AS parent_poly_lines', + 'poly_lines.id', + '=', + 'parent_poly_lines.parent_id' + ) + ->whereRaw('hafas_trips.polyline_id IS NULL AND parent_poly_lines.parent_id IS NULL') + ->get(); $this->info('Found ' . $rows->count() . ' unused polylines.'); $affectedRows = 0; // get 100 rows at a time foreach ($rows->chunk(100) as $chunk) { $ids = $chunk->pluck('id')->toArray(); - $affectedRows += PolyLine::whereIn('id', $ids)->delete(); + $row = PolyLine::whereIn('id', $ids)->get(); + foreach ($row as $polyline) { + $polyline->delete(); + $affectedRows++; + } $this->output->write('.'); } $this->output->newLine(); diff --git a/app/Console/Commands/PolylinesToFiles.php b/app/Console/Commands/PolylinesToFiles.php new file mode 100644 index 000000000..c631c09bb --- /dev/null +++ b/app/Console/Commands/PolylinesToFiles.php @@ -0,0 +1,39 @@ +where('polyline', '!=', '{}') + ->get(); + $this->info('Found ' . $rows->count() . ' polylines.'); + $affectedRows = 0; + + // get 100 rows at a time + foreach ($rows->chunk(100) as $chunk) { + $ids = $chunk->pluck('id')->toArray(); + $affectedRows += PolyLine::whereIn('id', $ids)->get()->map(function($polyline) { + $polyline->polyline; // trigger the __get method + return $polyline; + })->count(); + $this->output->write('.'); + } + $this->output->newLine(); + + $time_elapsed_secs = microtime(true) - $start; + Log::debug($affectedRows . ' polylines converted in ' . $time_elapsed_secs . ' seconds.'); + $this->info($affectedRows . ' polylines converted in ' . $time_elapsed_secs . ' seconds.'); + return 0; + } +} diff --git a/app/Http/Controllers/Backend/Support/LocationController.php b/app/Http/Controllers/Backend/Support/LocationController.php index 0ca7ba564..3e84c9492 100644 --- a/app/Http/Controllers/Backend/Support/LocationController.php +++ b/app/Http/Controllers/Backend/Support/LocationController.php @@ -145,7 +145,7 @@ private function getDistanceFromGeoJson(stdClass $geoJson): int { * @throws JsonException */ private function getPolylineWithTimestamps(): stdClass { - if (isset($this->trip->polyline)) { + if (!empty($this->trip->polyline)) { // decode GeoJSON object from polyline $geoJsonObj = json_decode($this->trip->polyline->polyline, false, 512, JSON_THROW_ON_ERROR); } else { diff --git a/app/Models/PolyLine.php b/app/Models/PolyLine.php index 699301e1a..39bedd1cf 100644 --- a/app/Models/PolyLine.php +++ b/app/Models/PolyLine.php @@ -2,19 +2,25 @@ namespace App\Models; +use App\Services\PolylineStorageService; use Illuminate\Database\Eloquent\Model; use Illuminate\Database\Eloquent\Relations\HasMany; use Illuminate\Database\Eloquent\Relations\HasOne; class PolyLine extends Model { - - protected $fillable = ['hash', 'polyline', 'source', 'parent_id']; - protected $casts = [ + private PolylineStorageService $polylineStorageService; + protected $fillable = ['hash', 'polyline', 'source', 'parent_id']; + protected $casts = [ 'id' => 'integer', 'source' => 'string', //enum['hafas', 'brouter'] in database ]; + public function __construct(array $attributes = []) { + parent::__construct($attributes); + $this->polylineStorageService = new PolylineStorageService(); + } + public function trips(): HasMany { return $this->hasMany(Trip::class, 'polyline_id', 'id'); } @@ -22,4 +28,18 @@ public function trips(): HasMany { public function parent(): HasOne { return $this->hasOne(PolyLine::class, 'parent_id', 'id'); } + + public function __get($key) { + // check if the polyline is empty + if ($key === 'polyline') { + return $this->polylineStorageService->getOrCreate($this); + } + + return parent::__get($key); + } + + public function delete(): ?bool { + $this->polylineStorageService->delete($this->hash); + return parent::delete(); + } } diff --git a/app/Services/PolylineStorageService.php b/app/Services/PolylineStorageService.php new file mode 100644 index 000000000..c75bf7d66 --- /dev/null +++ b/app/Services/PolylineStorageService.php @@ -0,0 +1,73 @@ +disk = Storage::build([ + 'driver' => config('trwl.polyline_storage_driver'), + 'root' => storage_path(config('trwl.polyline_storage_path')), + ]); + } + + private function store(string $content, string $hash = null): bool { + $hash = $hash ?? md5($content); + + if ($this->disk->exists($this->storageName($hash))) { + return true; + } + return $this->disk->put($this->storageName($hash), $content); + } + + public function get(string $hash): string { + if ($this->content !== null) { + return $this->content; + } + if (!$this->disk->exists($this->storageName($hash))) { + return ''; + } + + return $this->disk->get($this->storageName($hash)); + } + + public function delete(string $hash): void { + $this->disk->delete($this->storageName($hash)); + } + + public function getOrCreate(PolyLine $polyLine): string { + $content = $polyLine->getAttribute('polyline'); + $hash = $polyLine->getAttribute('hash'); + + if (!$this->empty($content)) { + $success = $this->store($content, $hash); + + if ($success && config('trwl.polyline_clear_after_copy')) { + $polyLine->update(['polyline' => '{}']); + } + } + + return $this->get($hash); + } + + /** + * Get the storage name for a given hash. + * This breaks the hash into 4 characters and uses them as subdirectories + * to avoid having too many files in one directory. + */ + private function storageName(string $hash): string { + return substr($hash, 0, 2) . '/' . substr($hash, 2, 2) . '/' . $hash; + } + + private function empty(string $content): bool { + $content = trim($content); + return empty($content) || $content === '{}' || $content === '[]'; + } +} diff --git a/config/trwl.php b/config/trwl.php index 52569004f..6d8de23ba 100644 --- a/config/trwl.php +++ b/config/trwl.php @@ -1,27 +1,32 @@ env('POST_SOCIAL', false), + 'post_social' => env('POST_SOCIAL', false), # Mastodon - 'mastodon_domain' => env('MASTODON_DOMAIN'), - 'mastodon_id' => env('MASTODON_ID'), - 'mastodon_secret' => env('MASTODON_SECRET'), - 'mastodon_redirect' => env('MASTODON_REDIRECT'), - 'mastodon_appname' => env('MASTODON_APPNAME'), - 'mastodon_timeout_seconds' => env("MASTODON_TIMEOUT_SECONDS", 5), + 'mastodon_domain' => env('MASTODON_DOMAIN'), + 'mastodon_id' => env('MASTODON_ID'), + 'mastodon_secret' => env('MASTODON_SECRET'), + 'mastodon_redirect' => env('MASTODON_REDIRECT'), + 'mastodon_appname' => env('MASTODON_APPNAME'), + 'mastodon_timeout_seconds' => env("MASTODON_TIMEOUT_SECONDS", 5), # Brouter 'brouter' => env('BROUTER', true), - 'brouter_url' => env('BROUTER_URL', 'https://brouter.de/'), - 'brouter_timeout' => env('BROUTER_TIMEOUT', 10), + 'brouter_url' => env('BROUTER_URL', 'https://brouter.de/'), + 'brouter_timeout' => env('BROUTER_TIMEOUT', 10), + + # Polyline + 'polyline_storage_path' => env('POLYLINE_STORAGE_PATH', 'polylines'), + 'polyline_storage_driver' => env('POLYLINE_STORAGE_DRIVER', 'local'), + 'polyline_clear_after_copy' => env('POLYLINE_CLEAR_AFTER_COPY', false), # DB_REST - 'db_rest' => env('DB_REST', 'https://v5.db.transport.rest/'), - 'db_rest_timeout' => env('DB_REST_TIMEOUT', 10), + 'db_rest' => env('DB_REST', 'https://v5.db.transport.rest/'), + 'db_rest_timeout' => env('DB_REST_TIMEOUT', 10), # Points - 'base_points' => [ + 'base_points' => [ 'time_window' => [ # time windows before and after a journey to get points 'good_enough' => [ @@ -45,17 +50,17 @@ 'nationalExpress' => env('BASE_POINTS_TRAIN_NATIONALEXPRESS', 10), ] ], - 'refresh' => [ + 'refresh' => [ 'max_trips_per_minute' => env('REFRESH_TRIPS_PER_MINUTE', 1) ], - 'cache' => [ + 'cache' => [ 'global-statistics-retention-seconds' => env('GLOBAL_STATISTICS_CACHE_RETENTION_SECONDS', 60 * 60), 'leaderboard-retention-seconds' => env('LEADERBOARD_CACHE_RETENTION_SECONDS', 5 * 60) ], - 'year_in_review' => [ + 'year_in_review' => [ 'alert' => env('YEAR_IN_REVIEW_ALERT', false), 'backend' => env('YEAR_IN_REVIEW_BACKEND', false), ], - 'webhooks_active' => env('WEBHOOKS_ACTIVE', false), - 'webfinger_active' => env('WEBFINGER_ACTIVE', false), + 'webhooks_active' => env('WEBHOOKS_ACTIVE', false), + 'webfinger_active' => env('WEBFINGER_ACTIVE', false), ]; diff --git a/storage/polylines/.gitignore b/storage/polylines/.gitignore new file mode 100644 index 000000000..d6b7ef32c --- /dev/null +++ b/storage/polylines/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/tests/Feature/Commands/CleanUpTest.php b/tests/Feature/Commands/CleanUpTest.php index 755407991..a146dcd82 100644 --- a/tests/Feature/Commands/CleanUpTest.php +++ b/tests/Feature/Commands/CleanUpTest.php @@ -8,10 +8,12 @@ use App\Models\Trip; use App\Models\User; use App\Notifications\StatusLiked; +use App\Services\PolylineStorageService; use Illuminate\Console\Command; use Illuminate\Foundation\Testing\RefreshDatabase; use Illuminate\Support\Facades\Password; use Illuminate\Support\Str; +use PHPUnit\Framework\MockObject\Exception; use Tests\FeatureTestCase; class CleanUpTest extends FeatureTestCase @@ -96,17 +98,25 @@ public function testUsersThatHaventAcceptedPrivacyPolicyWithinADayAreRemoved(): $this->assertDatabaseCount('users', 0); } + /** + * @throws Exception + */ public function testPolylineWithoutAnyReferenceAreDeleted(): void { $this->assertDatabaseCount('poly_lines', 0); + $service = new PolylineStorageService(); - PolyLine::create([ - 'hash' => Str::uuid(), - 'polyline' => json_encode(['some json data']), - ]); + $polyline = PolyLine::create([ + 'hash' => Str::uuid(), + 'polyline' => json_encode(['some json data']), + ]); + $content = $polyline->polyline; // this will store the polyline in the storage + $hash = $polyline->hash; $this->assertDatabaseCount('poly_lines', 1); + $this->assertSame($content, $service->get($hash)); $this->artisan('app:clean-db:polylines')->assertExitCode(Command::SUCCESS); $this->assertDatabaseCount('poly_lines', 0); + $this->assertSame('', $service->get($hash)); //create a polyline with a reference and a parent //Checkin Factory creates a trip which creates a polyline