Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calcul et sauvegarde de métadonnées pour les ressources GBFS #1905

Merged
merged 24 commits into from
Nov 16, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c01b452
WIP
AntoineAugusti Nov 5, 2021
1553a0e
Add simple test
AntoineAugusti Nov 5, 2021
b814395
More tests
AntoineAugusti Nov 8, 2021
32aeb07
Not async
AntoineAugusti Nov 8, 2021
e623cf4
Store metadata for resources
AntoineAugusti Nov 8, 2021
a9bbc9a
Clean code
AntoineAugusti Nov 8, 2021
6d0e421
Merge branch 'master' into gbfs-metadata
AntoineAugusti Nov 9, 2021
dc853fd
Move to dedicated file
AntoineAugusti Nov 9, 2021
2464339
Merge branch 'master' into gbfs-metadata
AntoineAugusti Nov 9, 2021
e2dc605
Add details about CORS
AntoineAugusti Nov 10, 2021
8749f62
Merge branch 'master' into gbfs-metadata
AntoineAugusti Nov 10, 2021
8841a99
Add tests for set_gbfs_feeds_metadata
AntoineAugusti Nov 10, 2021
d252108
remove aom from dataset
AntoineAugusti Nov 10, 2021
2503f49
Make sure verify_on_exit! is actually called
thbar Nov 15, 2021
b3cc5cd
Bypass the wrapper to simplify the tests & make them async
thbar Nov 15, 2021
d362712
Rename HTTPClient to HTTPValidatorClient (+ remove as: for clarity)
thbar Nov 15, 2021
670b351
Merge branch 'master' into gbfs-metadata
thbar Nov 15, 2021
3098490
Fix credo error
thbar Nov 15, 2021
0c20ff0
Merge branch 'gbfs-metadata' of github.com:etalab/transport-site into…
thbar Nov 15, 2021
30cc1da
Extract query to separate method to make it easier to invoke it from IEx
thbar Nov 15, 2021
2bc3228
Merge branch 'master' into gbfs-metadata
thbar Nov 15, 2021
181cc8f
Merge branch 'master' into gbfs-metadata
thbar Nov 15, 2021
0dcbe72
Update test port to reflect currently in use value & fix tests
thbar Nov 15, 2021
80b68d7
Add a bit of documentation
thbar Nov 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/shared/lib/validation/gbfs_validator.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ defmodule Shared.Validation.GBFSValidator do
A structure holding validation results for a GBFS feed
"""
@enforce_keys [:has_errors, :errors_count, :version_detected, :version_validated]
@derive Jason.Encoder
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

defstruct has_errors: false, errors_count: nil, version_detected: nil, version_validated: nil

@type t :: %__MODULE__{
Expand Down
22 changes: 20 additions & 2 deletions apps/shared/test/validation/gbfs_validator_test.exs
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
defmodule GBFSValidatorTest do
use ExUnit.Case, async: true
use ExUnit.Case, async: false
doctest Shared.Validation.GBFSValidator

import Mox

alias Shared.Validation.GBFSValidator.Summary
alias Shared.Validation.GBFSValidator.Wrapper, as: GBFSValidator

setup :verify_on_exit!
setup do
:verify_on_exit!
thbar marked this conversation as resolved.
Show resolved Hide resolved
# Do not use a mock for the GBFS Validator as we'll mock HTTP calls
old_value = Application.fetch_env!(:transport, :gbfs_validator_impl)
on_exit(fn -> Application.put_env(:transport, :gbfs_validator_impl, old_value) end)
Application.put_env(:transport, :gbfs_validator_impl, Shared.Validation.GBFSValidator.HTTPClient)
end
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similaire aux tests utilisant le "page cache" dans l'app GBFS

thbar marked this conversation as resolved.
Show resolved Hide resolved

test "validate GBFS feed" do
Transport.HTTPoison.Mock
Expand Down Expand Up @@ -42,4 +48,16 @@ defmodule GBFSValidatorTest do
{:error, error} = GBFSValidator.validate("https://example.com/gbfs.json")
assert String.starts_with?(error, "impossible to query GBFS Validator")
end

test "can encode summary" do
assert """
{"errors_count":0,"has_errors":false,"version_detected":"1.1","version_validated":"1.1"}\
"""
== Jason.encode!(%Summary{
errors_count: 0,
has_errors: false,
version_detected: "1.1",
version_validated: "1.1"
})
end
end
166 changes: 165 additions & 1 deletion apps/transport/lib/transport/data_checker.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ defmodule Transport.DataChecker do
"""
alias Datagouvfr.Client.{Datasets, Discussions}
alias Mailjet.Client
alias DB.{Dataset, Repo}
alias DB.{Dataset, Repo, Resource}
alias Shared.Validation.GBFSValidator.Summary, as: GBFSValidationSummary
alias Shared.Validation.GBFSValidator.Wrapper, as: GBFSValidator
import TransportWeb.Router.Helpers
import Ecto.Query
require Logger
Expand Down Expand Up @@ -80,6 +82,166 @@ defmodule Transport.DataChecker do
)
end

def set_gbfs_feeds_metadata do
resources =
Resource
|> join(:inner, [r], d in Dataset, on: r.dataset_id == d.id)
|> where([_r, d], d.type == "bike-scooter-sharing" and d.is_active)
|> where([r, _d], like(r.url, "%gbfs.json") or r.format == "gbfs")
|> where([r, _d], not fragment("? ~ ?", r.url, "station|free_bike"))
|> Repo.all()

Logger.info("Fetching details about #{Enum.count(resources)} GBFS feeds")

resources
|> Stream.map(fn resource ->
Logger.info("Fetching GBFS metadata for #{resource.url} (##{resource.id})")
changeset = Resource.changeset(resource, %{format: "gbfs", metadata: compute_gbfs_feed_metadata(resource)})
Repo.update(changeset)
end)
|> Stream.run()
end

@spec compute_gbfs_feed_metadata(Resource.t()) :: map()
def compute_gbfs_feed_metadata(resource) do
with {:ok, %{status_code: 200, body: body}} <- http_client().get(resource.url),
{:ok, json} <- Jason.decode(body) do
%{
validation: gbfs_validation(resource),
feeds: gbfs_feeds(json),
versions: gbfs_versions(json),
languages: gbfs_languages(json),
system_details: gbfs_system_details(json),
types: gbfs_types(json),
ttl: gbfs_ttl(json)
}
else
e ->
Logger.error(inspect(e))
%{}
end
end

@spec gbfs_validation(Resource.t()) :: GBFSValidationSummary.t() | nil
defp gbfs_validation(resource) do
case GBFSValidator.validate(resource.url) do
{:ok, %GBFSValidationSummary{} = summary} -> summary
{:error, _} -> nil
end
end

defp gbfs_types(%{"data" => _data} = payload) do
has_bike_status = gbfs_has_feed?(payload, "free_bike_status")
has_station_information = gbfs_has_feed?(payload, "station_information")

cond do
has_bike_status and has_station_information ->
["free_floating", "stations"]

has_bike_status ->
["free_floating"]

has_station_information ->
["stations"]

true ->
Logger.error("Cannot detect GBFS types for feed #{inspect(payload)}")
nil
end
end

defp gbfs_ttl(%{"data" => _data} = payload) do
feed = payload |> gbfs_first_feed()

value =
case gbfs_types(payload) do
["free_floating", "stations"] -> feed |> gbfs_feed_url_by_name("free_bike_status")
["free_floating"] -> feed |> gbfs_feed_url_by_name("free_bike_status")
["stations"] -> feed |> gbfs_feed_url_by_name("station_information")
nil -> payload["ttl"]
end

gbfs_feed_ttl(value)
end

defp gbfs_feed_ttl(value) when is_integer(value) and value >= 0, do: value

defp gbfs_feed_ttl(feed_url) when is_binary(feed_url) do
with {:ok, %{status_code: 200, body: body}} <- http_client().get(feed_url),
{:ok, json} <- Jason.decode(body) do
json["ttl"]
else
e ->
Logger.error("Cannot get GBFS ttl details: #{inspect(e)}")
nil
end
end

defp gbfs_system_details(%{"data" => _data} = payload) do
feed_url = payload |> gbfs_first_feed() |> gbfs_feed_url_by_name("system_information")

if not is_nil(feed_url) do
with {:ok, %{status_code: 200, body: body}} <- http_client().get(feed_url),
{:ok, json} <- Jason.decode(body) do
%{
timezone: json["data"]["timezone"],
name: json["data"]["name"]
}
else
e ->
Logger.error("Cannot get GBFS system_information details: #{inspect(e)}")
nil
end
end
end

defp gbfs_first_feed(%{"data" => data} = payload) do
(data["en"] || data["fr"] || data[payload |> gbfs_languages() |> Enum.at(0)])["feeds"]
end

defp gbfs_languages(%{"data" => data}) do
Map.keys(data)
end

@spec gbfs_versions(map()) :: [binary()] | nil
defp gbfs_versions(%{"data" => _data} = payload) do
gbfs_versions_url = payload |> gbfs_first_feed() |> gbfs_feed_url_by_name("gbfs_versions")

if is_nil(gbfs_versions_url) do
[Map.get(payload, "version", "1.0")]
else
with {:ok, %{status_code: 200, body: body}} <- http_client().get(gbfs_versions_url),
{:ok, json} <- Jason.decode(body) do
json["data"]["versions"] |> Enum.map(fn json -> json["version"] end) |> Enum.sort(:desc)
else
_ -> nil
end
end
end

@spec gbfs_feed_url_by_name(list(), binary()) :: binary() | nil
defp gbfs_feed_url_by_name(feeds, name) do
Enum.find(feeds, fn map -> gbfs_feed_is_named?(map, name) end)["url"]
end

@spec gbfs_feed_is_named?(map(), binary()) :: boolean()
def gbfs_feed_is_named?(map, name) do
# Many people make the mistake of appending `.json` to feed names
# so try to match this as well
Enum.member?([name, "#{name}.json"], map["name"])
end

@spec gbfs_has_feed?(map(), binary()) :: boolean()
def gbfs_has_feed?(%{"data" => _data} = payload, name) do
Enum.member?(gbfs_feeds(payload), name)
end

def gbfs_feeds(%{"data" => _data} = payload) do
# Remove potential ".json" at the end of feed names as people
# often make this mistake
payload |> gbfs_first_feed() |> Enum.map(fn feed -> String.replace(feed["name"], ".json", "") end)
end

defp make_str({delay, datasets}) do
r_str =
datasets
Expand Down Expand Up @@ -184,4 +346,6 @@ defmodule Transport.DataChecker do
false
)
end

defp http_client, do: Transport.Shared.Wrapper.HTTPoison.impl()
end
1 change: 1 addition & 0 deletions apps/transport/test/support/mocks.ex
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Mox.defmock(Transport.ExAWS.Mock, for: ExAws.Behaviour)
Mox.defmock(Transport.HTTPoison.Mock, for: HTTPoison.Base)
Mox.defmock(Validation.Validator.Mock, for: Shared.Validation.Validator)
Mox.defmock(Shared.Validation.GBFSValidator.Mock, for: Shared.Validation.GBFSValidator.Wrapper)
Loading