Skip to content

Commit

Permalink
Calcul et sauvegarde de métadonnées pour les ressources GBFS (#1905)
Browse files Browse the repository at this point in the history
* WIP

* Add simple test

* More tests

* Not async

* Store metadata for resources

* Clean code

* Move to dedicated file

* Add details about CORS

* Add tests for set_gbfs_feeds_metadata

* remove aom from dataset

* Make sure verify_on_exit! is actually called

The previous use was just an atom expression, evaluated but not leading to ExUnit callback call.

* Bypass the wrapper to simplify the tests & make them async

* Rename HTTPClient to HTTPValidatorClient (+ remove as: for clarity)

* Fix credo error

* Extract query to separate method to make it easier to invoke it from IEx

* Merge branch 'master' into gbfs-metadata

* Update test port to reflect currently in use value & fix tests

* Add a bit of documentation

Co-authored-by: Thibaut Barrère <[email protected]>
  • Loading branch information
AntoineAugusti and thbar authored Nov 16, 2021
1 parent f2a5b5d commit 069d93b
Show file tree
Hide file tree
Showing 8 changed files with 488 additions and 6 deletions.
3 changes: 2 additions & 1 deletion apps/shared/lib/validation/gbfs_validator.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ defmodule Shared.Validation.GBFSValidator do
A structure holding validation results for a GBFS feed
"""
@enforce_keys [:has_errors, :errors_count, :version_detected, :version_validated]
@derive Jason.Encoder
defstruct has_errors: false, errors_count: nil, version_detected: nil, version_validated: nil

@type t :: %__MODULE__{
Expand All @@ -28,7 +29,7 @@ defmodule Shared.Validation.GBFSValidator do
def validate(url), do: impl().validate(url)
end

defmodule HTTPClient do
defmodule HTTPValidatorClient do
@moduledoc """
An HTTP GBFS Validator calling a third party API
"""
Expand Down
19 changes: 15 additions & 4 deletions apps/shared/test/validation/gbfs_validator_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ defmodule GBFSValidatorTest do

import Mox

alias Shared.Validation.GBFSValidator.Summary
alias Shared.Validation.GBFSValidator.Wrapper, as: GBFSValidator
alias Shared.Validation.GBFSValidator.{HTTPValidatorClient, Summary}

setup :verify_on_exit!

Expand All @@ -32,14 +31,26 @@ defmodule GBFSValidatorTest do
version_detected: "1.1",
version_validated: "1.1"
}
assert {:ok, ^expected} = GBFSValidator.validate("https://example.com/gbfs.json")
assert {:ok, ^expected} = HTTPValidatorClient.validate("https://example.com/gbfs.json")
end

test "on invalid server response" do
Transport.HTTPoison.Mock |> expect(:post, fn _url, _, _ -> {:ok, %HTTPoison.Response
{status_code: 500}} end)

{:error, error} = GBFSValidator.validate("https://example.com/gbfs.json")
{:error, error} = HTTPValidatorClient.validate("https://example.com/gbfs.json")
assert String.starts_with?(error, "impossible to query GBFS Validator")
end

test "can encode summary" do
assert """
{"errors_count":0,"has_errors":false,"version_detected":"1.1","version_validated":"1.1"}\
"""
== Jason.encode!(%Summary{
errors_count: 0,
has_errors: false,
version_detected: "1.1",
version_validated: "1.1"
})
end
end
228 changes: 228 additions & 0 deletions apps/transport/lib/transport/gbfs_metadata.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
defmodule Transport.GBFSMetadata do
@moduledoc """
Compute and store metadata for GBFS resources.
"""
alias DB.{Dataset, Repo, Resource}
alias Shared.Validation.GBFSValidator.Summary, as: GBFSValidationSummary
alias Shared.Validation.GBFSValidator.Wrapper, as: GBFSValidator
import Ecto.Query
require Logger

@doc """
It is a bit of work, currently, to extract the list of `gbfs.json` endpoints,
for instance because `format` is not enough alone to filter them.
See https://github.com/etalab/transport-site/issues/1891#issuecomment-958888421 for some background.
"""
def gbfs_feeds_query do
Resource
|> join(:inner, [r], d in Dataset, on: r.dataset_id == d.id)
|> where([_r, d], d.type == "bike-scooter-sharing" and d.is_active)
|> where([r, _d], like(r.url, "%gbfs.json") or r.format == "gbfs")
|> where([r, _d], not fragment("? ~ ?", r.url, "station|free_bike"))
end

def set_gbfs_feeds_metadata do
resources = gbfs_feeds_query() |> Repo.all()

Logger.info("Fetching details about #{Enum.count(resources)} GBFS feeds")

resources
|> Stream.map(fn resource ->
Logger.info("Fetching GBFS metadata for #{resource.url} (##{resource.id})")
changeset = Resource.changeset(resource, %{format: "gbfs", metadata: compute_feed_metadata(resource)})
Repo.update!(changeset)
end)
|> Stream.run()
end

@doc """
This function does 2 HTTP calls on a given resource url, and returns a report
with metadata and also validation status (using a third-party HTTP validator).
"""
@spec compute_feed_metadata(Resource.t()) :: map()
def compute_feed_metadata(resource) do
with {:ok, %{status_code: 200, body: body} = response} <-
http_client().get(resource.url, [{"origin", website_url()}]),
{:ok, json} <- Jason.decode(body) do
%{
validation: validation(resource),
has_cors: has_cors?(response),
is_cors_allowed: cors_headers_allows_self?(response),
feeds: feeds(json),
versions: versions(json),
languages: languages(json),
system_details: system_details(json),
types: types(json),
ttl: ttl(json)
}
else
e ->
Logger.error(inspect(e))
%{}
end
end

@spec validation(Resource.t()) :: GBFSValidationSummary.t() | nil
defp validation(resource) do
case GBFSValidator.validate(resource.url) do
{:ok, %GBFSValidationSummary{} = summary} -> summary
{:error, _} -> nil
end
end

defp types(%{"data" => _data} = payload) do
has_bike_status = has_feed?(payload, "free_bike_status")
has_station_information = has_feed?(payload, "station_information")

cond do
has_bike_status and has_station_information ->
["free_floating", "stations"]

has_bike_status ->
["free_floating"]

has_station_information ->
["stations"]

true ->
Logger.error("Cannot detect GBFS types for feed #{inspect(payload)}")
nil
end
end

defp cors_header_value(%HTTPoison.Response{headers: headers}) do
headers = headers |> Enum.into(%{}, fn {h, v} -> {String.downcase(h), v} end)
Map.get(headers, "access-control-allow-origin")
end

@doc """
Find the value of the `Access-Control-Allow-Origin` header
iex> Transport.GBFSMetadata.has_cors?(%HTTPoison.Response{headers: []})
false
iex> Transport.GBFSMetadata.has_cors?(%HTTPoison.Response{headers: [{"access-control-allow-origin", "*"}]})
true
iex> Transport.GBFSMetadata.has_cors?(%HTTPoison.Response{headers: [{"Access-Control-Allow-Origin", "*"}]})
true
"""
def has_cors?(%HTTPoison.Response{} = response) do
not is_nil(cors_header_value(response))
end

@doc """
Determines if the CORS header allows transport.data.gouv.fr
iex> Transport.GBFSMetadata.cors_headers_allows_self?(%HTTPoison.Response{headers: []})
false
iex> Transport.GBFSMetadata.cors_headers_allows_self?(%HTTPoison.Response{headers: [{"access-control-allow-origin", "*"}]})
true
iex> Transport.GBFSMetadata.cors_headers_allows_self?(%HTTPoison.Response{headers: [{"Access-Control-Allow-Origin", "*"}]})
true
iex> Transport.GBFSMetadata.cors_headers_allows_self?(%HTTPoison.Response{headers: [{"Access-Control-Allow-Origin", "http://127.0.0.1:5100"}]})
true
"""
def cors_headers_allows_self?(%HTTPoison.Response{} = response) do
Enum.member?([website_url(), "*"], cors_header_value(response))
end

defp ttl(%{"data" => _data} = payload) do
feed = payload |> first_feed()

value =
case types(payload) do
["free_floating", "stations"] -> feed |> feed_url_by_name("free_bike_status")
["free_floating"] -> feed |> feed_url_by_name("free_bike_status")
["stations"] -> feed |> feed_url_by_name("station_information")
nil -> payload["ttl"]
end

feed_ttl(value)
end

defp feed_ttl(value) when is_integer(value) and value >= 0, do: value

defp feed_ttl(feed_url) when is_binary(feed_url) do
with {:ok, %{status_code: 200, body: body}} <- http_client().get(feed_url),
{:ok, json} <- Jason.decode(body) do
json["ttl"]
else
e ->
Logger.error("Cannot get GBFS ttl details: #{inspect(e)}")
nil
end
end

defp system_details(%{"data" => _data} = payload) do
feed_url = payload |> first_feed() |> feed_url_by_name("system_information")

if not is_nil(feed_url) do
with {:ok, %{status_code: 200, body: body}} <- http_client().get(feed_url),
{:ok, json} <- Jason.decode(body) do
%{
timezone: json["data"]["timezone"],
name: json["data"]["name"]
}
else
e ->
Logger.error("Cannot get GBFS system_information details: #{inspect(e)}")
nil
end
end
end

defp first_feed(%{"data" => data} = payload) do
(data["en"] || data["fr"] || data[payload |> languages() |> Enum.at(0)])["feeds"]
end

defp languages(%{"data" => data}) do
Map.keys(data)
end

@spec versions(map()) :: [binary()] | nil
defp versions(%{"data" => _data} = payload) do
versions_url = payload |> first_feed() |> feed_url_by_name("gbfs_versions")

if is_nil(versions_url) do
[Map.get(payload, "version", "1.0")]
else
with {:ok, %{status_code: 200, body: body}} <- http_client().get(versions_url),
{:ok, json} <- Jason.decode(body) do
json["data"]["versions"] |> Enum.map(fn json -> json["version"] end) |> Enum.sort(:desc)
else
_ -> nil
end
end
end

@spec feed_url_by_name(list(), binary()) :: binary() | nil
defp feed_url_by_name(feeds, name) do
Enum.find(feeds, fn map -> feed_is_named?(map, name) end)["url"]
end

@spec feed_is_named?(map(), binary()) :: boolean()
def feed_is_named?(map, name) do
# Many people make the mistake of appending `.json` to feed names
# so try to match this as well
Enum.member?([name, "#{name}.json"], map["name"])
end

@spec has_feed?(map(), binary()) :: boolean()
def has_feed?(%{"data" => _data} = payload, name) do
Enum.member?(feeds(payload), name)
end

def feeds(%{"data" => _data} = payload) do
# Remove potential ".json" at the end of feed names as people
# often make this mistake
payload |> first_feed() |> Enum.map(fn feed -> String.replace(feed["name"], ".json", "") end)
end

defp http_client, do: Transport.Shared.Wrapper.HTTPoison.impl()
defp website_url, do: TransportWeb.Endpoint.url()
end
2 changes: 2 additions & 0 deletions apps/transport/lib/transport/scheduler.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ defmodule Transport.Scheduler do
{"@daily", {Transport.DataChecker, :outdated_data, []}},
# Set inactive data
{"@daily", {Transport.DataChecker, :inactive_data, []}},
# Compute metadata for GBFS resources
{"@daily", {Transport.GBFSMetadata, :set_gbfs_feeds_metadata, []}},
# Watch for new comments on datasets
{"@daily", {Transport.CommentsChecker, :check_for_new_comments, []}},
# Delete orphan community resources
Expand Down
1 change: 1 addition & 0 deletions apps/transport/test/support/mocks.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Mox.defmock(Transport.ExAWS.Mock, for: ExAws.Behaviour)
Mox.defmock(Transport.HTTPoison.Mock, for: HTTPoison.Base)
Mox.defmock(Validation.Validator.Mock, for: Shared.Validation.Validator)
Mox.defmock(Shared.Validation.GBFSValidator.Mock, for: Shared.Validation.GBFSValidator.Wrapper)
Mox.defmock(Transport.Rambo.Mock, for: Transport.RamboLauncher)
Loading

0 comments on commit 069d93b

Please sign in to comment.