Skip to content

Commit

Permalink
Add Depth Anything example
Browse files Browse the repository at this point in the history
  • Loading branch information
robertknight committed Jan 23, 2024
1 parent eea65f4 commit 7d12d21
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 0 deletions.
4 changes: 4 additions & 0 deletions rten-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ path = "src/imagenet.rs"
name = "yolo"
path = "src/yolo.rs"

[[bin]]
name = "depth_anything"
path = "src/depth_anything.rs"

# Text
[[bin]]
name = "bert_qa"
Expand Down
1 change: 1 addition & 0 deletions rten-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ The examples have been chosen to cover common tasks and popular models.
This example works with a wide variety of models, such as ResNet, MobileNet,
ConvNeXt, ViT.
- **deeplab** - Semantic segmentation of images using [DeepLabv3](https://arxiv.org/abs/1706.05587)
- **depth_anything** - Monocular depth estimation using [Depth Anything](https://github.com/LiheYoung/Depth-Anything)
- **detr** - Object detection using [DETR](https://research.facebook.com/publications/end-to-end-object-detection-with-transformers/)
- **yolo** - Object detection using [YOLO v8](https://github.com/ultralytics/ultralytics)

Expand Down
113 changes: 113 additions & 0 deletions rten-examples/src/depth_anything.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
use std::collections::VecDeque;
use std::error::Error;
use std::fs;

use rten::{FloatOperators, Model};
use rten_imageio::{normalize_image, read_image, write_image};
use rten_tensor::prelude::*;
use rten_tensor::{NdTensor, Tensor};

struct Args {
model: String,
image: String,
output: String,
}

fn parse_args() -> Result<Args, lexopt::Error> {
use lexopt::prelude::*;

let mut values = VecDeque::new();
let mut parser = lexopt::Parser::from_env();

while let Some(arg) = parser.next()? {
match arg {
Value(val) => values.push_back(val.string()?),
Long("help") => {
println!(
"Perform monocular depth estimation on an image.
Usage: {bin_name} <model> <image> [<output>]
Args:
<model> - Input Depth Anything model
<image> - Image to process
<output> - Path to save depth image to. Defaults to \"depth-map.png\".
",
bin_name = parser.bin_name().unwrap_or("deeplab")
);
std::process::exit(0);
}
_ => return Err(arg.unexpected()),
}
}

let model = values.pop_front().ok_or("missing `model` arg")?;
let image = values.pop_front().ok_or("missing `image` arg")?;
let output = values.pop_front().unwrap_or("depth-map.png".into());

let args = Args {
image,
model,
output,
};

Ok(args)
}

/// Perform monocular depth estimation using [Depth Anything][depth_anything].
///
/// The ONNX models can be obtained from
/// https://github.com/fabio-sim/Depth-Anything-ONNX. See the
/// [releases](https://github.com/fabio-sim/Depth-Anything-ONNX/releases) page
/// for pre-trained model links. The small ("vits") model is recommended for
/// CPU inference.
///
/// After downloading the model, it can be run on an image using:
///
/// ```
/// tools/convert-onnx.py depth_anything.onnx
/// cargo run --release --bin depth_anything depth_anything.rten image.jpg
/// ```
///
/// This will generate a depth map as `depth-map.png`.
///
/// [depth_anything]: <https://github.com/LiheYoung/Depth-Anything>
fn main() -> Result<(), Box<dyn Error>> {
let args = parse_args()?;
let model_bytes = fs::read(args.model)?;
let model = Model::load(&model_bytes)?;

let mut image: Tensor = read_image(&args.image)?.into();
let [_, orig_height, orig_width] = image.shape().try_into()?;
normalize_image(image.nd_view_mut());
image.insert_axis(0); // Add batch dim

// Input size taken from README in https://github.com/fabio-sim/Depth-Anything-ONNX.
let [input_h, input_w] = [518, 518];
let image = image.resize_image([input_h, input_w])?;

// Run model to estimate depth for each pixel.
// Generates a (batch, depth, height, width) tensor, where `depth` == 1.
let mut output: NdTensor<f32, 4> = model.run_one(image.view().into(), None)?.try_into()?;

// Normalize depth values to be in the range [0, 1].
let min = output
.reduce_min(None, false /* keep_dims */)?
.item()
.copied()
.unwrap();
let max = output
.reduce_max(None, false /* keep_dims */)?
.item()
.copied()
.unwrap();
output.apply(|x| (x - min) / (max - min));

// Resize output map back to original input size and write to file.
let resized = output.resize_image([orig_height, orig_width])?;
let resized = resized.slice::<3, _>(0);
write_image(&args.output, resized)?;

Ok(())
}

0 comments on commit 7d12d21

Please sign in to comment.