Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] initial u256 conversions commit #51

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ An attempt is made to ensure that the dataset schemas conform to a common set of
- By default, rows should contain enough information be order-able
- Columns should be named by their JSON-RPC or ethers.rs defaults, except in cases where a much more explicit name is available
- To make joins across tables easier, a given piece of information should use the same datatype and column name across tables when possible
- Large ints such as `u256` should allow multiple conversions. A `value` column of type `u256` should allow: `value_binary`, `value_string`, `value_f64`, `value_decimal128`, `value_u64_high`, and `value_u64_low`
- Large ints such as `u256` should allow multiple conversions. A `value` column of type `u256` should allow: `value_binary`, `value_string`, `value_f32`, `value_f64`, `value_u32`, `value_u64`, and `value_d128`
- By default, columns related to non-identifying cryptographic signatures are omitted by default. For example, `state_root` of a block or `v`/`r`/`s` of a transaction
- Integer values that can never be negative should be stored as unsigned integers

Expand Down
5 changes: 5 additions & 0 deletions crates/cli/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ pub struct Args {
#[arg(long, value_name="COLS", num_args(0..), verbatim_doc_comment, help_heading="Content Options")]
pub columns: Option<Vec<String>>,

/// Set output datatype(s) of U256 integers
/// [default: binary, string, f64]
#[arg(long, num_args(1..), help_heading = "Content Options", verbatim_doc_comment)]
pub u256_types: Option<Vec<String>>,

/// Use hex string encoding for binary columns
#[arg(long, help_heading = "Content Options")]
pub hex: bool,
Expand Down
34 changes: 33 additions & 1 deletion crates/cli/src/parse/query.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use std::{collections::HashMap, sync::Arc};
use std::{
collections::{HashMap, HashSet},
sync::Arc,
};

use ethers::prelude::*;
use hex::FromHex;
Expand All @@ -7,6 +10,7 @@ use cryo_freeze::{ColumnEncoding, Datatype, FileFormat, MultiQuery, ParseError,

use super::{blocks, file_output, transactions};
use crate::args::Args;
use cryo_freeze::U256Type;

pub(crate) async fn parse_query(
args: &Args,
Expand Down Expand Up @@ -81,6 +85,33 @@ fn parse_datatypes(raw_inputs: &Vec<String>) -> Result<Vec<Datatype>, ParseError
fn parse_schemas(args: &Args) -> Result<HashMap<Datatype, Table>, ParseError> {
let datatypes = parse_datatypes(&args.datatype)?;
let output_format = file_output::parse_output_format(args)?;

let u256_types = if let Some(raw_u256_types) = &args.u256_types {
let mut u256_types: HashSet<U256Type> = HashSet::new();
for raw in raw_u256_types.iter() {
let u256_type = match raw.to_lowercase() {
raw if raw == "binary" => U256Type::Binary,
raw if raw == "string" => U256Type::String,
raw if raw == "str" => U256Type::String,
raw if raw == "f32" => U256Type::F32,
raw if raw == "float32" => U256Type::F32,
raw if raw == "f64" => U256Type::F64,
raw if raw == "float64" => U256Type::F64,
raw if raw == "float" => U256Type::F64,
raw if raw == "u32" => U256Type::U32,
raw if raw == "uint32" => U256Type::U32,
raw if raw == "u64" => U256Type::U64,
raw if raw == "uint64" => U256Type::U64,
raw if raw == "decimal128" => U256Type::Decimal128,
raw if raw == "d128" => U256Type::Decimal128,
_ => return Err(ParseError::ParseError("bad u256 type".to_string())),
};
u256_types.insert(u256_type);
}
u256_types
} else {
HashSet::from_iter(vec![U256Type::Binary, U256Type::String, U256Type::F64])
};
let binary_column_format = match args.hex | (output_format != FileFormat::Parquet) {
true => ColumnEncoding::Hex,
false => ColumnEncoding::Binary,
Expand All @@ -92,6 +123,7 @@ fn parse_schemas(args: &Args) -> Result<HashMap<Datatype, Table>, ParseError> {
.map(|datatype| {
datatype
.table_schema(
&u256_types,
&binary_column_format,
&args.include_columns,
&args.exclude_columns,
Expand Down
15 changes: 12 additions & 3 deletions crates/cli/src/summaries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use std::time::SystemTime;
use thousands::Separable;

use cryo_freeze::{
BlockChunk, Chunk, ChunkData, Datatype, FileOutput, FreezeSummary, MultiQuery, Source, Table,
TransactionChunk,
BlockChunk, Chunk, ChunkData, ColumnType, Datatype, FileOutput, FreezeSummary, MultiQuery,
Source, Table, TransactionChunk,
};

const TITLE_R: u8 = 0;
Expand Down Expand Up @@ -123,7 +123,16 @@ fn print_schema(name: &Datatype, schema: &Table) {
print_header("schema for ".to_string() + name.dataset().name());
for column in schema.columns() {
if let Some(column_type) = schema.column_type(column) {
print_bullet(column, column_type.as_str());
if column_type == ColumnType::UInt256 {
for uint256_type in schema.u256_types.iter() {
print_bullet(
column.to_owned() + uint256_type.suffix().as_str(),
uint256_type.to_columntype().as_str(),
);
}
} else {
print_bullet(column, column_type.as_str());
}
}
}
println!();
Expand Down
4 changes: 2 additions & 2 deletions crates/freeze/src/datasets/balance_diffs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ impl Dataset for BalanceDiffs {
("transaction_index", ColumnType::Binary),
("transaction_hash", ColumnType::Binary),
("address", ColumnType::Binary),
("from_value", ColumnType::Binary),
("to_value", ColumnType::Binary),
("from_value", ColumnType::UInt256),
("to_value", ColumnType::UInt256),
("chain_id", ColumnType::UInt64),
])
}
Expand Down
10 changes: 5 additions & 5 deletions crates/freeze/src/datasets/blocks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ use crate::{
types::{
conversions::{ToVecHex, ToVecU8},
BlockChunk, Blocks, CollectError, ColumnType, Dataset, Datatype, RowFilter, Source, Table,
TransactionChunk,
TransactionChunk, U256Type,
},
with_series, with_series_binary,
with_series, with_series_binary, with_series_u256,
};

pub(crate) type BlockTxGasTuple<TX> = Result<(Block<TX>, Option<Vec<u32>>), CollectError>;
Expand Down Expand Up @@ -321,7 +321,7 @@ pub(crate) struct TransactionColumns {
nonce: Vec<u64>,
from_address: Vec<Vec<u8>>,
to_address: Vec<Option<Vec<u8>>>,
value: Vec<String>,
value: Vec<U256>,
input: Vec<Vec<u8>>,
gas_limit: Vec<u32>,
gas_used: Vec<u32>,
Expand Down Expand Up @@ -364,7 +364,7 @@ impl TransactionColumns {
with_series!(cols, "nonce", self.nonce, schema);
with_series_binary!(cols, "from_address", self.from_address, schema);
with_series_binary!(cols, "to_address", self.to_address, schema);
with_series!(cols, "value", self.value, schema);
with_series_u256!(cols, "value", self.value, schema);
with_series_binary!(cols, "input", self.input, schema);
with_series!(cols, "gas_limit", self.gas_limit, schema);
with_series!(cols, "gas_used", self.gas_used, schema);
Expand Down Expand Up @@ -471,7 +471,7 @@ fn process_transaction(
columns.nonce.push(tx.nonce.as_u64());
}
if schema.has_column("value") {
columns.value.push(tx.value.to_string());
columns.value.push(tx.value);
}
if schema.has_column("input") {
columns.input.push(tx.input.to_vec());
Expand Down
2 changes: 1 addition & 1 deletion crates/freeze/src/datasets/native_transfers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ impl Dataset for NativeTransfers {
("transaction_hash", ColumnType::Binary),
("from_address", ColumnType::Binary),
("to_address", ColumnType::Binary),
("value", ColumnType::Binary),
("value", ColumnType::UInt256),
("chain_id", ColumnType::UInt64),
])
}
Expand Down
2 changes: 1 addition & 1 deletion crates/freeze/src/datasets/traces.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ impl Dataset for Traces {
HashMap::from_iter(vec![
("action_from", ColumnType::Binary),
("action_to", ColumnType::Binary),
("action_value", ColumnType::String),
("action_value", ColumnType::UInt256),
("action_gas", ColumnType::UInt32),
("action_input", ColumnType::Binary),
("action_call_type", ColumnType::String),
Expand Down
4 changes: 1 addition & 3 deletions crates/freeze/src/datasets/transactions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@ impl Dataset for Transactions {
("nonce", ColumnType::Int32),
("from_address", ColumnType::Binary),
("to_address", ColumnType::Binary),
("value", ColumnType::Decimal128),
("value_str", ColumnType::String),
("value_float", ColumnType::Float64),
("value", ColumnType::UInt256),
("input", ColumnType::Binary),
("gas_limit", ColumnType::UInt32),
("gas_used", ColumnType::UInt32),
Expand Down
73 changes: 73 additions & 0 deletions crates/freeze/src/types/dataframes/creation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,76 @@ macro_rules! with_series_binary {
}
};
}

/// convert a Vec to variety of u256 Series representations
#[macro_export]
macro_rules! with_series_u256 {
($all_series:expr, $name:expr, $value:expr, $schema:expr) => {
if $schema.has_column($name) {
// binary
if $schema.u256_types.contains(&U256Type::Binary) {
let name = $name.to_string() + U256Type::Binary.suffix().as_str();
let name = name.as_str();

let converted: Vec<Vec<u8>> = $value.iter().map(|v| v.to_vec_u8()).collect();
if let Some(ColumnType::Hex) = $schema.column_type($name) {
$all_series.push(Series::new(name, converted.to_vec_hex()));
} else {
$all_series.push(Series::new(name, converted));
}
}

// string
if $schema.u256_types.contains(&U256Type::String) {
let name = $name.to_string() + U256Type::String.suffix().as_str();
let name = name.as_str();

let converted: Vec<String> = $value.iter().map(|v| v.to_string()).collect();
$all_series.push(Series::new(name, converted));
}

// float32
if $schema.u256_types.contains(&U256Type::F32) {
let name = $name.to_string() + U256Type::F32.suffix().as_str();
let name = name.as_str();

let converted: Vec<Option<f32>> =
$value.iter().map(|v| v.to_string().parse::<f32>().ok()).collect();
$all_series.push(Series::new(name, converted));
}

// float64
if $schema.u256_types.contains(&U256Type::F64) {
let name = $name.to_string() + U256Type::F64.suffix().as_str();
let name = name.as_str();

let converted: Vec<Option<f64>> =
$value.iter().map(|v| v.to_string().parse::<f64>().ok()).collect();
$all_series.push(Series::new(name, converted));
}

// u32
if $schema.u256_types.contains(&U256Type::U32) {
let name = $name.to_string() + U256Type::U32.suffix().as_str();
let name = name.as_str();

let converted: Vec<u32> = $value.iter().map(|v| v.as_u32()).collect();
$all_series.push(Series::new(name, converted));
}

// u64
if $schema.u256_types.contains(&U256Type::U64) {
let name = $name.to_string() + U256Type::U64.suffix().as_str();
let name = name.as_str();

let converted: Vec<u64> = $value.iter().map(|v| v.as_u64()).collect();
$all_series.push(Series::new(name, converted));
}

// decimal128
if $schema.u256_types.contains(&U256Type::Decimal128) {
panic!("DECIMAL128 not implemented")
}
}
};
}
2 changes: 1 addition & 1 deletion crates/freeze/src/types/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl FileFormat {
}

/// Encoding for binary data in a column
#[derive(Clone, Eq, PartialEq)]
#[derive(Clone, Eq, PartialEq, Debug)]
pub enum ColumnEncoding {
/// Raw binary encoding
Binary,
Expand Down
2 changes: 1 addition & 1 deletion crates/freeze/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pub use conversions::{ToVecHex, ToVecU8};
pub use datatypes::*;
pub use files::{ColumnEncoding, FileFormat, FileOutput};
pub use queries::{MultiQuery, RowFilter, SingleQuery};
pub use schemas::{ColumnType, Table};
pub use schemas::{ColumnType, Table, U256Type};
pub use sources::{RateLimiter, Source};
pub(crate) use summaries::FreezeSummaryAgg;
pub use summaries::{FreezeChunkSummary, FreezeSummary};
Expand Down
68 changes: 67 additions & 1 deletion crates/freeze/src/types/schemas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@

/// sort order for rows
pub sort_columns: Option<Vec<String>>,

Check failure on line 18 in crates/freeze/src/types/schemas.rs

View workflow job for this annotation

GitHub Actions / clippy

cannot find type `HashSet` in this scope

error[E0412]: cannot find type `HashSet` in this scope --> crates/freeze/src/types/schemas.rs:18:21 | 18 | pub u256_types: HashSet<U256Type>, | ^^^^^^^ not found in this scope | help: consider importing this struct | 1 + use std::collections::HashSet; |
/// representations to use for u256 columns
pub u256_types: HashSet<U256Type>,

/// representation to use for binary columns
pub binary_type: ColumnEncoding,
}

impl Table {
Expand All @@ -34,17 +40,68 @@
}
}

/// representation of a U256 datum
#[derive(Hash, Clone, Debug, Eq, PartialEq)]
pub enum U256Type {
/// Binary representation
Binary,
/// String representation
String,
/// F32 representation
F32,
/// F64 representation
F64,
/// U32 representation
U32,
/// U64 representation
U64,
/// Decimal128 representation
Decimal128,
}

impl U256Type {
/// convert U256Type to Columntype
pub fn to_columntype(&self) -> ColumnType {
match self {
U256Type::Binary => ColumnType::Binary,
U256Type::String => ColumnType::String,
U256Type::F32 => ColumnType::Float32,
U256Type::F64 => ColumnType::Float64,
U256Type::U32 => ColumnType::UInt32,
U256Type::U64 => ColumnType::UInt64,
U256Type::Decimal128 => ColumnType::Decimal128,
}
}

/// get column name suffix of U256Type
pub fn suffix(&self) -> String {
match self {
U256Type::Binary => "_binary".to_string(),
U256Type::String => "_string".to_string(),
U256Type::F32 => "_f32".to_string(),
U256Type::F64 => "_f64".to_string(),
U256Type::U32 => "_u32".to_string(),
U256Type::U64 => "_u64".to_string(),
U256Type::Decimal128 => "_d128".to_string(),
}
}
}

/// datatype of column
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ColumnType {
/// UInt32 column type
UInt32,
/// UInt64 column type
UInt64,
/// U256 column type
UInt256,
/// Int32 column type
Int32,
/// Int64 column type
Int64,
/// Float32 column type
Float32,
/// Float64 column type
Float64,
/// Decimal128 column type
Expand All @@ -63,8 +120,10 @@
match *self {
ColumnType::UInt32 => "uint32",
ColumnType::UInt64 => "uint64",
ColumnType::UInt256 => "uint256",
ColumnType::Int32 => "int32",
ColumnType::Int64 => "int64",
ColumnType::Float32 => "float32",
ColumnType::Float64 => "float64",
ColumnType::Decimal128 => "decimal128",
ColumnType::String => "string",
Expand All @@ -84,8 +143,9 @@

impl Datatype {
/// get schema for a particular datatype
pub fn table_schema(

Check failure on line 146 in crates/freeze/src/types/schemas.rs

View workflow job for this annotation

GitHub Actions / clippy

cannot find type `HashSet` in this scope

error[E0412]: cannot find type `HashSet` in this scope --> crates/freeze/src/types/schemas.rs:146:22 | 146 | u256_types: &HashSet<U256Type>, | ^^^^^^^ not found in this scope | help: consider importing this struct | 1 + use std::collections::HashSet; |
&self,
u256_types: &HashSet<U256Type>,
binary_column_format: &ColumnEncoding,
include_columns: &Option<Vec<String>>,
exclude_columns: &Option<Vec<String>>,
Expand All @@ -104,7 +164,13 @@
}
columns.insert((*column.clone()).to_string(), *ctype);
}
let schema = Table { datatype: *self, sort_columns: sort, columns };
let schema = Table {
datatype: *self,
sort_columns: sort,
columns,
u256_types: u256_types.clone(),
binary_type: binary_column_format.clone(),
};
Ok(schema)
}
}
Expand Down
Loading
Loading