-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Run the examples in the new format #18946
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
50f9345
60c2a9e
ec321cc
ec77fba
14a91c1
ca8300f
a4cac2f
5110e4c
f8c18f6
33f17d4
db30d51
2f5c63f
383993a
2c98086
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,10 +21,11 @@ | |
| //! | ||
| //! ## Usage | ||
| //! ```bash | ||
| //! cargo run --example builtin_functions -- [date_time|function_factory|regexp] | ||
| //! cargo run --example builtin_functions -- [all|date_time|function_factory|regexp] | ||
| //! ``` | ||
| //! | ||
| //! Each subcommand runs a corresponding example: | ||
| //! - `all` — run all examples included in this module | ||
| //! - `date_time` — examples of date-time related functions and queries | ||
| //! - `function_factory` — register `CREATE FUNCTION` handler to implement SQL macros | ||
| //! - `regexp` — examples of using regular expression functions | ||
|
|
@@ -38,6 +39,7 @@ use std::str::FromStr; | |
| use datafusion::error::{DataFusionError, Result}; | ||
|
|
||
| enum ExampleKind { | ||
| All, | ||
| DateTime, | ||
| FunctionFactory, | ||
| Regexp, | ||
|
|
@@ -46,6 +48,7 @@ enum ExampleKind { | |
| impl AsRef<str> for ExampleKind { | ||
| fn as_ref(&self) -> &str { | ||
| match self { | ||
| Self::All => "all", | ||
| Self::DateTime => "date_time", | ||
| Self::FunctionFactory => "function_factory", | ||
| Self::Regexp => "regexp", | ||
|
|
@@ -58,6 +61,7 @@ impl FromStr for ExampleKind { | |
|
|
||
| fn from_str(s: &str) -> Result<Self> { | ||
| match s { | ||
| "all" => Ok(Self::All), | ||
| "date_time" => Ok(Self::DateTime), | ||
| "function_factory" => Ok(Self::FunctionFactory), | ||
| "regexp" => Ok(Self::Regexp), | ||
|
|
@@ -67,12 +71,38 @@ impl FromStr for ExampleKind { | |
| } | ||
|
|
||
| impl ExampleKind { | ||
| const ALL: [Self; 3] = [Self::DateTime, Self::FunctionFactory, Self::Regexp]; | ||
| const ALL_VARIANTS: [Self; 4] = [ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When looking at the amount of boiler plate code, I think we can use strum to do the same thing https://crates.io/crates/strum I know in general adding a new dependency is something we try to avoid, but given strum is already in the workspace, using it in examples seems reasonable to me Specifically,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was actually considering using To avoid scope creep for this change, I’d prefer to finish this PR as-is.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this sounds like a good plan |
||
| Self::All, | ||
| Self::DateTime, | ||
| Self::FunctionFactory, | ||
| Self::Regexp, | ||
| ]; | ||
|
|
||
| const RUNNABLE_VARIANTS: [Self; 3] = | ||
| [Self::DateTime, Self::FunctionFactory, Self::Regexp]; | ||
|
|
||
| const EXAMPLE_NAME: &str = "builtin_functions"; | ||
|
|
||
| fn variants() -> Vec<&'static str> { | ||
| Self::ALL.iter().map(|x| x.as_ref()).collect() | ||
| Self::ALL_VARIANTS | ||
| .iter() | ||
| .map(|example| example.as_ref()) | ||
| .collect() | ||
| } | ||
|
|
||
| async fn run(&self) -> Result<()> { | ||
| match self { | ||
| ExampleKind::All => { | ||
| for example in ExampleKind::RUNNABLE_VARIANTS { | ||
| println!("Running example: {}", example.as_ref()); | ||
| Box::pin(example.run()).await?; | ||
| } | ||
| } | ||
| ExampleKind::DateTime => date_time::date_time().await?, | ||
| ExampleKind::FunctionFactory => function_factory::function_factory().await?, | ||
| ExampleKind::Regexp => regexp::regexp().await?, | ||
| } | ||
| Ok(()) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -89,11 +119,6 @@ async fn main() -> Result<()> { | |
| DataFusionError::Execution("Missing argument".to_string()) | ||
| })?; | ||
|
|
||
| match arg.parse::<ExampleKind>()? { | ||
| ExampleKind::DateTime => date_time::date_time().await?, | ||
| ExampleKind::FunctionFactory => function_factory::function_factory().await?, | ||
| ExampleKind::Regexp => regexp::regexp().await?, | ||
| } | ||
|
|
||
| Ok(()) | ||
| let example = arg.parse::<ExampleKind>()?; | ||
| example.run().await | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,9 +18,12 @@ | |
|
|
||
| //! See `main.rs` for how to run it. | ||
|
|
||
| use std::{fs::File, io::Write}; | ||
|
|
||
| use datafusion::common::{assert_batches_eq, assert_contains}; | ||
| use datafusion::error::Result; | ||
| use datafusion::prelude::*; | ||
| use tempfile::tempdir; | ||
|
|
||
| /// This example demonstrates how to use the regexp_* functions | ||
| /// | ||
|
|
@@ -32,12 +35,30 @@ use datafusion::prelude::*; | |
| /// https://docs.rs/regex/latest/regex/#grouping-and-flags | ||
| pub async fn regexp() -> Result<()> { | ||
| let ctx = SessionContext::new(); | ||
| ctx.register_csv( | ||
| "examples", | ||
| "datafusion/physical-expr/tests/data/regex.csv", | ||
| CsvReadOptions::new(), | ||
| ) | ||
| .await?; | ||
| // content from file 'datafusion/physical-expr/tests/data/regex.csv' | ||
| let csv_data = r#"values,patterns,replacement,flags | ||
| abc,^(a),bb\1bb,i | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why inline this content? It is fine, I am just curious
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I inlined the CSV because I ran into issues reading files that live outside the To keep the example self-contained and stable in CI, I embedded the data and wrote it to a temp file before registering it. Longer term, a cleaner solution might be to store example data directly inside the |
||
| ABC,^(A).*,B,i | ||
| aBc,(b|d),e,i | ||
| AbC,(B|D),e, | ||
| aBC,^(b|c),d, | ||
| 4000,\b4([1-9]\d\d|\d[1-9]\d|\d\d[1-9])\b,xyz, | ||
| 4010,\b4([1-9]\d\d|\d[1-9]\d|\d\d[1-9])\b,xyz, | ||
| Düsseldorf,[\p{Letter}-]+,München, | ||
| Москва,[\p{L}-]+,Moscow, | ||
| Köln,[a-zA-Z]ö[a-zA-Z]{2},Koln, | ||
| اليوم,^\p{Arabic}+$,Today,"#; | ||
| let dir = tempdir()?; | ||
| let file_path = dir.path().join("regex.csv"); | ||
| { | ||
| let mut file = File::create(&file_path)?; | ||
| // write CSV data | ||
| file.write_all(csv_data.as_bytes())?; | ||
| } // scope closes the file | ||
| let file_path = file_path.to_str().unwrap(); | ||
|
|
||
| ctx.register_csv("examples", file_path, CsvReadOptions::new()) | ||
| .await?; | ||
|
|
||
| // | ||
| // | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When I ran this script twice, I got an error the second time around:
The second run made this:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the issue is that some of the examples leave files around:
andrewlamb@Andrews-MacBook-Pro-3:~/Software/datafusion2$ find datafusion-examples/examples/datafusion-examples datafusion-examples/examples/datafusion-examples datafusion-examples/examples/datafusion-examples/test_json datafusion-examples/examples/datafusion-examples/test_json/lQqU6IGDpwHJyGQB_0.json datafusion-examples/examples/datafusion-examples/test_json/KKsELYFJ4st3GUOa_0.json datafusion-examples/examples/datafusion-examples/test_csv datafusion-examples/examples/datafusion-examples/test_csv/9Alj5dF7w72vLpp0_0.csv.gz datafusion-examples/examples/datafusion-examples/test_csv/4Bh8tCQllXXE43A9_0.csv.gz datafusion-examples/examples/datafusion-examples/test_table datafusion-examples/examples/datafusion-examples/test_table/Y2CVyUXOXRPhDhFA_0.parquet datafusion-examples/examples/datafusion-examples/test_table/2nskzCtDZnEJR851_0.parquet datafusion-examples/examples/datafusion-examples/test_parquet datafusion-examples/examples/datafusion-examples/test_parquet/4CIKeX6U9Ik1YYgY_0.parquet datafusion-examples/examples/datafusion-examples/test_parquet/ooUJiB7QMqY0BRdN_0.parquetMaybe we could change the examples to use a temporary directory or else have this script clean up before running
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a good point! Let's do it.