diff --git a/datafusion/.gitignore b/datafusion/.gitignore new file mode 100644 index 000000000000..97d0badbd313 --- /dev/null +++ b/datafusion/.gitignore @@ -0,0 +1 @@ +_sources \ No newline at end of file diff --git a/datafusion/_downloads/3cce4d737d8c5814f5b50d859d21ba53/capitalized_example.csv b/datafusion/_downloads/3cce4d737d8c5814f5b50d859d21ba53/capitalized_example.csv new file mode 100644 index 000000000000..dbc8f5c5a0a6 --- /dev/null +++ b/datafusion/_downloads/3cce4d737d8c5814f5b50d859d21ba53/capitalized_example.csv @@ -0,0 +1,5 @@ +A,b,c +1,2,3 +1,10,5 +2,5,6 +2,1,4 \ No newline at end of file diff --git a/datafusion/_downloads/9f6fbc67bd5c63cb1fd7ba4efdf82d7a/example.csv b/datafusion/_downloads/9f6fbc67bd5c63cb1fd7ba4efdf82d7a/example.csv new file mode 100644 index 000000000000..0eadb69396b3 --- /dev/null +++ b/datafusion/_downloads/9f6fbc67bd5c63cb1fd7ba4efdf82d7a/example.csv @@ -0,0 +1,2 @@ +a,b,c +1,2,3 \ No newline at end of file diff --git a/datafusion/contributor-guide/roadmap.html b/datafusion/contributor-guide/roadmap.html index 5516bc76e6cf..0e85413f8296 100644 --- a/datafusion/contributor-guide/roadmap.html +++ b/datafusion/contributor-guide/roadmap.html @@ -421,7 +421,7 @@

Additional SQL Language Features

@@ -436,8 +436,8 @@

Datasources

diff --git a/datafusion/objects.inv b/datafusion/objects.inv index ce6713b92ca1..0b79f2c92d91 100644 Binary files a/datafusion/objects.inv and b/datafusion/objects.inv differ diff --git a/datafusion/searchindex.js b/datafusion/searchindex.js index a052178f56ed..0323e7713136 100644 --- a/datafusion/searchindex.js +++ b/datafusion/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["contributor-guide/communication", "contributor-guide/index", "contributor-guide/quarterly_roadmap", "contributor-guide/roadmap", "contributor-guide/specification/index", "contributor-guide/specification/invariants", "contributor-guide/specification/output-field-name-semantic", "index", "user-guide/cli", "user-guide/configs", "user-guide/dataframe", "user-guide/example-usage", "user-guide/expressions", "user-guide/faq", "user-guide/introduction", "user-guide/library", "user-guide/sql/aggregate_functions", "user-guide/sql/data_types", "user-guide/sql/ddl", "user-guide/sql/explain", "user-guide/sql/index", "user-guide/sql/information_schema", "user-guide/sql/scalar_functions", "user-guide/sql/select", "user-guide/sql/sql_status", "user-guide/sql/subqueries"], "filenames": ["contributor-guide/communication.md", "contributor-guide/index.md", "contributor-guide/quarterly_roadmap.md", "contributor-guide/roadmap.md", "contributor-guide/specification/index.rst", "contributor-guide/specification/invariants.md", "contributor-guide/specification/output-field-name-semantic.md", "index.rst", "user-guide/cli.md", "user-guide/configs.md", "user-guide/dataframe.md", "user-guide/example-usage.md", "user-guide/expressions.md", "user-guide/faq.md", "user-guide/introduction.md", "user-guide/library.md", "user-guide/sql/aggregate_functions.md", "user-guide/sql/data_types.md", "user-guide/sql/ddl.md", "user-guide/sql/explain.md", "user-guide/sql/index.rst", "user-guide/sql/information_schema.md", "user-guide/sql/scalar_functions.md", "user-guide/sql/select.md", "user-guide/sql/sql_status.md", "user-guide/sql/subqueries.md"], "titles": ["Communication", "Introduction", "Quarterly Roadmap", "Roadmap", "Specifications", "Invariants", "Output field name semantics", "Apache Arrow DataFusion", "DataFusion Command-line SQL Utility", "Configuration Settings", "DataFrame API", "Example Usage", "Expressions", "Frequently Asked Questions", "Introduction", "Using DataFusion as a library", "Aggregate Functions", "Data Types", "DDL", "EXPLAIN", "SQL Reference", "Information Schema", "Scalar Functions", "SELECT syntax", "Status", "Subqueries"], "terms": {"we": [0, 1, 3, 15, 23], "welcom": [0, 1, 3], "particip": 0, "from": [0, 1, 2, 3, 5, 6, 8, 9, 10, 15, 17, 18, 19, 20, 21, 22, 24, 25], "everyon": 0, "encourag": [0, 1], "you": [0, 1, 3, 8, 9, 11, 15, 16, 19, 23], "join": [0, 3, 5, 6, 9, 10, 20, 24], "u": [0, 1, 8], "ask": [0, 7], "get": [0, 1, 5, 15], "involv": 0, "all": [0, 1, 3, 5, 6, 8, 10, 11, 12, 15, 21, 22, 23, 24, 25], "apach": [0, 3, 8, 14], "arrow": [0, 1, 3, 5, 6, 8, 14, 17], "datafus": [0, 1, 5, 6, 9, 10, 11, 16, 17, 18, 21, 23, 25], "project": [0, 1, 2, 3, 5, 10, 19, 24], "i": [0, 1, 2, 3, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 22, 23, 25], "govern": [0, 3], "softwar": [0, 3, 8], "foundat": [0, 3, 14], "": [0, 3, 5, 7, 8, 9, 10, 14, 18, 25], "code": [0, 1, 5, 7], "conduct": [0, 7], "The": [0, 1, 3, 6, 8, 9, 10, 17, 18, 19, 22, 23, 25], "vast": 0, "major": [0, 1], "occur": [0, 17], "open": 0, "our": [0, 1, 5], "github": [0, 3, 8], "repositori": [0, 8], "us": [0, 1, 2, 3, 5, 7, 9, 10, 12, 13, 16, 17, 18, 19, 21, 22, 23, 25], "org": [0, 3, 8], "dev": [0, 1, 3, 8], "manag": [0, 1, 2], "releas": [0, 1, 15], "coordin": 0, "design": [0, 1, 2, 13, 14, 15], "discuss": 0, "subscrib": 0, "unsubscrib": 0, "archiv": 0, "when": [0, 1, 3, 5, 9, 10, 12, 17, 22, 23], "email": 0, "pleas": [0, 11, 16, 23], "make": [0, 1, 2, 3, 23], "sure": [0, 23], "prefix": 0, "subject": [0, 1], "line": [0, 1, 7], "tag": [0, 8], "e": [0, 1, 3, 5, 9, 16], "g": [0, 1, 3, 5, 9, 16], "new": [0, 2, 3, 5, 7, 9, 10, 11], "api": [0, 1, 2, 3, 6, 7, 12, 14], "remot": [0, 3], "data": [0, 1, 2, 3, 6, 9, 10, 12, 13, 14, 16, 18, 20, 22, 23], "sourc": [0, 1, 2, 14, 15, 18, 19, 20, 22], "so": [0, 1, 3, 5, 8, 9, 10, 11], "appropri": [0, 8], "peopl": [0, 1, 3], "notic": 0, "messag": [0, 9], "offici": [0, 8], "asf": 0, "workspac": 0, "inform": [0, 8, 18, 19, 20], "thi": [0, 1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 15, 17, 21, 22, 23], "great": [0, 1], "place": 0, "meet": 0, "other": [0, 1, 3, 5, 8, 15, 20, 24], "contributor": [0, 1, 2], "guidanc": 0, "where": [0, 1, 3, 5, 10, 12, 16, 20, 24, 25], "contribut": [0, 1, 3], "rust": [0, 2, 3, 5, 7, 12, 14], "channel": 0, "also": [0, 1, 5, 8, 9, 13, 18, 22], "have": [0, 1, 3, 5, 10, 20, 24], "backup": 0, "server": 0, "invit": 0, "link": 0, "case": [0, 3, 8, 11, 12, 23, 24], "ar": [0, 1, 3, 8, 9, 10, 11, 12, 15, 16, 17, 18, 22, 23, 25], "abl": 0, "If": [0, 1, 9, 18, 19], "need": [0, 1, 5, 8, 14, 19, 22], "an": [0, 1, 5, 8, 10, 14, 15, 16, 18, 22, 25], "can": [0, 1, 3, 5, 8, 9, 10, 12, 14, 15, 16, 18, 21, 22, 23, 25], "one": [0, 1, 9, 10, 12, 25], "biweekli": 0, "everi": [0, 3, 5, 23], "thursdai": 0, "both": [0, 1, 2, 5, 6, 14, 23], "04": 0, "00": [0, 9, 22], "utc": [0, 8, 21, 22], "16": [0, 19], "start": [0, 1, 3, 15], "septemb": 0, "30": 0, "2021": [0, 18], "depend": [0, 5, 15], "item": [0, 3], "agenda": 0, "someon": 0, "being": [0, 1, 5], "willing": 0, "host": 0, "see": [0, 1, 3, 23], "add": [0, 2, 3, 7, 9, 10, 11, 15], "topic": 0, "what": [0, 1, 5, 22], "plan": [0, 2, 3, 6, 7, 9, 10, 11, 14, 15, 19, 22], "goal": [0, 2, 3], "help": [0, 1, 3, 8, 9], "put": [0, 11], "face": 0, "name": [0, 1, 3, 4, 7, 8, 9, 10, 11, 19, 21, 22, 23], "some": [0, 3, 5, 9, 10, 11, 12], "work": [0, 1, 2, 8, 9, 11, 14], "synchron": 0, "initi": [0, 2], "differ": [0, 1, 5, 9, 25], "stakehold": 0, "identifi": [0, 1, 5, 7, 22], "area": 0, "more": [0, 1, 2, 3, 5, 9, 12, 16, 19, 23, 25], "align": 0, "No": 0, "decis": 0, "made": [0, 11, 23], "anyth": 0, "substanc": 0, "issu": [0, 1, 3, 7], "googl": 0, "doc": [0, 1, 8, 10, 15], "send": 0, "summari": 0, "kind": 1, "ticket": 1, "report": [1, 5], "featur": [1, 15], "improv": [1, 2, 3], "review": [1, 3], "In": [1, 5, 11, 12, 23], "addit": [1, 2, 9, 10, 12], "submit": [1, 3], "healthi": 1, "tradit": 1, "commun": [1, 2, 3, 7], "member": 1, "each": [1, 5, 8, 9, 10], "do": [1, 5, 9, 15], "wai": [1, 8, 22], "well": [1, 5, 8, 10, 14, 15], "familiar": [1, 5], "relev": 1, "codebas": [1, 3, 5], "find": [1, 25], "curat": 1, "good": 1, "first": [1, 12, 15, 22], "list": [1, 3, 8, 12, 16, 18, 24], "anyon": 1, "veri": [1, 14], "activ": 1, "fast": [1, 14], "move": 1, "try": [1, 3, 8, 9, 19], "quickli": 1, "keep": 1, "backlog": 1, "down": [1, 3, 9, 24], "pace": 1, "up": [1, 5, 10], "after": [1, 3, 10, 15, 23], "approv": 1, "mani": [1, 12, 25], "commit": 1, "access": [1, 21], "your": [1, 8, 11, 14, 15], "bandwidth": 1, "current": [1, 5, 8, 12, 15, 17, 21, 22], "most": 1, "limit": [1, 2, 3, 10, 11, 19, 20, 24], "resourc": [1, 2], "highli": [1, 9], "broader": [1, 3], "wait": 1, "consid": [1, 11], "Such": 1, "learn": 1, "becom": [1, 3], "expert": 1, "lack": 1, "coverag": 1, "address": 1, "futur": [1, 5], "faster": 1, "effici": [1, 2, 3, 5, 14], "sinc": [1, 5, 9, 22], "worldwid": 1, "timezon": [1, 22], "who": 1, "comment": 1, "To": [1, 9, 11, 15, 21], "ensur": [1, 3, 5], "wish": 1, "ha": [1, 3, 10, 11], "opportun": 1, "committ": 1, "least": [1, 15], "24": 1, "hour": [1, 9, 22], "pass": [1, 5, 9, 15], "between": [1, 2, 3, 9, 16], "A": [1, 2, 3, 5, 10, 11, 16, 23, 25], "mean": [1, 5, 10, 16], "substanti": 1, "chang": [1, 23], "appli": [1, 6, 9, 10], "best": [1, 3], "judgment": 1, "determin": [1, 9], "constitut": 1, "minor": 1, "might": 1, "without": [1, 3, 5, 8], "delai": 1, "again": 1, "exampl": [1, 3, 5, 7, 9, 10, 12, 15, 23, 25], "potenti": 1, "small": [1, 9], "bug": 1, "fix": 1, "non": [1, 9, 23], "controversi": 1, "build": [1, 3, 5, 8, 10, 14, 15, 16], "relat": [1, 6, 25], "clippi": 1, "version": [1, 5, 8], "upgrad": 1, "etc": [1, 3, 5], "smaller": [1, 3], "section": [1, 5, 15], "describ": [1, 3, 5, 8, 10, 23], "wget": 1, "http": [1, 2, 8], "az792536": 1, "vo": 1, "msecnd": 1, "net": 1, "vm": 1, "vmbuild_20190311": 1, "virtualbox": 1, "msedg": 1, "win10": 1, "zip": 1, "choco": 1, "y": [1, 12, 23, 25], "git": [1, 8], "rustup": [1, 15], "visualcpp": 1, "tool": [1, 8], "bash": 1, "ex": [1, 11], "cargo": [1, 15], "compil": [1, 2, 5, 15], "requir": [1, 9, 15], "protobuf": [1, 3], "On": 1, "platform": [1, 3, 13], "system": [1, 5, 14, 16, 17], "packag": 1, "apt": [1, 3], "dnf": 1, "pacman": 1, "brew": [1, 3, 8], "want": [1, 14, 15, 23], "verifi": 1, "3": [1, 6, 9, 18, 22], "12": [1, 8, 18, 22], "greater": [1, 12, 22], "which": [1, 3, 5, 9, 13, 14, 16, 23], "introduc": 1, "support": [1, 2, 3, 8, 9, 14, 16, 17, 20, 21, 22, 23, 25], "explicit": [1, 2], "field": [1, 3, 4, 7, 22, 23], "presenc": 1, "older": [1, 15], "mai": [1, 5, 8, 9], "fail": [1, 9], "libprotoc": 1, "4": [1, 6, 18], "altern": 1, "binari": [1, 20], "download": [1, 8], "page": [1, 9], "built": [1, 3, 5, 8, 13], "written": [1, 9, 14], "standard": [1, 13, 16, 22], "toolkit": 1, "fmt": 1, "updat": [1, 15], "stabl": [1, 15], "latest": [1, 8, 15], "submodul": 1, "init": 1, "instruct": [1, 15], "ci": 1, "script": [1, 3], "rust_fmt": 1, "sh": 1, "rust_clippi": 1, "rust_toml_fmt": 1, "run": [1, 3, 5, 9, 13, 15], "them": [1, 10, 23], "onc": [1, 3], "rust_lint": 1, "sever": [1, 15], "level": [1, 3, 5, 9], "its": [1, 5, 8, 10, 12, 14, 22], "pyramid": 1, "tri": 1, "follow": [1, 5, 8, 9, 10, 11, 15, 17, 23, 25], "book": 1, "highlight": 1, "import": [1, 5, 10, 15], "modul": 1, "exist": [1, 5, 12, 18, 20], "individu": 1, "defin": [1, 5, 8, 15, 17, 23], "same": [1, 3, 5, 9, 10, 22], "file": [1, 2, 3, 5, 6, 8, 9, 11, 15, 18], "convent": 1, "There": [1, 3, 8], "public": [1, 21], "interfac": [1, 5, 14], "librari": [1, 7, 13], "directori": [1, 8, 15], "command": [1, 3, 7, 17, 19, 21], "p": [1, 8, 16], "sql_integr": 1, "One": [1, 9], "valid": [1, 22], "abil": 1, "larg": [1, 3], "assort": 1, "queri": [1, 2, 5, 6, 9, 10, 13, 14, 15, 16, 17, 18, 23, 24, 25], "against": [1, 8, 14, 15, 23], "driven": [1, 3], "benefit": 1, "includ": [1, 2, 5, 10, 23], "easier": 1, "write": [1, 2, 5, 10], "maintain": [1, 10], "process": [1, 3, 9, 13, 14], "migrat": [1, 3], "possibl": [1, 3, 8, 18], "contain": [1, 6, 8, 9], "har": 1, "certain": [1, 5], "compar": 1, "result": [1, 3, 5, 6, 8, 9, 10, 11, 15, 16, 18, 23, 25], "export": [1, 8], "postgres_db": 1, "postgres_us": 1, "postgres_host": 1, "localhost": 1, "postgres_port": 1, "5432": 1, "python": 1, "m": 1, "pip": 1, "setuptool": 1, "wheel": 1, "r": [1, 8, 10, 15, 17], "txt": 1, "pytest": 1, "v": [1, 8], "test_psql_par": 1, "py": 1, "creat": [1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 14, 17, 20, 24], "psql": 1, "d": [1, 5, 8], "h": [1, 8], "c": [1, 8, 11, 15, 23], "tabl": [1, 6, 8, 9, 11, 15, 17, 19, 20, 21, 23, 24, 25], "IF": [1, 18], "NOT": [1, 8, 18, 20], "c1": [1, 5, 6, 8, 10, 18], "charact": [1, 20], "vari": 1, "null": [1, 5, 8, 9, 12, 18, 22, 23], "c2": [1, 5, 6, 8, 10, 18], "integ": [1, 12, 17, 22, 23], "c3": [1, 8, 18], "smallint": [1, 8, 17, 18], "c4": [1, 8, 18], "c5": [1, 8, 18], "c6": [1, 8, 18], "bigint": [1, 8, 17, 18], "c7": [1, 8, 18], "c8": [1, 8, 18], "c9": [1, 8, 18], "c10": [1, 8, 18], "c11": [1, 8, 18], "doubl": [1, 6, 8, 11, 17, 18, 23], "precis": [1, 17], "c12": [1, 8, 18], "c13": [1, 8, 18], "copi": 1, "pwd": 1, "csv": [1, 10, 14, 18, 19, 24], "aggregate_test_100": [1, 8, 18], "WITH": [1, 8, 18, 20], "header": [1, 8, 18], "true": [1, 8, 9, 15, 21], "statist": [1, 9, 20], "micro": 1, "framework": [1, 3, 14], "evalu": [1, 5, 9, 10], "perform": [1, 2, 3, 5, 10, 11, 12, 13, 14, 17, 22], "path": [1, 5, 8, 18, 19], "particular": [1, 5], "optimis": 1, "effort": 1, "prevent": 1, "regress": 1, "within": [1, 13], "given": [1, 5, 6, 25], "bench": 1, "benchmark_nam": 1, "full": [1, 24], "found": [1, 25], "here": [1, 10, 12, 22, 25], "advanc": [1, 3], "parquet_query_sql": 1, "These": [1, 3, 10, 12], "randomli": 1, "gener": [1, 2, 6, 20], "therefor": 1, "quick": 1, "variabl": [1, 8, 9], "parquet_fil": 1, "set": [1, 5, 7, 8, 10, 15, 16, 17, 21, 24, 25], "instead": [1, 8, 9], "multipl": [1, 2, 9], "custom": [1, 15, 17, 24], "dataset": 1, "automat": 1, "remov": [1, 18], "ani": [1, 5, 8, 9, 10, 14], "exit": [1, 8], "howev": [1, 3, 22], "interrupt": 1, "ctrl": 1, "analys": 1, "fact": 1, "preserv": [1, 10], "subsequ": 1, "valuabl": 1, "implement": [1, 2, 5, 12], "engin": [1, 3, 8, 14, 16], "below": [1, 12, 15, 25], "checklist": 1, "actual": [1, 5, 10], "string": [1, 5, 6, 17, 20, 24], "math": [1, 20, 24], "datetim": [1, 9, 17], "physic": [1, 3, 9, 19], "expr": [1, 12, 19], "src": [1, 15], "variant": 1, "builtinscalarfunct": 1, "entri": [1, 5], "fromstr": 1, "call": [1, 6, 10], "return_typ": 1, "expect": [1, 5, 16], "return": [1, 5, 10, 12, 16, 22, 23], "type": [1, 5, 8, 9, 10, 20, 22, 24], "incom": [1, 5], "signatur": 1, "number": [1, 3, 5, 9, 10, 16, 23], "argument": [1, 5, 6, 12, 22], "create_physical_expr": 1, "create_physical_fun": 1, "map": [1, 5, 17], "core": [1, 3, 8], "through": 1, "known": [1, 5], "expr_fn": 1, "unary_scalar_expr": 1, "macro": 1, "accumul": 1, "aggregateexpr": 1, "aggregatefunct": 1, "create_aggregate_expr": 1, "repres": [1, 5, 10], "logicalplan": [1, 5, 15], "node": [1, 3, 5, 15], "render": 1, "graphviz": 1, "save": 1, "output": [1, 4, 7, 9, 10, 19, 22], "display_graphviz": 1, "somehow": 1, "let": [1, 10, 11, 15], "mut": 1, "tmp": [1, 19], "dot": 1, "Then": [1, 15], "For": [1, 3, 8, 9, 15], "pdf": 1, "tpdf": 1, "formal": 1, "semant": [1, 4, 7], "behavior": [1, 11], "refer": [1, 5, 7, 10, 12, 16, 23], "resolv": 1, "ambigu": 1, "dure": [1, 2, 3, 5, 9, 16], "propos": 1, "fit": 1, "invari": [1, 4, 7], "store": [1, 8, 9, 16, 18], "folder": 1, "prettier": 1, "either": [1, 10, 23], "npm": 1, "global": 1, "npx": 1, "standalon": 1, "recommend": 1, "ad": [1, 23], "2": [1, 6, 8, 11, 12, 15, 18, 19, 22, 23, 25], "0": [1, 8, 10, 11, 12, 15, 16, 19, 22, 23, 25], "ve": 1, "confirm": 1, "w": [1, 16], "cli": [1, 8, 9], "publish": [2, 3, 8, 15], "give": [2, 23], "visibl": 2, "prioriti": 2, "bind": 2, "io": [2, 8, 15], "read": [2, 3, 5, 9], "regist": [2, 5, 11, 15, 18], "format": [2, 3, 5, 7, 8, 9, 10, 14], "datafram": [2, 3, 6, 7, 12, 14], "sql": [2, 6, 7, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23], "option": [2, 9, 10, 21], "partit": [2, 5, 9, 10, 14, 18, 19, 24], "metadata": [2, 5, 9, 21], "schedul": 2, "predict": [2, 3], "observ": 2, "cpu": [2, 3, 15], "bound": 2, "develop": [2, 3, 5, 7], "stori": 2, "parallel": [2, 3, 13, 14, 24], "execut": [2, 5, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 21, 24], "memori": [2, 3, 9, 10, 13, 14, 15, 16, 18], "oper": [2, 5, 9, 13, 16, 17, 22, 24], "incorpor": 2, "row": [2, 8, 9, 10, 18, 23, 25], "aggreg": [2, 3, 5, 7, 10, 11, 15, 19, 20, 24], "benchmark": [2, 3, 7], "explor": 2, "jit": 2, "complex": [2, 5], "express": [2, 6, 7, 9, 10, 16, 20, 23, 24, 25], "llvm": 2, "inlin": 2, "function": [2, 3, 7, 8, 9, 10, 20, 24], "primari": 2, "sort": [2, 3, 10, 24], "merg": [2, 7, 16], "document": [2, 3, 5, 6, 7, 10, 12, 15, 23], "websit": 2, "streamprovid": 2, "trait": 2, "product": [2, 14, 23], "readi": 2, "shuffl": 2, "cleanup": 2, "fill": 2, "gap": 2, "task": [2, 3, 8], "exchang": 2, "better": [2, 3], "error": [2, 3, 5, 9, 11, 15], "handl": [2, 3], "failur": 2, "executor": [2, 3], "lost": 2, "restart": 2, "monitor": 2, "log": 2, "auto": 2, "scale": [2, 3, 17], "multi": [2, 15], "deploy": 2, "resili": 2, "fault": 2, "toler": 2, "ultim": 2, "shard": 2, "scalabl": 2, "cach": [2, 3], "group": [2, 3, 9, 10, 11, 19, 20], "base": [2, 5, 6, 9, 10, 12, 15, 18, 21, 23, 25], "alloc": [2, 15], "miss": 2, "sessioncontext": [2, 10, 11, 15], "editor": 2, "expos": [2, 9], "context": [2, 3, 5, 25], "hdf": 2, "timestamp": [2, 3, 17, 22, 24], "rang": 2, "predic": [2, 9, 24], "pushdown": [2, 24], "thread": [2, 13, 14, 15], "awar": [2, 11, 23], "sdk": 2, "experiment": [2, 17], "high": [3, 14], "It": [3, 5, 8, 12, 13, 15, 16, 18, 22, 23], "meant": 3, "restrict": 3, "rather": 3, "newcom": 3, "understand": 3, "head": 3, "inspir": 3, "part": [3, 10, 14], "model": [3, 5, 13, 14, 22], "entir": [3, 9], "volunt": 3, "befor": [3, 9, 10], "pr": [3, 7], "strongli": 3, "suggest": 3, "convers": [3, 5, 22], "mail": 3, "avoid": [3, 9], "surpris": 3, "embed": [3, 9], "choic": 3, "analyt": 3, "applic": [3, 9, 15], "leverag": [3, 14], "uniqu": [3, 16], "provid": [3, 8, 13, 14, 15, 18], "class": 3, "singl": [3, 5, 9, 10, 13, 16, 25], "declar": [3, 5], "compat": [3, 9, 24], "postgresql": [3, 6, 16], "similar": [3, 10, 22], "those": 3, "offer": [3, 5], "panda": [3, 10], "spark": [3, 6, 10, 16], "procedur": 3, "programmat": 3, "race": 3, "free": 3, "ergonom": 3, "extens": [3, 14], "point": [3, 14, 15], "layer": 3, "decim": [3, 17], "122": 3, "complet": 3, "statu": [3, 20], "arithmet": 3, "194": 3, "parser": [3, 24], "533": 3, "nest": [3, 24], "structur": 3, "struct": [3, 10, 12], "119": 3, "tpch": 3, "mileston": 3, "detail": [3, 8, 19], "sophist": 3, "order": [3, 8, 9, 15, 20, 24], "tokomak": 3, "440": 3, "finer": 3, "filesystem": 3, "s3": 3, "local": [3, 8], "907": 3, "1060": 3, "async": [3, 11, 15], "reader": [3, 5, 9], "chunk": 3, "prefetch": 3, "capabl": [3, 14], "arrow2": 3, "h2oai": 3, "db": 3, "147": 3, "348": 3, "grain": 3, "control": [3, 9], "587": 3, "usag": [3, 7, 15, 16], "54": 3, "tbd": 3, "note": [3, 5, 8, 10, 12, 15, 17, 22], "thought": 3, "vision": 3, "1096": 3, "abstract": 3, "repl": 3, "pars": [3, 9], "separ": [3, 6], "correctli": 3, "connect": [3, 8, 14], "subsystem": 3, "print": [3, 8, 9, 10, 11, 15], "out": [3, 8, 9, 10], "stat": 3, "debug": 3, "interact": [3, 8], "shell": 3, "nuget": 3, "registri": [3, 10], "easili": 3, "adopt": 3, "shorter": 3, "like": [3, 8, 9], "dfcli": 3, "distribut": [3, 13], "comput": [3, 5, 13, 16], "break": 3, "stage": 3, "across": 3, "avail": [3, 8, 10, 12, 21], "cluster": 3, "remain": 3, "suitabl": [3, 16], "serial": 3, "thei": [3, 5, 9], "agnost": 3, "than": [3, 8, 12, 22], "advantag": 3, "over": [3, 9, 10], "becaus": [3, 5], "doesn": [3, 18], "t": [3, 11, 18, 21, 22, 23], "eagerli": 3, "direction": 3, "acycl": 3, "graph": 3, "dag": 3, "configur": [3, 7, 21], "concurr": 3, "should": [3, 5, 6, 23], "push": [3, 9, 24], "logic": [3, 9, 10, 12, 14, 19], "accur": [3, 16], "could": [3, 9], "hash": [3, 19], "desir": 3, "load": [3, 9], "side": [3, 9, 23], "cannot": [3, 9], "until": 3, "enumer": 5, "plane": 5, "enforc": 5, "assum": 5, "recordbatch": [5, 10], "arrai": [5, 12, 17, 24], "top": [5, 9], "dynam": 5, "object": 5, "as_ani": 5, "downcast": 5, "itself": [5, 14], "static": [5, 15], "int32arrai": 5, "data_typ": [5, 21], "respect": 5, "alwai": 5, "time": [5, 9, 12, 15, 16, 20, 22, 24], "onli": [5, 9, 10, 12, 22, 23, 25], "runtim": 5, "program": 5, "principl": 5, "user": [5, 6, 14, 15, 18], "know": 5, "datatyp": [5, 17], "nullabl": [5, 9, 10], "flag": 5, "bool": 5, "whether": 5, "valu": [5, 6, 8, 9, 16, 18, 22, 23, 24, 25], "pf": 5, "lf": 5, "root": [5, 8, 12, 22], "vector": [5, 10], "record": [5, 6, 9], "batch": [5, 6, 8, 9], "how": [5, 6, 7], "deriv": 5, "input": [5, 9, 10, 16], "logical_field": 5, "lf1": 5, "lf2": 5, "plu": [5, 6], "b": [5, 6, 10, 11, 12, 19, 23], "none": [5, 9, 17, 19, 22], "get_supertyp": 5, "length": [5, 9, 12, 24], "u32": [5, 22], "tree": [5, 15], "compos": 5, "AS": [5, 8, 18, 22, 23], "sum12": 5, "scan": [5, 9, 18, 23], "u64": 5, "frozen": 5, "while": [5, 9], "child": 5, "physical_field": 5, "pf1": 5, "pf2": 5, "whose": 5, "element": 5, "sum": [5, 12, 16, 19, 25], "two": [5, 9, 10], "togeth": [5, 12], "smallest": 5, "str": 5, "count": [5, 12, 16, 23], "byte": [5, 9], "doe": [5, 9, 22], "sole": 5, "properti": 5, "index": [5, 9, 19], "associ": 5, "necessari": [5, 8, 18], "physicalplan": 5, "accept": [5, 12], "manner": 5, "hardwar": 5, "environ": [5, 7, 8, 9], "extra": 5, "paramet": [5, 22], "logical_plan": [5, 19], "param": 5, "subsect": 5, "min": [5, 9, 10, 11, 12, 16], "tableprovid": [5, 9, 15], "likewis": 5, "specif": [5, 6, 7, 14, 21], "my_op": 5, "thu": [5, 17], "word": 5, "understood": 5, "must": [5, 6, 9, 10, 11, 22], "guarante": 5, "select": [5, 6, 8, 9, 10, 11, 12, 15, 18, 19, 20, 21, 22, 25], "t1": [5, 6], "id": [5, 6, 10], "t2": [5, 6], "unambigu": 5, "violat": 5, "content": 5, "correspond": [5, 8], "particularli": 5, "intermediari": 5, "calcul": [5, 10], "i32": 5, "i64": 5, "computation": 5, "expens": 5, "panic": 5, "satisfi": 5, "match": [5, 10, 23, 25], "claim": 5, "int32": [5, 17], "safe": 5, "parquet": [5, 9, 10, 14, 18, 24], "branch": 5, "combin": 5, "equival": [5, 12, 22], "physical_schema": 5, "qualifi": [5, 6], "strip": 5, "strip_rel": 5, "f": [5, 6, 8], "reli": 5, "simpl": [5, 11], "corollari": 5, "triplet": 5, "rule": [5, 9], "physical_plan": [5, 19], "consum": [5, 9], "downscal": 5, "jeopard": 5, "referenc": [5, 23], "assumpt": 5, "about": [5, 21], "bare": 6, "df": [6, 10, 11, 15], "select_column": [6, 10], "compound": 6, "foo": [6, 8, 12], "bar": 6, "convert": [6, 8, 22], "lowercas": 6, "avg": [6, 12, 16], "wrap": 6, "quot": [6, 11, 23], "parenthes": 6, "operand": 6, "space": [6, 22], "1": [6, 8, 11, 12, 15, 16, 18, 19, 21, 22, 23, 25], "comma": 6, "vec": [6, 10, 11], "udf": [6, 10, 15, 24], "col": [6, 10, 11, 12], "schema": [6, 8, 9, 10, 17, 18, 20, 23, 24], "test": [6, 7, 8, 10, 11, 14, 15, 18], "sampl": 6, "int": [6, 8, 17, 18], "varchar": [6, 8, 17, 18], "5": [6, 12, 16, 18], "insert": [6, 9], "INTO": 6, "hello": [6, 12, 18], "world": 6, "ON": [6, 23], "mysql": 6, "8": [6, 8, 9, 22], "13": 6, "sqlite": 6, "ab": [6, 10, 12], "foo_bar": 6, "7": [6, 12], "introduct": 7, "capit": [7, 23], "util": 7, "frequent": 7, "question": 7, "crate": [7, 15], "pull": 7, "request": 7, "window": [7, 24], "setup": 7, "protoc": 7, "instal": [7, 15], "bootstrap": 7, "organ": 7, "scalar": [7, 20], "displai": [7, 12, 22], "graphic": 7, "md": 7, "roadmap": 7, "ballista": 7, "quarterli": 7, "2022": [7, 18], "q2": 7, "tracker": 7, "allow": [8, 10, 12, 14, 23], "conveni": [8, 14], "own": [8, 15], "echo": 8, "v12": 8, "extern": [8, 17, 20], "locat": [8, 9, 18], "took": 8, "017": 8, "second": [8, 22], "column_1": [8, 23, 25], "column_2": [8, 23, 25], "012": 8, "easiest": 8, "spin": 8, "via": [8, 9], "pre": [8, 16], "ghcr": 8, "v2": 8, "manifest": 8, "100": [8, 10, 11, 16], "blob": [8, 17], "sha256": [8, 12], "9ecc8a01be47ceb9a53b39976696afa87c0a8": 8, "pkg": 8, "githubusercont": 8, "com": 8, "ghcr1": 8, "9ecc8a01be47ceb9a53b39976": 8, "pour": 8, "big_sur": 8, "bottl": 8, "tar": 8, "gz": 8, "usr": 8, "cellar": 8, "9": [8, 22], "17": 8, "4mb": 8, "imag": 8, "clone": 8, "dockerignor": 8, "delet": 8, "checkout": 8, "cd": [8, 15], "dockerfil": 8, "your_data_loc": 8, "client": 8, "size": [8, 9], "batch_siz": [8, 9, 21], "default": [8, 9, 12, 16, 22, 23], "tsv": 8, "json": [8, 10, 24], "nd": 8, "data_path": 8, "q": 8, "quiet": 8, "reduc": [8, 9], "quietli": 8, "rc": 8, "startup": 8, "datafusionrc": 8, "quit": 8, "statement": [8, 9, 10, 18, 19, 22], "taxi": [8, 18], "mnt": [8, 18], "nyctaxi": [8, 18], "tripdata": [8, 18], "float": [8, 17, 18], "aws_default_region": 8, "aws_access_key_id": 8, "aws_secret_access_kei": 8, "access_key_id": 8, "secret_access_kei": 8, "region": 8, "aws_endpoint": 8, "endpoint": 8, "aws_session_token": 8, "token": 8, "aws_container_credentials_relative_uri": 8, "aw": 8, "amazon": 8, "amazonec": 8, "developerguid": 8, "iam": 8, "role": 8, "html": 8, "aws_allow_http": 8, "permit": 8, "tl": 8, "cp": 8, "my": 8, "bucket": 8, "upload": 8, "east": 8, "v14": 8, "374": 8, "171": 8, "insid": 8, "listtabl": 8, "describet": 8, "table_nam": [8, 18, 21], "quietmod": 8, "fals": [8, 9, 19, 21], "search": 8, "show": [8, 10, 11, 15, 19, 21, 23, 24], "8192": [8, 9, 21], "coalesce_batch": [8, 9, 21], "coalesce_target_batch_s": [8, 9, 21], "4096": [8, 9, 19, 21], "time_zon": [8, 9, 21], "explain": [8, 9, 10, 20, 21], "logical_plan_onli": [8, 9, 21], "physical_plan_onli": [8, 9, 21], "optim": [8, 9, 10, 14, 21, 24], "filter_null_join_kei": [8, 9, 21], "skip_failed_rul": [8, 9, 21], "1024": 8, "seen": 8, "abov": 8, "look": 8, "upper": [8, 12], "_": 8, "would": [8, 9], "datafusion_execution_batch_s": [8, 9], "002": 8, "v13": 8, "011": 8, "000": 8, "005": 8, "sessionconfig": 9, "variou": 9, "aspect": 9, "construct": 9, "session": [9, 21], "from_env": 9, "kei": [9, 10], "transform": 9, "uppercas": 9, "period": 9, "replac": [9, 12, 18, 24], "underscor": 9, "accord": [9, 17], "cast": [9, 17, 22, 24], "utf8": [9, 17], "warn": 9, "emit": 9, "initialis": 9, "beforehand": 9, "affect": 9, "descript": 9, "catalog": [9, 18, 21], "uint64": [9, 16, 17], "especi": 9, "buffer": 9, "tini": 9, "too": 9, "much": 9, "consumpt": 9, "boolean": [9, 20], "examin": 9, "coalesc": [9, 12, 24], "larger": 9, "filter": [9, 10, 11, 12, 24, 25], "produc": [9, 23, 25], "target": [9, 15], "conjunct": 9, "enable_page_index": 9, "decod": 9, "metadata_size_hint": 9, "specifi": [9, 10, 18, 19, 22], "fetch": 9, "last": 9, "size_hint": 9, "optimist": 9, "footer": 9, "anoth": [9, 10], "encod": 9, "prune": [9, 18], "attempt": 9, "skip": 9, "max": [9, 12, 16, 23], "pushdown_filt": 9, "reorder_filt": 9, "opear": 9, "reorder": [9, 24], "heurist": 9, "minim": [9, 10], "cost": [9, 16], "skip_metadata": 9, "conflict": 9, "zone": 9, "extract": [9, 24], "some_tim": 9, "shift": [9, 12], "underlin": 9, "column": [9, 10, 11, 12, 16, 19, 21, 22, 23, 24], "overhead": 9, "fulli": 9, "hash_join_single_partition_threshold": 9, "1048576": 9, "maximum": [9, 23], "estim": 9, "hashjoin": 9, "collect": [9, 10], "max_pass": 9, "prefer_hash_join": 9, "prefer": 9, "sortmergejoin": 9, "efficientlythan": 9, "proce": 9, "next": 9, "caus": 9, "top_down_join_key_reord": 9, "typic": 10, "read_csv": [10, 11], "modifi": 10, "definit": 10, "prelud": [10, 11, 15], "ctx": [10, 11, 15], "csvreadopt": [10, 11, 15], "await": [10, 11, 15], "lt_eq": [10, 11, 12], "lazi": 10, "just": 10, "approach": 10, "overal": 10, "invok": 10, "distinct": [10, 16, 23], "duplic": 10, "except": [10, 22, 24], "exactli": 10, "intersect": [10, 24], "repartit": 10, "scheme": 10, "turn": 10, "arbitrari": 10, "union": [10, 20, 24], "union_distinct": 10, "with_column": 10, "with_column_renam": 10, "renam": 10, "stdout": 10, "disk": 10, "collect_partit": 10, "execute_stream": 10, "stream": [10, 16], "execute_stream_partit": 10, "per": 10, "show_limit": 10, "subset": [10, 18], "write_csv": 10, "write_json": 10, "write_parquet": 10, "explan": 10, "far": 10, "functionregistri": 10, "term": 10, "attribut": 10, "to_logical_plan": 10, "to_unoptimized_plan": 10, "unoptim": 10, "11": [11, 15, 18], "tokio": [11, 15], "main": 11, "fn": [11, 15], "register_csv": [11, 15], "BY": [11, 18, 19, 20, 24], "ok": [11, 15], "effect": [11, 23], "lower": [11, 12, 23], "letter": 11, "won": 11, "illustr": 11, "capitalized_exampl": 11, "method": 12, "chain": 12, "fluent": 12, "style": [12, 18], "AND": 12, "gt": 12, "lit": 12, "lt": 12, "123": [12, 22], "expr1": 12, "expr2": 12, "eq": 12, "gt_eq": 12, "not_eq": 12, "x": [12, 16, 19, 23, 25], "absolut": [12, 22], "aco": 12, "invers": [12, 22], "cosin": [12, 22], "asin": 12, "sine": [12, 22], "atan": 12, "tangent": [12, 22], "atan2": 12, "ceil": 12, "nearest": [12, 22], "equal": [12, 16, 22], "co": 12, "exp": 12, "exponenti": [12, 22], "floor": 12, "less": [12, 22], "ln": 12, "natur": [12, 22], "logarithm": [12, 22], "log10": 12, "10": [12, 22, 23], "log2": 12, "power": 12, "expon": 12, "rais": [12, 22], "round": 12, "signum": 12, "sign": [12, 22], "sin": 12, "sqrt": 12, "squar": [12, 22], "tan": 12, "trunc": 12, "truncat": [12, 22], "toward": [12, 22], "zero": [12, 22, 25], "OR": [12, 18], "xor": 12, "left": [12, 24], "right": [12, 24], "often": [12, 22], "substitut": [12, 22], "retriev": [12, 22], "otherwis": [12, 22], "end": [12, 15], "nullif": [12, 24], "value1": [12, 22], "value2": [12, 22], "ascii": [12, 24], "bit_length": [12, 24], "btrim": [12, 24], "char_length": [12, 24], "character_length": [12, 24], "concat": [12, 24], "concat_w": [12, 24], "chr": [12, 24], "initcap": [12, 24], "lpad": [12, 24], "ltrim": [12, 24], "md5": 12, "octet_length": [12, 24], "repeat": [12, 24], "revers": [12, 24], "rpad": [12, 24], "rtrim": [12, 24], "digest": 12, "split_part": [12, 24], "starts_with": [12, 24], "strpo": [12, 24], "substr": [12, 24], "translat": [12, 24], "trim": [12, 24], "regexp_match": 12, "regexp_replac": [12, 24], "date_part": [12, 24], "date_trunc": 12, "from_unixtim": 12, "to_timestamp": [12, 24], "to_timestamp_milli": [12, 24], "to_timestamp_micro": [12, 24], "to_timestamp_second": [12, 24], "now": 12, "in_list": 12, "random": 12, "sha224": 12, "sha384": 12, "sha512": 12, "to_hex": [12, 24], "approx_distinct": [12, 24], "approx_median": [12, 24], "approx_percentile_cont": [12, 24], "approx_percentile_cont_with_weight": [12, 24], "count_distinct": 12, "cube": 12, "grouping_set": 12, "median": [12, 16], "rollup": 12, "in_subqueri": 12, "df1": 12, "df2": 12, "IN": [12, 20], "not_exist": 12, "not_in_subqueri": 12, "scalar_subqueri": 12, "create_udf": 12, "create_udaf": 12, "represent": 13, "columnar": 13, "kernel": 13, "common": [13, 24], "modern": 14, "pipelin": 14, "etl": 14, "databas": 14, "achiev": 14, "easi": 14, "Being": 14, "ecosystem": 14, "flight": 14, "rest": 14, "big": 14, "emb": 14, "almost": 14, "tailor": 14, "usecas": 14, "qualiti": 14, "hello_datafus": 15, "toml": 15, "path_to_your_csv_fil": 15, "udaf": [15, 24], "rewrit": 15, "executionplan": 15, "step": 15, "worth": 15, "profil": 15, "significantli": 15, "increas": 15, "simd": 15, "rt": 15, "snmalloc": 15, "lto": 15, "codegen": 15, "unit": 15, "global_alloc": 15, "snmalloc_r": 15, "final": 15, "nightli": 15, "toolchain": 15, "architectur": 15, "ideal": 15, "nativ": 15, "avx2": 15, "rustflag": 15, "array_agg": 16, "var": 16, "var_samp": 16, "var_pop": 16, "stddev": 16, "stddev_samp": 16, "stddev_pop": 16, "covar": 16, "covar_samp": 16, "covar_pop": 16, "corr": 16, "hyperloglog": 16, "alia": 16, "percentil": 16, "tdigest": 16, "float64": [16, 17], "inclus": 16, "raw": 16, "sketch": 16, "n": 16, "centroid": 16, "fewer": 16, "exact": 16, "higher": 16, "weight": 16, "stand": 16, "low": 16, "latenc": 16, "olap": 16, "flink": 16, "sqlparser": 17, "char": 17, "text": 17, "tinyint": 17, "int8": 17, "int16": 17, "int64": [17, 21, 22], "unsign": 17, "uint8": 17, "uint16": 17, "uint32": 17, "float32": 17, "real": 17, "decimal128": 17, "3523": 17, "date32": [17, 22], "time64": 17, "timeunit": 17, "nanosecond": [17, 22], "bytea": 17, "uuid": 17, "Not": 17, "yet": 17, "clob": 17, "varbinari": 17, "regclass": 17, "nvarchar": 17, "enum": 17, "interv": 17, "infer": [18, 23], "aggregate_simpl": 18, "manual": 18, "alreadi": 18, "hive": 18, "year": [18, 22], "month": [18, 22], "01": [18, 22], "valuet": 18, "memtabl": 18, "silent": 18, "ignor": 18, "nonexistent_t": 18, "virtual": 18, "view_nam": 18, "column1": 18, "6": 18, "column2": 18, "users_v": 18, "customer_a": 18, "verbos": 19, "plan_typ": 19, "groupbi": 19, "aggr": 19, "tablescan": 19, "projectionexec": 19, "aggregateexec": 19, "mode": 19, "finalpartit": 19, "gby": 19, "coalescebatchesexec": 19, "target_batch_s": 19, "repartitionexec": 19, "partial": 19, "roundrobinbatch": 19, "csvexec": 19, "has_head": 19, "metric": 19, "coalescepartitionsexec": 19, "hashaggregateexec": 19, "outputrow": 19, "sendtim": 19, "839560": 19, "fetchtim": 19, "122528525": 19, "repartitiontim": 19, "5327877": 19, "5660489": 19, "8012": 19, "numer": 20, "date": [20, 22, 24], "unsupport": 20, "syntax": [20, 25], "claus": [20, 24], "subqueri": [20, 24], "ddl": 20, "drop": 20, "view": [20, 21, 24], "analyz": 20, "approxim": [20, 24], "condit": [20, 23, 24], "regular": 20, "tempor": 20, "iso": 21, "information_schema": [21, 24], "table_catalog": 21, "table_schema": 21, "table_typ": 21, "column_nam": 21, "is_nul": 21, "NO": 21, "df_set": 21, "1997": 22, "31t09": 22, "26": 22, "56": 22, "123z": 22, "rcf3339": 22, "05": 22, "31": 22, "09": 22, "close": 22, "er": 22, "et": 22, "offset": 22, "fraction": 22, "epoch": 22, "present": 22, "consist": 22, "to_timestamp_xx": 22, "millisecond": 22, "resolut": 22, "microsecond": 22, "secondsecond": 22, "subfield": 22, "date64": 22, "2020": 22, "08t12": 22, "week": 22, "37": 22, "dai": 22, "minut": 22, "03": 22, "postgr": [22, 24], "wherev": 22, "appear": 22, "chosen": 22, "accordingli": 23, "clarif": 23, "with_queri": 23, "select_expr": 23, "from_item": 23, "join_item": 23, "grouping_el": 23, "asc": 23, "desc": 23, "quantifi": 23, "By": 23, "person": 23, "ag": 23, "employe": 23, "keyword": 23, "even": 23, "cartesian": 23, "table1": 23, "table2": 23, "ascend": 23, "descend": 23, "neg": 23, "planner": 24, "constant": 24, "fold": 24, "coercion": 24, "post": 24, "try_cast": 24, "basic": 24, "inner": 24, "cross": 24, "empti": 24, "frame": 24, "primit": 24, "avro": 24, "correl": 25}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"commun": 0, "question": [0, 13], "mail": 0, "list": 0, "slack": 0, "discord": 0, "sync": 0, "up": 0, "video": 0, "call": 0, "introduct": [1, 14], "pull": 1, "request": 1, "merg": 1, "pr": 1, "develop": 1, "": 1, "guid": [1, 7], "window": 1, "setup": 1, "protoc": 1, "instal": [1, 8], "bootstrap": 1, "environ": 1, "test": 1, "organ": 1, "unit": 1, "rust": [1, 15], "integr": 1, "sqllogictest": 1, "sql": [1, 3, 8, 11, 20, 24], "postgr": 1, "depend": 1, "invok": 1, "runner": 1, "benchmark": 1, "criterion": 1, "parquet": [1, 8], "upstream": 1, "suit": 1, "how": 1, "add": 1, "new": [1, 15], "scalar": [1, 22, 25], "function": [1, 5, 6, 12, 15, 16, 22], "aggreg": [1, 12, 16], "displai": 1, "plan": [1, 5], "graphic": 1, "specif": [1, 4], "format": 1, "md": 1, "document": 1, "quarterli": 2, "roadmap": [2, 3], "2022": 2, "q2": 2, "datafus": [2, 3, 7, 8, 13, 14, 15], "core": 2, "ballista": [2, 3, 13], "extens": [2, 15], "contrib": 2, "python": [2, 3], "s3": [2, 8], "tui": 2, "bigtabl": 2, "stream": 2, "addit": 3, "languag": 3, "featur": 3, "queri": [3, 8, 11], "optim": [3, 5, 15], "datasourc": 3, "runtim": 3, "infrastructur": 3, "resourc": 3, "manag": 3, "interfac": 3, "cli": 3, "move": 3, "schedul": 3, "implement": 3, "execut": 3, "time": [3, 17], "cost": 3, "base": [3, 22], "statist": [3, 16], "invari": 5, "ration": 5, "notat": 5, "logic": 5, "column": [5, 6], "physic": 5, "data": [5, 8, 11, 17, 24], "sourc": [5, 8, 24], "registri": 5, "planner": 5, "builder": 5, "relat": 5, "name": [5, 6], "tupl": 5, "field": [5, 6], "ar": 5, "uniqu": 5, "respons": 5, "valid": 5, "schema": [5, 21], "i": [5, 13], "consist": 5, "The": 5, "under": 5, "output": [5, 6, 11], "equal": 5, "semant": 6, "rule": 6, "appendic": 6, "exampl": [6, 8, 11], "comparison": [6, 12], "other": [6, 10, 12, 22], "system": 6, "project": [6, 15], "transform": [6, 10], "oper": [6, 12], "liter": [6, 12], "apach": [7, 13], "arrow": [7, 13], "tabl": [7, 18], "content": 7, "user": [7, 12], "contributor": 7, "command": 8, "line": 8, "util": 8, "run": [8, 11], "us": [8, 11, 14, 15], "cargo": [8, 11], "homebrew": 8, "maco": 8, "docker": 8, "usag": [8, 11], "regist": 8, "csv": [8, 11], "chang": 8, "configur": [8, 9, 15], "option": 8, "set": 9, "datafram": [10, 11], "api": [10, 11], "action": 10, "method": 10, "updat": 11, "toml": 11, "against": 11, "store": 11, "process": 11, "from": [11, 23], "both": 11, "identifi": [11, 12], "capit": 11, "express": [12, 22], "valu": 12, "boolean": [12, 17], "math": [12, 22], "bitwis": 12, "condit": [12, 22], "string": [12, 22], "regular": [12, 22], "tempor": [12, 22], "subqueri": [12, 25], "defin": 12, "frequent": 13, "ask": 13, "what": 13, "relationship": 13, "between": 13, "case": 14, "why": 14, "librari": 15, "creat": [15, 18], "default": 15, "main": 15, "version": 15, "compat": 15, "gener": [16, 24], "approxim": 16, "approx_distinct": 16, "approx_median": 16, "approx_percentile_cont": 16, "approx_percentile_cont_with_weight": 16, "type": 17, "charact": 17, "numer": 17, "date": 17, "binari": 17, "unsupport": 17, "ddl": 18, "extern": 18, "drop": 18, "view": 18, "explain": 19, "analyz": 19, "refer": 20, "inform": 21, "ab": 22, "x": 22, "aco": 22, "asin": 22, "atan": 22, "atan2": 22, "y": 22, "ceil": 22, "co": 22, "exp": 22, "floor": 22, "ln": 22, "log10": 22, "log2": 22, "power": 22, "expon": 22, "round": 22, "signum": 22, "sin": 22, "sqrt": 22, "tan": 22, "trunc": 22, "coalesc": 22, "nullif": 22, "ascii": 22, "bit_length": 22, "btrim": 22, "char_length": 22, "character_length": 22, "concat": 22, "concat_w": 22, "chr": 22, "initcap": 22, "left": [22, 23], "length": 22, "lower": 22, "lpad": 22, "ltrim": 22, "md5": 22, "octet_length": 22, "repeat": 22, "replac": 22, "revers": 22, "right": [22, 23], "rpad": 22, "rtrim": 22, "digest": 22, "split_part": 22, "starts_with": 22, "strpo": 22, "substr": 22, "translat": 22, "trim": 22, "upper": 22, "regexp_match": 22, "regexp_replac": 22, "to_timestamp": 22, "to_timestamp_milli": 22, "to_timestamp_micro": 22, "to_timestamp_second": 22, "extract": 22, "date_part": 22, "date_trunc": 22, "date_bin": 22, "from_unixtim": 22, "now": 22, "arrai": 22, "in_list": 22, "random": 22, "sha224": 22, "sha256": 22, "sha384": 22, "sha512": 22, "struct": 22, "to_hex": 22, "select": 23, "syntax": 23, "WITH": 23, "claus": 23, "where": 23, "join": 23, "inner": 23, "outer": 23, "full": 23, "cross": 23, "group": 23, "BY": 23, "have": 23, "union": 23, "order": 23, "limit": 23, "statu": 24, "support": 24, "exist": 25, "NOT": 25, "IN": 25}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["contributor-guide/communication", "contributor-guide/index", "contributor-guide/quarterly_roadmap", "contributor-guide/roadmap", "contributor-guide/specification/index", "contributor-guide/specification/invariants", "contributor-guide/specification/output-field-name-semantic", "index", "user-guide/cli", "user-guide/configs", "user-guide/dataframe", "user-guide/example-usage", "user-guide/expressions", "user-guide/faq", "user-guide/introduction", "user-guide/library", "user-guide/sql/aggregate_functions", "user-guide/sql/data_types", "user-guide/sql/ddl", "user-guide/sql/explain", "user-guide/sql/index", "user-guide/sql/information_schema", "user-guide/sql/scalar_functions", "user-guide/sql/select", "user-guide/sql/sql_status", "user-guide/sql/subqueries"], "filenames": ["contributor-guide/communication.md", "contributor-guide/index.md", "contributor-guide/quarterly_roadmap.md", "contributor-guide/roadmap.md", "contributor-guide/specification/index.rst", "contributor-guide/specification/invariants.md", "contributor-guide/specification/output-field-name-semantic.md", "index.rst", "user-guide/cli.md", "user-guide/configs.md", "user-guide/dataframe.md", "user-guide/example-usage.md", "user-guide/expressions.md", "user-guide/faq.md", "user-guide/introduction.md", "user-guide/library.md", "user-guide/sql/aggregate_functions.md", "user-guide/sql/data_types.md", "user-guide/sql/ddl.md", "user-guide/sql/explain.md", "user-guide/sql/index.rst", "user-guide/sql/information_schema.md", "user-guide/sql/scalar_functions.md", "user-guide/sql/select.md", "user-guide/sql/sql_status.md", "user-guide/sql/subqueries.md"], "titles": ["Communication", "Introduction", "Quarterly Roadmap", "Roadmap", "Specifications", "Invariants", "Output field name semantics", "Apache Arrow DataFusion", "DataFusion Command-line SQL Utility", "Configuration Settings", "DataFrame API", "Example Usage", "Expressions", "Frequently Asked Questions", "Introduction", "Using DataFusion as a library", "Aggregate Functions", "Data Types", "DDL", "EXPLAIN", "SQL Reference", "Information Schema", "Scalar Functions", "SELECT syntax", "Status", "Subqueries"], "terms": {"we": [0, 1, 3, 15, 23], "welcom": [0, 1, 3], "particip": 0, "from": [0, 1, 2, 3, 5, 6, 8, 9, 10, 15, 17, 18, 19, 20, 21, 22, 24, 25], "everyon": 0, "encourag": [0, 1], "you": [0, 1, 3, 8, 9, 11, 15, 16, 19, 23], "join": [0, 3, 5, 6, 9, 10, 20, 24], "u": [0, 1, 8], "ask": [0, 7], "get": [0, 1, 5, 15], "involv": 0, "all": [0, 1, 3, 5, 6, 8, 10, 11, 12, 15, 21, 22, 23, 24, 25], "apach": [0, 3, 8, 14], "arrow": [0, 1, 3, 5, 6, 8, 14, 17], "datafus": [0, 1, 5, 6, 9, 10, 11, 16, 17, 18, 21, 23, 25], "project": [0, 1, 2, 3, 5, 10, 19, 24], "i": [0, 1, 2, 3, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 22, 23, 25], "govern": [0, 3], "softwar": [0, 3, 8], "foundat": [0, 3, 14], "": [0, 3, 5, 7, 8, 9, 10, 14, 18, 25], "code": [0, 1, 5, 7], "conduct": [0, 7], "The": [0, 1, 3, 6, 8, 9, 10, 17, 18, 19, 22, 23, 25], "vast": 0, "major": [0, 1], "occur": [0, 17], "open": 0, "our": [0, 1, 5], "github": [0, 3, 8], "repositori": [0, 8], "us": [0, 1, 2, 3, 5, 7, 9, 10, 12, 13, 16, 17, 18, 19, 21, 22, 23, 25], "org": [0, 3, 8], "dev": [0, 1, 3, 8], "manag": [0, 1, 2], "releas": [0, 1, 15], "coordin": 0, "design": [0, 1, 2, 13, 14, 15], "discuss": 0, "subscrib": 0, "unsubscrib": 0, "archiv": 0, "when": [0, 1, 3, 5, 9, 10, 12, 17, 22, 23], "email": 0, "pleas": [0, 11, 16, 23], "make": [0, 1, 2, 3, 23], "sure": [0, 23], "prefix": 0, "subject": [0, 1], "line": [0, 1, 7], "tag": [0, 8], "e": [0, 1, 3, 5, 9, 16], "g": [0, 1, 3, 5, 9, 16], "new": [0, 2, 3, 5, 7, 9, 10, 11], "api": [0, 1, 2, 3, 6, 7, 12, 14], "remot": [0, 3], "data": [0, 1, 2, 3, 6, 9, 10, 12, 13, 14, 16, 18, 20, 22, 23], "sourc": [0, 1, 2, 14, 15, 18, 19, 20, 22], "so": [0, 1, 3, 5, 8, 9, 10, 11], "appropri": [0, 8], "peopl": [0, 1, 3], "notic": 0, "messag": [0, 9], "offici": [0, 8], "asf": 0, "workspac": 0, "inform": [0, 8, 9, 18, 19, 20], "thi": [0, 1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 15, 17, 21, 22, 23], "great": [0, 1], "place": 0, "meet": 0, "other": [0, 1, 3, 5, 8, 15, 20, 24], "contributor": [0, 1, 2], "guidanc": 0, "where": [0, 1, 3, 5, 10, 12, 16, 20, 24, 25], "contribut": [0, 1, 3], "rust": [0, 2, 3, 5, 7, 12, 14], "channel": 0, "also": [0, 1, 5, 8, 9, 13, 18, 22], "have": [0, 1, 3, 5, 10, 20, 24], "backup": 0, "server": 0, "invit": 0, "link": 0, "case": [0, 3, 8, 11, 12, 23, 24], "ar": [0, 1, 3, 8, 9, 10, 11, 12, 15, 16, 17, 18, 22, 23, 25], "abl": 0, "If": [0, 1, 9, 18, 19], "need": [0, 1, 5, 8, 14, 19, 22], "an": [0, 1, 5, 8, 10, 14, 15, 16, 18, 22, 25], "can": [0, 1, 3, 5, 8, 9, 10, 12, 14, 15, 16, 18, 21, 22, 23, 25], "one": [0, 1, 9, 10, 12, 25], "biweekli": 0, "everi": [0, 3, 5, 23], "thursdai": 0, "both": [0, 1, 2, 5, 6, 14, 23], "04": 0, "00": [0, 9, 22], "utc": [0, 8, 21, 22], "16": [0, 19], "start": [0, 1, 3, 15], "septemb": 0, "30": 0, "2021": [0, 18], "depend": [0, 5, 15], "item": [0, 3], "agenda": 0, "someon": 0, "being": [0, 1, 5], "willing": 0, "host": 0, "see": [0, 1, 3, 8, 23], "add": [0, 2, 3, 7, 9, 10, 11, 15], "topic": 0, "what": [0, 1, 5, 9, 22], "plan": [0, 2, 3, 6, 7, 9, 10, 11, 14, 15, 19, 22], "goal": [0, 2, 3], "help": [0, 1, 3, 8, 9], "put": [0, 11], "face": 0, "name": [0, 1, 3, 4, 7, 8, 9, 10, 11, 19, 21, 22, 23], "some": [0, 3, 5, 9, 10, 11, 12], "work": [0, 1, 2, 8, 9, 11, 14], "synchron": 0, "initi": [0, 2], "differ": [0, 1, 5, 9, 25], "stakehold": 0, "identifi": [0, 1, 5, 7, 22], "area": 0, "more": [0, 1, 2, 3, 5, 8, 9, 12, 16, 19, 23, 25], "align": 0, "No": 0, "decis": 0, "made": [0, 11, 23], "anyth": 0, "substanc": 0, "issu": [0, 1, 3, 7], "googl": 0, "doc": [0, 1, 8, 10, 15], "send": 0, "summari": 0, "kind": 1, "ticket": 1, "report": [1, 5], "featur": [1, 15], "improv": [1, 2, 3], "review": [1, 3], "In": [1, 5, 11, 12, 23], "addit": [1, 2, 9, 10, 12], "submit": [1, 3], "healthi": 1, "tradit": 1, "commun": [1, 2, 3, 7], "member": 1, "each": [1, 5, 8, 9, 10], "do": [1, 5, 9, 15], "wai": [1, 8, 22], "well": [1, 5, 8, 10, 14, 15], "familiar": [1, 5], "relev": 1, "codebas": [1, 3, 5], "find": [1, 25], "curat": 1, "good": 1, "first": [1, 12, 15, 22], "list": [1, 3, 8, 9, 12, 16, 18, 24], "anyon": 1, "veri": [1, 14], "activ": 1, "fast": [1, 14], "move": 1, "try": [1, 3, 8, 9, 19], "quickli": 1, "keep": 1, "backlog": 1, "down": [1, 3, 9, 24], "pace": 1, "up": [1, 5, 10], "after": [1, 3, 9, 10, 15, 23], "approv": 1, "mani": [1, 12, 25], "commit": 1, "access": [1, 9, 21], "your": [1, 8, 11, 14, 15], "bandwidth": 1, "current": [1, 5, 8, 12, 15, 17, 21, 22], "most": 1, "limit": [1, 2, 3, 10, 11, 19, 20, 24], "resourc": [1, 2], "highli": [1, 9], "broader": [1, 3], "wait": 1, "consid": [1, 11], "Such": 1, "learn": 1, "becom": [1, 3], "expert": 1, "lack": 1, "coverag": 1, "address": 1, "futur": [1, 5], "faster": 1, "effici": [1, 2, 3, 5, 9, 14], "sinc": [1, 5, 9, 22], "worldwid": 1, "timezon": [1, 22], "who": 1, "comment": 1, "To": [1, 9, 11, 15, 21], "ensur": [1, 3, 5], "wish": 1, "ha": [1, 3, 9, 10, 11], "opportun": 1, "committ": 1, "least": [1, 15], "24": 1, "hour": [1, 9, 22], "pass": [1, 5, 9, 15], "between": [1, 2, 3, 9, 16], "A": [1, 2, 3, 5, 10, 11, 16, 23, 25], "mean": [1, 5, 10, 16], "substanti": 1, "chang": [1, 23], "appli": [1, 6, 9, 10], "best": [1, 3], "judgment": 1, "determin": [1, 9], "constitut": 1, "minor": 1, "might": 1, "without": [1, 3, 5, 8], "delai": 1, "again": 1, "exampl": [1, 3, 5, 7, 9, 10, 12, 15, 23, 25], "potenti": 1, "small": [1, 9], "bug": 1, "fix": 1, "non": [1, 9, 23], "controversi": 1, "build": [1, 3, 5, 8, 10, 14, 15, 16], "relat": [1, 6, 25], "clippi": 1, "version": [1, 5, 8], "upgrad": 1, "etc": [1, 3, 5], "smaller": [1, 3], "section": [1, 5, 15], "describ": [1, 3, 5, 8, 10, 23], "wget": 1, "http": [1, 2, 8], "az792536": 1, "vo": 1, "msecnd": 1, "net": 1, "vm": 1, "vmbuild_20190311": 1, "virtualbox": 1, "msedg": 1, "win10": 1, "zip": 1, "choco": 1, "y": [1, 12, 23, 25], "git": [1, 8], "rustup": [1, 15], "visualcpp": 1, "tool": [1, 8], "bash": 1, "ex": [1, 11], "cargo": [1, 15], "compil": [1, 2, 5, 15], "requir": [1, 9, 15], "protobuf": [1, 3], "On": 1, "platform": [1, 3, 13], "system": [1, 5, 9, 14, 16, 17], "packag": 1, "apt": [1, 3], "dnf": 1, "pacman": 1, "brew": [1, 3, 8], "want": [1, 14, 15, 23], "verifi": 1, "3": [1, 6, 8, 9, 18, 22], "12": [1, 8, 18, 22], "greater": [1, 12, 22], "which": [1, 3, 5, 9, 13, 14, 16, 23], "introduc": 1, "support": [1, 2, 3, 8, 9, 14, 16, 17, 20, 21, 22, 23, 25], "explicit": [1, 2], "field": [1, 3, 4, 7, 22, 23], "presenc": 1, "older": [1, 15], "mai": [1, 5, 8, 9], "fail": [1, 9], "libprotoc": 1, "4": [1, 6, 8, 18], "altern": 1, "binari": [1, 20], "download": [1, 8], "page": [1, 9], "built": [1, 3, 5, 8, 13], "written": [1, 9, 14], "standard": [1, 13, 16, 22], "toolkit": 1, "fmt": 1, "updat": [1, 15], "stabl": [1, 15], "latest": [1, 8, 15], "submodul": 1, "init": 1, "instruct": [1, 15], "ci": 1, "script": [1, 3], "rust_fmt": 1, "sh": 1, "rust_clippi": 1, "rust_toml_fmt": 1, "run": [1, 3, 5, 9, 13, 15], "them": [1, 10, 23], "onc": [1, 3], "rust_lint": 1, "sever": [1, 15], "level": [1, 3, 5, 9], "its": [1, 5, 8, 10, 12, 14, 22], "pyramid": 1, "tri": 1, "follow": [1, 5, 8, 9, 10, 11, 15, 17, 23, 25], "book": 1, "highlight": 1, "import": [1, 5, 10, 15], "modul": 1, "exist": [1, 5, 12, 18, 20], "individu": 1, "defin": [1, 5, 8, 15, 17, 23], "same": [1, 3, 5, 9, 10, 22], "file": [1, 2, 3, 5, 6, 9, 11, 15, 18], "convent": 1, "There": [1, 3, 8], "public": [1, 9, 21], "interfac": [1, 5, 14], "librari": [1, 7, 13], "directori": [1, 8, 15], "command": [1, 3, 7, 17, 19, 21], "p": [1, 8, 16], "sql_integr": 1, "One": [1, 9], "valid": [1, 22], "abil": 1, "larg": [1, 3], "assort": 1, "queri": [1, 2, 5, 6, 9, 10, 13, 14, 15, 16, 17, 18, 23, 24, 25], "against": [1, 8, 14, 15, 23], "driven": [1, 3], "benefit": 1, "includ": [1, 2, 5, 10, 23], "easier": 1, "write": [1, 2, 5, 10], "maintain": [1, 10], "process": [1, 3, 9, 13, 14], "migrat": [1, 3], "possibl": [1, 3, 8, 18], "contain": [1, 6, 8, 9], "har": 1, "certain": [1, 5], "compar": 1, "result": [1, 3, 5, 6, 8, 9, 10, 11, 15, 16, 18, 23, 25], "export": [1, 8], "postgres_db": 1, "postgres_us": 1, "postgres_host": 1, "localhost": 1, "postgres_port": 1, "5432": 1, "python": 1, "m": 1, "pip": 1, "setuptool": 1, "wheel": 1, "r": [1, 8, 10, 15, 17], "txt": 1, "pytest": 1, "v": [1, 8], "test_psql_par": 1, "py": 1, "creat": [1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 14, 17, 20, 24], "psql": 1, "d": [1, 5, 8], "h": [1, 8], "c": [1, 8, 11, 15, 23], "tabl": [1, 6, 8, 9, 11, 15, 17, 19, 20, 21, 23, 24, 25], "IF": [1, 18], "NOT": [1, 8, 18, 20], "c1": [1, 5, 6, 8, 10, 18], "charact": [1, 20], "vari": 1, "null": [1, 5, 8, 9, 12, 18, 22, 23], "c2": [1, 5, 6, 8, 10, 18], "integ": [1, 12, 17, 22, 23], "c3": [1, 8, 18], "smallint": [1, 8, 17, 18], "c4": [1, 8, 18], "c5": [1, 8, 18], "c6": [1, 8, 18], "bigint": [1, 8, 17, 18], "c7": [1, 8, 18], "c8": [1, 8, 18], "c9": [1, 8, 18], "c10": [1, 8, 18], "c11": [1, 8, 18], "doubl": [1, 6, 8, 11, 17, 18, 23], "precis": [1, 17], "c12": [1, 8, 18], "c13": [1, 8, 18], "copi": 1, "pwd": 1, "csv": [1, 10, 14, 18, 19, 24], "aggregate_test_100": [1, 8, 18], "WITH": [1, 8, 18, 20], "header": [1, 8, 9, 18], "true": [1, 8, 9, 15, 21], "statist": [1, 9, 20], "micro": 1, "framework": [1, 3, 14], "evalu": [1, 5, 9, 10], "perform": [1, 2, 3, 5, 10, 11, 12, 13, 14, 17, 22], "path": [1, 5, 8, 18, 19], "particular": [1, 5], "optimis": 1, "effort": 1, "prevent": 1, "regress": 1, "within": [1, 13], "given": [1, 5, 6, 25], "bench": 1, "benchmark_nam": 1, "full": [1, 24], "found": [1, 25], "here": [1, 10, 12, 22, 25], "advanc": [1, 3], "parquet_query_sql": 1, "These": [1, 3, 10, 12], "randomli": 1, "gener": [1, 2, 6, 20], "therefor": 1, "quick": 1, "variabl": [1, 8, 9], "parquet_fil": 1, "set": [1, 5, 7, 8, 10, 15, 16, 17, 21, 24, 25], "instead": [1, 8, 9], "multipl": [1, 2, 9], "custom": [1, 15, 17, 24], "dataset": 1, "automat": 1, "remov": [1, 18], "ani": [1, 5, 8, 9, 10, 14], "exit": [1, 8], "howev": [1, 3, 22], "interrupt": 1, "ctrl": 1, "analys": 1, "fact": 1, "preserv": [1, 10], "subsequ": 1, "valuabl": 1, "implement": [1, 2, 5, 12], "engin": [1, 3, 8, 14, 16], "below": [1, 8, 12, 15, 25], "checklist": 1, "actual": [1, 5, 10], "string": [1, 5, 6, 17, 20, 24], "math": [1, 20, 24], "datetim": [1, 9, 17], "physic": [1, 3, 9, 19], "expr": [1, 12, 19], "src": [1, 15], "variant": 1, "builtinscalarfunct": 1, "entri": [1, 5], "fromstr": 1, "call": [1, 6, 10], "return_typ": 1, "expect": [1, 5, 16], "return": [1, 5, 10, 12, 16, 22, 23], "type": [1, 5, 9, 10, 20, 22, 24], "incom": [1, 5], "signatur": 1, "number": [1, 3, 5, 9, 10, 16, 23], "argument": [1, 5, 6, 12, 22], "create_physical_expr": 1, "create_physical_fun": 1, "map": [1, 5, 17], "core": [1, 3, 8, 9], "through": 1, "known": [1, 5], "expr_fn": 1, "unary_scalar_expr": 1, "macro": 1, "accumul": 1, "aggregateexpr": 1, "aggregatefunct": 1, "create_aggregate_expr": 1, "repres": [1, 5, 10], "logicalplan": [1, 5, 15], "node": [1, 3, 5, 15], "render": 1, "graphviz": 1, "save": 1, "output": [1, 4, 7, 9, 10, 19, 22], "display_graphviz": 1, "somehow": 1, "let": [1, 10, 11, 15], "mut": 1, "tmp": [1, 19], "dot": 1, "Then": [1, 15], "For": [1, 3, 8, 9, 15], "pdf": 1, "tpdf": 1, "formal": 1, "semant": [1, 4, 7], "behavior": [1, 11], "refer": [1, 5, 7, 10, 12, 16, 23], "resolv": 1, "ambigu": 1, "dure": [1, 2, 3, 5, 9, 16], "propos": 1, "fit": 1, "invari": [1, 4, 7], "store": [1, 8, 9, 16, 18], "folder": 1, "prettier": 1, "either": [1, 10, 23], "npm": 1, "global": 1, "npx": 1, "standalon": 1, "recommend": 1, "ad": [1, 23], "2": [1, 6, 8, 11, 12, 15, 18, 19, 22, 23, 25], "0": [1, 8, 9, 10, 11, 12, 15, 16, 19, 22, 23, 25], "ve": 1, "confirm": 1, "w": [1, 16], "cli": [1, 8, 9], "publish": [2, 3, 8, 15], "give": [2, 23], "visibl": 2, "prioriti": 2, "bind": 2, "io": [2, 8, 15], "read": [2, 3, 5, 9], "regist": [2, 5, 11, 15, 18], "format": [2, 3, 5, 7, 8, 9, 10, 14], "datafram": [2, 3, 6, 7, 12, 14], "sql": [2, 6, 7, 9, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23], "option": [2, 9, 10, 21], "partit": [2, 5, 9, 10, 14, 18, 19, 24], "metadata": [2, 5, 9, 21], "schedul": 2, "predict": [2, 3], "observ": 2, "cpu": [2, 3, 9, 15], "bound": 2, "develop": [2, 3, 5, 7], "stori": 2, "parallel": [2, 3, 9, 13, 14, 24], "execut": [2, 5, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 21, 24], "memori": [2, 3, 9, 10, 13, 14, 15, 16, 18], "oper": [2, 5, 9, 13, 16, 17, 22, 24], "incorpor": 2, "row": [2, 8, 9, 10, 18, 23, 25], "aggreg": [2, 3, 5, 7, 9, 10, 11, 15, 19, 20, 24], "benchmark": [2, 3, 7], "explor": 2, "jit": 2, "complex": [2, 5], "express": [2, 6, 7, 9, 10, 16, 20, 23, 24, 25], "llvm": 2, "inlin": 2, "function": [2, 3, 7, 8, 9, 10, 20, 24], "primari": 2, "sort": [2, 3, 10, 24], "merg": [2, 7, 16], "document": [2, 3, 5, 6, 7, 10, 12, 15, 23], "websit": 2, "streamprovid": 2, "trait": 2, "product": [2, 14, 23], "readi": 2, "shuffl": 2, "cleanup": 2, "fill": 2, "gap": 2, "task": [2, 3, 8], "exchang": 2, "better": [2, 3], "error": [2, 3, 5, 9, 11, 15], "handl": [2, 3], "failur": 2, "executor": [2, 3], "lost": 2, "restart": 2, "monitor": 2, "log": 2, "auto": 2, "scale": [2, 3, 17], "multi": [2, 15], "deploy": 2, "resili": 2, "fault": 2, "toler": 2, "ultim": 2, "shard": 2, "scalabl": 2, "cach": [2, 3], "group": [2, 3, 9, 10, 11, 19, 20], "base": [2, 5, 6, 9, 10, 12, 15, 18, 21, 23, 25], "alloc": [2, 15], "miss": 2, "sessioncontext": [2, 10, 11, 15], "editor": 2, "expos": [2, 9], "context": [2, 3, 5, 25], "hdf": 2, "timestamp": [2, 3, 17, 22, 24], "rang": 2, "predic": [2, 9, 24], "pushdown": [2, 24], "thread": [2, 13, 14, 15], "awar": [2, 11, 23], "sdk": 2, "experiment": [2, 17], "high": [3, 14], "It": [3, 5, 8, 12, 13, 15, 16, 18, 22, 23], "meant": 3, "restrict": 3, "rather": 3, "newcom": 3, "understand": 3, "head": 3, "inspir": 3, "part": [3, 10, 14], "model": [3, 5, 13, 14, 22], "entir": [3, 9], "volunt": 3, "befor": [3, 9, 10], "pr": [3, 7], "strongli": 3, "suggest": 3, "convers": [3, 5, 22], "mail": 3, "avoid": [3, 9], "surpris": 3, "embed": [3, 9], "choic": 3, "analyt": 3, "applic": [3, 9, 15], "leverag": [3, 9, 14], "uniqu": [3, 16], "provid": [3, 8, 9, 13, 14, 15, 18], "class": 3, "singl": [3, 5, 8, 9, 10, 13, 16, 25], "declar": [3, 5], "compat": [3, 8, 9, 24], "postgresql": [3, 6, 16], "similar": [3, 10, 22], "those": 3, "offer": [3, 5], "panda": [3, 10], "spark": [3, 6, 10, 16], "procedur": 3, "programmat": 3, "race": 3, "free": 3, "ergonom": 3, "extens": [3, 14], "point": [3, 14, 15], "layer": 3, "decim": [3, 17], "122": 3, "complet": 3, "statu": [3, 20], "arithmet": 3, "194": 3, "parser": [3, 24], "533": 3, "nest": [3, 24], "structur": 3, "struct": [3, 10, 12], "119": 3, "tpch": 3, "mileston": 3, "detail": [3, 8, 19], "sophist": 3, "order": [3, 8, 9, 15, 20, 24], "tokomak": 3, "440": 3, "finer": 3, "filesystem": 3, "s3": 3, "local": [3, 8], "907": 3, "1060": 3, "async": [3, 11, 15], "reader": [3, 5, 9], "chunk": 3, "prefetch": 3, "capabl": [3, 14], "arrow2": 3, "h2oai": 3, "db": 3, "147": 3, "348": 3, "grain": 3, "control": [3, 9], "587": 3, "usag": [3, 7, 15, 16], "54": 3, "tbd": 3, "note": [3, 5, 8, 10, 12, 15, 17, 22], "thought": 3, "vision": 3, "1096": 3, "abstract": 3, "repl": 3, "pars": [3, 9], "separ": [3, 6], "correctli": 3, "connect": [3, 8, 14], "subsystem": 3, "print": [3, 8, 9, 10, 11, 15], "out": [3, 8, 9, 10], "stat": 3, "debug": 3, "interact": [3, 8], "shell": 3, "nuget": 3, "registri": [3, 10], "easili": 3, "adopt": 3, "shorter": 3, "like": [3, 8, 9], "dfcli": 3, "distribut": [3, 13], "comput": [3, 5, 13, 16], "break": 3, "stage": 3, "across": 3, "avail": [3, 8, 10, 12, 21], "cluster": 3, "remain": 3, "suitabl": [3, 16], "serial": 3, "thei": [3, 5, 9], "agnost": 3, "than": [3, 8, 9, 12, 22], "advantag": 3, "over": [3, 9, 10], "becaus": [3, 5], "doesn": [3, 18], "t": [3, 11, 18, 21, 22, 23], "eagerli": 3, "direction": 3, "acycl": 3, "graph": 3, "dag": 3, "configur": [3, 7, 21], "concurr": [3, 9], "should": [3, 5, 6, 9, 23], "push": [3, 9, 24], "logic": [3, 9, 10, 12, 14, 19], "accur": [3, 16], "could": [3, 9], "hash": [3, 19], "desir": 3, "load": [3, 9], "side": [3, 9, 23], "cannot": [3, 9], "until": 3, "enumer": 5, "plane": 5, "enforc": 5, "assum": 5, "recordbatch": [5, 10], "arrai": [5, 12, 17, 24], "top": [5, 9], "dynam": 5, "object": 5, "as_ani": 5, "downcast": 5, "itself": [5, 14], "static": [5, 15], "int32arrai": 5, "data_typ": [5, 21], "respect": 5, "alwai": 5, "time": [5, 9, 12, 15, 16, 20, 22, 24], "onli": [5, 9, 10, 12, 22, 23, 25], "runtim": 5, "program": 5, "principl": 5, "user": [5, 6, 14, 15, 18], "know": 5, "datatyp": [5, 17], "nullabl": [5, 9, 10], "flag": 5, "bool": 5, "whether": 5, "valu": [5, 6, 8, 9, 16, 18, 22, 23, 24, 25], "pf": 5, "lf": 5, "root": [5, 8, 12, 22], "vector": [5, 10], "record": [5, 6, 9], "batch": [5, 6, 8, 9], "how": [5, 6, 7], "deriv": 5, "input": [5, 9, 10, 16], "logical_field": 5, "lf1": 5, "lf2": 5, "plu": [5, 6], "b": [5, 6, 8, 10, 11, 12, 19, 23], "none": [5, 17, 19, 22], "get_supertyp": 5, "length": [5, 9, 12, 24], "u32": [5, 22], "tree": [5, 15], "compos": 5, "AS": [5, 8, 18, 22, 23], "sum12": 5, "scan": [5, 9, 18, 23], "u64": 5, "frozen": 5, "while": [5, 9], "child": 5, "physical_field": 5, "pf1": 5, "pf2": 5, "whose": 5, "element": 5, "sum": [5, 12, 16, 19, 25], "two": [5, 9, 10], "togeth": [5, 12], "smallest": 5, "str": 5, "count": [5, 12, 16, 23], "byte": [5, 9], "doe": [5, 9, 22], "sole": 5, "properti": 5, "index": [5, 9, 19], "associ": 5, "necessari": [5, 8, 18], "physicalplan": 5, "accept": [5, 12], "manner": 5, "hardwar": 5, "environ": [5, 7, 8, 9], "extra": 5, "paramet": [5, 22], "logical_plan": [5, 19], "param": 5, "subsect": 5, "min": [5, 9, 10, 11, 12, 16], "tableprovid": [5, 9, 15], "likewis": 5, "specif": [5, 6, 7, 14, 21], "my_op": 5, "thu": [5, 17], "word": 5, "understood": 5, "must": [5, 6, 9, 10, 11, 22], "guarante": 5, "select": [5, 6, 9, 10, 11, 12, 15, 18, 19, 20, 21, 22, 25], "t1": [5, 6], "id": [5, 6, 10], "t2": [5, 6], "unambigu": 5, "violat": 5, "content": 5, "correspond": [5, 8], "particularli": 5, "intermediari": 5, "calcul": [5, 10], "i32": 5, "i64": 5, "computation": 5, "expens": 5, "panic": 5, "satisfi": 5, "match": [5, 10, 23, 25], "claim": 5, "int32": [5, 17], "safe": 5, "parquet": [5, 9, 10, 14, 18, 24], "branch": 5, "combin": 5, "equival": [5, 12, 22], "physical_schema": 5, "qualifi": [5, 6], "strip": 5, "strip_rel": 5, "f": [5, 6, 8], "reli": 5, "simpl": [5, 11], "corollari": 5, "triplet": 5, "rule": [5, 9], "physical_plan": [5, 19], "consum": [5, 9], "downscal": 5, "jeopard": 5, "referenc": [5, 23], "assumpt": 5, "about": [5, 21], "bare": 6, "df": [6, 10, 11, 15], "select_column": [6, 10], "compound": 6, "foo": [6, 12], "bar": 6, "convert": [6, 8, 22], "lowercas": 6, "avg": [6, 12, 16], "wrap": 6, "quot": [6, 8, 11, 23], "parenthes": 6, "operand": 6, "space": [6, 22], "1": [6, 8, 11, 12, 15, 16, 18, 19, 21, 22, 23, 25], "comma": 6, "vec": [6, 10, 11], "udf": [6, 10, 15, 24], "col": [6, 10, 11, 12], "schema": [6, 8, 9, 10, 17, 18, 20, 23, 24], "test": [6, 7, 8, 10, 11, 14, 15, 18], "sampl": 6, "int": [6, 8, 17, 18], "varchar": [6, 8, 17, 18], "5": [6, 12, 16, 18], "insert": [6, 9], "INTO": 6, "hello": [6, 12, 18], "world": 6, "ON": [6, 23], "mysql": 6, "8": [6, 8, 9, 22], "13": 6, "sqlite": 6, "ab": [6, 10, 12], "foo_bar": 6, "7": [6, 12], "introduct": 7, "capit": [7, 23], "util": 7, "frequent": 7, "question": 7, "crate": [7, 15], "pull": 7, "request": 7, "window": [7, 9, 24], "setup": 7, "protoc": 7, "instal": [7, 15], "bootstrap": 7, "organ": 7, "scalar": [7, 20], "displai": [7, 9, 12, 22], "graphic": 7, "md": 7, "roadmap": 7, "ballista": 7, "quarterli": 7, "2022": [7, 18], "q2": 7, "tracker": 7, "conveni": [8, 14], "own": [8, 15], "echo": 8, "compress": 8, "avro": [8, 24], "json": [8, 10, 24], "v17": 8, "took": 8, "007": 8, "second": [8, 22], "l": 8, "data_dir": 8, "data2": 8, "v16": 8, "easiest": 8, "spin": 8, "via": [8, 9], "pre": [8, 16], "ghcr": 8, "v2": 8, "manifest": 8, "100": [8, 10, 11, 16], "blob": [8, 17], "sha256": [8, 12], "9ecc8a01be47ceb9a53b39976696afa87c0a8": 8, "pkg": 8, "githubusercont": 8, "com": 8, "ghcr1": 8, "9ecc8a01be47ceb9a53b39976": 8, "pour": 8, "big_sur": 8, "bottl": 8, "tar": 8, "gz": 8, "usr": 8, "cellar": 8, "9": [8, 22], "17": 8, "4mb": 8, "imag": 8, "clone": 8, "dockerignor": 8, "delet": 8, "checkout": 8, "cd": [8, 15], "dockerfil": 8, "your_data_loc": 8, "client": 8, "size": [8, 9], "batch_siz": [8, 9, 21], "default": [8, 9, 12, 16, 22, 23], "tsv": 8, "nd": 8, "data_path": 8, "q": 8, "quiet": 8, "reduc": [8, 9], "quietli": 8, "rc": 8, "startup": 8, "datafusionrc": 8, "enclos": 8, "shown": 8, "back": 8, "explicitli": 8, "extern": [8, 17, 20], "statement": [8, 9, 10, 18, 19, 22], "taxi": [8, 18], "locat": [8, 9, 18], "mnt": [8, 18], "nyctaxi": [8, 18], "tripdata": [8, 18], "float": [8, 17, 18], "aws_default_region": 8, "aws_access_key_id": 8, "aws_secret_access_kei": 8, "access_key_id": 8, "secret_access_kei": 8, "region": 8, "aws_endpoint": 8, "endpoint": 8, "aws_session_token": 8, "token": 8, "aws_container_credentials_relative_uri": 8, "aw": 8, "amazon": 8, "amazonec": 8, "developerguid": 8, "iam": 8, "role": 8, "html": 8, "aws_allow_http": 8, "permit": 8, "tl": 8, "cp": 8, "my": 8, "bucket": 8, "upload": 8, "east": 8, "v14": 8, "374": 8, "column_1": [8, 23, 25], "column_2": [8, 23, 25], "171": 8, "insid": 8, "quit": 8, "listtabl": 8, "describet": 8, "table_nam": [8, 18, 21], "quietmod": 8, "fals": [8, 9, 19, 21], "search": 8, "show": [8, 10, 11, 15, 19, 21, 23, 24], "8192": [8, 9, 21], "coalesce_batch": [8, 9, 21], "coalesce_target_batch_s": [8, 21], "4096": [8, 19, 21], "time_zon": [8, 9, 21], "explain": [8, 9, 10, 20, 21], "logical_plan_onli": [8, 9, 21], "physical_plan_onli": [8, 9, 21], "optim": [8, 9, 10, 14, 21, 24], "filter_null_join_kei": [8, 9, 21], "skip_failed_rul": [8, 9, 21], "1024": 8, "seen": 8, "abov": 8, "look": 8, "upper": [8, 12], "_": 8, "would": [8, 9], "datafusion_execution_batch_s": [8, 9], "v12": 8, "002": 8, "v13": 8, "011": 8, "000": 8, "005": 8, "sessionconfig": 9, "variou": 9, "aspect": 9, "construct": 9, "session": [9, 21], "from_env": 9, "kei": [9, 10], "transform": 9, "uppercas": 9, "period": 9, "replac": [9, 12, 18, 24], "underscor": 9, "accord": [9, 17], "cast": [9, 17, 22, 24], "utf8": [9, 17], "warn": 9, "emit": 9, "initialis": 9, "beforehand": 9, "affect": 9, "descript": 9, "catalog": [9, 18, 21], "create_default_catalog_and_schema": 9, "increas": [9, 15], "default_catalog": 9, "impact": 9, "specifi": [9, 10, 18, 19, 22], "default_schema": 9, "information_schema": [9, 21, 24], "virtual": [9, 18], "has_head": [9, 19], "especi": 9, "buffer": 9, "tini": 9, "too": 9, "much": 9, "consumpt": 9, "examin": 9, "coalesc": [9, 12, 24], "larger": 9, "filter": [9, 10, 11, 12, 24, 25], "produc": [9, 23, 25], "target": [9, 15], "collect_statist": 9, "collect": [9, 10], "target_partit": 9, "zone": 9, "extract": [9, 24], "some_tim": 9, "shift": [9, 12], "underli": 9, "enable_page_index": 9, "decod": 9, "prune": [9, 18], "attempt": 9, "skip": 9, "max": [9, 12, 16, 23], "skip_metadata": 9, "conflict": 9, "metadata_size_hint": 9, "fetch": 9, "last": 9, "size_hint": 9, "optimist": 9, "footer": 9, "anoth": [9, 10], "encod": 9, "pushdown_filt": 9, "reorder_filt": 9, "reorder": [9, 24], "heurist": 9, "minim": [9, 10], "cost": [9, 16], "enable_round_robin_repartit": 9, "round": [9, 12], "robin": 9, "repartit": [9, 10], "column": [9, 10, 11, 12, 16, 19, 21, 22, 23, 24], "overhead": 9, "fulli": 9, "repartition_aggreg": 9, "repartition_join": 9, "repartition_window": 9, "proce": 9, "next": 9, "caus": 9, "max_pass": 9, "top_down_join_key_reord": 9, "prefer_hash_join": 9, "prefer": 9, "hashjoin": 9, "sortmergejoin": 9, "hash_join_single_partition_threshold": 9, "1048576": 9, "maximum": [9, 23], "estim": 9, "typic": 10, "read_csv": [10, 11], "modifi": 10, "definit": 10, "prelud": [10, 11, 15], "ctx": [10, 11, 15], "csvreadopt": [10, 11, 15], "await": [10, 11, 15], "lt_eq": [10, 11, 12], "lazi": 10, "just": 10, "approach": 10, "allow": [10, 12, 14, 23], "overal": 10, "invok": 10, "distinct": [10, 16, 23], "duplic": 10, "except": [10, 22, 24], "exactli": 10, "intersect": [10, 24], "scheme": 10, "turn": 10, "arbitrari": 10, "union": [10, 20, 24], "union_distinct": 10, "with_column": 10, "with_column_renam": 10, "renam": 10, "stdout": 10, "disk": 10, "collect_partit": 10, "execute_stream": 10, "stream": [10, 16], "execute_stream_partit": 10, "per": 10, "show_limit": 10, "subset": [10, 18], "write_csv": 10, "write_json": 10, "write_parquet": 10, "explan": 10, "far": 10, "functionregistri": 10, "term": 10, "attribut": 10, "to_logical_plan": 10, "to_unoptimized_plan": 10, "unoptim": 10, "11": [11, 15, 18], "tokio": [11, 15], "main": 11, "fn": [11, 15], "register_csv": [11, 15], "BY": [11, 18, 19, 20, 24], "ok": [11, 15], "effect": [11, 23], "lower": [11, 12, 23], "letter": 11, "won": 11, "illustr": 11, "capitalized_exampl": 11, "method": 12, "chain": 12, "fluent": 12, "style": [12, 18], "AND": 12, "gt": 12, "lit": 12, "lt": 12, "123": [12, 22], "expr1": 12, "expr2": 12, "eq": 12, "gt_eq": 12, "not_eq": 12, "x": [12, 16, 19, 23, 25], "absolut": [12, 22], "aco": 12, "invers": [12, 22], "cosin": [12, 22], "asin": 12, "sine": [12, 22], "atan": 12, "tangent": [12, 22], "atan2": 12, "ceil": 12, "nearest": [12, 22], "equal": [12, 16, 22], "co": 12, "exp": 12, "exponenti": [12, 22], "floor": 12, "less": [12, 22], "ln": 12, "natur": [12, 22], "logarithm": [12, 22], "log10": 12, "10": [12, 22, 23], "log2": 12, "power": 12, "expon": 12, "rais": [12, 22], "signum": 12, "sign": [12, 22], "sin": 12, "sqrt": 12, "squar": [12, 22], "tan": 12, "trunc": 12, "truncat": [12, 22], "toward": [12, 22], "zero": [12, 22, 25], "OR": [12, 18], "xor": 12, "left": [12, 24], "right": [12, 24], "often": [12, 22], "substitut": [12, 22], "retriev": [12, 22], "otherwis": [12, 22], "end": [12, 15], "nullif": [12, 24], "value1": [12, 22], "value2": [12, 22], "ascii": [12, 24], "bit_length": [12, 24], "btrim": [12, 24], "char_length": [12, 24], "character_length": [12, 24], "concat": [12, 24], "concat_w": [12, 24], "chr": [12, 24], "initcap": [12, 24], "lpad": [12, 24], "ltrim": [12, 24], "md5": 12, "octet_length": [12, 24], "repeat": [12, 24], "revers": [12, 24], "rpad": [12, 24], "rtrim": [12, 24], "digest": 12, "split_part": [12, 24], "starts_with": [12, 24], "strpo": [12, 24], "substr": [12, 24], "translat": [12, 24], "trim": [12, 24], "regexp_match": 12, "regexp_replac": [12, 24], "date_part": [12, 24], "date_trunc": 12, "from_unixtim": 12, "to_timestamp": [12, 24], "to_timestamp_milli": [12, 24], "to_timestamp_micro": [12, 24], "to_timestamp_second": [12, 24], "now": 12, "in_list": 12, "random": 12, "sha224": 12, "sha384": 12, "sha512": 12, "to_hex": [12, 24], "approx_distinct": [12, 24], "approx_median": [12, 24], "approx_percentile_cont": [12, 24], "approx_percentile_cont_with_weight": [12, 24], "count_distinct": 12, "cube": 12, "grouping_set": 12, "median": [12, 16], "rollup": 12, "in_subqueri": 12, "df1": 12, "df2": 12, "IN": [12, 20], "not_exist": 12, "not_in_subqueri": 12, "scalar_subqueri": 12, "create_udf": 12, "create_udaf": 12, "represent": 13, "columnar": 13, "kernel": 13, "common": [13, 24], "modern": 14, "pipelin": 14, "etl": 14, "databas": 14, "achiev": 14, "easi": 14, "Being": 14, "ecosystem": 14, "flight": 14, "rest": 14, "big": 14, "emb": 14, "almost": 14, "tailor": 14, "usecas": 14, "qualiti": 14, "hello_datafus": 15, "toml": 15, "path_to_your_csv_fil": 15, "udaf": [15, 24], "rewrit": 15, "executionplan": 15, "step": 15, "worth": 15, "profil": 15, "significantli": 15, "simd": 15, "rt": 15, "snmalloc": 15, "lto": 15, "codegen": 15, "unit": 15, "global_alloc": 15, "snmalloc_r": 15, "final": 15, "nightli": 15, "toolchain": 15, "architectur": 15, "ideal": 15, "nativ": 15, "avx2": 15, "rustflag": 15, "array_agg": 16, "var": 16, "var_samp": 16, "var_pop": 16, "stddev": 16, "stddev_samp": 16, "stddev_pop": 16, "covar": 16, "covar_samp": 16, "covar_pop": 16, "corr": 16, "uint64": [16, 17], "hyperloglog": 16, "alia": 16, "percentil": 16, "tdigest": 16, "float64": [16, 17], "inclus": 16, "raw": 16, "sketch": 16, "n": 16, "centroid": 16, "fewer": 16, "exact": 16, "higher": 16, "weight": 16, "stand": 16, "low": 16, "latenc": 16, "olap": 16, "flink": 16, "sqlparser": 17, "char": 17, "text": 17, "tinyint": 17, "int8": 17, "int16": 17, "int64": [17, 21, 22], "unsign": 17, "uint8": 17, "uint16": 17, "uint32": 17, "float32": 17, "real": 17, "decimal128": 17, "3523": 17, "date32": [17, 22], "time64": 17, "timeunit": 17, "nanosecond": [17, 22], "bytea": 17, "uuid": 17, "Not": 17, "yet": 17, "clob": 17, "varbinari": 17, "regclass": 17, "nvarchar": 17, "enum": 17, "interv": 17, "infer": [18, 23], "aggregate_simpl": 18, "manual": 18, "alreadi": 18, "hive": 18, "year": [18, 22], "month": [18, 22], "01": [18, 22], "valuet": 18, "memtabl": 18, "silent": 18, "ignor": 18, "nonexistent_t": 18, "view_nam": 18, "column1": 18, "6": 18, "column2": 18, "users_v": 18, "customer_a": 18, "verbos": 19, "plan_typ": 19, "groupbi": 19, "aggr": 19, "tablescan": 19, "projectionexec": 19, "aggregateexec": 19, "mode": 19, "finalpartit": 19, "gby": 19, "coalescebatchesexec": 19, "target_batch_s": 19, "repartitionexec": 19, "partial": 19, "roundrobinbatch": 19, "csvexec": 19, "metric": 19, "coalescepartitionsexec": 19, "hashaggregateexec": 19, "outputrow": 19, "sendtim": 19, "839560": 19, "fetchtim": 19, "122528525": 19, "repartitiontim": 19, "5327877": 19, "5660489": 19, "8012": 19, "numer": 20, "date": [20, 22, 24], "boolean": 20, "unsupport": 20, "syntax": [20, 25], "claus": [20, 24], "subqueri": [20, 24], "ddl": 20, "drop": 20, "view": [20, 21, 24], "analyz": 20, "approxim": [20, 24], "condit": [20, 23, 24], "regular": 20, "tempor": 20, "iso": 21, "table_catalog": 21, "table_schema": 21, "table_typ": 21, "column_nam": 21, "is_nul": 21, "NO": 21, "df_set": 21, "1997": 22, "31t09": 22, "26": 22, "56": 22, "123z": 22, "rcf3339": 22, "05": 22, "31": 22, "09": 22, "close": 22, "er": 22, "et": 22, "offset": 22, "fraction": 22, "epoch": 22, "present": 22, "consist": 22, "to_timestamp_xx": 22, "millisecond": 22, "resolut": 22, "microsecond": 22, "secondsecond": 22, "subfield": 22, "date64": 22, "2020": 22, "08t12": 22, "week": 22, "37": 22, "dai": 22, "minut": 22, "03": 22, "postgr": [22, 24], "wherev": 22, "appear": 22, "chosen": 22, "accordingli": 23, "clarif": 23, "with_queri": 23, "select_expr": 23, "from_item": 23, "join_item": 23, "grouping_el": 23, "asc": 23, "desc": 23, "quantifi": 23, "By": 23, "person": 23, "ag": 23, "employe": 23, "keyword": 23, "even": 23, "cartesian": 23, "table1": 23, "table2": 23, "ascend": 23, "descend": 23, "neg": 23, "planner": 24, "constant": 24, "fold": 24, "coercion": 24, "post": 24, "try_cast": 24, "basic": 24, "inner": 24, "cross": 24, "empti": 24, "frame": 24, "primit": 24, "correl": 25}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"commun": 0, "question": [0, 13], "mail": 0, "list": 0, "slack": 0, "discord": 0, "sync": 0, "up": 0, "video": 0, "call": 0, "introduct": [1, 14], "pull": 1, "request": 1, "merg": 1, "pr": 1, "develop": 1, "": 1, "guid": [1, 7], "window": 1, "setup": 1, "protoc": 1, "instal": [1, 8], "bootstrap": 1, "environ": 1, "test": 1, "organ": 1, "unit": 1, "rust": [1, 15], "integr": 1, "sqllogictest": 1, "sql": [1, 3, 8, 11, 20, 24], "postgr": 1, "depend": 1, "invok": 1, "runner": 1, "benchmark": 1, "criterion": 1, "parquet": [1, 8], "upstream": 1, "suit": 1, "how": 1, "add": 1, "new": [1, 15], "scalar": [1, 22, 25], "function": [1, 5, 6, 12, 15, 16, 22], "aggreg": [1, 12, 16], "displai": 1, "plan": [1, 5], "graphic": 1, "specif": [1, 4], "format": 1, "md": 1, "document": 1, "quarterli": 2, "roadmap": [2, 3], "2022": 2, "q2": 2, "datafus": [2, 3, 7, 8, 13, 14, 15], "core": 2, "ballista": [2, 3, 13], "extens": [2, 15], "contrib": 2, "python": [2, 3], "s3": [2, 8], "tui": 2, "bigtabl": 2, "stream": 2, "addit": 3, "languag": 3, "featur": 3, "queri": [3, 8, 11], "optim": [3, 5, 15], "datasourc": 3, "runtim": 3, "infrastructur": 3, "resourc": 3, "manag": 3, "interfac": 3, "cli": 3, "move": 3, "schedul": 3, "implement": 3, "execut": 3, "time": [3, 17], "cost": 3, "base": [3, 22], "statist": [3, 16], "invari": 5, "ration": 5, "notat": 5, "logic": 5, "column": [5, 6], "physic": 5, "data": [5, 8, 11, 17, 24], "sourc": [5, 8, 24], "registri": 5, "planner": 5, "builder": 5, "relat": 5, "name": [5, 6], "tupl": 5, "field": [5, 6], "ar": 5, "uniqu": 5, "respons": 5, "valid": 5, "schema": [5, 21], "i": [5, 13], "consist": 5, "The": 5, "under": 5, "output": [5, 6, 11], "equal": 5, "semant": 6, "rule": 6, "appendic": 6, "exampl": [6, 8, 11], "comparison": [6, 12], "other": [6, 10, 12, 22], "system": 6, "project": [6, 15], "transform": [6, 10], "oper": [6, 12], "liter": [6, 12], "apach": [7, 13], "arrow": [7, 13], "tabl": [7, 18], "content": 7, "user": [7, 12], "contributor": 7, "command": 8, "line": 8, "util": 8, "run": [8, 11], "us": [8, 11, 14, 15], "cargo": [8, 11], "homebrew": 8, "maco": 8, "docker": 8, "usag": [8, 11], "select": [8, 23], "file": 8, "directli": 8, "regist": 8, "csv": [8, 11], "chang": 8, "configur": [8, 9, 15], "option": 8, "set": 9, "datafram": [10, 11], "api": [10, 11], "action": 10, "method": 10, "updat": 11, "toml": 11, "against": 11, "store": 11, "process": 11, "from": [11, 23], "both": 11, "identifi": [11, 12], "capit": 11, "express": [12, 22], "valu": 12, "boolean": [12, 17], "math": [12, 22], "bitwis": 12, "condit": [12, 22], "string": [12, 22], "regular": [12, 22], "tempor": [12, 22], "subqueri": [12, 25], "defin": 12, "frequent": 13, "ask": 13, "what": 13, "relationship": 13, "between": 13, "case": 14, "why": 14, "librari": 15, "creat": [15, 18], "default": 15, "main": 15, "version": 15, "compat": 15, "gener": [16, 24], "approxim": 16, "approx_distinct": 16, "approx_median": 16, "approx_percentile_cont": 16, "approx_percentile_cont_with_weight": 16, "type": 17, "charact": 17, "numer": 17, "date": 17, "binari": 17, "unsupport": 17, "ddl": 18, "extern": 18, "drop": 18, "view": 18, "explain": 19, "analyz": 19, "refer": 20, "inform": 21, "ab": 22, "x": 22, "aco": 22, "asin": 22, "atan": 22, "atan2": 22, "y": 22, "ceil": 22, "co": 22, "exp": 22, "floor": 22, "ln": 22, "log10": 22, "log2": 22, "power": 22, "expon": 22, "round": 22, "signum": 22, "sin": 22, "sqrt": 22, "tan": 22, "trunc": 22, "coalesc": 22, "nullif": 22, "ascii": 22, "bit_length": 22, "btrim": 22, "char_length": 22, "character_length": 22, "concat": 22, "concat_w": 22, "chr": 22, "initcap": 22, "left": [22, 23], "length": 22, "lower": 22, "lpad": 22, "ltrim": 22, "md5": 22, "octet_length": 22, "repeat": 22, "replac": 22, "revers": 22, "right": [22, 23], "rpad": 22, "rtrim": 22, "digest": 22, "split_part": 22, "starts_with": 22, "strpo": 22, "substr": 22, "translat": 22, "trim": 22, "upper": 22, "regexp_match": 22, "regexp_replac": 22, "to_timestamp": 22, "to_timestamp_milli": 22, "to_timestamp_micro": 22, "to_timestamp_second": 22, "extract": 22, "date_part": 22, "date_trunc": 22, "date_bin": 22, "from_unixtim": 22, "now": 22, "arrai": 22, "in_list": 22, "random": 22, "sha224": 22, "sha256": 22, "sha384": 22, "sha512": 22, "struct": 22, "to_hex": 22, "syntax": 23, "WITH": 23, "claus": 23, "where": 23, "join": 23, "inner": 23, "outer": 23, "full": 23, "cross": 23, "group": 23, "BY": 23, "have": 23, "union": 23, "order": 23, "limit": 23, "statu": 24, "support": 24, "exist": 25, "NOT": 25, "IN": 25}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file diff --git a/datafusion/user-guide/cli.html b/datafusion/user-guide/cli.html index 87c27135e671..d30c06e18c3b 100644 --- a/datafusion/user-guide/cli.html +++ b/datafusion/user-guide/cli.html @@ -294,6 +294,11 @@ Usage +
  • + + Selecting files directly + +
  • Registering Parquet Data Sources @@ -367,26 +372,43 @@ -->

    DataFusion Command-line SQL Utility

    -

    The DataFusion CLI is a command-line interactive SQL utility that allows -queries to be executed against any supported data files. It is a convenient way to +

    The DataFusion CLI is a command-line interactive SQL utility for executing +queries against any supported data files. It is a convenient way to try DataFusion out with your own data sources, and test out its SQL support.

    Example

    Create a CSV file to query.

    -
    $ echo "1,2" > data.csv
    +
    $ echo "a,b" > data.csv
    +$ echo "1,2" >> data.csv
     
    -
    $ datafusion-cli
    -DataFusion CLI v12.0.0
    -❯ CREATE EXTERNAL TABLE foo STORED AS CSV LOCATION 'data.csv';
    -0 rows in set. Query took 0.017 seconds.
    -❯ select * from foo;
    -+----------+----------+
    -| column_1 | column_2 |
    -+----------+----------+
    -| 1        | 2        |
    -+----------+----------+
    -1 row in set. Query took 0.012 seconds.
    +

    Query that single file (the CLI also supports parquet, compressed csv, avro, json and more)

    +
    $ datafusion-cli
    +DataFusion CLI v17.0.0
    +❯ select * from 'data.csv';
    ++---+---+
    +| a | b |
    ++---+---+
    +| 1 | 2 |
    ++---+---+
    +1 row in set. Query took 0.007 seconds.
    +
    +
    +

    You can also query directories of files with compatible schemas:

    +
    $ ls data_dir/
    +data.csv   data2.csv
    +
    +
    +
    $ datafusion-cli
    +DataFusion CLI v16.0.0
    +❯ select * from 'data_dir';
    ++---+---+
    +| a | b |
    ++---+---+
    +| 3 | 4 |
    +| 1 | 2 |
    ++---+---+
    +2 rows in set. Query took 0.007 seconds.
     
    @@ -430,6 +452,7 @@

    Run using Docker

    Usage

    +

    See the current usage using datafusion-cli --help:

    +
    +

    Selecting files directly

    +

    Files can be queried directly by enclosing the file or +directory name in single ' quotes as shown in the example.

    +

    It is also possible to create a table backed by files by explicitly +via CREATE EXTERNAL TABLE as shown below.

    +

    Registering Parquet Data Sources

    Parquet data sources can be registered by executing a CREATE EXTERNAL TABLE SQL statement. It is not necessary to provide schema information for Parquet files.

    diff --git a/datafusion/user-guide/configs.html b/datafusion/user-guide/configs.html index 3ae271b6fb0d..b340d1022082 100644 --- a/datafusion/user-guide/configs.html +++ b/datafusion/user-guide/configs.html @@ -321,116 +321,130 @@

    Configuration Settings

    key

    -

    type

    default

    description

    +

    datafusion.catalog.create_default_catalog_and_schema

    +

    true

    +

    Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of cpu cores on the system.

    + +

    datafusion.catalog.default_catalog

    +

    datafusion

    +

    The default catalog name - this impacts what SQL queries use if not specified

    + +

    datafusion.catalog.default_schema

    +

    public

    +

    The default schema name - this impacts what SQL queries use if not specified

    + +

    datafusion.catalog.information_schema

    +

    false

    +

    Should DataFusion provide access to information_schema virtual tables for displaying schema information

    +

    datafusion.catalog.location

    -

    Utf8

    NULL

    -

    Location scanned to load tables for default schema, defaults to None

    +

    Location scanned to load tables for default schema

    -

    datafusion.catalog.type

    -

    Utf8

    +

    datafusion.catalog.format

    NULL

    -

    Type of TableProvider to use when loading default schema. Defaults to None

    +

    Type of TableProvider to use when loading default schema

    -

    datafusion.execution.batch_size

    -

    UInt64

    +

    datafusion.catalog.has_header

    +

    false

    +

    If the file has a header

    + +

    datafusion.execution.batch_size

    8192

    -

    Default batch size while creating new batches, it’s especially useful for buffer-in-memory batches since creating tiny batches would results in too much metadata memory consumption.

    +

    Default batch size while creating new batches, it’s especially useful for buffer-in-memory batches since creating tiny batches would results in too much metadata memory consumption

    -

    datafusion.execution.coalesce_batches

    -

    Boolean

    +

    datafusion.execution.coalesce_batches

    true

    -

    When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting ‘datafusion.execution.coalesce_target_batch_size’.

    - -

    datafusion.execution.coalesce_target_batch_size

    -

    UInt64

    -

    4096

    -

    Target batch size when coalescing batches. Uses in conjunction with the configuration setting ‘datafusion.execution.coalesce_batches’.

    +

    When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting

    -

    datafusion.execution.parquet.enable_page_index

    -

    Boolean

    +

    datafusion.execution.collect_statistics

    false

    -

    If true, uses parquet data page level metadata (Page Index) statistics to reduce the number of rows decoded.

    +

    Should DataFusion collect statistics after listing files

    -

    datafusion.execution.parquet.metadata_size_hint

    -

    UInt64

    -

    NULL

    -

    If specified, the parquet reader will try and fetch the last size_hint bytes of the parquet file optimistically. If not specified, two read are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer.

    +

    datafusion.execution.target_partitions

    +

    0

    +

    Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of cpu cores on the system

    -

    datafusion.execution.parquet.pruning

    -

    Boolean

    -

    true

    -

    If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file.

    +

    datafusion.execution.time_zone

    +

    +00:00

    +

    The default time zone Some functions, e.g. EXTRACT(HOUR from SOME_TIME), shift the underlying datetime according to this time zone, and then extract the hour

    -

    datafusion.execution.parquet.pushdown_filters

    -

    Boolean

    +

    datafusion.execution.parquet.enable_page_index

    false

    -

    If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded.

    +

    If true, uses parquet data page level metadata (Page Index) statistics to reduce the number of rows decoded.

    -

    datafusion.execution.parquet.reorder_filters

    -

    Boolean

    -

    false

    -

    If true, filter expressions evaluated during the parquet decoding opearation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query.

    +

    datafusion.execution.parquet.pruning

    +

    true

    +

    If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file

    datafusion.execution.parquet.skip_metadata

    -

    Boolean

    true

    -

    If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata.

    - -

    datafusion.execution.time_zone

    -

    Utf8

    -

    +00:00

    -

    The session time zone which some function require e.g. EXTRACT(HOUR from SOME_TIME) shift the underline datetime according to the time zone,

    +

    If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata

    -

    then extract the hour.

    -

    -

    -

    +

    datafusion.execution.parquet.metadata_size_hint

    +

    NULL

    +

    If specified, the parquet reader will try and fetch the last size_hint bytes of the parquet file optimistically. If not specified, two read are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer

    -

    datafusion.explain.logical_plan_only

    -

    Boolean

    +

    datafusion.execution.parquet.pushdown_filters

    false

    -

    When set to true, the explain statement will only print logical plans.

    +

    If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded

    -

    datafusion.explain.physical_plan_only

    -

    Boolean

    +

    datafusion.execution.parquet.reorder_filters

    false

    -

    When set to true, the explain statement will only print physical plans.

    +

    If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query

    + +

    datafusion.optimizer.enable_round_robin_repartition

    +

    true

    +

    When set to true, the physical plan optimizer will try to add round robin repartition to increase parallelism to leverage more CPU cores

    datafusion.optimizer.filter_null_join_keys

    -

    Boolean

    false

    When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.

    -

    datafusion.optimizer.hash_join_single_partition_threshold

    -

    UInt64

    -

    1048576

    -

    The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition

    +

    datafusion.optimizer.repartition_aggregations

    +

    true

    +

    Should DataFusion repartition data using the aggregate keys to execute aggregates in parallel using the provided target_partitions level”

    -

    datafusion.optimizer.max_passes

    -

    UInt64

    -

    3

    -

    Number of times that the optimizer will attempt to optimize the plan

    +

    datafusion.optimizer.repartition_joins

    +

    true

    +

    Should DataFusion repartition data using the join keys to execute joins in parallel using the provided target_partitions level”

    -

    datafusion.optimizer.prefer_hash_join

    -

    Boolean

    +

    datafusion.optimizer.repartition_windows

    true

    -

    When set to true, the physical plan optimizer will prefer HashJoin over SortMergeJoin. HashJoin can work more efficientlythan SortMergeJoin but consumes more memory. Defaults to true

    +

    Should DataFusion repartition data using the partitions keys to execute window functions in parallel using the provided target_partitions level”

    datafusion.optimizer.skip_failed_rules

    -

    Boolean

    true

    -

    When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail.

    +

    When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail

    + +

    datafusion.optimizer.max_passes

    +

    3

    +

    Number of times that the optimizer will attempt to optimize the plan

    + +

    datafusion.optimizer.top_down_join_key_reordering

    +

    true

    +

    When set to true, the physical plan optimizer will run a top down process to reorder the join keys

    -

    datafusion.optimizer.top_down_join_key_reordering

    -

    Boolean

    +

    datafusion.optimizer.prefer_hash_join

    true

    -

    When set to true, the physical plan optimizer will run a top down process to reorder the join keys. Defaults to true

    +

    When set to true, the physical plan optimizer will prefer HashJoin over SortMergeJoin. HashJoin can work more efficiently than SortMergeJoin but consumes more memory

    + +

    datafusion.optimizer.hash_join_single_partition_threshold

    +

    1048576

    +

    The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition

    + +

    datafusion.explain.logical_plan_only

    +

    false

    +

    When set to true, the explain statement will only print logical plans

    + +

    datafusion.explain.physical_plan_only

    +

    false

    +

    When set to true, the explain statement will only print physical plans

    diff --git a/datafusion/user-guide/dataframe.html b/datafusion/user-guide/dataframe.html index 69afc2a941dc..8ac113dc5408 100644 --- a/datafusion/user-guide/dataframe.html +++ b/datafusion/user-guide/dataframe.html @@ -338,7 +338,7 @@

    DataFrame API
    let ctx = SessionContext::new();
    -let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
    +let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
     let df = df.filter(col("a").lt_eq(col("b")))?
                .aggregate(vec![col("a")], vec![min(col("b"))])?
                .limit(0, Some(100))?;
    diff --git a/datafusion/user-guide/example-usage.html b/datafusion/user-guide/example-usage.html
    index e3b40604abc6..a997783dfbfd 100644
    --- a/datafusion/user-guide/example-usage.html
    +++ b/datafusion/user-guide/example-usage.html
    @@ -364,7 +364,7 @@
     -->
     

    Example Usage

    -

    In this example some simple processing is performed on the example.csv file.

    +

    In this example some simple processing is performed on the example.csv file.

    Update Cargo.toml

    Add the following to your Cargo.toml file:

    @@ -381,7 +381,7 @@

    Run a SQL query against data stored in a CSV:async fn main() -> datafusion::error::Result<()> { // register the table let ctx = SessionContext::new(); - ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?; + ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?; // create a plan to run a SQL query let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?; @@ -401,7 +401,7 @@

    Use the DataFrame API to process data stored in a CSV:async fn main() -> datafusion::error::Result<()> { // create the dataframe let ctx = SessionContext::new(); - let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?; + let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; let df = df.filter(col("a").lt_eq(col("b")))? .aggregate(vec![col("a")], vec![min(col("b"))])? @@ -428,7 +428,7 @@

    Output from both examples

    Identifiers and Capitalization

    Please be aware that all identifiers are effectively made lower-case in SQL, so if your csv file has capital letters (ex: Name) you must put your column name in double quotes or the examples won’t work.

    -

    To illustrate this behavior, consider the capitalized_example.csv file:

    +

    To illustrate this behavior, consider the capitalized_example.csv file:

    Run a SQL query against data stored in a CSV: