From c510b0180e722cac9dacb0e59f470b3861a3b5a1 Mon Sep 17 00:00:00 2001
From: Daniel Rizk <rizkytennis@gmail.com>
Date: Thu, 26 Sep 2024 09:15:21 -0400
Subject: [PATCH 1/4] enhance window frame

---
 src/joins_sq.jl | 230 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 201 insertions(+), 29 deletions(-)

diff --git a/src/joins_sq.jl b/src/joins_sq.jl
index ea804db..163b8ab 100644
--- a/src/joins_sq.jl
+++ b/src/joins_sq.jl
@@ -25,41 +25,95 @@ end
 $docstring_left_join
 """
 macro left_join(sqlquery, join_table, lhs_column, rhs_column)
-    # Convert column references to string
+    # Convert column references to strings
     lhs_col_str = string(lhs_column)
     rhs_col_str = string(rhs_column)
-    join_table = QuoteNode(join_table)
+    # Removed the QuoteNode wrapping to allow evaluation of join_table
+    # join_table = QuoteNode(join_table)
 
     return quote
         sq = $(esc(sqlquery))
+        jq = $(esc(join_table))  # Evaluate join_table
+
         if isa(sq, SQLQuery)
             needs_new_cte = !isempty(sq.select) || !isempty(sq.where) || sq.is_aggregated || !isempty(sq.ctes)
             if needs_new_cte
                 sq.cte_count += 1
                 cte_name = "cte_" * string(sq.cte_count)
-
                 most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from
 
-                join_sql = " " * most_recent_source * ".*, " * get_join_columns(sq.db, string($(esc(join_table))), $lhs_col_str) * gbq_join_parse(most_recent_source) *
-                            " LEFT JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(most_recent_source), ".", $rhs_col_str)
+                if isa(jq, SQLQuery)
+                    # Handle when join_table is an SQLQuery
+                    needs_new_cte_jq = !isempty(jq.select) || !isempty(jq.where) || jq.is_aggregated || !isempty(jq.ctes)
+                    if needs_new_cte_jq
+                        jq.cte_count += 1
+                        cte_name_jq = "jcte_" * string(jq.cte_count)
+                        most_recent_source_jq = !isempty(jq.ctes) ? "jcte_" * string(jq.cte_count - 1) : jq.from
+                        select_sql_jq = "SELECT * FROM " * most_recent_source_jq
+                        new_cte_jq = CTE(name=cte_name_jq, select=select_sql_jq)
+                        push!(jq.ctes, new_cte_jq)
+                        jq.from = cte_name_jq
+                    end
+                    # Combine CTEs and metadata
+                    sq.ctes = vcat(sq.ctes, jq.ctes)
+                    sq.metadata = vcat(sq.metadata, jq.metadata)
+                    join_table_name = jq.from
+                else
+                    # When join_table is a table name
+                    join_table_name = string(jq)
+                    if current_sql_mode[] != :athena
+                        new_metadata = get_table_metadata(sq.db, join_table_name)
+                    else
+                        new_metadata = get_table_metadata_athena(sq.db, join_table_name, sq.athena_params)
+                    end
+                    sq.metadata = vcat(sq.metadata, new_metadata)
+                end
+
+                join_sql = " " * most_recent_source * ".*, " *
+                           get_join_columns(sq.db, join_table_name, $lhs_col_str) * gbq_join_parse(most_recent_source) *
+                           " LEFT JOIN " * join_table_name * " ON " *
+                           gbq_join_parse(join_table_name) * "." * $lhs_col_str * " = " *
+                           gbq_join_parse(most_recent_source) * "." * $rhs_col_str
 
                 # Create and add the new CTE
                 new_cte = CTE(name=cte_name, select=join_sql)
                 push!(sq.ctes, new_cte)
-
                 # Update the FROM clause
                 sq.from = cte_name
             else
-                join_clause = " LEFT JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(sq.from), ".", $rhs_col_str)
+                if isa(jq, SQLQuery)
+                    # Handle when join_table is an SQLQuery
+                    needs_new_cte_jq = !isempty(jq.select) || !isempty(jq.where) || jq.is_aggregated || !isempty(jq.ctes)
+                        
+                    if needs_new_cte_jq
+                        jq.cte_count += 1
+                        cte_name_jq = "ncte_" * string(jq.cte_count)
+                        most_recent_source_jq = !isempty(jq.ctes) ? "cte_" * string(jq.cte_count - 1) : jq.from
+                        select_sql_jq = "SELECT * FROM " * most_recent_source_jq
+                        new_cte_jq = CTE(name=cte_name_jq, select=select_sql_jq)
+                        push!(jq.ctes, new_cte_jq)
+                        jq.from = cte_name_jq
+                    end
+                    # Combine CTEs and metadata
+                    sq.ctes = vcat(sq.ctes, jq.ctes)
+                    sq.metadata = vcat(sq.metadata, jq.metadata)
+                    join_table_name = jq.from
+                else
+                    # When join_table is a table name
+                    join_table_name = string(jq)
+                    if current_sql_mode[] != :athena
+                        new_metadata = get_table_metadata(sq.db, join_table_name)
+                    else
+                        new_metadata = get_table_metadata_athena(sq.db, join_table_name, sq.athena_params)
+                    end
+                    sq.metadata = vcat(sq.metadata, new_metadata)
+                end
+
+                join_clause = " LEFT JOIN " * join_table_name * " ON " *
+                              gbq_join_parse(join_table_name) * "." * $lhs_col_str * " = " *
+                              gbq_join_parse(sq.from) * "." * $rhs_col_str
                 sq.from *= join_clause
             end
-
-            if current_sql_mode[] != :athena
-                new_metadata = get_table_metadata(sq.db, string($(esc(join_table))))
-            else
-                new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), sq.athena_params)
-            end
-            sq.metadata = vcat(sq.metadata, new_metadata)
         else
             error("Expected sqlquery to be an instance of SQLQuery")
         end
@@ -67,6 +121,7 @@ macro left_join(sqlquery, join_table, lhs_column, rhs_column)
     end
 end
 
+
 """
 $docstring_right_join
 """
@@ -167,42 +222,92 @@ end
 $docstring_full_join
 """
 macro full_join(sqlquery, join_table, lhs_column, rhs_column)
-    # Convert column references to string
+    # Convert column references to strings
     lhs_col_str = string(lhs_column)
     rhs_col_str = string(rhs_column)
-    join_table = QuoteNode(join_table)
 
     return quote
         sq = $(esc(sqlquery))
+        jq = $(esc(join_table))  # Evaluate join_table
+
         if isa(sq, SQLQuery)
             needs_new_cte = !isempty(sq.select) || !isempty(sq.where) || sq.is_aggregated || !isempty(sq.ctes)
-
             if needs_new_cte
                 sq.cte_count += 1
                 cte_name = "cte_" * string(sq.cte_count)
-
                 most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from
 
-                join_sql = " " * most_recent_source * ".*, " * get_join_columns(sq.db, string($(esc(join_table))), $lhs_col_str) * gbq_join_parse(most_recent_source) *
-                            " FULL JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(most_recent_source), ".", $rhs_col_str)
+                if isa(jq, SQLQuery)
+                    # Handle when join_table is an SQLQuery
+                    needs_new_cte_jq = !isempty(jq.select) || !isempty(jq.where) || jq.is_aggregated || !isempty(jq.ctes)
+                    if needs_new_cte_jq
+                        jq.cte_count += 1
+                        cte_name_jq = "jcte_" * string(jq.cte_count)
+                        most_recent_source_jq = !isempty(jq.ctes) ? "jcte_" * string(jq.cte_count - 1) : jq.from
+                        select_sql_jq = "SELECT * FROM " * most_recent_source_jq
+                        new_cte_jq = CTE(name=cte_name_jq, select=select_sql_jq)
+                        push!(jq.ctes, new_cte_jq)
+                        jq.from = cte_name_jq
+                    end
+                    # Combine CTEs and metadata
+                    sq.ctes = vcat(sq.ctes, jq.ctes)
+                    sq.metadata = vcat(sq.metadata, jq.metadata)
+                    join_table_name = jq.from
+                else
+                    # When join_table is a table name
+                    join_table_name = string(jq)
+                    if current_sql_mode[] != :athena
+                        new_metadata = get_table_metadata(sq.db, join_table_name)
+                    else
+                        new_metadata = get_table_metadata_athena(sq.db, join_table_name, sq.athena_params)
+                    end
+                    sq.metadata = vcat(sq.metadata, new_metadata)
+                end
+
+                join_sql = " " * most_recent_source * ".*, " *
+                           get_join_columns(sq.db, join_table_name, $lhs_col_str) * gbq_join_parse(most_recent_source) *
+                           " FULL JOIN " * join_table_name * " ON " *
+                           gbq_join_parse(join_table_name) * "." * $lhs_col_str * " = " *
+                           gbq_join_parse(most_recent_source) * "." * $rhs_col_str
 
                 # Create and add the new CTE
                 new_cte = CTE(name=cte_name, select=join_sql)
                 push!(sq.ctes, new_cte)
-
                 # Update the FROM clause
                 sq.from = cte_name
             else
-                join_clause = " FULL JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(sq.from), ".", $rhs_col_str)
+                if isa(jq, SQLQuery)
+                    # Handle when join_table is an SQLQuery
+                    needs_new_cte_jq = !isempty(jq.select) || !isempty(jq.where) || jq.is_aggregated || !isempty(jq.ctes)
+                    if needs_new_cte_jq
+                        jq.cte_count += 1
+                        cte_name_jq = "ncte_" * string(jq.cte_count)
+                        most_recent_source_jq = !isempty(jq.ctes) ? "cte_" * string(jq.cte_count - 1) : jq.from
+                        select_sql_jq = "SELECT * FROM " * most_recent_source_jq
+                        new_cte_jq = CTE(name=cte_name_jq, select=select_sql_jq)
+                        push!(jq.ctes, new_cte_jq)
+                        jq.from = cte_name_jq
+                    end
+                    # Combine CTEs and metadata
+                    sq.ctes = vcat(sq.ctes, jq.ctes)
+                    sq.metadata = vcat(sq.metadata, jq.metadata)
+                    join_table_name = jq.from
+                else
+                    # When join_table is a table name
+                    join_table_name = string(jq)
+                    if current_sql_mode[] != :athena
+                        new_metadata = get_table_metadata(sq.db, join_table_name)
+                    else
+                        new_metadata = get_table_metadata_athena(sq.db, join_table_name, sq.athena_params)
+                    end
+                    sq.metadata = vcat(sq.metadata, new_metadata)
+                end
+
+                join_clause = " FULL JOIN " * join_table_name * " ON " *
+                              gbq_join_parse(join_table_name) * "." * $lhs_col_str * " = " *
+                              gbq_join_parse(sq.from) * "." * $rhs_col_str
                 sq.from *= join_clause
             end
-
-            if current_sql_mode[] != :athena
-                new_metadata = get_table_metadata(sq.db, string($(esc(join_table))))
-            else
-                new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), sq.athena_params)
-            end
-            sq.metadata = vcat(sq.metadata, new_metadata)
         else
             error("Expected sqlquery to be an instance of SQLQuery")
         end
@@ -211,6 +316,7 @@ macro full_join(sqlquery, join_table, lhs_column, rhs_column)
 end
 
 
+
 """
 $docstring_semi_join
 """
@@ -305,3 +411,69 @@ macro anti_join(sqlquery, join_table, lhs_column, rhs_column)
         sq
     end
 end
+
+"""
+$docstring_union
+"""
+macro union(sqlquery, union_query)
+    return quote
+        sq = $(esc(sqlquery))
+        uq = $(esc(union_query))
+
+        if isa(sq, SQLQuery)
+            # Determine if sq needs a new CTE
+            needs_new_cte_sq = !isempty(sq.select) || !isempty(sq.where) || sq.is_aggregated || !isempty(sq.ctes)
+            if needs_new_cte_sq
+                sq.cte_count += 1
+                cte_name_sq = "cte_" * string(sq.cte_count)
+                most_recent_source_sq = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from
+                select_sql_sq = "SELECT * FROM " * most_recent_source_sq
+                new_cte_sq = CTE(name=cte_name_sq, select=select_sql_sq)
+                push!(sq.ctes, new_cte_sq)
+                sq.from = cte_name_sq
+            end
+
+            # Prepare the union query
+            if isa(uq, SQLQuery)
+                # Determine if uq needs a new CTE
+                needs_new_cte_uq = !isempty(uq.select) || !isempty(uq.where) || uq.is_aggregated || !isempty(uq.ctes)
+                if needs_new_cte_uq
+                    uq.cte_count += 1
+                    cte_name_uq = "cte_" * string(uq.cte_count)
+                    most_recent_source_uq = !isempty(uq.ctes) ? "cte_" * string(uq.cte_count - 1) : uq.from
+                    select_sql_uq = "SELECT * FROM " * most_recent_source_uq
+                    new_cte_uq = CTE(name=cte_name_uq, select=select_sql_uq)
+                    push!(uq.ctes, new_cte_uq)
+                    uq.from = cte_name_uq
+                end
+
+                # Combine the queries using UNION
+                union_sql = "SELECT * FROM " * sq.from * " UNION SELECT * FROM " * uq.from
+
+                # Merge CTEs and metadata
+                sq.ctes = vcat(sq.ctes, uq.ctes)
+                sq.metadata = vcat(sq.metadata, uq.metadata)
+            else
+                # Treat uq as a table name
+                union_sql = "SELECT * FROM " * sq.from * " UNION SELECT * FROM " * string(uq)
+                # Update metadata
+                if current_sql_mode[] != :athena
+                    new_metadata = get_table_metadata(sq.db, string(uq))
+                else
+                    new_metadata = get_table_metadata_athena(sq.db, string(uq), sq.athena_params)
+                end
+                sq.metadata = vcat(sq.metadata, new_metadata)
+            end
+
+            # Create a new CTE for the union
+            sq.cte_count += 1
+            union_cte_name = "cte_" * string(sq.cte_count)
+            union_cte = CTE(name=union_cte_name, select=union_sql)
+            push!(sq.ctes, union_cte)
+            sq.from = union_cte_name
+        else
+            error("Expected sqlquery to be an instance of SQLQuery")
+        end
+        sq
+    end
+end
\ No newline at end of file

From faf07da160256a08f08ca9a20284870b92e99ff5 Mon Sep 17 00:00:00 2001
From: Daniel Rizk <rizkytennis@gmail.com>
Date: Thu, 26 Sep 2024 09:18:34 -0400
Subject: [PATCH 2/4] fix connect for duckdb file

---
 README.md                                  |  15 +-
 docs/examples/UserGuide/from_queryex.jl    |   2 +-
 docs/examples/UserGuide/getting_started.jl |   4 +-
 docs/examples/UserGuide/ibis_comp.jl       |   2 -
 docs/src/index.md                          |   5 +-
 src/TBD_macros.jl                          |  86 -----------
 src/TidierDB.jl                            |   7 +-
 src/docstrings.jl                          |  56 ++++++-
 src/windows.jl                             | 169 +++++++++++++++++++++
 9 files changed, 237 insertions(+), 109 deletions(-)
 create mode 100644 src/windows.jl

diff --git a/README.md b/README.md
index 14f8c6e..74984b3 100644
--- a/README.md
+++ b/README.md
@@ -44,9 +44,10 @@ TidierDB.jl currently supports the following top-level macros:
 | **Helper Functions**             | `across`, `desc`, `if_else`, `case_when`, `n`, `starts_with`, `ends_with`, `contains`, `as_float`, `as_integer`, `as_string`, `is_missing`, `missing_if`, `replace_missing` |
 | **TidierStrings.jl Functions** | `str_detect`, `str_replace`, `str_replace_all`, `str_remove_all`, `str_remove`                                                                                               |
 | **TidierDates.jl Functions**   | `year`, `month`, `day`, `hour`, `min`, `second`, `floor_date`, `difftime`                                                                                                   |
-| **Aggregate Functions**          | `mean`, `minimum`, `maximum`, `std`, `sum`, `cumsum`, `cor`, `cov`, `var`, 
+| **Aggregate Functions**          | `mean`, `minimum`, `maximum`, `std`, `sum`, `cumsum`, `cor`, `cov`, `var`, all SQL aggregate
 
-`@summarize` supports any SQL aggregate function in addition to the list above. Simply write the function as written in SQL syntax and it will work.                                                                                                    |
+`@summarize` supports any SQL aggregate function in addition to the list above. Simply write the function as written in SQL syntax and it will work.
+`@mutate` supports nearly all functions built into SQL as well.                                                                                                    
 
 When using the DuckDB backend, if `db_table` recieves a file path (`.parquet`, `.json`, `.csv`, `iceberg` or `delta`), it does not copy it into memory. This allows for queries on files too big for memory. `db_table` also supports S3 bucket locations via DuckDB.
 
@@ -67,7 +68,9 @@ import TidierDB as DB
 db = DB.connect(DB.duckdb());
 path_or_name = "https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv"
 
-@chain DB.db_table(db, path_or_name) begin
+mtcars = DB.db_table(db, path_or_name);
+
+@chain DB.t(mtcars) begin
     DB.@filter(!starts_with(model, "M"))
     DB.@group_by(cyl)
     DB.@summarize(mpg = mean(mpg))
@@ -97,7 +100,7 @@ end
 We cannot do this using TidierDB. However, we can call `@pivot_longer()` from TidierData *after* the result of the query has been instantiated as a DataFrame, like this: 
 
 ```julia
-@chain DB.db_table(db, path_or_name) begin
+@chain DB.t(mtcars) begin
     DB.@filter(!starts_with(model, "M"))
     DB.@group_by(cyl)
     DB.@summarize(mpg = mean(mpg))
@@ -136,7 +139,7 @@ end
 We can replace `DB.collect()` with `DB.@show_query` to reveal the underlying SQL query being generated by TidierDB. To handle complex queries, TidierDB makes heavy use of Common Table Expressions (CTE), which are a useful tool to organize long queries.
 
 ```julia
-@chain DB.db_table(db, path_or_name) begin
+@chain DB.t(mtcars) begin
     DB.@filter(!starts_with(model, "M"))
     DB.@group_by(cyl)
     DB.@summarize(mpg = mean(mpg))
@@ -176,7 +179,7 @@ SELECT *
 ## TidierDB is already quite fully-featured, supporting advanced TidierData functions like `across()` for multi-column selection.
 
 ```julia
-@chain DB.db_table(db, path_or_name) begin
+@chain DB.t(mtcars) begin
     DB.@group_by(cyl)
     DB.@summarize(across((starts_with("a"), ends_with("s")), (mean, sum)))
     DB.@collect
diff --git a/docs/examples/UserGuide/from_queryex.jl b/docs/examples/UserGuide/from_queryex.jl
index 8a39597..f92c451 100644
--- a/docs/examples/UserGuide/from_queryex.jl
+++ b/docs/examples/UserGuide/from_queryex.jl
@@ -1,4 +1,4 @@
-# While using TidierDB, you may need to generate part of a query and reuse it multiple times. `from_query()` enables a query portion to be reused multiple times as shown below.
+# While using TidierDB, you may need to generate part of a query and reuse it multiple times. `from_query()` or `t()` enable a query portion to be reused multiple times as shown below.
 
 # ```julia
 # import TidierDB as DB
diff --git a/docs/examples/UserGuide/getting_started.jl b/docs/examples/UserGuide/getting_started.jl
index fef2524..e3e6774 100644
--- a/docs/examples/UserGuide/getting_started.jl
+++ b/docs/examples/UserGuide/getting_started.jl
@@ -53,10 +53,10 @@
 # If you are working with a backend where compute cost is important, it will be important to minimize using `db_table` as this will requery for metadata each time. 
 # Compute costs are relevant to backends such as AWS, databricks and Snowflake. 
 
-# To do this, save the results of `db_table` and use them with `from_query`. Using `from_query` pulls the relevant information (metadata, con, etc) from the mutable SQLquery struct, allowing you to repeatedly query and collect the table without requerying for the metadata each time
+# To do this, save the results of `db_table` and use them with `t`. Using `t` pulls the relevant information (metadata, con, etc) from the mutable SQLquery struct, allowing you to repeatedly query and collect the table without requerying for the metadata each time
 # ```julia
 # table = DB.db_table(con, "path")
-# @chain DB.from_query(table) begin
+# @chain DB.t(table) begin
 #     ## data wrangling here 
 # end 
 # ```
diff --git a/docs/examples/UserGuide/ibis_comp.jl b/docs/examples/UserGuide/ibis_comp.jl
index 5e122c4..be99876 100644
--- a/docs/examples/UserGuide/ibis_comp.jl
+++ b/docs/examples/UserGuide/ibis_comp.jl
@@ -17,8 +17,6 @@
 # ```julia
 # using TidierDB
 # db = connect(duckdb())
-# # This next line is optional, but it will let us avoid writing `db_table` or `from_query` for each query
-# t(table) = from_query(table)
 # ```
 # Of note, TidierDB does not yet have an "interactive mode" so each example result will be collected.
 
diff --git a/docs/src/index.md b/docs/src/index.md
index ad9fc8c..b3a4fc9 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -39,9 +39,10 @@ TidierDB.jl currently supports:
 | **Helper Functions**             | `across`, `desc`, `if_else`, `case_when`, `n`, `starts_with`, `ends_with`, `contains`, `as_float`, `as_integer`, `as_string`, `is_missing`, `missing_if`, `replace_missing` |
 | **TidierStrings.jl Functions** | `str_detect`, `str_replace`, `str_replace_all`, `str_remove_all`, `str_remove`                                                                                               |
 | **TidierDates.jl Functions**   | `year`, `month`, `day`, `hour`, `min`, `second`, `floor_date`, `difftime`                                                                                                   |
-| **Aggregate Functions**          | `mean`, `minimum`, `maximum`, `std`, `sum`, `cumsum`, `cor`, `cov`, `var`, 
+| **Aggregate Functions**          | `mean`, `minimum`, `maximum`, `std`, `sum`, `cumsum`, `cor`, `cov`, `var`, all SQL aggregate
 
-`@summarize` supports any SQL aggregate function in addition to the list above. Simply write the function as written in SQL syntax and it will work.                                                                                                    |
+`@summarize` supports any SQL aggregate function in addition to the list above. Simply write the function as written in SQL syntax and it will work.
+`@mutate` supports nearly all functions built into SQL as well.                                                                                                          
 
 
 When using the DuckDB backend, if `db_table` recieves a file path ( `.parquet`, `.json`, `.csv`, `iceberg` or `delta`), it does not copy it into memory. This allows for queries on files too big for memory. `db_table` also supports S3 bucket locations via DuckDB.
diff --git a/src/TBD_macros.jl b/src/TBD_macros.jl
index 8609f31..70630dc 100644
--- a/src/TBD_macros.jl
+++ b/src/TBD_macros.jl
@@ -552,92 +552,6 @@ macro rename(sqlquery, renamings...)
     end
 end
 
-"""
-$docstring_window_order
-"""
-macro window_order(sqlquery, order_by_expr...)
-    order_by_expr = parse_interpolation2.(order_by_expr)
-
-    return quote
-        sq = $(esc(sqlquery))
-        if isa(sq, SQLQuery)
-            # Convert order_by_expr to SQL order by string
-            order_by_sql = join([expr_to_sql(expr, sq) for expr in $(esc(order_by_expr))], ", ")
-            
-            # Update the orderBy field of the SQLQuery instance
-            sq.window_order = order_by_sql
-            
-            # If this is the first operation after an aggregation, wrap current state in a CTE
-            if sq.post_aggregation
-                sq.post_aggregation = false
-                cte_name = "cte_" * string(sq.cte_count + 1)
-                cte_sql = "SELECT * FROM " * sq.from
-                
-                if !isempty(sq.where)
-                    cte_sql *= " WHERE " * sq.where
-                end
-                
-                new_cte = CTE(name=cte_name, select=cte_sql, from=sq.from)
-                push!(sq.ctes, new_cte)
-                sq.cte_count += 1
-                
-                # Reset the from to reference the new CTE
-                sq.from = cte_name
-            end
-            
-            # Note: Actual window functions would be applied in subsequent @mutate calls or similar,
-            # potentially using the orderBy set here for their OVER() clauses.
-        else
-            error("Expected sqlquery to be an instance of SQLQuery")
-        end
-        sq
-    end
-end
-
-"""
-$docstring_window_frame
-"""
-macro window_frame(sqlquery, frame_start::Int, frame_end::Int)
-    return quote
-        sq = $(esc(sqlquery))
-        if isa(sq, SQLQuery)
-            # Validate frame_start and frame_end
-            if $frame_end < $frame_start
-                error("frame_end must be greater than or equal to frame_start")
-            end
-
-            # Calculate absolute values for frame_start and frame_end
-            abs_frame_start = abs($frame_start)
-            abs_frame_end = abs($frame_end)
-
-            # Determine the direction and clause for frame_start
-            frame_start_clause = if $frame_start < 0
-                string(abs_frame_start, " PRECEDING")
-            else
-                string(abs_frame_start, " FOLLOWING")
-            end
-
-            # Determine the direction and clause for frame_end
-            frame_end_clause = if $frame_end < 0
-                string(abs_frame_end, " PRECEDING")
-            else
-                string(abs_frame_end, " FOLLOWING")
-            end
-
-            # Construct the window frame clause
-            frame_clause = string("ROWS BETWEEN ", frame_start_clause, " AND ", frame_end_clause)
-
-            # Update the windowFrame field of the SQLQuery instance
-            sq.windowFrame = frame_clause
-        else
-            error("Expected sqlquery to be an instance of SQLQuery")
-        end
-        sq
-    end
-end
-
-
-
 
 macro show_query(sqlquery)
     return quote
diff --git a/src/TidierDB.jl b/src/TidierDB.jl
index d41af89..e0181b3 100644
--- a/src/TidierDB.jl
+++ b/src/TidierDB.jl
@@ -18,7 +18,8 @@ using GZip
  @distinct, @left_join, @right_join, @inner_join, @count, @window_order, @window_frame, @show_query, @collect, @slice_max, 
  @slice_min, @slice_sample, @rename, copy_to, duckdb_open, duckdb_connect, @semi_join, @full_join, 
  @anti_join, connect, from_query, @interpolate, add_interp_parameter!, update_con,  @head, 
- clickhouse, duckdb, sqlite, mysql, mssql, postgres, athena, snowflake, gbq, oracle, databricks, SQLQuery, show_tables, t
+ clickhouse, duckdb, sqlite, mysql, mssql, postgres, athena, snowflake, gbq, oracle, databricks, SQLQuery, show_tables,
+ t, @union
 
  abstract type SQLBackend end
 
@@ -58,7 +59,7 @@ include("parsing_oracle.jl")
 include("parsing_databricks.jl")
 include("joins_sq.jl")
 include("slices_sq.jl")
-
+include("windows.jl")
 
 
 
@@ -413,7 +414,7 @@ end
 function connect(::duckdb, token::String)
     if token == "md:" 
         return DBInterface.connect(DuckDB.DB, "md:")
-    elseif endswith(token, ".duckdb")
+    elseif endswith(token, ".duckdb") || endswith(token, ".duck.db")
         return DuckDB.DB(token)
     else
         return DBInterface.connect(DuckDB.DB, "md:$token")
diff --git a/src/docstrings.jl b/src/docstrings.jl
index 557c31d..37e2273 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -20,7 +20,9 @@ julia> db = connect(duckdb());
 
 julia> copy_to(db, df, "df_mem");
 
-julia> @chain db_table(db, :df_mem) begin
+julia> df_mem = db_table(db, :df_mem);
+
+julia> @chain t(df_mem) begin
          @select(groups:percent)
          @collect
        end
@@ -39,7 +41,7 @@ julia> @chain db_table(db, :df_mem) begin
    9 │ bb          4      0.9
   10 │ aa          5      1.0
 
-julia> @chain db_table(db, :df_mem) begin
+julia> @chain t(df_mem) begin
          @select(contains("e"))
          @collect
        end
@@ -925,20 +927,30 @@ julia> df = DataFrame(id = [string('A' + i ÷ 26, 'A' + i % 26) for i in 0:9],
 julia> db = connect(duckdb());
 
 julia> copy_to(db, df, "df_mem");
+
+julia> @chain db_table(db, :df_mem) begin
+        @group_by groups
+        @window_frame(3)
+        @window_order(desc(percent))
+        @mutate(avg = mean(value))
+       #@show_query 
+       end;
 ```
 """
 
 const docstring_window_frame = 
 """
-    @window_frame(sql_query, frame_start::Int, frame_end::Int)
+    @window_frame(sql_query, args...)
 
 Define the window frame for window functions in a SQL query, specifying the range of rows to include in the calculation relative to the current row.
 
 # Arguments
-sql_query: The SQL query to operate on, expected to be an instance of SQLQuery.
-- `frame_start`: The starting point of the window frame. A positive value indicates the start after the current row (FOLLOWING), a negative value indicates before the current row (PRECEDING), and 0 indicates the current row.
-- `frame_end`: The ending point of the window frame. A positive value indicates the end after the current row (FOLLOWING), a negative value indicates before the current row (PRECEDING), and 0 indicates the current row.
-
+- `sqlquery::SQLQuery`: The SQLQuery instance to which the window frame will be applied.
+- `args...`: A variable number of arguments specifying the frame boundaries. These can be:
+    - `from`: The starting point of the frame. Can be a positive or negative integer, 0 or empty. When empty, it will use UNBOUNDED
+    - `to`: The ending point of the frame. Can be a positive or negative integer,  0 or empty. When empty, it will use UNBOUNDED
+    - if only one integer is provided without specifying `to` or `from` it will default to from, and to will be UNBOUNDED.
+    - if no arguments are given, both will be UNBOUNDED
 # Examples 
 ```jldoctest
 julia> df = DataFrame(id = [string('A' + i ÷ 26, 'A' + i % 26) for i in 0:9], 
@@ -949,6 +961,36 @@ julia> df = DataFrame(id = [string('A' + i ÷ 26, 'A' + i % 26) for i in 0:9],
 julia> db = connect(duckdb());
 
 julia> copy_to(db, df, "df_mem");
+
+julia> df_mem = db_table(db, :df_mem);
+
+julia> @chain t(df_mem) begin
+        @group_by groups
+        @window_frame(3)
+        @mutate(avg = mean(percent))
+        #@show_query
+       end;
+
+julia> @chain t(df_mem) begin
+        @group_by groups
+        @window_frame(-3, 3)
+        @mutate(avg = mean(percent))
+        #@show_query
+       end;
+
+julia> @chain t(df_mem) begin
+        @group_by groups
+        @window_frame(to = -3)
+        @mutate(avg = mean(percent))
+        #@show_query
+       end;
+
+julia> @chain t(df_mem) begin
+        @group_by groups
+        @window_frame()
+        @mutate(avg = mean(percent))
+        #@show_query
+       end;
 ```
 """
 
diff --git a/src/windows.jl b/src/windows.jl
new file mode 100644
index 0000000..bae4f3a
--- /dev/null
+++ b/src/windows.jl
@@ -0,0 +1,169 @@
+"""
+$docstring_window_order
+"""
+macro window_order(sqlquery, order_by_expr...)
+    order_by_expr = parse_interpolation2.(order_by_expr)
+
+    return quote
+        sq = $(esc(sqlquery))
+        if isa(sq, SQLQuery)
+            # Convert order_by_expr to SQL order by string
+            order_specs = String[]
+            for expr in $(esc(order_by_expr))
+                if isa(expr, Expr) && expr.head == :call && expr.args[1] == :desc
+                    # Column specified with `desc()`, indicating descending order
+                    push!(order_specs, string(expr.args[2]) * " DESC")
+                elseif isa(expr, Symbol)
+                    # Plain column symbol, indicating ascending order
+                    push!(order_specs, string(expr) * " ASC")
+                else
+                    throw("Unsupported column specification in @window_order: $expr")
+                end
+            end
+            order_by_sql = join(order_specs, ", ")
+            
+            # Update the window_order field of the SQLQuery instance
+            sq.window_order = order_by_sql
+            
+            # If this is the first operation after an aggregation, wrap current state in a CTE
+            if sq.post_aggregation
+                sq.post_aggregation = false
+                cte_name = "cte_" * string(sq.cte_count + 1)
+                cte_sql = "SELECT * FROM " * sq.from
+                
+                if !isempty(sq.where)
+                    cte_sql *= " WHERE " * sq.where
+                end
+                
+                new_cte = CTE(name=cte_name, select=cte_sql, from=sq.from)
+                push!(sq.ctes, new_cte)
+                sq.cte_count += 1
+                
+                # Reset the from to reference the new CTE
+                sq.from = cte_name
+            end
+            
+            # Note: Actual window functions would be applied in subsequent @mutate calls or similar,
+            # potentially using the orderBy set here for their OVER() clauses.
+        else
+            error("Expected sqlquery to be an instance of SQLQuery")
+        end
+        sq
+    end
+end
+
+"""
+$docstring_window_frame
+"""
+macro window_frame(sqlquery, args...)
+    sqlquery_expr = esc(sqlquery)
+    # Initialize expressions for from and to values
+    frame_from_expr = nothing
+    frame_to_expr = nothing
+
+    # Process the arguments at macro expansion time
+    for arg in args
+        if isa(arg, Expr) && arg.head == :(=)
+            # Named argument
+            arg_name = arg.args[1]
+            arg_value = arg.args[2]
+            if arg_name == :from
+                frame_from_expr = arg_value
+            elseif arg_name == :to
+                frame_to_expr = arg_value
+            else
+                error("Unknown keyword argument: $(arg_name)")
+            end
+        else
+            # Positional argument
+            if frame_from_expr === nothing
+                frame_from_expr = arg
+            elseif frame_to_expr === nothing
+                frame_to_expr = arg
+            else
+                error("Too many positional arguments")
+            end
+        end
+    end
+
+    # Now generate the code that computes the frame clauses at runtime
+    return quote
+        sq = $sqlquery_expr
+        if isa(sq, SQLQuery)
+            # Evaluate frame_from_value and frame_to_value
+            frame_from_value = $(frame_from_expr !== nothing ? esc(frame_from_expr) : :(nothing))
+            frame_to_value = $(frame_to_expr !== nothing ? esc(frame_to_expr) : :(nothing))
+
+            # Initialize frame_start_clause and frame_end_clause
+            frame_start_clause = ""
+            frame_end_clause = ""
+
+            if frame_from_value !== nothing && frame_to_value === nothing
+                # Only from is specified
+                frame_start_value = frame_from_value
+                frame_start_clause = if frame_start_value == 0
+                    "CURRENT ROW"
+                elseif frame_start_value < 0
+                    string(abs(frame_start_value), " PRECEDING")
+                elseif frame_start_value > 0
+                    string(abs(frame_start_value), " FOLLOWING")
+                else
+                    error("Invalid frame_from_value")
+                end
+                # Set frame_end_clause to "UNBOUNDED FOLLOWING"
+                frame_end_clause = "UNBOUNDED FOLLOWING"
+            elseif frame_from_value === nothing && frame_to_value !== nothing
+                # Only to is specified
+                frame_end_value = frame_to_value
+                frame_end_clause = if frame_end_value == 0
+                    "CURRENT ROW"
+                elseif frame_end_value < 0
+                    string(abs(frame_end_value), " PRECEDING")
+                elseif frame_end_value > 0
+                    string(abs(frame_end_value), " FOLLOWING")
+                else
+                    error("Invalid frame_to_value")
+                end
+                # Set frame_start_clause to "UNBOUNDED PRECEDING"
+                frame_start_clause = "UNBOUNDED PRECEDING"
+            elseif frame_from_value !== nothing && frame_to_value !== nothing
+                # Both from and to are specified
+                frame_start_value = frame_from_value
+                frame_start_clause = if frame_start_value == 0
+                    "CURRENT ROW"
+                elseif frame_start_value < 0
+                    string(abs(frame_start_value), " PRECEDING")
+                elseif frame_start_value > 0
+                    string(abs(frame_start_value), " FOLLOWING")
+                else
+                    error("Invalid frame_from_value")
+                end
+
+                frame_end_value = frame_to_value
+                frame_end_clause = if frame_end_value == 0
+                    "CURRENT ROW"
+                elseif frame_end_value < 0
+                    string(abs(frame_end_value), " PRECEDING")
+                elseif frame_end_value > 0
+                    string(abs(frame_end_value), " FOLLOWING")
+                else
+                    error("Invalid frame_to_value")
+                end
+            else
+                # Neither from nor to is specified
+                frame_start_clause = "UNBOUNDED PRECEDING"
+                frame_end_clause = "UNBOUNDED FOLLOWING"
+
+            end
+
+            # Construct the window frame clause
+            frame_clause = "ROWS BETWEEN $(frame_start_clause) AND $(frame_end_clause)"
+
+            # Update the windowFrame field of the SQLQuery instance
+            sq.windowFrame = frame_clause
+        else
+            error("Expected sqlquery to be an instance of SQLQuery")
+        end
+        sq
+    end
+end

From 52754b42fd6914056aba05cc70d1cfa81c24a6d1 Mon Sep 17 00:00:00 2001
From: Daniel Rizk <rizkytennis@gmail.com>
Date: Thu, 26 Sep 2024 09:31:21 -0400
Subject: [PATCH 3/4] revert join changes

---
 src/TidierDB.jl |   2 +-
 src/joins_sq.jl | 230 ++++++------------------------------------------
 2 files changed, 30 insertions(+), 202 deletions(-)

diff --git a/src/TidierDB.jl b/src/TidierDB.jl
index e0181b3..9cd1e9b 100644
--- a/src/TidierDB.jl
+++ b/src/TidierDB.jl
@@ -19,7 +19,7 @@ using GZip
  @slice_min, @slice_sample, @rename, copy_to, duckdb_open, duckdb_connect, @semi_join, @full_join, 
  @anti_join, connect, from_query, @interpolate, add_interp_parameter!, update_con,  @head, 
  clickhouse, duckdb, sqlite, mysql, mssql, postgres, athena, snowflake, gbq, oracle, databricks, SQLQuery, show_tables,
- t, @union
+ t
 
  abstract type SQLBackend end
 
diff --git a/src/joins_sq.jl b/src/joins_sq.jl
index 163b8ab..ea804db 100644
--- a/src/joins_sq.jl
+++ b/src/joins_sq.jl
@@ -25,95 +25,41 @@ end
 $docstring_left_join
 """
 macro left_join(sqlquery, join_table, lhs_column, rhs_column)
-    # Convert column references to strings
+    # Convert column references to string
     lhs_col_str = string(lhs_column)
     rhs_col_str = string(rhs_column)
-    # Removed the QuoteNode wrapping to allow evaluation of join_table
-    # join_table = QuoteNode(join_table)
+    join_table = QuoteNode(join_table)
 
     return quote
         sq = $(esc(sqlquery))
-        jq = $(esc(join_table))  # Evaluate join_table
-
         if isa(sq, SQLQuery)
             needs_new_cte = !isempty(sq.select) || !isempty(sq.where) || sq.is_aggregated || !isempty(sq.ctes)
             if needs_new_cte
                 sq.cte_count += 1
                 cte_name = "cte_" * string(sq.cte_count)
+
                 most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from
 
-                if isa(jq, SQLQuery)
-                    # Handle when join_table is an SQLQuery
-                    needs_new_cte_jq = !isempty(jq.select) || !isempty(jq.where) || jq.is_aggregated || !isempty(jq.ctes)
-                    if needs_new_cte_jq
-                        jq.cte_count += 1
-                        cte_name_jq = "jcte_" * string(jq.cte_count)
-                        most_recent_source_jq = !isempty(jq.ctes) ? "jcte_" * string(jq.cte_count - 1) : jq.from
-                        select_sql_jq = "SELECT * FROM " * most_recent_source_jq
-                        new_cte_jq = CTE(name=cte_name_jq, select=select_sql_jq)
-                        push!(jq.ctes, new_cte_jq)
-                        jq.from = cte_name_jq
-                    end
-                    # Combine CTEs and metadata
-                    sq.ctes = vcat(sq.ctes, jq.ctes)
-                    sq.metadata = vcat(sq.metadata, jq.metadata)
-                    join_table_name = jq.from
-                else
-                    # When join_table is a table name
-                    join_table_name = string(jq)
-                    if current_sql_mode[] != :athena
-                        new_metadata = get_table_metadata(sq.db, join_table_name)
-                    else
-                        new_metadata = get_table_metadata_athena(sq.db, join_table_name, sq.athena_params)
-                    end
-                    sq.metadata = vcat(sq.metadata, new_metadata)
-                end
-
-                join_sql = " " * most_recent_source * ".*, " *
-                           get_join_columns(sq.db, join_table_name, $lhs_col_str) * gbq_join_parse(most_recent_source) *
-                           " LEFT JOIN " * join_table_name * " ON " *
-                           gbq_join_parse(join_table_name) * "." * $lhs_col_str * " = " *
-                           gbq_join_parse(most_recent_source) * "." * $rhs_col_str
+                join_sql = " " * most_recent_source * ".*, " * get_join_columns(sq.db, string($(esc(join_table))), $lhs_col_str) * gbq_join_parse(most_recent_source) *
+                            " LEFT JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(most_recent_source), ".", $rhs_col_str)
 
                 # Create and add the new CTE
                 new_cte = CTE(name=cte_name, select=join_sql)
                 push!(sq.ctes, new_cte)
+
                 # Update the FROM clause
                 sq.from = cte_name
             else
-                if isa(jq, SQLQuery)
-                    # Handle when join_table is an SQLQuery
-                    needs_new_cte_jq = !isempty(jq.select) || !isempty(jq.where) || jq.is_aggregated || !isempty(jq.ctes)
-                        
-                    if needs_new_cte_jq
-                        jq.cte_count += 1
-                        cte_name_jq = "ncte_" * string(jq.cte_count)
-                        most_recent_source_jq = !isempty(jq.ctes) ? "cte_" * string(jq.cte_count - 1) : jq.from
-                        select_sql_jq = "SELECT * FROM " * most_recent_source_jq
-                        new_cte_jq = CTE(name=cte_name_jq, select=select_sql_jq)
-                        push!(jq.ctes, new_cte_jq)
-                        jq.from = cte_name_jq
-                    end
-                    # Combine CTEs and metadata
-                    sq.ctes = vcat(sq.ctes, jq.ctes)
-                    sq.metadata = vcat(sq.metadata, jq.metadata)
-                    join_table_name = jq.from
-                else
-                    # When join_table is a table name
-                    join_table_name = string(jq)
-                    if current_sql_mode[] != :athena
-                        new_metadata = get_table_metadata(sq.db, join_table_name)
-                    else
-                        new_metadata = get_table_metadata_athena(sq.db, join_table_name, sq.athena_params)
-                    end
-                    sq.metadata = vcat(sq.metadata, new_metadata)
-                end
-
-                join_clause = " LEFT JOIN " * join_table_name * " ON " *
-                              gbq_join_parse(join_table_name) * "." * $lhs_col_str * " = " *
-                              gbq_join_parse(sq.from) * "." * $rhs_col_str
+                join_clause = " LEFT JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(sq.from), ".", $rhs_col_str)
                 sq.from *= join_clause
             end
+
+            if current_sql_mode[] != :athena
+                new_metadata = get_table_metadata(sq.db, string($(esc(join_table))))
+            else
+                new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), sq.athena_params)
+            end
+            sq.metadata = vcat(sq.metadata, new_metadata)
         else
             error("Expected sqlquery to be an instance of SQLQuery")
         end
@@ -121,7 +67,6 @@ macro left_join(sqlquery, join_table, lhs_column, rhs_column)
     end
 end
 
-
 """
 $docstring_right_join
 """
@@ -222,92 +167,42 @@ end
 $docstring_full_join
 """
 macro full_join(sqlquery, join_table, lhs_column, rhs_column)
-    # Convert column references to strings
+    # Convert column references to string
     lhs_col_str = string(lhs_column)
     rhs_col_str = string(rhs_column)
+    join_table = QuoteNode(join_table)
 
     return quote
         sq = $(esc(sqlquery))
-        jq = $(esc(join_table))  # Evaluate join_table
-
         if isa(sq, SQLQuery)
             needs_new_cte = !isempty(sq.select) || !isempty(sq.where) || sq.is_aggregated || !isempty(sq.ctes)
+
             if needs_new_cte
                 sq.cte_count += 1
                 cte_name = "cte_" * string(sq.cte_count)
+
                 most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from
 
-                if isa(jq, SQLQuery)
-                    # Handle when join_table is an SQLQuery
-                    needs_new_cte_jq = !isempty(jq.select) || !isempty(jq.where) || jq.is_aggregated || !isempty(jq.ctes)
-                    if needs_new_cte_jq
-                        jq.cte_count += 1
-                        cte_name_jq = "jcte_" * string(jq.cte_count)
-                        most_recent_source_jq = !isempty(jq.ctes) ? "jcte_" * string(jq.cte_count - 1) : jq.from
-                        select_sql_jq = "SELECT * FROM " * most_recent_source_jq
-                        new_cte_jq = CTE(name=cte_name_jq, select=select_sql_jq)
-                        push!(jq.ctes, new_cte_jq)
-                        jq.from = cte_name_jq
-                    end
-                    # Combine CTEs and metadata
-                    sq.ctes = vcat(sq.ctes, jq.ctes)
-                    sq.metadata = vcat(sq.metadata, jq.metadata)
-                    join_table_name = jq.from
-                else
-                    # When join_table is a table name
-                    join_table_name = string(jq)
-                    if current_sql_mode[] != :athena
-                        new_metadata = get_table_metadata(sq.db, join_table_name)
-                    else
-                        new_metadata = get_table_metadata_athena(sq.db, join_table_name, sq.athena_params)
-                    end
-                    sq.metadata = vcat(sq.metadata, new_metadata)
-                end
-
-                join_sql = " " * most_recent_source * ".*, " *
-                           get_join_columns(sq.db, join_table_name, $lhs_col_str) * gbq_join_parse(most_recent_source) *
-                           " FULL JOIN " * join_table_name * " ON " *
-                           gbq_join_parse(join_table_name) * "." * $lhs_col_str * " = " *
-                           gbq_join_parse(most_recent_source) * "." * $rhs_col_str
+                join_sql = " " * most_recent_source * ".*, " * get_join_columns(sq.db, string($(esc(join_table))), $lhs_col_str) * gbq_join_parse(most_recent_source) *
+                            " FULL JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(most_recent_source), ".", $rhs_col_str)
 
                 # Create and add the new CTE
                 new_cte = CTE(name=cte_name, select=join_sql)
                 push!(sq.ctes, new_cte)
+
                 # Update the FROM clause
                 sq.from = cte_name
             else
-                if isa(jq, SQLQuery)
-                    # Handle when join_table is an SQLQuery
-                    needs_new_cte_jq = !isempty(jq.select) || !isempty(jq.where) || jq.is_aggregated || !isempty(jq.ctes)
-                    if needs_new_cte_jq
-                        jq.cte_count += 1
-                        cte_name_jq = "ncte_" * string(jq.cte_count)
-                        most_recent_source_jq = !isempty(jq.ctes) ? "cte_" * string(jq.cte_count - 1) : jq.from
-                        select_sql_jq = "SELECT * FROM " * most_recent_source_jq
-                        new_cte_jq = CTE(name=cte_name_jq, select=select_sql_jq)
-                        push!(jq.ctes, new_cte_jq)
-                        jq.from = cte_name_jq
-                    end
-                    # Combine CTEs and metadata
-                    sq.ctes = vcat(sq.ctes, jq.ctes)
-                    sq.metadata = vcat(sq.metadata, jq.metadata)
-                    join_table_name = jq.from
-                else
-                    # When join_table is a table name
-                    join_table_name = string(jq)
-                    if current_sql_mode[] != :athena
-                        new_metadata = get_table_metadata(sq.db, join_table_name)
-                    else
-                        new_metadata = get_table_metadata_athena(sq.db, join_table_name, sq.athena_params)
-                    end
-                    sq.metadata = vcat(sq.metadata, new_metadata)
-                end
-
-                join_clause = " FULL JOIN " * join_table_name * " ON " *
-                              gbq_join_parse(join_table_name) * "." * $lhs_col_str * " = " *
-                              gbq_join_parse(sq.from) * "." * $rhs_col_str
+                join_clause = " FULL JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(sq.from), ".", $rhs_col_str)
                 sq.from *= join_clause
             end
+
+            if current_sql_mode[] != :athena
+                new_metadata = get_table_metadata(sq.db, string($(esc(join_table))))
+            else
+                new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), sq.athena_params)
+            end
+            sq.metadata = vcat(sq.metadata, new_metadata)
         else
             error("Expected sqlquery to be an instance of SQLQuery")
         end
@@ -316,7 +211,6 @@ macro full_join(sqlquery, join_table, lhs_column, rhs_column)
 end
 
 
-
 """
 $docstring_semi_join
 """
@@ -411,69 +305,3 @@ macro anti_join(sqlquery, join_table, lhs_column, rhs_column)
         sq
     end
 end
-
-"""
-$docstring_union
-"""
-macro union(sqlquery, union_query)
-    return quote
-        sq = $(esc(sqlquery))
-        uq = $(esc(union_query))
-
-        if isa(sq, SQLQuery)
-            # Determine if sq needs a new CTE
-            needs_new_cte_sq = !isempty(sq.select) || !isempty(sq.where) || sq.is_aggregated || !isempty(sq.ctes)
-            if needs_new_cte_sq
-                sq.cte_count += 1
-                cte_name_sq = "cte_" * string(sq.cte_count)
-                most_recent_source_sq = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from
-                select_sql_sq = "SELECT * FROM " * most_recent_source_sq
-                new_cte_sq = CTE(name=cte_name_sq, select=select_sql_sq)
-                push!(sq.ctes, new_cte_sq)
-                sq.from = cte_name_sq
-            end
-
-            # Prepare the union query
-            if isa(uq, SQLQuery)
-                # Determine if uq needs a new CTE
-                needs_new_cte_uq = !isempty(uq.select) || !isempty(uq.where) || uq.is_aggregated || !isempty(uq.ctes)
-                if needs_new_cte_uq
-                    uq.cte_count += 1
-                    cte_name_uq = "cte_" * string(uq.cte_count)
-                    most_recent_source_uq = !isempty(uq.ctes) ? "cte_" * string(uq.cte_count - 1) : uq.from
-                    select_sql_uq = "SELECT * FROM " * most_recent_source_uq
-                    new_cte_uq = CTE(name=cte_name_uq, select=select_sql_uq)
-                    push!(uq.ctes, new_cte_uq)
-                    uq.from = cte_name_uq
-                end
-
-                # Combine the queries using UNION
-                union_sql = "SELECT * FROM " * sq.from * " UNION SELECT * FROM " * uq.from
-
-                # Merge CTEs and metadata
-                sq.ctes = vcat(sq.ctes, uq.ctes)
-                sq.metadata = vcat(sq.metadata, uq.metadata)
-            else
-                # Treat uq as a table name
-                union_sql = "SELECT * FROM " * sq.from * " UNION SELECT * FROM " * string(uq)
-                # Update metadata
-                if current_sql_mode[] != :athena
-                    new_metadata = get_table_metadata(sq.db, string(uq))
-                else
-                    new_metadata = get_table_metadata_athena(sq.db, string(uq), sq.athena_params)
-                end
-                sq.metadata = vcat(sq.metadata, new_metadata)
-            end
-
-            # Create a new CTE for the union
-            sq.cte_count += 1
-            union_cte_name = "cte_" * string(sq.cte_count)
-            union_cte = CTE(name=union_cte_name, select=union_sql)
-            push!(sq.ctes, union_cte)
-            sq.from = union_cte_name
-        else
-            error("Expected sqlquery to be an instance of SQLQuery")
-        end
-        sq
-    end
-end
\ No newline at end of file

From 16f68ceb2e4a6bcbb41d6f47dd17bec23bbe3d9f Mon Sep 17 00:00:00 2001
From: Daniel Rizk <rizkytennis@gmail.com>
Date: Fri, 27 Sep 2024 11:00:29 -0400
Subject: [PATCH 4/4] fixes `connect` for duckdb and adds docs for database
 file conxn

---
 NEWS.md                                    |  4 ++++
 Project.toml                               |  2 +-
 docs/examples/UserGuide/getting_started.jl | 10 ++++++++--
 src/TBD_macros.jl                          |  2 +-
 src/TidierDB.jl                            | 13 ++++---------
 src/docstrings.jl                          | 13 ++++++-------
 6 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 00f212d..18b67b2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # TidierDB.jl updates
 
+## v0.3.5 - 2024-09-
+- improves DuckDB `connect()` interface and documentation 
+- enhances `@window_frame` to allow for just a `to` or `from` argument, as well as autodetection for `preceding`, `following` and `unbounded` for the frame boundaries. 
+
 ## v0.3.4 - 2024 2024-09-23
 TidierDB works with nearly any exisiting SQL function, now there are docs about it.
 - Docs on using any exisiting SQL function in TidierDB
diff --git a/Project.toml b/Project.toml
index 846ca7f..30fcf70 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "TidierDB"
 uuid = "86993f9b-bbba-4084-97c5-ee15961ad48b"
 authors = ["Daniel Rizk <rizk.daniel.12@gmail.com> and contributors"]
-version = "0.3.4"
+version = "0.3.5"
 
 [deps]
 Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
diff --git a/docs/examples/UserGuide/getting_started.jl b/docs/examples/UserGuide/getting_started.jl
index e3e6774..94428a2 100644
--- a/docs/examples/UserGuide/getting_started.jl
+++ b/docs/examples/UserGuide/getting_started.jl
@@ -22,7 +22,13 @@
 # conn = DB.connect(DB.duckdb())
 # ```
 
-# You can also use establish a connection through an alternate method that you preferred, and use that as your connection as well. 
+# ## Connect to a local database file
+# You can also connect to an existing database by passing the database file path as a string.
+# ```julia
+# db = DB.connect(DB.duckdb(), "mydb.duckdb")
+# ```
+
+# You can also establish any DuckDB connection through an alternate method that you prefer, and use that as your connection as well. 
 
 # ## Package Extensions 
 # The following backends utilize package extensions. To use one of backends listed below, you will need to write `using Library`
@@ -42,9 +48,9 @@
 # `db_table` starts the underlying SQL query struct, in addition to pulling the table metadata and storing it there. Storing metadata is what enables a lazy interface that also supports tidy selection.  
 # - `db_table` has two required arguments: `connection` and `table`
 # - `table` can be a table name on a database or a path/url to file to read.  When passing `db_table` a path or url, the table is not copied into memory.
+#   - Of note, `db_table` only support direct file paths to a table. It does not support database file paths such as `dbname.duckdb` or `dbname.sqlite`. Such files must be used with `connect` first.
 # - With DuckDB and ClickHouse, if you have a folder of multiple files to read, you can use `*` read in all files matching the pattern. 
 # - For example, the below would read all files that end in `.csv` in the given folder.
-# ```julia
 # db_table(db, "folder/path/*.csv")
 # ``` 
 # `db_table` also supports iceberg, delta, and S3 file paths via DuckDB.
diff --git a/src/TBD_macros.jl b/src/TBD_macros.jl
index 70630dc..3ba33e5 100644
--- a/src/TBD_macros.jl
+++ b/src/TBD_macros.jl
@@ -690,5 +690,5 @@ end
 $docstring_show_tables
 """
 function show_tables(con::Union{DuckDB.DB, DuckDB.Connection})
-    return DataFrame(DBInterface.execute(con, "SHOW TABLES"))
+    return DataFrame(DBInterface.execute(con, "SHOW ALL TABLES"))
 end
diff --git a/src/TidierDB.jl b/src/TidierDB.jl
index 9cd1e9b..520efe6 100644
--- a/src/TidierDB.jl
+++ b/src/TidierDB.jl
@@ -181,8 +181,9 @@ function get_table_metadata(conn::Union{DuckDB.DB, DuckDB.Connection}, table_nam
     result = DuckDB.execute(conn, query) |> DataFrame
     result[!, :current_selxn] .= 1
     table_name = if occursin(r"[:/]", table_name)
-         split(basename(table_name), '.')[1]
-        #"'$table_name'"
+        split(basename(table_name), '.')[1]
+        elseif occursin(".", table_name)
+        split(basename(table_name), '.')[end]
     else
         table_name
     end
@@ -412,13 +413,7 @@ function connect(::duckdb, db_type::Symbol; access_key::String="", secret_key::S
 end
 
 function connect(::duckdb, token::String)
-    if token == "md:" 
-        return DBInterface.connect(DuckDB.DB, "md:")
-    elseif endswith(token, ".duckdb") || endswith(token, ".duck.db")
-        return DuckDB.DB(token)
-    else
-        return DBInterface.connect(DuckDB.DB, "md:$token")
-    end 
+     return DBInterface.connect(DuckDB.DB, token)
 end
 
 end
diff --git a/src/docstrings.jl b/src/docstrings.jl
index 37e2273..fb5f7c6 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -1036,7 +1036,10 @@ This function establishes a database connection based on the specified backend a
 # Connect to AWS via DuckDB
 # aws_db = connect2(duckdb(), :aws, aws_access_key_id=get(ENV, "AWS_ACCESS_KEY_ID", "access_key"), aws_secret_access_key=get(ENV, "AWS_SECRET_ACCESS_KEY", "secret_access key"), aws_region=get(ENV, "AWS_DEFAULT_REGION", "us-east-1"))
 # Connect to MotherDuck
-# connect(duckdb(), "token") for first connection, vs connect(duckdb(), "md:") for reconnection
+# connect(duckdb(), ""md://..."") for first connection, vs connect(duckdb(), "md:") for reconnection
+# Connect to exisiting database file
+# connect(duckdb(), "path/to/database.duckdb")
+# Open an in-memory database
 julia> db = connect(duckdb())
 DuckDB.Connection(":memory:")
 ```
@@ -1094,7 +1097,7 @@ const docstring_db_table =
     db_table(database, table_name, athena_params, delta = false, iceberg = false)
 
 `db_table` starts the underlying SQL query struct, adding the metadata and table. If paths are passed directly to db_table instead of a 
-name it will not copy it to memory, but rather ready directly from the file.
+name it will not copy it to memory, but rather ready directly from the file. `db_table` only supports direct file paths to a table. It does not support database file paths such as `dbname.duckdb` or `dbname.sqlite`. Such files must be used with `connect first`
 
 # Arguments
 - `database`: The Database or connection object
@@ -1221,11 +1224,7 @@ Shows tables available in database. currently supports DuckDB, databricks, Snowf
 ```jldoctest
 julia> db = connect(duckdb());
 
-julia> show_tables(db) # there are no tables in when first loading so df below is empty.
-0×1 DataFrame
- Row │ name   
-     │ String 
-─────┴────────
+julia> show_tables(db);
 ```
 """