From 6359d35b142584a409e556ea7c0af4899471fa90 Mon Sep 17 00:00:00 2001 From: Sean Marshallsay Date: Sat, 22 Aug 2015 11:00:28 +0100 Subject: [PATCH 1/4] Initial implementation of window function. --- src/SQLite.jl | 11 +++++---- src/window.jl | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 4 deletions(-) create mode 100644 src/window.jl diff --git a/src/SQLite.jl b/src/SQLite.jl index 12e3a0d..19330f8 100644 --- a/src/SQLite.jl +++ b/src/SQLite.jl @@ -88,9 +88,12 @@ type SQLiteStmt{T} sql::T end -sqliteprepare(db,sql,stmt,null) = +include("window.jl") +export window + +sqliteprepare(db,sql,stmt,null) = @CHECK db sqlite3_prepare_v2(db.handle,utf8(sql),stmt,null) -sqliteprepare(db::SQLiteDB{UTF16String},sql,stmt,null) = +sqliteprepare(db::SQLiteDB{UTF16String},sql,stmt,null) = @CHECK db sqlite3_prepare16_v2(db.handle,utf16(sql),stmt,null) function SQLiteStmt{T}(db::SQLiteDB{T},sql::AbstractString) @@ -180,7 +183,7 @@ const SERIALIZATION = UInt8[0x11,0x01,0x02,0x0d,0x53,0x65,0x72,0x69,0x61,0x6c,0x function sqldeserialize(r) ret = ccall(:memcmp, Int32, (Ptr{UInt8},Ptr{UInt8}, UInt), SERIALIZATION, r, min(18,length(r))) - + if ret == 0 v = deserialize(IOBuffer(r)) return v.object @@ -205,7 +208,7 @@ function query(db::SQLiteDB,sql::AbstractString, values=[]) end while status == SQLITE_ROW for i = 1:ncols - t = sqlite3_column_type(stmt.handle,i-1) + t = sqlite3_column_type(stmt.handle,i-1) if t == SQLITE_INTEGER r = sqlite3_column_int64(stmt.handle,i-1) elseif t == SQLITE_FLOAT diff --git a/src/window.jl b/src/window.jl new file mode 100644 index 0000000..3cbe8dc --- /dev/null +++ b/src/window.jl @@ -0,0 +1,64 @@ +addrange(i::Integer, r::UnitRange) = (i + r.start):(i + r.stop) +addrange(i::Integer, r::StepRange) = (i + r.start):r.step:(i + r.stop) + +function fetchrow(stmt::SQLiteStmt, ncols::Integer) + row = Any[] + for col in 1:ncols + t = sqlite3_column_type(stmt.handle,col-1) + if t == SQLITE_INTEGER + r = sqlite3_column_int64(stmt.handle,col-1) + elseif t == SQLITE_FLOAT + r = sqlite3_column_double(stmt.handle,col-1) + elseif t == SQLITE_TEXT + #TODO: have a way to return text16? + r = bytestring(sqlite3_column_text(stmt.handle,col-1)) + elseif t == SQLITE_BLOB + blob = sqlite3_column_blob(stmt.handle,col-1) + b = sqlite3_column_bytes(stmt.handle,col-1) + buf = zeros(UInt8,b) + unsafe_copy!(pointer(buf), convert(Ptr{UInt8},blob), b) + r = sqldeserialize(buf) + else + r = NULL + end + push!(row, r) + end + status = sqlite3_step(stmt.handle) + status, row +end + +# TODO: wrapping this in a macro would avoid the slowness of first-class functions +function window{S<:String}( + db::SQLiteDB, cb::Base.Callable, range::OrdinalRange, + table::String, columns::Vector{S}, data..., +) + @assert !isempty(columns) "you must specifiy at least one column" + # TODO: should this be robust against injection attacks? how? + nrows = query(db, string("SELECT COUNT(*) FROM ", table))[1][1] + stmt = SQLiteStmt(db, string("SELECT ", join(columns, ", "), " FROM ", table)) + status = execute(stmt) + ncols = Int64(sqlite3_column_count(stmt.handle)) + # TODO: we alread know the size of this so do everything in place + # TODO: this is the table elements not the results + # TODO: would it be less confusing to use a Vector of Vectors rather than a Matrix + results = Array(Any, (0, ncols)) + actual_results = Any[] + latest_row = 0 + for start_row in 1:(nrows + range.start - range.stop) + window_results = Array(Any, (0, ncols)) + # find relevent rows for window + for row in addrange(start_row-1, range) + # only load rows as they are needed + # TODO: is this really an optimisation? + while row > latest_row && status == SQLITE_ROW + status, row_results = fetchrow(stmt, ncols) + results = vcat(results, row_results') + latest_row += 1 + end + status == SQLITE_ROW || status == SQLITE_DONE || sqliteerror(stmt.db) + window_results = vcat(window_results, results[row, :]) + end + push!(actual_results, cb(window_results, range, data)) + end + actual_results +end From 052b0ac925bc4abf81c4b0cc1166a4ab02c1132e Mon Sep 17 00:00:00 2001 From: Sean Marshallsay Date: Tue, 25 Aug 2015 19:30:10 +0100 Subject: [PATCH 2/4] Do more in place. --- src/window.jl | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/window.jl b/src/window.jl index 3cbe8dc..59a7a35 100644 --- a/src/window.jl +++ b/src/window.jl @@ -28,37 +28,34 @@ function fetchrow(stmt::SQLiteStmt, ncols::Integer) end # TODO: wrapping this in a macro would avoid the slowness of first-class functions -function window{S<:String}( +function window{S<:AbstractString}( db::SQLiteDB, cb::Base.Callable, range::OrdinalRange, - table::String, columns::Vector{S}, data..., + table::AbstractString, columns::Vector{S}, data..., ) @assert !isempty(columns) "you must specifiy at least one column" - # TODO: should this be robust against injection attacks? how? nrows = query(db, string("SELECT COUNT(*) FROM ", table))[1][1] stmt = SQLiteStmt(db, string("SELECT ", join(columns, ", "), " FROM ", table)) status = execute(stmt) - ncols = Int64(sqlite3_column_count(stmt.handle)) - # TODO: we alread know the size of this so do everything in place - # TODO: this is the table elements not the results - # TODO: would it be less confusing to use a Vector of Vectors rather than a Matrix - results = Array(Any, (0, ncols)) - actual_results = Any[] + ncols = sqlite3_column_count(stmt.handle) + # TODO: don't keep rows that are no longer needed + tablerows = [Array(Any, ncols) for _ in 1:nrows] + results = Any[] latest_row = 0 for start_row in 1:(nrows + range.start - range.stop) - window_results = Array(Any, (0, ncols)) + # TODO: with work we can do this in place aswell + curwindow = Any[] # find relevent rows for window for row in addrange(start_row-1, range) # only load rows as they are needed - # TODO: is this really an optimisation? while row > latest_row && status == SQLITE_ROW status, row_results = fetchrow(stmt, ncols) - results = vcat(results, row_results') latest_row += 1 + copy!(tablerows[latest_row], row_results) end status == SQLITE_ROW || status == SQLITE_DONE || sqliteerror(stmt.db) - window_results = vcat(window_results, results[row, :]) + push!(curwindow, tablerows[row]) end - push!(actual_results, cb(window_results, range, data)) + push!(results, cb(curwindow, range, data)) end - actual_results + results end From a634656107bb124ef00cc96d6e4861d9072c5a47 Mon Sep 17 00:00:00 2001 From: Sean Marshallsay Date: Wed, 26 Aug 2015 20:00:34 +0100 Subject: [PATCH 3/4] Only store rows that are needed. If a row is the first row in a window then that will be the last time it is used so it can be `shift!`ed so that the GC can get rid of it. --- src/window.jl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/window.jl b/src/window.jl index 59a7a35..32e1928 100644 --- a/src/window.jl +++ b/src/window.jl @@ -27,7 +27,6 @@ function fetchrow(stmt::SQLiteStmt, ncols::Integer) status, row end -# TODO: wrapping this in a macro would avoid the slowness of first-class functions function window{S<:AbstractString}( db::SQLiteDB, cb::Base.Callable, range::OrdinalRange, table::AbstractString, columns::Vector{S}, data..., @@ -37,25 +36,27 @@ function window{S<:AbstractString}( stmt = SQLiteStmt(db, string("SELECT ", join(columns, ", "), " FROM ", table)) status = execute(stmt) ncols = sqlite3_column_count(stmt.handle) - # TODO: don't keep rows that are no longer needed - tablerows = [Array(Any, ncols) for _ in 1:nrows] + # TODO: we can calculate how many rows we need and do this in place + tablerows = Array{Any,1}[] results = Any[] latest_row = 0 for start_row in 1:(nrows + range.start - range.stop) - # TODO: with work we can do this in place aswell - curwindow = Any[] + # TODO: we can do this in place aswell + curwindow = Array{Any,1}[] # find relevent rows for window - for row in addrange(start_row-1, range) + for row in range # only load rows as they are needed - while row > latest_row && status == SQLITE_ROW - status, row_results = fetchrow(stmt, ncols) + while latest_row < row + start_row - 1 && status == SQLITE_ROW + status, row_values = fetchrow(stmt, ncols) latest_row += 1 - copy!(tablerows[latest_row], row_results) + push!(tablerows, row_values) end status == SQLITE_ROW || status == SQLITE_DONE || sqliteerror(stmt.db) push!(curwindow, tablerows[row]) end push!(results, cb(curwindow, range, data)) + # get rid of rows we no longer need + shift!(tablerows) end results end From 53e13428379cc0997279a3c004a2da9f42ddd3fe Mon Sep 17 00:00:00 2001 From: Sean Marshallsay Date: Mon, 31 Aug 2015 12:43:54 +0100 Subject: [PATCH 4/4] Don't use varargs for data. Forcing the user to structure the data variable themselves means they know how to unpack it in the callback. --- src/window.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/window.jl b/src/window.jl index 32e1928..b1e682e 100644 --- a/src/window.jl +++ b/src/window.jl @@ -29,7 +29,7 @@ end function window{S<:AbstractString}( db::SQLiteDB, cb::Base.Callable, range::OrdinalRange, - table::AbstractString, columns::Vector{S}, data..., + table::AbstractString, columns::Vector{S}, data=nothing, ) @assert !isempty(columns) "you must specifiy at least one column" nrows = query(db, string("SELECT COUNT(*) FROM ", table))[1][1]