Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/sql-tapd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/mickamy/sql-tap/proxy"
"github.com/mickamy/sql-tap/proxy/mysql"
"github.com/mickamy/sql-tap/proxy/postgres"
"github.com/mickamy/sql-tap/query"
"github.com/mickamy/sql-tap/server"
"github.com/mickamy/sql-tap/web"
)
Expand Down Expand Up @@ -156,6 +157,9 @@ func run(

go func() {
for ev := range p.Events() {
if ev.Query != "" {
ev.NormalizedQuery = query.Normalize(ev.Query)
}
if det != nil && isSelectQuery(ev.Op, ev.Query) {
r := det.Record(ev.Query, ev.StartTime)
ev.NPlus1 = r.Matched
Expand Down
39 changes: 24 additions & 15 deletions gen/tap/v1/tap.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions proto/tap/v1/tap.proto
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ message QueryEvent {
string error = 8;
string tx_id = 9;
bool n_plus_1 = 10;
string normalized_query = 11;
}

message WatchRequest {}
Expand Down
21 changes: 11 additions & 10 deletions proxy/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,17 @@ func (o Op) String() string {

// Event represents a captured database query event.
type Event struct {
ID string
Op Op
Query string
Args []string
StartTime time.Time
Duration time.Duration
RowsAffected int64
Error string
TxID string
NPlus1 bool
ID string
Op Op
Query string
Args []string
StartTime time.Time
Duration time.Duration
RowsAffected int64
Error string
TxID string
NPlus1 bool
NormalizedQuery string
}

// Proxy is the common interface for DB protocol proxies.
Expand Down
115 changes: 115 additions & 0 deletions query/normalize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package query

import "strings"

// Normalize replaces literal values in a SQL query with placeholders,
// so that structurally identical queries can be grouped together.
//
// String literals ('...') are replaced with '?', standalone numeric
// literals are replaced with ?, and $N parameters are kept as-is.
// Consecutive whitespace is collapsed to a single space.
func Normalize(sql string) string {
if sql == "" {
return ""
}

var b strings.Builder
b.Grow(len(sql))

i := 0
prevSpace := false
for i < len(sql) {
ch := sql[i]

if ch == '\'' {
i = normalizeString(&b, sql, i)
prevSpace = false
continue
}

if ch == '$' && i+1 < len(sql) && isDigit(sql[i+1]) {
i = keepParam(&b, sql, i)
prevSpace = false
continue
}

if isDigit(ch) && (i == 0 || isNumBoundary(sql[i-1])) {
if next, ok := normalizeNumber(&b, sql, i); ok {
i = next
prevSpace = false
continue
}
}

if isSpace(ch) {
if !prevSpace && b.Len() > 0 {
b.WriteByte(' ')
prevSpace = true
}
i++
continue
}

b.WriteByte(ch)
i++
prevSpace = false
}

return strings.TrimRight(b.String(), " ")
}

// normalizeString replaces a string literal starting at pos with '?'.
func normalizeString(b *strings.Builder, sql string, pos int) int {
j := pos + 1
for j < len(sql) {
if sql[j] == '\'' && j+1 < len(sql) && sql[j+1] == '\'' {
j += 2
continue
}
if sql[j] == '\'' {
j++
break
}
j++
}
b.WriteString("'?'")
return j
}

// keepParam writes $N parameter as-is and returns the new position.
func keepParam(b *strings.Builder, sql string, pos int) int {
b.WriteByte('$')
j := pos + 1
for j < len(sql) && isDigit(sql[j]) {
b.WriteByte(sql[j])
j++
}
return j
}

// normalizeNumber replaces a numeric literal at pos with '?'.
// Returns (newPos, true) if replaced, or (0, false) if not a standalone number.
func normalizeNumber(b *strings.Builder, sql string, pos int) (int, bool) {
j := pos + 1
for j < len(sql) && (isDigit(sql[j]) || sql[j] == '.') {
j++
}
if j >= len(sql) || isNumBoundary(sql[j]) {
b.WriteByte('?')
return j, true
}
return 0, false
}

func isDigit(c byte) bool { return c >= '0' && c <= '9' }

func isSpace(c byte) bool {
return c == ' ' || c == '\t' || c == '\n' || c == '\r'
}

func isNumBoundary(c byte) bool {
return isSpace(c) ||
c == ',' || c == '(' || c == ')' || c == '=' ||
c == '<' || c == '>' || c == '+' || c == '-' ||
c == '*' || c == '/' || c == ';'
}
40 changes: 40 additions & 0 deletions query/normalize_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package query_test

import (
"testing"

"github.com/mickamy/sql-tap/query"
)

func TestNormalize(t *testing.T) {
t.Parallel()

tests := []struct {
name string
in string
want string
}{
{"empty", "", ""},
{"string literal", "SELECT id FROM users WHERE name = 'alice'", "SELECT id FROM users WHERE name = '?'"},
{"escaped quote", "WHERE name = 'it''s'", "WHERE name = '?'"},
{"numeric literal", "SELECT id, name FROM users WHERE id = 42", "SELECT id, name FROM users WHERE id = ?"},
{"float literal", "WHERE score > 3.14", "WHERE score > ?"},
{"pg param kept", "WHERE id = $1 AND name = $2", "WHERE id = $1 AND name = $2"},
{"in list", "WHERE id IN (1, 2, 3)", "WHERE id IN (?, ?, ?)"},
{"mixed", "WHERE id = 42 AND name = 'bob' AND status = $1", "WHERE id = ? AND name = '?' AND status = $1"},
{"whitespace collapse", "SELECT id\n\tFROM users", "SELECT id FROM users"},
{"leading trailing space", " SELECT 1 ", "SELECT ?"},
{"no replace in identifier", "SELECT t1.id FROM t1", "SELECT t1.id FROM t1"},
{"negative number", "WHERE x = -5", "WHERE x = -?"},
{"multiple string literals", "INSERT INTO t (a, b) VALUES ('x', 'y')", "INSERT INTO t (a, b) VALUES ('?', '?')"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
got := query.Normalize(tt.in)
if got != tt.want {
t.Errorf("Normalize(%q)\n got %q\n want %q", tt.in, got, tt.want)
}
})
}
}
21 changes: 11 additions & 10 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,17 @@ func eventToProto(ev proxy.Event) *tapv1.QueryEvent {
args[i] = sanitizeUTF8(a)
}
return &tapv1.QueryEvent{
Id: ev.ID,
Op: int32(ev.Op),
Query: sanitizeUTF8(ev.Query),
Args: args,
StartTime: timestamppb.New(ev.StartTime),
Duration: durationpb.New(ev.Duration),
RowsAffected: ev.RowsAffected,
Error: sanitizeUTF8(ev.Error),
TxId: ev.TxID,
NPlus_1: ev.NPlus1,
Id: ev.ID,
Op: int32(ev.Op),
Query: sanitizeUTF8(ev.Query),
Args: args,
StartTime: timestamppb.New(ev.StartTime),
Duration: durationpb.New(ev.Duration),
RowsAffected: ev.RowsAffected,
Error: sanitizeUTF8(ev.Error),
TxId: ev.TxID,
NPlus_1: ev.NPlus1,
NormalizedQuery: sanitizeUTF8(ev.NormalizedQuery),
}
}

Expand Down
Loading