Skip to content

Commit

Permalink
improvements to "from" operator (#5378)
Browse files Browse the repository at this point in the history
This commit improves the from operator and brings it into closer alignment
with SQL.  We also deprecated pool, get, and file though they can still
be used but appear as "from" when running super compile.

Targets of the from operator are now self describing and names are
interpreted as files when running locally (super -c) and pools when
running on a lake (super db).  URLs continue to work in both cases.

Const expressions continue to work but now need to be enclosed
in brackets, e.g., "from [poolConst]".  This makes easy
to distinguish a file reference e.g., "from foo.json", from a
record deref. e.g., "from [this.pool]"  Currently these expressions
must evaluate at compile time, but this sets us up to configure
"from" to have a parent and make this all dynamic. Note you can also
now say 'from ["a.json", "b.json"]', which will do a merge of
the scans (not sequential eval).

The semantic pass now does existence checks of files and pools
so "super compile" tests needed to be updated to "touch" files etc.

We will update docs to reflect these changes after we add the parent
concept to "from" and have a bit of experience with the new syntax and
semantics.
  • Loading branch information
mccanne authored Oct 28, 2024
1 parent b5cd8ee commit eebbb56
Show file tree
Hide file tree
Showing 32 changed files with 3,588 additions and 3,443 deletions.
8 changes: 4 additions & 4 deletions cmd/super/ztests/from-file-error.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
script: |
! super -I query.zed
! super -I query.spq
inputs:
- name: query.zed
- name: query.spq
data: |
file a.zson
file a.jsup
outputs:
- name: stderr
regexp: |
a.zson: file does not exist
a.jsup: file:///.*/a.jsup: file does not exist.*
6 changes: 2 additions & 4 deletions cmd/super/ztests/from-pool-error.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,5 @@ script: |
outputs:
- name: stderr
data: |
super: "from pool" cannot be used without a lake at line 1, column 8:
from ( pool a )
~~~~~~
regexp: |
a: file:///.*/a: file does not exist
97 changes: 49 additions & 48 deletions compiler/ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,22 @@ type Name struct {
Loc `json:"loc"`
}

type Pattern interface {
type FromEntity interface {
Node
PatternAST()
fromEntityAST()
}

func (*Glob) PatternAST() {}
func (*Regexp) PatternAST() {}
func (*Name) PatternAST() {}
type ExprEntity struct {
Kind string `json:"kind" unpack:""`
Expr Expr `json:"expr"`
Loc `json:"loc"`
}

func (*Glob) fromEntityAST() {}
func (*Regexp) fromEntityAST() {}
func (*ExprEntity) fromEntityAST() {}
func (*LakeMeta) fromEntityAST() {}
func (*Name) fromEntityAST() {}

type RecordExpr struct {
Kind string `json:"kind" unpack:""`
Expand Down Expand Up @@ -492,11 +500,6 @@ type (
Kind string `json:"kind" unpack:""`
Loc `json:"loc"`
}
From struct {
Kind string `json:"kind" unpack:""`
Trunks []Seq `json:"trunks"`
Loc `json:"loc"`
}
Load struct {
Kind string `json:"kind" unpack:""`
Pool *Name `json:"pool"`
Expand Down Expand Up @@ -524,55 +527,56 @@ type (
}
)

// Source structure

type (
File struct {
Kind string `json:"kind" unpack:""`
Path Pattern `json:"path"`
Format *Name `json:"format"`
SortKeys []SortExpr `json:"sort_keys"`
Loc `json:"loc"`
}
HTTP struct {
Kind string `json:"kind" unpack:""`
URL Pattern `json:"url"`
Format *Name `json:"format"`
SortKeys []SortExpr `json:"sort_keys"`
Method *Name `json:"method"`
Headers *RecordExpr `json:"headers"`
Body *Name `json:"body"`
Loc `json:"loc"`
}
Pool struct {
Kind string `json:"kind" unpack:""`
Spec PoolSpec `json:"spec"`
Loc `json:"loc"`
From struct {
Kind string `json:"kind" unpack:""`
Entity FromEntity `json:"entity"`
Args FromArgs `json:"args"`
Loc `json:"loc"`
}
LakeMeta struct {
Kind string `json:"kind" unpack:""`
MetaPos int `json:"meta_pos"`
Meta *Name `json:"meta"`
Loc `json:"loc"`
}
Delete struct {
Kind string `json:"kind" unpack:""`
Loc `json:"loc"` // dummy field, not needed except to implement Node
}
)

type PoolSpec struct {
Pool Pattern `json:"pool"`
Commit *Name `json:"commit"`
Meta *Name `json:"meta"`
Tap bool `json:"tap"`
type PoolArgs struct {
Kind string `json:"kind" unpack:""`
Commit *Name `json:"commit"`
Meta *Name `json:"meta"`
Tap bool `json:"tap"`
Loc `json:"loc"`
}

type Source interface {
type FormatArg struct {
Kind string `json:"kind" unpack:""`
Format *Name `json:"format"`
Loc `json:"loc"`
}

type HTTPArgs struct {
Kind string `json:"kind" unpack:""`
Format *Name `json:"format"`
Method *Name `json:"method"`
Headers *RecordExpr `json:"headers"`
Body *Name `json:"body"`
Loc `json:"loc"`
}

type FromArgs interface {
Node
Source()
fromArgs()
}

func (*Pool) Source() {}
func (*File) Source() {}
func (*HTTP) Source() {}
func (*Pass) Source() {}
func (*Delete) Source() {}
func (*PoolArgs) fromArgs() {}
func (*FormatArg) fromArgs() {}
func (*HTTPArgs) fromArgs() {}

type SortExpr struct {
Kind string `json:"kind" unpack:""`
Expand Down Expand Up @@ -615,9 +619,6 @@ func (*Cut) OpAST() {}
func (*Drop) OpAST() {}
func (*Head) OpAST() {}
func (*Tail) OpAST() {}
func (*Pool) OpAST() {}
func (*File) OpAST() {}
func (*HTTP) OpAST() {}
func (*Pass) OpAST() {}
func (*Uniq) OpAST() {}
func (*Summarize) OpAST() {}
Expand Down
9 changes: 6 additions & 3 deletions compiler/ast/unpack.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ var unpacker = unpack.New(
Explode{},
Enum{},
Error{},
ExprEntity{},
FieldExpr{},
File{},
FormatArg{},
From{},
FString{},
FStringExpr{},
Expand All @@ -38,7 +39,7 @@ var unpacker = unpack.New(
Summarize{},
Grep{},
Head{},
HTTP{},
HTTPArgs{},
ID{},
ImpliedValue{},
IndexExpr{},
Expand All @@ -53,7 +54,7 @@ var unpacker = unpack.New(
OverExpr{},
Parallel{},
Pass{},
Pool{},
PoolArgs{},
Primitive{},
Put{},
Record{},
Expand Down Expand Up @@ -94,6 +95,8 @@ var unpacker = unpack.New(
Where{},
Yield{},
Sample{},
Delete{},
LakeMeta{},
)

// UnmarshalOp transforms a JSON representation of an operator into an Op.
Expand Down
22 changes: 10 additions & 12 deletions compiler/dag/op.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,20 +206,18 @@ type (
SortKeys order.SortKeys `json:"sort_keys"`
}
FileScan struct {
Kind string `json:"kind" unpack:""`
Path string `json:"path"`
Format string `json:"format"`
SortKeys order.SortKeys `json:"sort_keys"`
Filter Expr `json:"filter"`
Kind string `json:"kind" unpack:""`
Path string `json:"path"`
Format string `json:"format"`
Filter Expr `json:"filter"`
}
HTTPScan struct {
Kind string `json:"kind" unpack:""`
URL string `json:"url"`
Format string `json:"format"`
SortKeys order.SortKeys `json:"sort_keys"`
Method string `json:"method"`
Headers map[string][]string `json:"headers"`
Body string `json:"body"`
Kind string `json:"kind" unpack:""`
URL string `json:"url"`
Format string `json:"format"`
Method string `json:"method"`
Headers map[string][]string `json:"headers"`
Body string `json:"body"`
}
PoolScan struct {
Kind string `json:"kind" unpack:""`
Expand Down
4 changes: 2 additions & 2 deletions compiler/optimizer/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,9 +416,9 @@ func (o *Optimizer) sortKeysOfSource(op dag.Op) (order.SortKeys, error) {
case *dag.DefaultScan:
return op.SortKeys, nil
case *dag.FileScan:
return op.SortKeys, nil
return nil, nil
case *dag.HTTPScan:
return op.SortKeys, nil
return nil, nil
case *dag.PoolScan:
return o.sortKey(op.ID)
case *dag.Lister:
Expand Down
Loading

0 comments on commit eebbb56

Please sign in to comment.