Skip to content

Commit

Permalink
Configurable timeout for dmesg
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-iyashchyshyn committed Dec 18, 2024
1 parent 2065a91 commit 3cddb94
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 168 deletions.
69 changes: 69 additions & 0 deletions services/sysinfo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# System Info
Service to retrieve system information from target system, such as kernel messages, uptime and journaling entries.

# Usage

### sanssh sysinfo uptime
Print the uptime of the system in below format: System_idx (ip:port) up for X days, X hours, X minutes, X seconds | total X seconds (X is a placeholder that will be replaced)

```bash
sanssh <sanssh-args> sysinfo uptime
```
Where:
- `<sanssh-args>` common sanssh arguments

Examples:
```bash
# Get data from a yml file
sanssh --targets=localhost sysinfo uptime
```

### sanssh sysinfo dmesg
Print the messages from kernel ring buffer.

```bash
sanssh <sanssh-args> sysinfo dmesg [--tail==<tail-n-lines>] [--grep=<grep-pattern>] [-i] [-v] [--timeout=<timeout-in-seconds>]
```

Where:
- `<sanssh-args>` common sanssh arguments
- `<tail-n-lines>` specify number of lines to `tail`
- `<grep-pattern>` grep regex pattern to filter out messages with
- `-i` - ignore grep case
- `-v` - inverted match grep
- `<timeout-in-seconds>` timeout collection of kernel messages in this number of seconds, default is 2 seconds

Examples:
```bash
### Default
sanssh --targets localhost sysinfo dmesg
### Get messages matching NVMe SSD pattern
sanssh --targets localhost sysinfo dmesg --grep "nvme1n1.*ssd.*"
### Get messages not related to BTRFS (ignoring case)
sanssh --targets localhost sysinfo dmesg -grep "btrfs" -i -v
### Collect messages for 10 seconds
sanssh --targets localhost sysinfo dmesg -grep "btrfs" -i -v --timeout=10
```

### sanssh sysinfo journalctl
Get the log entries stored in journald by systemd-journald.service

```bash
sanssh <sanssh-args> sysinfo djournalctl [--since|--S=<since>] [--until|-U=<until>] [-tail=<tail>] [-u|-unit=<unit>] [--json]
```

Where:
- `<sanssh-args>` common sanssh arguments
- `<since>` Sets the date (YYYY-MM-DD HH:MM:SS) we want to filter from
- `<until>` Sets the date (YYYY-MM-DD HH:MM:SS) we want to filter until
- `<tail>` - If positive, the latest n records to fetch. By default, fetch latest 100 records. The upper limit is 10000 for now
- `<unit>` - Sets systemd unit to filter messages
- `--json` - Print the journal entries in JSON format(can work with jq for better visualization)

Examples:
```bash
### Get 5 journalctl entries in json format between 18th and 19th of December 2024
sanssh --targets localhost sysinfo journalctl --json --tail 5 --since "2024-12-18 00:00:00" --until "2024-12-19 00:00:00"
### Read entries from default.target unit
sanssh --targets localhost sysinfo journalctl --unit default.target
```
5 changes: 4 additions & 1 deletion services/sysinfo/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,13 @@ type dmesgCmd struct {
grep string
ignoreCase bool
invertMatch bool
timeout int64
}

func (*dmesgCmd) Name() string { return "dmesg" }
func (*dmesgCmd) Synopsis() string { return "View the messages in kernel ring buffer" }
func (*dmesgCmd) Usage() string {
return `dmesg [--tail=N] [--grep=PATTERN] [-i] [-v]:
return `dmesg [--tail=N] [--grep=PATTERN] [-i] [-v] [--timeout=N]:
Print the messages from kernel ring buffer.
`
}
Expand All @@ -131,6 +132,7 @@ func (p *dmesgCmd) SetFlags(f *flag.FlagSet) {
f.Int64Var(&p.tail, "tail", -1, "tail the latest n lines")
f.BoolVar(&p.ignoreCase, "i", false, "ignore case")
f.BoolVar(&p.invertMatch, "v", false, "invert match")
f.Int64Var(&p.timeout, "timeout", 2, "timeout collection of messages after N seconds, default is 2 seconds")
}

func (p *dmesgCmd) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
Expand All @@ -142,6 +144,7 @@ func (p *dmesgCmd) Execute(ctx context.Context, f *flag.FlagSet, args ...interfa
Grep: p.grep,
IgnoreCase: p.ignoreCase,
InvertMatch: p.invertMatch,
Timeout: p.timeout,
}
stream, err := c.DmesgOneMany(ctx, req)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion services/sysinfo/server/sysinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func (s *server) Dmesg(req *pb.DmesgRequest, stream pb.SysInfo_DmesgServer) erro
return status.Error(codes.InvalidArgument, "must provide grep argument before setting ignore_case or invert_match")
}

records, err := getKernelMessages()
records, err := getKernelMessages(req.Timeout)
if err != nil {
recorder.CounterOrLog(ctx, sysinfoDmesgFailureCounter, 1, attribute.String("reason", "get kernel message error"))
return status.Errorf(codes.InvalidArgument, "can't get kernel message %v", err)
Expand Down
18 changes: 16 additions & 2 deletions services/sysinfo/server/sysinfo_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ var getUptime = func() (time.Duration, error) {
// we set 2 seconds timeout to explicitly close the channel
// If the package releases new feature to support non-blocking read, we can
// make corresponding changes below to get rid of hard code timeout setting
var getKernelMessages = func() ([]*pb.DmsgRecord, error) {
var getKernelMessages = func(providedTimeout int64) ([]*pb.DmsgRecord, error) {
timeout := getTimeout(providedTimeout)
parser, err := getKmsgParser()
if err != nil {
return nil, err
Expand All @@ -106,7 +107,7 @@ var getKernelMessages = func() ([]*pb.DmsgRecord, error) {
Message: msg.Message,
Time: timestamppb.New(msg.Timestamp),
})
case <-time.After(2 * time.Second):
case <-time.After(timeout):
parser.Close()
done = true
}
Expand Down Expand Up @@ -195,3 +196,16 @@ var getJournalRecordsAndSend = func(ctx context.Context, req *pb.JournalRequest,
}
return nil
}

func getTimeout(provided int64) time.Duration {
const maxTimeout = 30 * time.Second
const minTimeout = 2 * time.Second
timeout := time.Duration(provided) * time.Second
if timeout > maxTimeout {
timeout = maxTimeout
}
if timeout < minTimeout {
timeout = minTimeout
}
return timeout
}
4 changes: 2 additions & 2 deletions services/sysinfo/server/sysinfo_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,8 @@ func TestDmesg(t *testing.T) {
})
}

getKernelMessages = func() ([]*pb.DmsgRecord, error) {
_, err := savedGetKernelMessages()
getKernelMessages = func(int32) ([]*pb.DmsgRecord, error) {

Check failure on line 255 in services/sysinfo/server/sysinfo_linux_test.go

View workflow job for this annotation

GitHub Actions / golangci lint

cannot use func(int32) ([]*pb.DmsgRecord, error) {…} (value of type func(int32) ([]*"github.com/Snowflake-Labs/sansshell/services/sysinfo".DmsgRecord, error)) as func(providedTimeout int64) ([]*"github.com/Snowflake-Labs/sansshell/services/sysinfo".DmsgRecord, error) value in assignment (typecheck)

Check failure on line 255 in services/sysinfo/server/sysinfo_linux_test.go

View workflow job for this annotation

GitHub Actions / Integration tests

cannot use func(int32) ([]*pb.DmsgRecord, error) {…} (value of type func(int32) ([]*"github.com/Snowflake-Labs/sansshell/services/sysinfo".DmsgRecord, error)) as func(providedTimeout int64) ([]*"github.com/Snowflake-Labs/sansshell/services/sysinfo".DmsgRecord, error) value in assignment
_, err := savedGetKernelMessages(0)
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit 3cddb94

Please sign in to comment.