Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

write data leads to painc #4080

Open
ruiz-code opened this issue Mar 3, 2022 · 4 comments
Open

write data leads to painc #4080

ruiz-code opened this issue Mar 3, 2022 · 4 comments

Comments

@ruiz-code
Copy link

ruiz-code commented Mar 3, 2022

deployment a cluster, node config:

m3_1.4.2_linux_amd64.tar.gz

node1

"coordinator":
  "listenAddress": "0.0.0.0:7201"
  "local":
    "namespaces":
    - "namespace": "default"
      "retention": "48h"
      "type": "unaggregated"
  "logging":
    "level": "info"
  "metrics":
    "extended": "none"
    "prometheus":
      "handlerPath": "/metrics"
      "listenAddress": "0.0.0.0:7203"
    "samplingRate": 1
    "sanitization": "prometheus"
    "scope":
      "prefix": "coordinator"
  "tagOptions":
    "idScheme": "quoted"
"db":
  "cache":
    "postingsList":
      "size": 262144
    "series":
      "policy": "lru"
  "client":
    "readConsistencyLevel": "one"
    "writeConsistencyLevel": "one"
  "clusterListenAddress": "0.0.0.0:9001"
  "commitlog":
    "flushEvery": "1s"
    "flushMaxBytes": 524288
    "queue":
      "calculationType": "fixed"
      "size": 2097152
  discovery:
    "config":
      "service":
        "cacheDir": "/var/lib/m3kv"
        "env": "default_env"
        "etcdClusters":
        - "endpoints":
          - "http://127.0.0.1:2379"
          "zone": "embedded"
        "service": "m3db"
        "zone": "embedded"
  "debugListenAddress": "0.0.0.0:9004"
  "filesystem":
    "filePathPrefix": "/var/lib/m3db"
  "gcPercentage": 100
  "httpClusterListenAddress": "0.0.0.0:9003"
  "httpNodeListenAddress": "0.0.0.0:9002"
  "listenAddress": "0.0.0.0:9000"
  "logging":
    "level": "info"
  "hostID":
    "resolver": "hostname"
  "metrics":
    "extended": "detailed"
    "prometheus":
      "handlerPath": "/metrics"
    "samplingRate": 1
    "sanitization": "prometheus"
  "writeNewSeriesAsync": true
  "writeNewSeriesBackoffDuration": "2ms"

node2

"coordinator":
  "listenAddress": "0.0.0.0:7201"
  "local":
    "namespaces":
    - "namespace": "default"
      "retention": "48h"
      "type": "unaggregated"
  "logging":
    "level": "info"
  "metrics":
    "extended": "none"
    "prometheus":
      "handlerPath": "/metrics"
      "listenAddress": "0.0.0.0:7203"
    "samplingRate": 1
    "sanitization": "prometheus"
    "scope":
      "prefix": "coordinator"
  "tagOptions":
    "idScheme": "quoted"
"db":
  "cache":
    "postingsList":
      "size": 262144
    "series":
      "policy": "lru"
  "client":
    "readConsistencyLevel": "one"
    "writeConsistencyLevel": "one"
  "clusterListenAddress": "0.0.0.0:9001"
  "commitlog":
    "flushEvery": "1s"
    "flushMaxBytes": 524288
    "queue":
      "calculationType": "fixed"
      "size": 2097152
  discovery:
    "config":
      "service":
        "cacheDir": "/var/lib/m3kv"
        "env": "default_env"
        "etcdClusters":
        - "endpoints":
          - "http://127.0.0.1:2379"
          "zone": "embedded"
        "service": "m3db"
        "zone": "embedded"
  "debugListenAddress": "0.0.0.0:9004"
  "filesystem":
    "filePathPrefix": "/var/lib/m3db"
  "gcPercentage": 100
  "httpClusterListenAddress": "0.0.0.0:9003"
  "httpNodeListenAddress": "0.0.0.0:9002"
  "listenAddress": "0.0.0.0:9000"
  "hostID":
    "resolver": "hostname"
  "logging":
    "level": "info"
  "writeNewSeriesAsync": true
  "writeNewSeriesBackoffDuration": "20ms"

node3

"coordinator":
  "listenAddress": "0.0.0.0:7201"
  "local":
    "namespaces":
    - "namespace": "default"
      "retention": "48h"
      "type": "unaggregated"
  "logging":
    "level": "info"
  "metrics":
    "extended": "none"
    "prometheus":
      "handlerPath": "/metrics"
      "listenAddress": "0.0.0.0:7203"
    "samplingRate": 1
    "sanitization": "prometheus"
    "scope":
      "prefix": "coordinator"
  "tagOptions":
    "idScheme": "quoted"
"db":
  "cache":
    "postingsList":
      "size": 262144
    "series":
      "policy": "lru"
  "client":
    "readConsistencyLevel": "one"
    "writeConsistencyLevel": "one"
  "clusterListenAddress": "0.0.0.0:9001"
  "commitlog":
    "flushEvery": "1s"
    "flushMaxBytes": 524288
    "queue":
      "calculationType": "fixed"
      "size": 2097152
  discovery:
    "config":
      "service":
        "cacheDir": "/var/lib/m3kv"
        "env": "default_env"
        "etcdClusters":
        - "endpoints":
          - "http://127.0.0.1:2379"
          "zone": "embedded"
        "service": "m3db"
        "zone": "embedded"
  "debugListenAddress": "0.0.0.0:9004"
  "filesystem":
    "filePathPrefix": "/var/lib/m3db"
  "gcPercentage": 100
  "httpClusterListenAddress": "0.0.0.0:9003"
  "httpNodeListenAddress": "0.0.0.0:9002"
  "listenAddress": "0.0.0.0:9000"
  "hostID":
    "resolver": "hostname"
  "logging":
    "level": "info"
  "writeNewSeriesAsync": true
  "writeNewSeriesBackoffDuration": "20ms"

start node cmd:

M3DB_HOST_ID=node1 ./m3dbnode -f config.yml
M3DB_HOST_ID=node2 ./m3dbnode -f config.yml
M3DB_HOST_ID=node3 ./m3dbnode -f config.yml

create database

curl -X POST http://localhost:7201/api/v1/database/create -d '{
  "type": "cluster",
  "namespaceName": "default",
  "retentionTime": "48h",
  "numShards": "6",
  "replicationFactor": "2",
  "hosts": [
        {
            "id": "node1",
            "isolationGroup": "test1",
            "zone": "embedded",
            "weight": 100,
            "address": "10.10.10.1",
            "port": 9000
        },
        {
            "id": "node1",
            "isolationGroup": "test2",
            "zone": "embedded",
            "weight": 100,
            "address": "10.10.10.2",
            "port": 9000
        },
        {
            "id": "node3",
            "isolationGroup": "test3",
            "zone": "embedded",
            "weight": 100,
            "address": "10.10.10.3",
            "port": 9000
        }
    ]
}'

initial namespace

curl -X POST http://localhost:7201/api/v1/services/m3db/namespace/ready -d '{
  "name": "default"
}'

when write to db

curl -X POST http://localhost:7201/api/v1/json/write -d '{
  "tags":
    {
      "__name__": "third_avenue",
      "city": "new_york",
      "checkout": "1"
    },
    "timestamp": '\"$(date "+%s")\"',
    "value": 5347.26
}'

get the painc below. Because there is too much data I only posted the start and end

SIGILL: illegal instruction
PC=0xdcdc60 m=5 sigcode=2
instruction bytes: 0xf 0x1 0xf9 0x48 0x81 0xe1 0xff 0x0 0x0 0x0 0x48 0x89 0x4c 0x24 0x8 0xc3

goroutine 6123 [running]:
github.com/m3db/m3/src/x/sync.getCore(0x36, 0xc0030dc160, 0x12df07d, 0x3, 0x0, 0x0, 0x1, 0x2389d60, 0xc000728900, 0x234e8c0, ...)
        /go/src/github.com/m3db/m3/src/x/sync/cpu_linux_amd64.s:9 fp=0xc0030dc0e8 sp=0xc0030dc0e0 pc=0xdcdc60
github.com/m3db/m3/src/x/sync.CPUCore(0x3)
        /go/src/github.com/m3db/m3/src/x/sync/index_cpu.go:49 +0x3f fp=0xc0030dc100 sp=0xc0030dc0e8 pc=0xdcb9df
github.com/m3db/m3/src/dbnode/storage.(*dbShardInsertQueue).Insert(0xc04e021080, 0xc05908f980, 0x0, 0x16d8bfdd4cbc4200, 0x40b4e3428f5c28f6, 0x2, 0x0, 0x0, 0x0, 0x0, ...)
        /go/src/github.com/m3db/m3/src/dbnode/storage/shard_insert_queue.go:293 +0x3d fp=0xc0030dc170 sp=0xc0030dc100 pc=0x12df07d
github.com/m3db/m3/src/dbnode/storage.(*dbShard).insertSeriesAsyncBatched(0xc000728900, 0x2396c50, 0xc002dd2c30, 0x0, 0xc058a16900, 0x39, 0x40, 0x0, 0x0, 0x0, ...)
        /go/src/github.com/m3db/m3/src/dbnode/storage/shard.go:1315 +0x178 fp=0xc0030dc3c0 sp=0xc0030dc170 pc=0x12d1e38
github.com/m3db/m3/src/dbnode/storage.(*dbShard).writeAndIndex(0xc000728900, 0x23a5b00, 0xc026269570, 0x2396c50, 0xc002dd2c30, 0x0, 0xc058a16900, 0x39, 0x40, 0x0, ...)
        /go/src/github.com/m3db/m3/src/dbnode/storage/shard.go:986 +0x258 fp=0xc0030dc950 sp=0xc0030dc3c0 pc=0x12cf818
github.com/m3db/m3/src/dbnode/storage.(*dbShard).WriteTagged(0xc000728900, 0x23a5b00, 0xc026269570, 0x2396c50, 0xc002dd2c30, 0x0, 0xc058a16900, 0x39, 0x40, 0x0, ...)
        /go/src/github.com/m3db/m3/src/dbnode/storage/shard.go:873 +0x19d fp=0xc0030dcd18 sp=0xc0030dc950 pc=0x12cf23d
github.com/m3db/m3/src/dbnode/storage.(*dbNamespace).WriteTagged(0xc0500e3500, 0x23a5b00, 0xc026269570, 0x2396c50, 0xc002dd2c30, 0x0, 0xc058a16900, 0x39, 0x40, 0x0, ...)
        /go/src/github.com/m3db/m3/src/dbnode/storage/namespace.go:768 +0x2e7 fp=0xc0030dd150 sp=0xc0030dcd18 pc=0x12ad8a7
github.com/m3db/m3/src/dbnode/storage.(*db).writeBatch(0xc000865dc0, 0x23a5b00, 0xc026269570, 0x2396c50, 0xc002dd2c00, 0x7f3f525efc38, 0xc00fa71140, 0x234e660, 0xc0139b62a0, 0xc058a16901, ...)
        /go/src/github.com/m3db/m3/src/dbnode/storage/database.go:932 +0x3cb fp=0xc0030dd7c8 sp=0xc0030dd150 pc=0x1279ccb
github.com/m3db/m3/src/dbnode/storage.(*db).WriteTaggedBatch(0xc000865dc0, 0x23a5b00, 0xc026269570, 0x2396c50, 0xc002dd2c00, 0x7f3f525efc38, 0xc00fa71140, 0x234e660, 0xc0139b62a0, 0x0, ...)
        /go/src/github.com/m3db/m3/src/dbnode/storage/database.go:890 +0x99 fp=0xc0030dd838 sp=0xc0030dd7c8 pc=0x12798b9
github.com/m3db/m3/src/dbnode/storage/cluster.(*clusterDB).WriteTaggedBatch(0xc04ff16d20, 0x23a5b00, 0xc026269570, 0x2396c50, 0xc002dd2c00, 0x7f3f525efc38, 0xc00fa71140, 0x234e660, 0xc0139b62a0, 0xc033d50002, ...)
        <autogenerated>:1 +0xa9 fp=0xc0030dd8a0 sp=0xc0030dd838 pc=0x1913709
github.com/m3db/m3/src/dbnode/network/server/tchannelthrift/node.(*service).WriteTaggedBatchRaw(0xc0075c4a00, 0x7f3f517a9998, 0xc058cdb930, 0xc058cfb9b0, 0x0, 0x0)
        /go/src/github.com/m3db/m3/src/dbnode/network/server/tchannelthrift/node/service.go:2031 +0x7fb fp=0xc0030dda18 sp=0xc0030dd8a0 pc=0x18bc83b
github.com/m3db/m3/src/dbnode/generated/thrift/rpc.(*tchanNodeServer).handleWriteTaggedBatchRaw(0xc013a2e7a0, 0x7f3f517a9998, 0xc058cdb930, 0x23b3900, 0xc03b910090, 0xc03d315ae0, 0x48b7f4, 0x30dd620, 0xc03b1d2400, 0x0)
        /go/src/github.com/m3db/m3/src/dbnode/generated/thrift/rpc/tchan-rpc.go:2015 +0xc7 fp=0xc0030dda70 sp=0xc0030dda18 pc=0xfe6ec7
github.com/m3db/m3/src/dbnode/generated/thrift/rpc.(*tchanNodeServer).Handle(0xc013a2e7a0, 0x7f3f517a9998, 0xc058cdb930, 0xc058c7c5a6, 0x13, 0x23b3900, 0xc03b910090, 0xc058cfb920, 0xc03d315b88, 0x418df3, ...)
        /go/src/github.com/m3db/m3/src/dbnode/generated/thrift/rpc/tchan-rpc.go:1162 +0x112a fp=0xc0030ddaf0 sp=0xc0030dda70 pc=0xfe1d6a
github.com/uber/tchannel-go/thrift.(*Server).handle(0xc013a20690, 0x238b960, 0xc058cdb8a0, 0x237a870, 0xc013a2e7a0, 

...

goroutine 6073 [runnable]:
internal/poll.runtime_pollWait(0x7f3f50502f78, 0x72, 0xffffffffffffffff)
        /usr/local/go/src/runtime/netpoll.go:222 +0x55
internal/poll.(*pollDesc).wait(0xc0586f5398, 0x72, 0x0, 0x10, 0xffffffffffffffff)
        /usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x45
internal/poll.(*pollDesc).waitRead(...)
        /usr/local/go/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Read(0xc0586f5380, 0xc0557e87f0, 0x10, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/internal/poll/fd_unix.go:166 +0x1d5
net.(*netFD).Read(0xc0586f5380, 0xc0557e87f0, 0x10, 0x10, 0xc05914b7f0, 0xc058a35140, 0xc0590ad4a0)
        /usr/local/go/src/net/fd_posix.go:55 +0x4f
net.(*conn).Read(0xc054f08a58, 0xc0557e87f0, 0x10, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/net/net.go:183 +0x91
io.ReadAtLeast(0x2352400, 0xc054f08a58, 0xc0557e87f0, 0x10, 0x10, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/io/io.go:328 +0x87
io.ReadFull(...)
        /usr/local/go/src/io/io.go:347
github.com/uber/tchannel-go.(*Connection).readFrames(0xc03d1d9340, 0xc00000009a)
        /go/src/github.com/m3db/m3/vendor/github.com/uber/tchannel-go/connection.go:660 +0xd9
created by github.com/uber/tchannel-go.(*Channel).newConnection
        /go/src/github.com/m3db/m3/vendor/github.com/uber/tchannel-go/connection.go:374 +0xf7c

goroutine 5226 [runnable]:
internal/poll.runtime_pollWait(0x7f3f4fd56dd0, 0x72, 0xffffffffffffffff)
        /usr/local/go/src/runtime/netpoll.go:222 +0x55
internal/poll.(*pollDesc).wait(0xc0586f5718, 0x72, 0x0, 0x10, 0xffffffffffffffff)
        /usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x45
internal/poll.(*pollDesc).waitRead(...)
        /usr/local/go/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Read(0xc0586f5700, 0xc055ab00f0, 0x10, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/internal/poll/fd_unix.go:166 +0x1d5
net.(*netFD).Read(0xc0586f5700, 0xc055ab00f0, 0x10, 0x10, 0xc05914b820, 0xc058a351a0, 0xc0590ad550)
        /usr/local/go/src/net/fd_posix.go:55 +0x4f
net.(*conn).Read(0xc054f08aa0, 0xc055ab00f0, 0x10, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/net/net.go:183 +0x91
io.ReadAtLeast(0x2352400, 0xc054f08aa0, 0xc055ab00f0, 0x10, 0x10, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/io/io.go:328 +0x87
io.ReadFull(...)
        /usr/local/go/src/io/io.go:347
github.com/uber/tchannel-go.(*Connection).readFrames(0xc03e08bb80, 0xc00000009d)
        /go/src/github.com/m3db/m3/vendor/github.com/uber/tchannel-go/connection.go:660 +0xd9
created by github.com/uber/tchannel-go.(*Channel).newConnection
        /go/src/github.com/m3db/m3/vendor/github.com/uber/tchannel-go/connection.go:374 +0xf7c

goroutine 6086 [select]:
github.com/uber/tchannel-go.(*Connection).writeFrames(0xc035af7b80, 0xc000000098)
        /go/src/github.com/m3db/m3/vendor/github.com/uber/tchannel-go/connection.go:737 +0xa5
created by github.com/uber/tchannel-go.(*Channel).newConnection
        /go/src/github.com/m3db/m3/vendor/github.com/uber/tchannel-go/connection.go:375 +0xfa9

goroutine 6100 [runnable]:
internal/poll.runtime_pollWait(0x7f3f4fd56778, 0x72, 0xffffffffffffffff)
        /usr/local/go/src/runtime/netpoll.go:222 +0x55
internal/poll.(*pollDesc).wait(0xc0586f5a98, 0x72, 0x0, 0x10, 0xffffffffffffffff)
        /usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x45
internal/poll.(*pollDesc).waitRead(...)
        /usr/local/go/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Read(0xc0586f5a80, 0xc055ab0110, 0x10, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/internal/poll/fd_unix.go:166 +0x1d5
net.(*netFD).Read(0xc0586f5a80, 0xc055ab0110, 0x10, 0x10, 0xc059380010, 0xc058eda5a0, 0xc03b9d8580)
        /usr/local/go/src/net/fd_posix.go:55 +0x4f
net.(*conn).Read(0xc054f08ad8, 0xc055ab0110, 0x10, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/net/net.go:183 +0x91
io.ReadAtLeast(0x2352400, 0xc054f08ad8, 0xc055ab0110, 0x10, 0x10, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/io/io.go:328 +0x87
io.ReadFull(...)
        /usr/loca
gs     0x0

already set kernel args by sysctl

sysctl -w vm.max_map_count=3000000
sysctl -w vm.swappiness=1
sysctl -w fs.file-max=3000000
sysctl -w fs.nr_open=3000000
@ruiz-code
Copy link
Author

ruiz-code commented Mar 3, 2022

Is here any one who could help me? thanks you so much

@ruiz-code
Copy link
Author

ruiz-code commented Mar 3, 2022

@robskillington help, please !!! Maybe the instruction set is wrong on my machine

// func getCore() int
TEXT ·getCore(SB), NOSPLIT, $0
	// RDTSCP
	BYTE $0x0f; BYTE $0x01; BYTE $0xf9

	// Linux puts core ID in the bottom byte.
	ANDQ $0xff, CX
	MOVQ CX, ret+0(FP)
	RET

@kendrickclark
Copy link

I too have this issue

@amritanshu-pandey
Copy link

I am also facing this issue! Will add more details tomorrow.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants