0%

kafka集群扩容PV案例

起因

阿里云账号欠费,导致三台后付费的master节点全部宕机,续费后发现有一个kafka集群没起来。

排查

找到出问题的pod日志发现如下内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
[2019-11-07 20:59:58,934] FATAL [Kafka Server 1], Fatal error during KafkaServer startup. Prepare to shutdown (kafka.server.KafkaServer)
java.lang.InternalError: a fault occurred in a recent unsafe memory access operation in compiled Java code
at kafka.log.TimeIndex$$anonfun$maybeAppend$1.apply$mcV$sp(TimeIndex.scala:119)
at kafka.log.TimeIndex$$anonfun$maybeAppend$1.apply(TimeIndex.scala:107)
at kafka.log.TimeIndex$$anonfun$maybeAppend$1.apply(TimeIndex.scala:107)
at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:213)
at kafka.log.TimeIndex.maybeAppend(TimeIndex.scala:107)
at kafka.log.LogSegment$$anonfun$recover$1.apply(LogSegment.scala:238)
at kafka.log.LogSegment$$anonfun$recover$1.apply(LogSegment.scala:224)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at kafka.log.LogSegment.recover(LogSegment.scala:224)
at kafka.log.Log.recoverLog(Log.scala:305)
at kafka.log.Log.loadSegments(Log.scala:279)
at kafka.log.Log.<init>(Log.scala:117)
at kafka.log.LogManager$$anonfun$loadLogs$2$$anonfun$3$$anonfun$apply$10$$anonfun$apply$1.apply$mcV$sp(LogManager.scala:157)
at kafka.utils.CoreUtils$$anon$1.run(CoreUtils.scala:57)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

提示磁盘空间满了,进入pod查看空间

1
2
3
4
5
kubectl exec -it kafka-1 -n zhonggu -- /bin/sh
...
/dev/vdd ext4 20G 20G 20M 100% /var/lib/kafka
...
# 可以看到空间确实满了

解决方法(扩容PV)

首先要在存储类的声明文件中加入这个配置:

1
allowVolumeExpansion: true

表示该存储类可以动态扩展存储卷的空间,而后,将PV增大,并删除原有PVC,修改PVC模板中的空间大小,重新部署应用即可。附:kafka.yaml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
app: kafka
name: kafka
namespace: zhonggu
spec:
podManagementPolicy: OrderedReady
replicas: 3
revisionHistoryLimit: 10
selector:
matchLabels:
app: kafka
serviceName: kafka-svc
template:
metadata:
labels:
app: kafka
spec:
affinity:
podAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- zk
topologyKey: kubernetes.io/hostname
weight: 1
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- kafka
topologyKey: kubernetes.io/hostname
containers:
- command:
- sh
- '-c'
- >-
exec kafka-server-start.sh /opt/kafka/config/server.properties
--override broker.id=${HOSTNAME##*-} --override
listeners=PLAINTEXT://:9093 --override
zookeeper.connect=zk-0.zk-svc.zhonggu.svc.cluster.local:2181,zk-1.zk-svc.zhonggu.svc.cluster.local:2181,zk-2.zk-svc.zhonggu.svc.cluster.local:2181
--override log.dirs=/var/lib/kafka/data --override
auto.create.topics.enable=true --override
auto.leader.rebalance.enable=true --override background.threads=10
--override compression.type=producer --override
delete.topic.enable=true --override
leader.imbalance.check.interval.seconds=300 --override
leader.imbalance.per.broker.percentage=10 --override
log.flush.interval.messages=9223372036854775807 --override
log.flush.offset.checkpoint.interval.ms=60000 --override
log.flush.scheduler.interval.ms=9223372036854775807 --override
log.retention.bytes=102400 --override log.retention.hours=10
--override log.roll.hours=168 --override log.roll.jitter.hours=0
--override log.segment.bytes=1073741824 --override
log.segment.delete.delay.ms=60000 --override
message.max.bytes=10485760 --override min.insync.replicas=1
--override num.io.threads=8 --override num.network.threads=3
--override num.recovery.threads.per.data.dir=1 --override
num.replica.fetchers=1 --override offset.metadata.max.bytes=4096
--override offsets.commit.required.acks=-1 --override
offsets.commit.timeout.ms=5000 --override
offsets.load.buffer.size=5242880 --override
offsets.retention.check.interval.ms=600000 --override
offsets.retention.minutes=1440 --override
offsets.topic.compression.codec=0 --override
offsets.topic.num.partitions=50 --override
offsets.topic.replication.factor=3 --override
offsets.topic.segment.bytes=104857600 --override
queued.max.requests=500 --override
quota.consumer.default=9223372036854775807 --override
quota.producer.default=9223372036854775807 --override
replica.fetch.min.bytes=1 --override replica.fetch.wait.max.ms=500
--override replica.high.watermark.checkpoint.interval.ms=5000
--override replica.lag.time.max.ms=10000 --override
replica.socket.receive.buffer.bytes=65536 --override
replica.socket.timeout.ms=30000 --override
request.timeout.ms=30000 --override
socket.receive.buffer.bytes=102400 --override
socket.request.max.bytes=104857600 --override
socket.send.buffer.bytes=102400 --override
unclean.leader.election.enable=true --override
zookeeper.session.timeout.ms=6000 --override
zookeeper.set.acl=false --override
broker.id.generation.enable=true --override
connections.max.idle.ms=600000 --override
controlled.shutdown.enable=true --override
controlled.shutdown.max.retries=3 --override
controlled.shutdown.retry.backoff.ms=5000 --override
controller.socket.timeout.ms=30000 --override
default.replication.factor=3 --override
fetch.purgatory.purge.interval.requests=1000 --override
group.max.session.timeout.ms=300000 --override
group.min.session.timeout.ms=6000 --override
inter.broker.protocol.version=0.10.2-IV0 --override
log.cleaner.backoff.ms=15000 --override
log.cleaner.dedupe.buffer.size=134217728 --override
log.cleaner.delete.retention.ms=86400000 --override
log.cleaner.enable=true --override
log.cleaner.io.buffer.load.factor=0.9 --override
log.cleaner.io.buffer.size=524288 --override
log.cleaner.io.max.bytes.per.second=1.7976931348623157E308
--override log.cleaner.min.cleanable.ratio=0.5 --override
log.cleaner.min.compaction.lag.ms=0 --override
log.cleaner.threads=1 --override log.cleanup.policy=delete
--override log.index.interval.bytes=4096 --override
log.index.size.max.bytes=10485760 --override
log.message.timestamp.difference.max.ms=9223372036854775807
--override log.message.timestamp.type=CreateTime --override
log.preallocate=false --override
log.retention.check.interval.ms=300000 --override
max.connections.per.ip=2147483647 --override num.partitions=3
--override producer.purgatory.purge.interval.requests=1000
--override replica.fetch.backoff.ms=1000 --override
replica.fetch.max.bytes=1048576 --override
replica.fetch.response.max.bytes=10485760 --override
reserved.broker.max.id=1000
env:
- name: TZ
value: Asia/Shanghai
- name: KAFKA_HEAP_OPTS
value: '-Xmx4g -Xms4g'
- name: KAFKA_OPTS
value: '-Dlogging.level=INFO'
image: 'registry.cn-hangzhou.aliyuncs.com/daianla/k8skafka:v1'
imagePullPolicy: Always
name: k8skafka
ports:
- containerPort: 9093
name: server
protocol: TCP
readinessProbe:
exec:
command:
- sh
- '-c'
- >-
/opt/kafka/bin/kafka-broker-api-versions.sh
--bootstrap-server=localhost:9093
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources:
requests:
cpu: 500m
memory: 2Gi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /var/lib/kafka
name: datadir
dnsPolicy: ClusterFirst
imagePullSecrets:
- name: kube-registry
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 300
updateStrategy:
type: OnDelete
volumeClaimTemplates:
- metadata:
name: datadir
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 30Gi
storageClassName: alicloud-disk-ssd