From 7d33227d706409d3c68a4f45ed09cbe361927c97 Mon Sep 17 00:00:00 2001 From: v-zhangjc9 Date: Sat, 12 Oct 2024 17:17:15 +0800 Subject: [PATCH] =?UTF-8?q?feat(monitor):=20=E5=A2=9E=E5=8A=A0=E5=85=B3?= =?UTF-8?q?=E4=BA=8Ehudi=E8=A1=A8=E6=96=87=E4=BB=B6=E6=95=B0=E7=9A=84?= =?UTF-8?q?=E7=9B=91=E6=8E=A7=E6=8C=87=E6=A0=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/httpRequests/http-requests-log.http | 325 +++++++++--------- .../command/pro/commands/HudiCommand.java | 45 +++ .../service/common/Constants.java | 6 + .../service/monitor/metric/HudiMetrics.java | 94 +++++ .../service/monitor/metric/PulsarMetrics.java | 2 +- test/test.http | 6 +- 6 files changed, 316 insertions(+), 162 deletions(-) create mode 100644 service-monitor/src/main/java/com/lanyuanxiaoyao/service/monitor/metric/HudiMetrics.java diff --git a/.idea/httpRequests/http-requests-log.http b/.idea/httpRequests/http-requests-log.http index bcab1dc..73704de 100644 --- a/.idea/httpRequests/http-requests-log.http +++ b/.idea/httpRequests/http-requests-log.http @@ -1,3 +1,168 @@ +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s20.hdp.dc:19521/hdfs/count?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +content-length: 0 + +<> 2024-10-12T154854.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s12.hdp.dc:25961/hdfs/count?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +Cookie: JSESSIONID=EE0952421D19909D0A80BB5A1216DE93 +content-length: 0 + +<> 2024-10-12T154825.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s12.hdp.dc:25961/hdfs/count?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +Cookie: JSESSIONID=EE0952421D19909D0A80BB5A1216DE93 +content-length: 0 + +<> 2024-10-12T154754.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s12.hdp.dc:25961/hdfs/count?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +Cookie: JSESSIONID=EE0952421D19909D0A80BB5A1216DE93 +content-length: 0 + +<> 2024-10-12T154616.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s12.hdp.dc:25961/hdfs/list?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +Cookie: JSESSIONID=EE0952421D19909D0A80BB5A1216DE93 +content-length: 0 + +<> 2024-10-12T154529.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s12.hdp.dc:25961/hdfs/list?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +Cookie: JSESSIONID=EE0952421D19909D0A80BB5A1216DE93 +content-length: 0 + +<> 2024-10-12T151839.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s12.hdp.dc:25961/hdfs/file_count?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +Cookie: JSESSIONID=EE0952421D19909D0A80BB5A1216DE93 +content-length: 0 + +<> 2024-10-12T151753.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s12.hdp.dc:25961/hdfs/file_count?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +Cookie: JSESSIONID=EE0952421D19909D0A80BB5A1216DE93 +content-length: 0 + +<> 2024-10-12T151727.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s12.hdp.dc:25961/hdfs/count?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +content-length: 0 + +<> 2024-10-12T151704.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s22.hdp.dc:13241/hdfs/count?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +Cookie: JSESSIONID=D4B48AD7708DF28D7AFA0A74B26CF45A +content-length: 0 + +<> 2024-10-12T151540.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s22.hdp.dc:13241/hdfs/count?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +Cookie: JSESSIONID=D4B48AD7708DF28D7AFA0A74B26CF45A +content-length: 0 + +<> 2024-10-12T151442.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s22.hdp.dc:13241/hdfs/count?root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +Cookie: JSESSIONID=D4B48AD7708DF28D7AFA0A74B26CF45A +content-length: 0 + +<> 2024-10-12T151417.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s22.hdp.dc:13241/hdfs/count?hdfs=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +Cookie: JSESSIONID=D4B48AD7708DF28D7AFA0A74B26CF45A +content-length: 0 + +<> 2024-10-12T151409.500.txt + +### + +GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s22.hdp.dc:13241/hdfs?hdfs=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data +Authorization: Basic AxhEbscwsJDbYMH2 cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4 +User-Agent: IntelliJ HTTP Client/IntelliJ IDEA 2024.1.4 +Accept-Encoding: br, deflate, gzip, x-gzip +Accept: */* +content-length: 0 + +<> 2024-10-12T151340.404.json + +### + GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:31719/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID Connection: Keep-Alive User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) @@ -358,163 +523,3 @@ Accept-Encoding: br,deflate,gzip,x-gzip ### -GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:33535/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_dg/acct_item_760&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=08E78CE8926806AAB5D110D0FE9B05F7 -Accept-Encoding: br,deflate,gzip,x-gzip - -<> 2024-05-28T164901.200.txt - -### - -GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:33535/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_dg/acct_item_760&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=08E78CE8926806AAB5D110D0FE9B05F7 -Accept-Encoding: br,deflate,gzip,x-gzip - -<> 2024-05-28T164758.200.txt - -### - -GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:33535/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_dg/acct_item_760&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=08E78CE8926806AAB5D110D0FE9B05F7 -Accept-Encoding: br,deflate,gzip,x-gzip - -<> 2024-05-28T164303.200.txt - -### - -GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:33535/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_dg/acct_item_760&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=08E78CE8926806AAB5D110D0FE9B05F7 -Accept-Encoding: br,deflate,gzip,x-gzip - -<> 2024-05-28T164220.200.txt - -### - -GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:33535/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_dg/acct_item_760&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=08E78CE8926806AAB5D110D0FE9B05F7 -Accept-Encoding: br,deflate,gzip,x-gzip - -<> 2024-05-28T164107.200.txt - -### - -GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s10.hdp.dc:33535/task/law_enforcement?pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650&pulsar_topic=persistent://odcp/acct_dg/acct_item_760&start_time=1716858000000&end_time=1716861600000&primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=C5D2666661F27F68E53223FE5B74AF35 -Accept-Encoding: br,deflate,gzip,x-gzip - -<> 2024-05-28T163410.200.txt - -### - -GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/queue/queue/clear?name=compaction-queue-pre -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=8516C92140B5118AF9AA61025D0F8C93 -Accept-Encoding: br,deflate,gzip,x-gzip - -### - -GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_scheduler/schedule/all -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=7A4C34E0240A98C1186F3A2551BC5E80 -Accept-Encoding: br,deflate,gzip,x-gzip - -### - -GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_web/cloud/list -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=F5F155198FAF72435339CC2E21B873CC -Accept-Encoding: br,deflate,gzip,x-gzip - -<> 2024-05-09T170723.200.json - -### - -GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/hudi_api/api/message_id?flink_job_id=1542097984132706304&alias=crm_cfguse_mkt_cam_strategy_rel -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=F5F155198FAF72435339CC2E21B873CC -Accept-Encoding: br,deflate,gzip,x-gzip - -### - -POST http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s8.hdp.dc:15391/hdfs/write?root=hdfs://b2/apps/datalake/test/test.txt&overwrite=true -Content-Type: text/plain -Content-Length: 738 -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=E6FF5447C8553BA4268979B8C5779363 -Accept-Encoding: br,deflate,gzip,x-gzip - -\#Properties saved on 2023-12-26T09:18:39.583Z -\#Tue Dec 26 17:18:39 CST 2023 -hoodie.table.precombine.field=update_ts -hoodie.datasource.write.drop.partition.columns=false -hoodie.table.partition.fields=CITY_ID -hoodie.table.type=MERGE_ON_READ -hoodie.archivelog.folder=archived -hoodie.compaction.payload.class=org.apache.hudi.common.model.OverwriteWithLatestAvroPayload -hoodie.timeline.layout.version=1 -hoodie.table.version=4 -hoodie.table.recordkey.fields=_key -hoodie.datasource.write.partitionpath.urlencode=false -hoodie.table.name=dws_account -hoodie.table.keygenerator.class=org.apache.hudi.keygen.SimpleKeyGenerator -hoodie.table.timeline.timezone=LOCAL -hoodie.datasource.write.hive_style_partitioning=false -hoodie.table.checksum=989688289 - -### - -POST http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s8.hdp.dc:15391/hdfs/write?root=hdfs://b2/apps/datalake/test/test.txt&overwrite=true -Content-Length: 11 -Content-Type: */*; charset=UTF-8 -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=D12E206603C453F1429C0B7DF1519A4B -Accept-Encoding: br,deflate,gzip,x-gzip - -Hello world - -### - -POST http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s8.hdp.dc:34469/hdfs/write?root=hdfs://b2/apps/datalake/test/test.txt&overwrite=true -Content-Length: 11 -Content-Type: */*; charset=UTF-8 -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=D12E206603C453F1429C0B7DF1519A4B -Accept-Encoding: br,deflate,gzip,x-gzip - -Hello world - -### - -POST http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s8.hdp.dc:34469/hdfs/write?root=hdfs://b2/apps/datalake/test/test.txt -Content-Length: 11 -Content-Type: */*; charset=UTF-8 -Connection: Keep-Alive -User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.10) -Cookie: JSESSIONID=D12E206603C453F1429C0B7DF1519A4B -Accept-Encoding: br,deflate,gzip,x-gzip - -Hello world - -<> 2024-05-08T095641.500.txt - -### - diff --git a/service-command-pro/src/main/java/com/lanyuanxiaoyao/service/command/pro/commands/HudiCommand.java b/service-command-pro/src/main/java/com/lanyuanxiaoyao/service/command/pro/commands/HudiCommand.java index ff0eb72..e3765f1 100644 --- a/service-command-pro/src/main/java/com/lanyuanxiaoyao/service/command/pro/commands/HudiCommand.java +++ b/service-command-pro/src/main/java/com/lanyuanxiaoyao/service/command/pro/commands/HudiCommand.java @@ -205,6 +205,41 @@ public class HudiCommand { fileSystem.close(); } + @ShellMethod("Max meta files") + public void maxMetaFiles() throws IOException { + MutableList

list = Lists.mutable.

empty().asSynchronized(); + FileSystem fileSystem = FileSystem.get(new Configuration()); + infoService + .tableMetaList() + .collect(TableMeta::getHudi) + .collect(TableMeta.HudiMeta::getTargetHdfsPath) + .asParallel(ExecutorProvider.EXECUTORS_20, 1) + .forEach(hdfs -> { + Path root = new Path(hdfs, ".hoodie"); + try { + FileStatus[] statuses = fileSystem.listStatus(root); + long num = 0; + for (FileStatus status : statuses) { + if (status.isFile()) { + num++; + } + if (StrUtil.containsIgnoreCase(status.getPath().toString(), "INVALID")) { + logger.info("{}", status.getPath().toString()); + } + } + list.add(new P(num, hdfs)); + logger.info("Count: {} Hdfs: {}", num, hdfs); + } catch (IOException e) { + logger.warn("List file error", e); + } + }); + MutableList

listP = list.select(p -> p.count > 1000); + for (P maxP : listP) { + logger.info("Max: {} Hdfs: {}", maxP.count, maxP.hdfs); + } + fileSystem.close(); + } + @ShellMethod("Get timeline instants") public void timelineInstant(@ShellOption(help = "root hdfs path") String hdfs) { hudiService.timelineHdfsAllActive(hdfs).forEach(instant -> logger.info(instant.toString())); @@ -229,4 +264,14 @@ public class HudiCommand { public interface Runnable { void run(LongAdder counter); } + + private static final class P { + String hdfs; + long count; + + public P(long count, String hdfs) { + this.count = count; + this.hdfs = hdfs; + } + } } diff --git a/service-common/src/main/java/com/lanyuanxiaoyao/service/common/Constants.java b/service-common/src/main/java/com/lanyuanxiaoyao/service/common/Constants.java index eff8179..14f0488 100644 --- a/service-common/src/main/java/com/lanyuanxiaoyao/service/common/Constants.java +++ b/service-common/src/main/java/com/lanyuanxiaoyao/service/common/Constants.java @@ -128,6 +128,12 @@ public interface Constants { String METRICS_PULSAR_PREFIX = METRICS_PREFIX + "_pulsar"; String METRICS_PULSAR_BACKLOG = METRICS_PULSAR_PREFIX + "_backlog"; + String METRICS_HUDI_TABLE = METRICS_PREFIX + "_hudi_table"; + String METRICS_HUDI_TABLE_FILE_COUNT = METRICS_HUDI_TABLE + "_file_count"; + String METRICS_HUDI_TABLE_FILE_COUNT_AVERAGE_PER_TABLE = METRICS_HUDI_TABLE_FILE_COUNT + "_average_per_table"; + String METRICS_HUDI_TABLE_TIMELINE_FILE_COUNT = METRICS_HUDI_TABLE + "_timeline_file_count"; + String METRICS_HUDI_TABLE_TIMELINE_FILE_COUNT_AVERAGE_PER_TABLE = METRICS_HUDI_TABLE_TIMELINE_FILE_COUNT + "_average_per_table"; + String METRICS_LABEL_FLINK_JOB_ID = "flink_job_id"; String METRICS_LABEL_FLINK_JOB_NAME = "flink_job_name"; String METRICS_LABEL_FLINK_NATIVE_JOB_ID = "flink_native_job_id"; diff --git a/service-monitor/src/main/java/com/lanyuanxiaoyao/service/monitor/metric/HudiMetrics.java b/service-monitor/src/main/java/com/lanyuanxiaoyao/service/monitor/metric/HudiMetrics.java new file mode 100644 index 0000000..a9e3d8e --- /dev/null +++ b/service-monitor/src/main/java/com/lanyuanxiaoyao/service/monitor/metric/HudiMetrics.java @@ -0,0 +1,94 @@ +package com.lanyuanxiaoyao.service.monitor.metric; + +import cn.hutool.core.util.StrUtil; +import com.lanyuanxiaoyao.service.common.Constants; +import com.lanyuanxiaoyao.service.configuration.ExecutorProvider; +import com.lanyuanxiaoyao.service.forest.service.HudiService; +import com.lanyuanxiaoyao.service.forest.service.InfoService; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Tag; +import java.util.concurrent.atomic.AtomicLong; +import org.eclipse.collections.api.factory.Lists; +import org.eclipse.collections.api.factory.Maps; +import org.eclipse.collections.api.map.MutableMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Service; + +/** + * Hudi表相关指标 + * + * @author lanyuanxiaoyao + * @date 2024-03-05 + */ +@SuppressWarnings("SpringJavaInjectionPointsAutowiringInspection") +@Service +public class HudiMetrics extends Metrics { + private static final Logger logger = LoggerFactory.getLogger(HudiMetrics.class); + + private final MeterRegistry registry; + private final InfoService infoService; + private final HudiService hudiService; + + private final MutableMap fileCountCacheMap; + private final MutableMap timelineFileCountCacheMap; + + public HudiMetrics(MeterRegistry registry, InfoService infoService, HudiService hudiService) { + this.registry = registry; + this.infoService = infoService; + this.hudiService = hudiService; + + fileCountCacheMap = Maps.mutable.empty(); + timelineFileCountCacheMap = Maps.mutable.empty(); + } + + @Scheduled(cron = "0 30 * * * ?") + @Override + public void update() { + infoService.tableMetaList() + .asParallel(ExecutorProvider.EXECUTORS_10, 1) + .reject(meta -> StrUtil.isBlank(meta.getPulsarAddress())) + .forEach(meta -> { + try { + AtomicLong filecountCache = fileCountCacheMap.getIfAbsentPut( + meta.getAlias(), + registry.gauge( + Constants.METRICS_HUDI_TABLE_FILE_COUNT, + Lists.immutable.of( + Tag.of(Constants.METRICS_LABEL_FLINK_JOB_ID, meta.getJob().getId().toString()), + Tag.of(Constants.METRICS_LABEL_ALIAS, meta.getAlias()), + Tag.of(Constants.METRICS_LABEL_SCHEMA, meta.getSchema()), + Tag.of(Constants.METRICS_LABEL_TABLE, meta.getTable()) + ), + new AtomicLong(0) + ) + ); + AtomicLong timelineFileCountCache = timelineFileCountCacheMap.getIfAbsentPut( + meta.getAlias(), + registry.gauge( + Constants.METRICS_HUDI_TABLE_TIMELINE_FILE_COUNT, + Lists.immutable.of( + Tag.of(Constants.METRICS_LABEL_FLINK_JOB_ID, meta.getJob().getId().toString()), + Tag.of(Constants.METRICS_LABEL_ALIAS, meta.getAlias()), + Tag.of(Constants.METRICS_LABEL_SCHEMA, meta.getSchema()), + Tag.of(Constants.METRICS_LABEL_TABLE, meta.getTable()) + ), + new AtomicLong(0) + ) + ); + + String hdfs = meta.getHudi().getTargetHdfsPath(); + if (hudiService.existsHudiTable(hdfs)) { + Long count = hudiService.fileCount(hdfs); + filecountCache.set(count); + + String timelineHdfs = hdfs + "/.hoodie"; + timelineFileCountCache.set(hudiService.fileCount(timelineHdfs)); + } + } catch (Exception exception) { + logger.warn("Get file count fail for {}", meta.getAlias(), exception); + } + }); + } +} diff --git a/service-monitor/src/main/java/com/lanyuanxiaoyao/service/monitor/metric/PulsarMetrics.java b/service-monitor/src/main/java/com/lanyuanxiaoyao/service/monitor/metric/PulsarMetrics.java index 47329bf..b4fb64c 100644 --- a/service-monitor/src/main/java/com/lanyuanxiaoyao/service/monitor/metric/PulsarMetrics.java +++ b/service-monitor/src/main/java/com/lanyuanxiaoyao/service/monitor/metric/PulsarMetrics.java @@ -75,7 +75,7 @@ public class PulsarMetrics extends Metrics { infoService.savePulsarBacklog(meta.getJob().getId(), meta.getAlias(), backlog); } } catch (Exception exception) { - logger.warn("Update pulsar backlog fail for " + meta.getAlias(), exception); + logger.warn("Update pulsar backlog fail for {}", meta.getAlias(), exception); } }); } diff --git a/test/test.http b/test/test.http index 486b02c..e0fb75f 100644 --- a/test/test.http +++ b/test/test.http @@ -133,4 +133,8 @@ hoodie.table.checksum=989688289 GET http://{{username}}:{{password}}@b12s10.hdp.dc:31719/task/law_enforcement? pulsar_url=pulsar://132.122.115.158:16650,132.122.115.159:16650,132.122.115.160:16650,132.122.115.161:16650,132.122.115.167:16650,132.122.115.168:16650& pulsar_topic=persistent://odcp/acct_sz/acct_item_755&start_time=1716858000000&end_time=1716861600000& - primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID \ No newline at end of file + primary_keys=ACCT_ITEM_ID&partition_keys=ACCT_ID + +### Count files +GET http://{{username}}:{{password}}@b12s20.hdp.dc:19521/hdfs/count? + root=hdfs://b2/apps/datalake/hive/dws_wsyyt/external_table_hudi/dws_tb_jt_servuser_data \ No newline at end of file