Ceph Health Problem
Kurti2k
16 Posts
November 7, 2018, 10:00 amQuote from Kurti2k on November 7, 2018, 10:00 am
Hi,
Ceph Health
[root@localhost ~]# ceph -s
cluster:
id: 86c6888d-4b3e-4c72-a605-770ae4a3495a
health: HEALTH_WARN
Reduced data availability: 72 pgs inactive, 64 pgs incompleteservices:
mon: 3 daemons, quorum peta-1,peta-2,peta-3
mgr: peta-1(active), standbys: peta-2
osd: 54 osds: 54 up, 54 indata:
pools: 1 pools, 4096 pgs
objects: 1896k objects, 7357 GB
usage: 9867 GB used, 36181 GB / 46048 GB avail
pgs: 0.195% pgs unknown
1.562% pgs not active
4024 active+clean
64 incomplete
8 unknown
[root@localhost ~]#
how can i fix this ?
with best regards
Marcel
Hi,
Ceph Health
[root@localhost ~]# ceph -s
cluster:
id: 86c6888d-4b3e-4c72-a605-770ae4a3495a
health: HEALTH_WARN
Reduced data availability: 72 pgs inactive, 64 pgs incompleteservices:
mon: 3 daemons, quorum peta-1,peta-2,peta-3
mgr: peta-1(active), standbys: peta-2
osd: 54 osds: 54 up, 54 indata:
pools: 1 pools, 4096 pgs
objects: 1896k objects, 7357 GB
usage: 9867 GB used, 36181 GB / 46048 GB avail
pgs: 0.195% pgs unknown
1.562% pgs not active
4024 active+clean
64 incomplete
8 unknown
[root@localhost ~]#
how can i fix this ?
with best regards
Marcel
Last edited on November 7, 2018, 10:02 am by Kurti2k · #1
admin
2,930 Posts
November 7, 2018, 11:08 amQuote from admin on November 7, 2018, 11:08 amany thing that led to this ? were any OSDs deleted ?
is it stuck at this, or is it slowly recovering ?
if it is stuck, query a stuck pg and see what it reports in recovery status.
any thing that led to this ? were any OSDs deleted ?
is it stuck at this, or is it slowly recovering ?
if it is stuck, query a stuck pg and see what it reports in recovery status.
Last edited on November 7, 2018, 11:09 am by admin · #2
Kurti2k
16 Posts
November 7, 2018, 12:47 pmQuote from Kurti2k on November 7, 2018, 12:47 pmno OSD bad or removed
root@peta-2:/etc/ceph# ceph health detail
HEALTH_WARN Reduced data availability: 72 pgs inactive, 64 pgs incomplete
PG_AVAILABILITY Reduced data availability: 72 pgs inactive, 64 pgs incomplete
pg 1.16 is incomplete, acting [48,44]
pg 1.6d is incomplete, acting [2,50]
pg 1.7a is incomplete, acting [34,17]
pg 1.7f is incomplete, acting [29,14]
pg 1.155 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.1ce is incomplete, acting [53,3]
pg 1.1eb is incomplete, acting [9,19]
pg 1.1fb is incomplete, acting [30,46]
pg 1.22b is incomplete, acting [48,26]
pg 1.2c1 is incomplete, acting [42,15]
pg 1.302 is incomplete, acting [29,18]
pg 1.319 is incomplete, acting [21,33]
pg 1.348 is incomplete, acting [29,5]
pg 1.356 is incomplete, acting [4,39]
pg 1.363 is incomplete, acting [42,17]
pg 1.37d is incomplete, acting [48,22]
pg 1.396 is incomplete, acting [1,41]
pg 1.3b8 is incomplete, acting [42,22]
pg 1.3fe is incomplete, acting [14,25]
pg 1.409 is incomplete, acting [29,17]
pg 1.449 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.46f is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.4d8 is incomplete, acting [45,4]
pg 1.4f7 is incomplete, acting [39,23]
pg 1.520 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.558 is incomplete, acting [50,23]
pg 1.651 is incomplete, acting [51,11]
pg 1.692 is incomplete, acting [22,29]
pg 1.6a6 is incomplete, acting [7,18]
pg 1.6cf is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.6d8 is incomplete, acting [2,47]
pg 1.6e8 is incomplete, acting [9,33]
pg 1.6f1 is incomplete, acting [0,38]
pg 1.76c is incomplete, acting [7,52]
pg 1.7b7 is incomplete, acting [40,21]
pg 1.7b9 is incomplete, acting [14,25]
pg 1.7cf is incomplete, acting [0,50]
pg 1.7d8 is incomplete, acting [29,17]
pg 1.7fa is incomplete, acting [33,41]
pg 1.844 is incomplete, acting [38,2]
pg 1.8b3 is incomplete, acting [47,2]
pg 1.909 is incomplete, acting [7,37]
pg 1.94f is incomplete, acting [29,16]
pg 1.964 is incomplete, acting [37,4]
pg 1.974 is stuck inactive since forever, current state incomplete, last acting [33,53]
pg 1.eeb is incomplete, acting [21,44]
pg 1.f2c is incomplete, acting [4,47]
pg 1.f95 is incomplete, acting [2,29]
pg 1.fb8 is incomplete, acting [2,25]
pg 1.fe7 is incomplete, acting [0,52]
pg 1.ff8 is incomplete, acting [7,50]
ceph pg 1.ff8 query
"created": 0,
"last_epoch_clean": 0,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0
},
"up": [],
"acting": [],
"blocked_by": [],
"up_primary": -1,
"acting_primary": -1
},
"empty": 1,
"dne": 1,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
{
"peer": "50",
"pgid": "1.ff8",
"last_update": "0'0",
"last_complete": "0'0",
"log_tail": "0'0",
"last_user_version": 0,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 0,
"epoch_pool_created": 0,
"last_epoch_started": 0,
"last_interval_started": 0,
"last_epoch_clean": 0,
"last_interval_clean": 0,
"last_epoch_split": 0,
"last_epoch_marked_full": 0,
"same_up_since": 0,
"same_interval_since": 0,
"same_primary_since": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000"
},
"stats": {
"version": "0'0",
"reported_seq": "0",
"reported_epoch": "0",
"state": "unknown",
"last_fresh": "0.000000",
"last_change": "0.000000",
"last_active": "0.000000",
"last_peered": "0.000000",
"last_clean": "0.000000",
"last_became_active": "0.000000",
"last_became_peered": "0.000000",
"last_unstale": "0.000000",
"last_undegraded": "0.000000",
"last_fullsized": "0.000000",
"mapping_epoch": 0,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 0,
"last_epoch_clean": 0,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0
},
"up": [],
"acting": [],
"blocked_by": [],
"up_primary": -1,
"acting_primary": -1
},
"empty": 1,
"dne": 1,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
}
],
"recovery_state": [
{
"name": "Started/Primary/Peering/Incomplete",
"enter_time": "2018-11-07 09:22:42.983356",
"comment": "not enough complete instances of this PG"
},
{
"name": "Started/Primary/Peering",
"enter_time": "2018-11-07 09:22:42.947534",
"past_intervals": [
{
"first": "2098",
"last": "6839",
"all_participants": [
{
"osd": 7
},
{
"osd": 20
},
{
"osd": 48
},
{
"osd": 50
}
],
"intervals": [
{
"first": "4262",
"last": "4271",
"acting": "48"
},
{
"first": "4272",
"last": "4441",
"acting": "20"
},
{
"first": "6837",
"last": "6839",
"acting": "7"
}
]
}
],
"probing_osds": [
"7",
"20",
"48",
"50"
],
"down_osds_we_would_probe": [],
"peering_blocked_by": [],
"peering_blocked_by_detail": [
{
"detail": "peering_blocked_by_history_les_bound"
}
]
},
{
"name": "Started",
"enter_time": "2018-11-07 09:22:42.947468"
}
],
"agent_state": {}
}
best regards
marcel
no OSD bad or removed
root@peta-2:/etc/ceph# ceph health detail
HEALTH_WARN Reduced data availability: 72 pgs inactive, 64 pgs incomplete
PG_AVAILABILITY Reduced data availability: 72 pgs inactive, 64 pgs incomplete
pg 1.16 is incomplete, acting [48,44]
pg 1.6d is incomplete, acting [2,50]
pg 1.7a is incomplete, acting [34,17]
pg 1.7f is incomplete, acting [29,14]
pg 1.155 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.1ce is incomplete, acting [53,3]
pg 1.1eb is incomplete, acting [9,19]
pg 1.1fb is incomplete, acting [30,46]
pg 1.22b is incomplete, acting [48,26]
pg 1.2c1 is incomplete, acting [42,15]
pg 1.302 is incomplete, acting [29,18]
pg 1.319 is incomplete, acting [21,33]
pg 1.348 is incomplete, acting [29,5]
pg 1.356 is incomplete, acting [4,39]
pg 1.363 is incomplete, acting [42,17]
pg 1.37d is incomplete, acting [48,22]
pg 1.396 is incomplete, acting [1,41]
pg 1.3b8 is incomplete, acting [42,22]
pg 1.3fe is incomplete, acting [14,25]
pg 1.409 is incomplete, acting [29,17]
pg 1.449 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.46f is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.4d8 is incomplete, acting [45,4]
pg 1.4f7 is incomplete, acting [39,23]
pg 1.520 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.558 is incomplete, acting [50,23]
pg 1.651 is incomplete, acting [51,11]
pg 1.692 is incomplete, acting [22,29]
pg 1.6a6 is incomplete, acting [7,18]
pg 1.6cf is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.6d8 is incomplete, acting [2,47]
pg 1.6e8 is incomplete, acting [9,33]
pg 1.6f1 is incomplete, acting [0,38]
pg 1.76c is incomplete, acting [7,52]
pg 1.7b7 is incomplete, acting [40,21]
pg 1.7b9 is incomplete, acting [14,25]
pg 1.7cf is incomplete, acting [0,50]
pg 1.7d8 is incomplete, acting [29,17]
pg 1.7fa is incomplete, acting [33,41]
pg 1.844 is incomplete, acting [38,2]
pg 1.8b3 is incomplete, acting [47,2]
pg 1.909 is incomplete, acting [7,37]
pg 1.94f is incomplete, acting [29,16]
pg 1.964 is incomplete, acting [37,4]
pg 1.974 is stuck inactive since forever, current state incomplete, last acting [33,53]
pg 1.eeb is incomplete, acting [21,44]
pg 1.f2c is incomplete, acting [4,47]
pg 1.f95 is incomplete, acting [2,29]
pg 1.fb8 is incomplete, acting [2,25]
pg 1.fe7 is incomplete, acting [0,52]
pg 1.ff8 is incomplete, acting [7,50]
ceph pg 1.ff8 query
"created": 0,
"last_epoch_clean": 0,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0
},
"up": [],
"acting": [],
"blocked_by": [],
"up_primary": -1,
"acting_primary": -1
},
"empty": 1,
"dne": 1,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
{
"peer": "50",
"pgid": "1.ff8",
"last_update": "0'0",
"last_complete": "0'0",
"log_tail": "0'0",
"last_user_version": 0,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 0,
"epoch_pool_created": 0,
"last_epoch_started": 0,
"last_interval_started": 0,
"last_epoch_clean": 0,
"last_interval_clean": 0,
"last_epoch_split": 0,
"last_epoch_marked_full": 0,
"same_up_since": 0,
"same_interval_since": 0,
"same_primary_since": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000"
},
"stats": {
"version": "0'0",
"reported_seq": "0",
"reported_epoch": "0",
"state": "unknown",
"last_fresh": "0.000000",
"last_change": "0.000000",
"last_active": "0.000000",
"last_peered": "0.000000",
"last_clean": "0.000000",
"last_became_active": "0.000000",
"last_became_peered": "0.000000",
"last_unstale": "0.000000",
"last_undegraded": "0.000000",
"last_fullsized": "0.000000",
"mapping_epoch": 0,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 0,
"last_epoch_clean": 0,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0
},
"up": [],
"acting": [],
"blocked_by": [],
"up_primary": -1,
"acting_primary": -1
},
"empty": 1,
"dne": 1,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
}
],
"recovery_state": [
{
"name": "Started/Primary/Peering/Incomplete",
"enter_time": "2018-11-07 09:22:42.983356",
"comment": "not enough complete instances of this PG"
},
{
"name": "Started/Primary/Peering",
"enter_time": "2018-11-07 09:22:42.947534",
"past_intervals": [
{
"first": "2098",
"last": "6839",
"all_participants": [
{
"osd": 7
},
{
"osd": 20
},
{
"osd": 48
},
{
"osd": 50
}
],
"intervals": [
{
"first": "4262",
"last": "4271",
"acting": "48"
},
{
"first": "4272",
"last": "4441",
"acting": "20"
},
{
"first": "6837",
"last": "6839",
"acting": "7"
}
]
}
],
"probing_osds": [
"7",
"20",
"48",
"50"
],
"down_osds_we_would_probe": [],
"peering_blocked_by": [],
"peering_blocked_by_detail": [
{
"detail": "peering_blocked_by_history_les_bound"
}
]
},
{
"name": "Started",
"enter_time": "2018-11-07 09:22:42.947468"
}
],
"agent_state": {}
}
best regards
marcel
Last edited on November 7, 2018, 12:51 pm by Kurti2k · #3
admin
2,930 Posts
November 7, 2018, 3:53 pmQuote from admin on November 7, 2018, 3:53 pmtry restarting the following osds: 7,20,48,50 and see if it helps.
the problem lies in:
"detail": "peering_blocked_by_history_les_bound"
you can search for the meaning of this error and proposed fixes.
you can also look at the logs of the above osds.
try restarting the following osds: 7,20,48,50 and see if it helps.
the problem lies in:
"detail": "peering_blocked_by_history_les_bound"
you can search for the meaning of this error and proposed fixes.
you can also look at the logs of the above osds.
Last edited on November 7, 2018, 4:32 pm by admin · #4
Kurti2k
16 Posts
November 8, 2018, 1:13 pmQuote from Kurti2k on November 8, 2018, 1:13 pm// closed
i have reinstalled to new release
the error comes from a hard reboot after pwr switching
best regards
Marcel
// closed
i have reinstalled to new release
the error comes from a hard reboot after pwr switching
best regards
Marcel
Ceph Health Problem
Kurti2k
16 Posts
Quote from Kurti2k on November 7, 2018, 10:00 amHi,
Ceph Health
[root@localhost ~]# ceph -s
cluster:
id: 86c6888d-4b3e-4c72-a605-770ae4a3495a
health: HEALTH_WARN
Reduced data availability: 72 pgs inactive, 64 pgs incompleteservices:
mon: 3 daemons, quorum peta-1,peta-2,peta-3
mgr: peta-1(active), standbys: peta-2
osd: 54 osds: 54 up, 54 indata:
pools: 1 pools, 4096 pgs
objects: 1896k objects, 7357 GB
usage: 9867 GB used, 36181 GB / 46048 GB avail
pgs: 0.195% pgs unknown
1.562% pgs not active
4024 active+clean
64 incomplete
8 unknown[root@localhost ~]#
how can i fix this ?with best regardsMarcel
Hi,
Ceph Health
cluster:
id: 86c6888d-4b3e-4c72-a605-770ae4a3495a
health: HEALTH_WARN
Reduced data availability: 72 pgs inactive, 64 pgs incompleteservices:
mon: 3 daemons, quorum peta-1,peta-2,peta-3
mgr: peta-1(active), standbys: peta-2
osd: 54 osds: 54 up, 54 indata:
pools: 1 pools, 4096 pgs
objects: 1896k objects, 7357 GB
usage: 9867 GB used, 36181 GB / 46048 GB avail
pgs: 0.195% pgs unknown
1.562% pgs not active
4024 active+clean
64 incomplete
8 unknown
[root@localhost ~]#
admin
2,930 Posts
Quote from admin on November 7, 2018, 11:08 amany thing that led to this ? were any OSDs deleted ?
is it stuck at this, or is it slowly recovering ?
if it is stuck, query a stuck pg and see what it reports in recovery status.
any thing that led to this ? were any OSDs deleted ?
is it stuck at this, or is it slowly recovering ?
if it is stuck, query a stuck pg and see what it reports in recovery status.
Kurti2k
16 Posts
Quote from Kurti2k on November 7, 2018, 12:47 pmno OSD bad or removed
root@peta-2:/etc/ceph# ceph health detail
HEALTH_WARN Reduced data availability: 72 pgs inactive, 64 pgs incomplete
PG_AVAILABILITY Reduced data availability: 72 pgs inactive, 64 pgs incomplete
pg 1.16 is incomplete, acting [48,44]
pg 1.6d is incomplete, acting [2,50]
pg 1.7a is incomplete, acting [34,17]
pg 1.7f is incomplete, acting [29,14]
pg 1.155 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.1ce is incomplete, acting [53,3]
pg 1.1eb is incomplete, acting [9,19]
pg 1.1fb is incomplete, acting [30,46]
pg 1.22b is incomplete, acting [48,26]
pg 1.2c1 is incomplete, acting [42,15]
pg 1.302 is incomplete, acting [29,18]
pg 1.319 is incomplete, acting [21,33]
pg 1.348 is incomplete, acting [29,5]
pg 1.356 is incomplete, acting [4,39]
pg 1.363 is incomplete, acting [42,17]
pg 1.37d is incomplete, acting [48,22]
pg 1.396 is incomplete, acting [1,41]
pg 1.3b8 is incomplete, acting [42,22]
pg 1.3fe is incomplete, acting [14,25]
pg 1.409 is incomplete, acting [29,17]
pg 1.449 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.46f is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.4d8 is incomplete, acting [45,4]
pg 1.4f7 is incomplete, acting [39,23]
pg 1.520 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.558 is incomplete, acting [50,23]
pg 1.651 is incomplete, acting [51,11]
pg 1.692 is incomplete, acting [22,29]
pg 1.6a6 is incomplete, acting [7,18]
pg 1.6cf is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.6d8 is incomplete, acting [2,47]
pg 1.6e8 is incomplete, acting [9,33]
pg 1.6f1 is incomplete, acting [0,38]
pg 1.76c is incomplete, acting [7,52]
pg 1.7b7 is incomplete, acting [40,21]
pg 1.7b9 is incomplete, acting [14,25]
pg 1.7cf is incomplete, acting [0,50]
pg 1.7d8 is incomplete, acting [29,17]
pg 1.7fa is incomplete, acting [33,41]
pg 1.844 is incomplete, acting [38,2]
pg 1.8b3 is incomplete, acting [47,2]
pg 1.909 is incomplete, acting [7,37]
pg 1.94f is incomplete, acting [29,16]
pg 1.964 is incomplete, acting [37,4]
pg 1.974 is stuck inactive since forever, current state incomplete, last acting [33,53]
pg 1.eeb is incomplete, acting [21,44]
pg 1.f2c is incomplete, acting [4,47]
pg 1.f95 is incomplete, acting [2,29]
pg 1.fb8 is incomplete, acting [2,25]
pg 1.fe7 is incomplete, acting [0,52]
pg 1.ff8 is incomplete, acting [7,50]
ceph pg 1.ff8 query
"created": 0,
"last_epoch_clean": 0,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0
},
"up": [],
"acting": [],
"blocked_by": [],
"up_primary": -1,
"acting_primary": -1
},
"empty": 1,
"dne": 1,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
{
"peer": "50",
"pgid": "1.ff8",
"last_update": "0'0",
"last_complete": "0'0",
"log_tail": "0'0",
"last_user_version": 0,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 0,
"epoch_pool_created": 0,
"last_epoch_started": 0,
"last_interval_started": 0,
"last_epoch_clean": 0,
"last_interval_clean": 0,
"last_epoch_split": 0,
"last_epoch_marked_full": 0,
"same_up_since": 0,
"same_interval_since": 0,
"same_primary_since": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000"
},
"stats": {
"version": "0'0",
"reported_seq": "0",
"reported_epoch": "0",
"state": "unknown",
"last_fresh": "0.000000",
"last_change": "0.000000",
"last_active": "0.000000",
"last_peered": "0.000000",
"last_clean": "0.000000",
"last_became_active": "0.000000",
"last_became_peered": "0.000000",
"last_unstale": "0.000000",
"last_undegraded": "0.000000",
"last_fullsized": "0.000000",
"mapping_epoch": 0,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 0,
"last_epoch_clean": 0,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0
},
"up": [],
"acting": [],
"blocked_by": [],
"up_primary": -1,
"acting_primary": -1
},
"empty": 1,
"dne": 1,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
}
],
"recovery_state": [
{
"name": "Started/Primary/Peering/Incomplete",
"enter_time": "2018-11-07 09:22:42.983356",
"comment": "not enough complete instances of this PG"
},
{
"name": "Started/Primary/Peering",
"enter_time": "2018-11-07 09:22:42.947534",
"past_intervals": [
{
"first": "2098",
"last": "6839",
"all_participants": [
{
"osd": 7
},
{
"osd": 20
},
{
"osd": 48
},
{
"osd": 50
}
],
"intervals": [
{
"first": "4262",
"last": "4271",
"acting": "48"
},
{
"first": "4272",
"last": "4441",
"acting": "20"
},
{
"first": "6837",
"last": "6839",
"acting": "7"
}
]
}
],
"probing_osds": [
"7",
"20",
"48",
"50"
],
"down_osds_we_would_probe": [],
"peering_blocked_by": [],
"peering_blocked_by_detail": [
{
"detail": "peering_blocked_by_history_les_bound"
}
]
},
{
"name": "Started",
"enter_time": "2018-11-07 09:22:42.947468"
}
],
"agent_state": {}
}
best regards
marcel
no OSD bad or removed
root@peta-2:/etc/ceph# ceph health detail
HEALTH_WARN Reduced data availability: 72 pgs inactive, 64 pgs incomplete
PG_AVAILABILITY Reduced data availability: 72 pgs inactive, 64 pgs incomplete
pg 1.16 is incomplete, acting [48,44]
pg 1.6d is incomplete, acting [2,50]
pg 1.7a is incomplete, acting [34,17]
pg 1.7f is incomplete, acting [29,14]
pg 1.155 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.1ce is incomplete, acting [53,3]
pg 1.1eb is incomplete, acting [9,19]
pg 1.1fb is incomplete, acting [30,46]
pg 1.22b is incomplete, acting [48,26]
pg 1.2c1 is incomplete, acting [42,15]
pg 1.302 is incomplete, acting [29,18]
pg 1.319 is incomplete, acting [21,33]
pg 1.348 is incomplete, acting [29,5]
pg 1.356 is incomplete, acting [4,39]
pg 1.363 is incomplete, acting [42,17]
pg 1.37d is incomplete, acting [48,22]
pg 1.396 is incomplete, acting [1,41]
pg 1.3b8 is incomplete, acting [42,22]
pg 1.3fe is incomplete, acting [14,25]
pg 1.409 is incomplete, acting [29,17]
pg 1.449 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.46f is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.4d8 is incomplete, acting [45,4]
pg 1.4f7 is incomplete, acting [39,23]
pg 1.520 is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.558 is incomplete, acting [50,23]
pg 1.651 is incomplete, acting [51,11]
pg 1.692 is incomplete, acting [22,29]
pg 1.6a6 is incomplete, acting [7,18]
pg 1.6cf is stuck inactive for 103011.099283, current state unknown, last acting []
pg 1.6d8 is incomplete, acting [2,47]
pg 1.6e8 is incomplete, acting [9,33]
pg 1.6f1 is incomplete, acting [0,38]
pg 1.76c is incomplete, acting [7,52]
pg 1.7b7 is incomplete, acting [40,21]
pg 1.7b9 is incomplete, acting [14,25]
pg 1.7cf is incomplete, acting [0,50]
pg 1.7d8 is incomplete, acting [29,17]
pg 1.7fa is incomplete, acting [33,41]
pg 1.844 is incomplete, acting [38,2]
pg 1.8b3 is incomplete, acting [47,2]
pg 1.909 is incomplete, acting [7,37]
pg 1.94f is incomplete, acting [29,16]
pg 1.964 is incomplete, acting [37,4]
pg 1.974 is stuck inactive since forever, current state incomplete, last acting [33,53]
pg 1.eeb is incomplete, acting [21,44]
pg 1.f2c is incomplete, acting [4,47]
pg 1.f95 is incomplete, acting [2,29]
pg 1.fb8 is incomplete, acting [2,25]
pg 1.fe7 is incomplete, acting [0,52]
pg 1.ff8 is incomplete, acting [7,50]
ceph pg 1.ff8 query
"created": 0,
"last_epoch_clean": 0,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0
},
"up": [],
"acting": [],
"blocked_by": [],
"up_primary": -1,
"acting_primary": -1
},
"empty": 1,
"dne": 1,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
{
"peer": "50",
"pgid": "1.ff8",
"last_update": "0'0",
"last_complete": "0'0",
"log_tail": "0'0",
"last_user_version": 0,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 0,
"epoch_pool_created": 0,
"last_epoch_started": 0,
"last_interval_started": 0,
"last_epoch_clean": 0,
"last_interval_clean": 0,
"last_epoch_split": 0,
"last_epoch_marked_full": 0,
"same_up_since": 0,
"same_interval_since": 0,
"same_primary_since": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000"
},
"stats": {
"version": "0'0",
"reported_seq": "0",
"reported_epoch": "0",
"state": "unknown",
"last_fresh": "0.000000",
"last_change": "0.000000",
"last_active": "0.000000",
"last_peered": "0.000000",
"last_clean": "0.000000",
"last_became_active": "0.000000",
"last_became_peered": "0.000000",
"last_unstale": "0.000000",
"last_undegraded": "0.000000",
"last_fullsized": "0.000000",
"mapping_epoch": 0,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 0,
"last_epoch_clean": 0,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "0.000000",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "0.000000",
"last_clean_scrub_stamp": "0.000000",
"log_size": 0,
"ondisk_log_size": 0,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 0,
"num_objects": 0,
"num_object_clones": 0,
"num_object_copies": 0,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 0,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 0,
"num_write_kb": 0,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0
},
"up": [],
"acting": [],
"blocked_by": [],
"up_primary": -1,
"acting_primary": -1
},
"empty": 1,
"dne": 1,
"incomplete": 0,
"last_epoch_started": 0,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
}
],
"recovery_state": [
{
"name": "Started/Primary/Peering/Incomplete",
"enter_time": "2018-11-07 09:22:42.983356",
"comment": "not enough complete instances of this PG"
},
{
"name": "Started/Primary/Peering",
"enter_time": "2018-11-07 09:22:42.947534",
"past_intervals": [
{
"first": "2098",
"last": "6839",
"all_participants": [
{
"osd": 7
},
{
"osd": 20
},
{
"osd": 48
},
{
"osd": 50
}
],
"intervals": [
{
"first": "4262",
"last": "4271",
"acting": "48"
},
{
"first": "4272",
"last": "4441",
"acting": "20"
},
{
"first": "6837",
"last": "6839",
"acting": "7"
}
]
}
],
"probing_osds": [
"7",
"20",
"48",
"50"
],
"down_osds_we_would_probe": [],
"peering_blocked_by": [],
"peering_blocked_by_detail": [
{
"detail": "peering_blocked_by_history_les_bound"
}
]
},
{
"name": "Started",
"enter_time": "2018-11-07 09:22:42.947468"
}
],
"agent_state": {}
}
best regards
marcel
admin
2,930 Posts
Quote from admin on November 7, 2018, 3:53 pmtry restarting the following osds: 7,20,48,50 and see if it helps.
the problem lies in:
"detail": "peering_blocked_by_history_les_bound"
you can search for the meaning of this error and proposed fixes.
you can also look at the logs of the above osds.
try restarting the following osds: 7,20,48,50 and see if it helps.
the problem lies in:
"detail": "peering_blocked_by_history_les_bound"
you can search for the meaning of this error and proposed fixes.
you can also look at the logs of the above osds.
Kurti2k
16 Posts
Quote from Kurti2k on November 8, 2018, 1:13 pm// closed
i have reinstalled to new release
the error comes from a hard reboot after pwr switching
best regards
Marcel
// closed
i have reinstalled to new release
the error comes from a hard reboot after pwr switching
best regards
Marcel