You might encounter a problem with the search node in analytics where the replicas would show down and to find the root cause, we need to take a look at /var/log/versa/solr/solr.log
Caveats: Make sure the time between all the nodes including search and analytics is in sync.
Error seen under "vsh dbstatus"
Check the cluster_status output from the Search which would look like below.
We would need to look at the state for each log collection eg: alarmlogs/seachlogs/mapDatalogs
shell
sudo /opt/versa/scripts/van-install/cluster_install.sh solr cluster_status
Output:
{
"responseHeader":{
"status":0,
"QTime":2},
"cluster":{
"collections":{
"alarmlogs":{
"pullReplicas":"0",
"replicationFactor":"1",
"shards":{"shard1":{
"range":"80000000-7fffffff",
"state":"active",
"replicas":{"core_node8":{
"core":"alarmlogs_shard1_replica_n7",
"node_name":"10.128.128.181:8983_solr",
"base_url":"http://10.128.128.181:8983/solr",
"state":"down",
"type":"NRT",
"force_set_state":"false",
"leader":"true"}},
"health":"RED"}},
"router":{"name":"compositeId"},
"maxShardsPerNode":"1",
"autoAddReplicas":"false",
"nrtReplicas":"1",
"tlogReplicas":"0",
"health":"RED",
"znodeVersion":35,
"aliases":["alarmlogsRead",
"globallogs"],
"configName":"van_configs"},
"searchlogs":{
"pullReplicas":"0",
"replicationFactor":"1",
"shards":{"shard1":{
"range":"80000000-7fffffff",
"state":"active",
"replicas":{"core_node10":{
"core":"searchlogs_shard1_replica_n9",
"node_name":"10.128.128.181:8983_solr",
"base_url":"http://10.128.128.181:8983/solr",
"state":"down",
"type":"NRT",
"force_set_state":"false",
"leader":"true"}},
"health":"RED"}},
"router":{"name":"compositeId"},
"maxShardsPerNode":"1",
"autoAddReplicas":"false",
"nrtReplicas":"1",
"tlogReplicas":"0",
"health":"RED",
"znodeVersion":79,
"aliases":["alarmlogsRead",
"globallogs"],
"configName":"van_configs"},
"mapDataLogs":{
"pullReplicas":"0",
"replicationFactor":"1",
"shards":{"shard1":{
"range":"80000000-7fffffff",
"state":"active",
"replicas":{"core_node8":{
"core":"mapDataLogs_shard1_replica_n7",
"node_name":"10.128.128.181:8983_solr",
"base_url":"http://10.128.128.181:8983/solr",
"state":"down",
"type":"NRT",
"force_set_state":"false",
"leader":"true"}},
"health":"RED"}},
"router":{"name":"compositeId"},
"maxShardsPerNode":"1",
"autoAddReplicas":"false",
"nrtReplicas":"1",
"tlogReplicas":"0",
"health":"RED",
"znodeVersion":31,
"configName":"van_configs"}},
"aliases":{
"alarmlogsRead":"alarmlogs,searchlogs",
"globallogs":"alarmlogs,searchlogs"},
"live_nodes":["10.128.128.181:8983_solr"]}}
Fix:
Step 1: You can perform rolling restart of solr
sudo service versa-monit stop
sudo service solr stop
sudo service zookeeper restart
sudo service solr start
sudo service versa-monit start
If step 1 doesn't fix, proceed with step 2
Step 2: Delete and recreate the replicas using the below steps
You can get the shard and corenode reference from the above cluster_status highlighted command
sudo /opt/versa/scripts/van-install/cluster_install.sh solr delete_replica alarmlogs <shard> <core_nodeX>
sudo /opt/versa/scripts/van-install/cluster_install.sh solr delete_replica mapDataLogs <shard> <core_nodeX>
sudo /opt/versa/scripts/van-install/cluster_install.sh solr add_replica alarmlogs <shard>
sudo /opt/versa/scripts/van-install/cluster_install.sh solr add_replica mapDataLogs <shard>
Re-run the cluster_status command and you should see the output like below
sudo /opt/versa/scripts/van-install/cluster_install.sh solr cluster_status
Output:
{
"responseHeader":{
"status":0,
"QTime":2},
"cluster":{
"collections":{
"alarmlogs":{
"pullReplicas":"0",
"replicationFactor":"1",
"shards":{"shard1":{
"range":"80000000-7fffffff",
"state":"active",
"replicas":{"core_node8":{
"core":"alarmlogs_shard1_replica_n7",
"node_name":"10.128.128.181:8983_solr",
"base_url":"http://10.128.128.181:8983/solr",
"state":"active",
"type":"NRT",
"force_set_state":"false",
"leader":"true"}},
"health":"RED"}},
"router":{"name":"compositeId"},
"maxShardsPerNode":"1",
"autoAddReplicas":"false",
"nrtReplicas":"1",
"tlogReplicas":"0",
"health":"RED",
"znodeVersion":35,
"aliases":["alarmlogsRead",
"globallogs"],
"configName":"van_configs"},
"searchlogs":{
"pullReplicas":"0",
"replicationFactor":"1",
"shards":{"shard1":{
"range":"80000000-7fffffff",
"state":"active",
"replicas":{"core_node10":{
"core":"searchlogs_shard1_replica_n9",
"node_name":"10.128.128.181:8983_solr",
"base_url":"http://10.128.128.181:8983/solr",
"state":"active",
"type":"NRT",
"force_set_state":"false",
"leader":"true"}},
"health":"RED"}},
"router":{"name":"compositeId"},
"maxShardsPerNode":"1",
"autoAddReplicas":"false",
"nrtReplicas":"1",
"tlogReplicas":"0",
"health":"RED",
"znodeVersion":79,
"aliases":["alarmlogsRead",
"globallogs"],
"configName":"van_configs"},
"mapDataLogs":{
"pullReplicas":"0",
"replicationFactor":"1",
"shards":{"shard1":{
"range":"80000000-7fffffff",
"state":"active",
"replicas":{"core_node8":{
"core":"mapDataLogs_shard1_replica_n7",
"node_name":"10.128.128.181:8983_solr",
"base_url":"http://10.128.128.181:8983/solr",
"state":"active",
"type":"NRT",
"force_set_state":"false",
"leader":"true"}},
"health":"RED"}},
"router":{"name":"compositeId"},
"maxShardsPerNode":"1",
"autoAddReplicas":"false",
"nrtReplicas":"1",
"tlogReplicas":"0",
"health":"RED",
"znodeVersion":31,
"configName":"van_configs"}},
"aliases":{
"alarmlogsRead":"alarmlogs,searchlogs",
"globallogs":"alarmlogs,searchlogs"},
"live_nodes":["10.128.128.181:8983_solr"]}}