You might encounter a problem with the search node in analytics where the replicas would show down and to find the root cause, we need to take a look at /var/log/versa/solr/solr.log


Caveats: Make sure the time between all the nodes including search and analytics is in sync.


Error seen under "vsh dbstatus"


2024-08-10 08:00:38,572 - versa-analytics-monitor - ERROR - The coll/shard/replica status is alarmlogs/shard1/alarmlogs_shard1_replica_n7 down
2024-08-10 08:00:38,572 - versa-analytics-monitor - ERROR - The coll/shard/replica status is searchlogs/shard1/searchlogs_shard1_replica_n9 down
2024-08-10 08:00:38,572 - versa-analytics-monitor - ERROR - The coll/shard/replica status is mapDataLogs/shard1/mapDataLogs_shard1_replica_n7 down


Check the cluster_status output from the Search which would look like below.

We would need to look at the state for each log collection eg: alarmlogs/seachlogs/mapDatalogs


shell

sudo /opt/versa/scripts/van-install/cluster_install.sh solr cluster_status


Output:

{

  "responseHeader":{

    "status":0,

    "QTime":2},

  "cluster":{

    "collections":{

      "alarmlogs":{

        "pullReplicas":"0",

        "replicationFactor":"1",

        "shards":{"shard1":{

            "range":"80000000-7fffffff",

            "state":"active",

            "replicas":{"core_node8":{

                "core":"alarmlogs_shard1_replica_n7",

                "node_name":"10.128.128.181:8983_solr",

                "base_url":"http://10.128.128.181:8983/solr",

                "state":"down",

                "type":"NRT",

                "force_set_state":"false",

                "leader":"true"}},

            "health":"RED"}},

        "router":{"name":"compositeId"},

        "maxShardsPerNode":"1",

        "autoAddReplicas":"false",

        "nrtReplicas":"1",

        "tlogReplicas":"0",

        "health":"RED",

        "znodeVersion":35,

        "aliases":["alarmlogsRead",

          "globallogs"],

        "configName":"van_configs"},

      "searchlogs":{

        "pullReplicas":"0",

        "replicationFactor":"1",

        "shards":{"shard1":{

            "range":"80000000-7fffffff",

            "state":"active",

            "replicas":{"core_node10":{

                "core":"searchlogs_shard1_replica_n9",

                "node_name":"10.128.128.181:8983_solr",

                "base_url":"http://10.128.128.181:8983/solr",

                "state":"down",

                "type":"NRT",

                "force_set_state":"false",

                "leader":"true"}},

            "health":"RED"}},

        "router":{"name":"compositeId"},

        "maxShardsPerNode":"1",

        "autoAddReplicas":"false",

        "nrtReplicas":"1",

        "tlogReplicas":"0",

        "health":"RED",

        "znodeVersion":79,

        "aliases":["alarmlogsRead",

          "globallogs"],

        "configName":"van_configs"},

      "mapDataLogs":{

        "pullReplicas":"0",

        "replicationFactor":"1",

        "shards":{"shard1":{

            "range":"80000000-7fffffff",

            "state":"active",

            "replicas":{"core_node8":{

                "core":"mapDataLogs_shard1_replica_n7",

                "node_name":"10.128.128.181:8983_solr",

                "base_url":"http://10.128.128.181:8983/solr",

                "state":"down",

                "type":"NRT",

                "force_set_state":"false",

                "leader":"true"}},

            "health":"RED"}},

        "router":{"name":"compositeId"},

        "maxShardsPerNode":"1",

        "autoAddReplicas":"false",

        "nrtReplicas":"1",

        "tlogReplicas":"0",

        "health":"RED",

        "znodeVersion":31,

        "configName":"van_configs"}},

    "aliases":{

      "alarmlogsRead":"alarmlogs,searchlogs",

      "globallogs":"alarmlogs,searchlogs"},

    "live_nodes":["10.128.128.181:8983_solr"]}}




Fix:


Step 1: You can perform rolling restart of solr


Solr restart

sudo service versa-monit stop
sudo service solr stop
sudo service zookeeper restart
sudo service solr start
sudo service versa-monit start


If step 1 doesn't fix, proceed with step 2


Step 2: Delete and recreate the replicas using the below steps


Delete the problematic replicas,


You can get the shard and corenode reference from the above cluster_status highlighted command


sudo /opt/versa/scripts/van-install/cluster_install.sh solr delete_replica searchlogs <shard> <core_nodeX>
sudo /opt/versa/scripts/van-install/cluster_install.sh solr delete_replica alarmlogs <shard> <core_nodeX>
sudo /opt/versa/scripts/van-install/cluster_install.sh solr delete_replica mapDataLogs <shard> <core_nodeX>

Re-add the problematic replicas to fix the issue,
sudo /opt/versa/scripts/van-install/cluster_install.sh solr add_replica searchlogs <shard>
sudo /opt/versa/scripts/van-install/cluster_install.sh solr add_replica alarmlogs <shard>
sudo /opt/versa/scripts/van-install/cluster_install.sh solr add_replica mapDataLogs <shard>



Re-run the cluster_status command and you should see the output like below


sudo /opt/versa/scripts/van-install/cluster_install.sh solr cluster_status


Output:

{

  "responseHeader":{

    "status":0,

    "QTime":2},

  "cluster":{

    "collections":{

      "alarmlogs":{

        "pullReplicas":"0",

        "replicationFactor":"1",

        "shards":{"shard1":{

            "range":"80000000-7fffffff",

            "state":"active",

            "replicas":{"core_node8":{

                "core":"alarmlogs_shard1_replica_n7",

                "node_name":"10.128.128.181:8983_solr",

                "base_url":"http://10.128.128.181:8983/solr",

                "state":"active",

                "type":"NRT",

                "force_set_state":"false",

                "leader":"true"}},

            "health":"RED"}},

        "router":{"name":"compositeId"},

        "maxShardsPerNode":"1",

        "autoAddReplicas":"false",

        "nrtReplicas":"1",

        "tlogReplicas":"0",

        "health":"RED",

        "znodeVersion":35,

        "aliases":["alarmlogsRead",

          "globallogs"],

        "configName":"van_configs"},

      "searchlogs":{

        "pullReplicas":"0",

        "replicationFactor":"1",

        "shards":{"shard1":{

            "range":"80000000-7fffffff",

            "state":"active",

            "replicas":{"core_node10":{

                "core":"searchlogs_shard1_replica_n9",

                "node_name":"10.128.128.181:8983_solr",

                "base_url":"http://10.128.128.181:8983/solr",

                "state":"active",

                "type":"NRT",

                "force_set_state":"false",

                "leader":"true"}},

            "health":"RED"}},

        "router":{"name":"compositeId"},

        "maxShardsPerNode":"1",

        "autoAddReplicas":"false",

        "nrtReplicas":"1",

        "tlogReplicas":"0",

        "health":"RED",

        "znodeVersion":79,

        "aliases":["alarmlogsRead",

          "globallogs"],

        "configName":"van_configs"},

      "mapDataLogs":{

        "pullReplicas":"0",

        "replicationFactor":"1",

        "shards":{"shard1":{

            "range":"80000000-7fffffff",

            "state":"active",

            "replicas":{"core_node8":{

                "core":"mapDataLogs_shard1_replica_n7",

                "node_name":"10.128.128.181:8983_solr",

                "base_url":"http://10.128.128.181:8983/solr",

                "state":"active",

                "type":"NRT",

                "force_set_state":"false",

                "leader":"true"}},

            "health":"RED"}},

        "router":{"name":"compositeId"},

        "maxShardsPerNode":"1",

        "autoAddReplicas":"false",

        "nrtReplicas":"1",

        "tlogReplicas":"0",

        "health":"RED",

        "znodeVersion":31,

        "configName":"van_configs"}},

    "aliases":{

      "alarmlogsRead":"alarmlogs,searchlogs",

      "globallogs":"alarmlogs,searchlogs"},

    "live_nodes":["10.128.128.181:8983_solr"]}}