Project

General

Profile

Actions

Idea #11836

closed

[Nodemanager] Improve status.json for monitoring

Added by Peter Amstutz almost 7 years ago. Updated almost 6 years ago.

Status:
Rejected
Priority:
Normal
Assigned To:
Category:
-
Target version:
-
Start date:
05/23/2018
Due date:
Story points:
0.5

Description

11836-nodemanager-status-json

Example status.json

Lists node counts, information about node sizes, and individual node details.

{
  "nodes_down": 0,
  "status": "OK",
  "node_compute-ah091aeky2404it-zzzzz": null,
  "timestamp": "2017-06-08T14:26:19Z",
  "nodes_idle": 0,
  "nodes_wish": 4,
  "nodes_shutdown": 0,
  "nodes_booting": 0,
  "size_Standard_D3": {
    "nodes_down": 0,
    "disk": 200,
    "name": "Standard_D3",
    "nodes_unpaired": 2,
    "nodes_idle": 0,
    "ram": 3325,
    "price": 0.56,
    "bandwidth": 0,
    "nodes_shutdown": 0,
    "nodes_booting": 0,
    "nodes_busy": 0,
    "id": "Standard_D3" 
  },
  "size_Standard_D4": {
    "nodes_down": 0,
    "disk": 400,
    "name": "Standard_D4",
    "nodes_unpaired": 0,
    "nodes_idle": 0,
    "ram": 6650,
    "price": 1.12,
    "bandwidth": 0,
    "nodes_shutdown": 0,
    "nodes_booting": 0,
    "nodes_busy": 0,
    "id": "Standard_D4" 
  },
  "node_compute-sl0mohfw1i58uyx-zzzzz": {
    "state": [
      "unpaired",
      "closed",
      "boot wait",
      "idle exceeded" 
    ],
    "arvados": null,
    "id": "compute-sl0mohfw1i58uyx-zzzzz",
    "size": "Standard_D3" 
  },
  "servicetype": "arvados_nodemanager",
  "nodes_unpaired": 2,
  "nodes_max": 8,
  "hostname": "9294ee7bb1cf",
  "node_compute-a2x3cjc8fjzk8qz-zzzzz": null,
  "node_compute-jozp0dj520ovozp-zzzzz": {
    "state": [
      "idle",
      "closed",
      "boot wait",
      "idle exceeded" 
    ],
    "arvados": {
      "status": "running",
      "first_ping_at": "2017-06-08T14:26:14.962205000Z",
      "modified_by_client_uuid": "zzzzz-ozdt8-obw7foaks3qjyej",
      "domain": false,
      "owner_uuid": "zzzzz-tpzed-d9tiejq69daie8f",
      "etag": "1d7e4jeyvufmpnr6vr4inin5l",
      "slot_number": 1,
      "crunch_worker_state": "idle",
      "ip_address": null,
      "properties": {
        "cloud_node": {
          "price": 0.56,
          "size": "Standard_D3" 
        }
      },
      "info": {
        "ec2_instance_id": "compute-jozp0dj520ovozp-zzzzz" 
      },
      "kind": "arvados#node",
      "uuid": "zzzzz-7ekkf-jozp0dj520ovozp",
      "modified_by_user_uuid": "zzzzz-tpzed-d9tiejq69daie8f",
      "nameservers": [
        "192.168.1.1" 
      ],
      "created_at": "2017-06-08T14:26:14.926534000Z",
      "hostname": "compute1",
      "modified_at": "2017-06-08T14:26:15.021194000Z",
      "href": "/nodes/zzzzz-7ekkf-jozp0dj520ovozp",
      "last_ping_at": "2017-06-08T14:26:14.962205000Z",
      "job_uuid": null
    },
    "id": "compute-jozp0dj520ovozp-zzzzz",
    "size": "Standard_D3" 
  },
  "nodes_quota": 3,
  "version": "0.1.20170608142355",
  "node_compute-9qqojgiar0ezj2j-zzzzz": {
    "state": [
      "idle",
      "closed",
      "boot wait",
      "idle exceeded" 
    ],
    "arvados": {
      "status": "running",
      "first_ping_at": "2017-06-08T14:26:14.993218000Z",
      "modified_by_client_uuid": "zzzzz-ozdt8-obw7foaks3qjyej",
      "domain": false,
      "owner_uuid": "zzzzz-tpzed-d9tiejq69daie8f",
      "etag": "5qi8mjmofuq5f3bjj8h7kh20x",
      "slot_number": 2,
      "crunch_worker_state": "idle",
      "ip_address": "127.0.0.1",
      "properties": {
        "cloud_node": {
          "price": 0.56,
          "size": "Standard_D3" 
        }
      },
      "info": {
        "ec2_instance_id": "compute-9qqojgiar0ezj2j-zzzzz" 
      },
      "kind": "arvados#node",
      "uuid": "zzzzz-7ekkf-9qqojgiar0ezj2j",
      "modified_by_user_uuid": "zzzzz-tpzed-d9tiejq69daie8f",
      "nameservers": [
        "192.168.1.1" 
      ],
      "created_at": "2017-06-08T14:26:14.966885000Z",
      "hostname": "compute2",
      "modified_at": "2017-06-08T14:26:15.062881000Z",
      "href": "/nodes/zzzzz-7ekkf-9qqojgiar0ezj2j",
      "last_ping_at": "2017-06-08T14:26:14.993218000Z",
      "job_uuid": null
    },
    "id": "compute-9qqojgiar0ezj2j-zzzzz",
    "size": "Standard_D3" 
  },
  "nodes_busy": 0
}

Subtasks 1 (0 open1 closed)

Task #11845: Review 11836-nodemanager-status-jsonClosedNico César05/23/2018Actions

Related issues

Related to Arvados - Idea #11349: [Node Manager] Add status URL for node managerResolvedTom Clegg04/10/2017Actions
Related to Arvados - Idea #12085: Add monitoring/alarm for failed/slow job dispatch & excess idle nodesResolvedLucas Di Pentima08/08/2017Actions
Actions

Also available in: Atom PDF