autoendpoint/routes/
health.rs

1//! Health and Dockerflow routes
2use std::collections::HashMap;
3use std::fs::read_to_string;
4use std::thread;
5
6use actix_web::{
7    HttpResponse,
8    web::{Data, Json},
9};
10use reqwest::StatusCode;
11use serde_json::json;
12
13use crate::error::{ApiErrorKind, ApiResult};
14use crate::server::AppState;
15use autopush_common::db::error::DbResult;
16#[cfg(feature = "reliable_report")]
17use autopush_common::errors::ApcError;
18#[cfg(feature = "reliable_report")]
19use autopush_common::metric_name::MetricName;
20#[cfg(feature = "reliable_report")]
21use autopush_common::metrics::StatsdClientExt;
22#[cfg(feature = "reliable_report")]
23use autopush_common::util::b64_encode_url;
24
25/// get the local memory usage in percentage of limit (presumes running under kubernetes)
26pub fn memory_usage_percentage(memory_path: &str) -> Option<f64> {
27    // If we can read (and there is a limit)
28    if let Ok(mem_limit_str) = read_to_string(format!("{}/{}", memory_path, "memory.max"))
29        && mem_limit_str.trim() != "max"
30        && let Ok(mem_limit) = mem_limit_str.trim().parse::<u64>()
31        // get the current memory usage snapshot
32        && let Ok(mem_current_str) = read_to_string(format!("{}/{}", memory_path, "memory.current"))
33        && let Ok(mem_current) = mem_current_str.trim().parse::<u64>()
34    {
35        // Stars have aligned, and we can return a value.
36        return Some((mem_current as f64 / mem_limit as f64) * 100.0);
37    }
38
39    None
40}
41
42/// Handle the `/health` and `/__heartbeat__` routes
43pub async fn health_route(state: Data<AppState>) -> Json<serde_json::Value> {
44    let router_health = interpret_table_health(state.db.router_table_exists().await);
45    let message_health = interpret_table_health(state.db.message_table_exists().await);
46    let mut routers: HashMap<&str, bool> = HashMap::new();
47    routers.insert("apns", state.apns_router.active());
48    routers.insert("fcm", state.fcm_router.active());
49
50    let mut health = json!({
51        "status": if state
52            .db
53            .health_check()
54            .await
55            .map_err(|e| {
56                error!("Autoendpoint health error: {:?}", e);
57                e
58            })
59            .is_ok() {
60            "OK"
61        } else {
62            "ERROR"
63        },
64        "version": env!("CARGO_PKG_VERSION"),
65        "router_table": router_health,
66        "message_table": message_health,
67        "routers": routers,
68        "request_count":state.in_process_subscription_updates.load(std::sync::atomic::Ordering::Relaxed),
69    });
70
71    // if we can display memory usage, do so.
72    if let Some(mem_usage) = memory_usage_percentage(&state.settings.kubernetes_memory_path) {
73        health["memory_usage_percentage"] = json!(mem_usage);
74    }
75
76    #[cfg(feature = "reliable_report")]
77    {
78        let reliability_health: Result<String, ApcError> = state
79            .reliability
80            .health_check()
81            .await
82            .map(|_| {
83                let keys: Vec<String> = state
84                    .settings
85                    .tracking_keys()
86                    .unwrap_or_default()
87                    .iter()
88                    .map(|k|
89                        // Hint the key values
90                        b64_encode_url(k)[..8].to_string())
91                    .collect();
92                if keys.is_empty() {
93                    Ok("NO_TRACKING_KEYS".to_owned())
94                } else {
95                    Ok(format!("OK: {}", keys.join(",")))
96                }
97            })
98            .unwrap_or_else(|e| {
99                // Record that Redis is down.
100                state
101                    .metrics
102                    .incr_with_tags(MetricName::ReliabilityErrorRedisUnavailable)
103                    .with_tag("application", "autoendpoint")
104                    .send();
105                error!("🔍🟥 Reliability reporting down: {:?}", e);
106                Ok("STORE_ERROR".to_owned())
107            });
108        health["reliability"] = json!(reliability_health);
109    }
110    Json(health)
111}
112
113/// Convert the result of a DB health check to JSON
114fn interpret_table_health(health: DbResult<bool>) -> serde_json::Value {
115    match health {
116        Ok(true) => json!({
117            "status": "OK"
118        }),
119        Ok(false) => json!({
120            "status": "NOT OK",
121            "cause": "Nonexistent table"
122        }),
123        Err(e) => {
124            error!("Autoendpoint health error: {:?}", e);
125            json!({
126                "status": "NOT OK",
127                "cause": e.to_string()
128            })
129        }
130    }
131}
132
133/// Handle the `/status` route
134pub async fn status_route() -> ApiResult<Json<serde_json::Value>> {
135    Ok(Json(json!({
136        "status": "OK",
137        "version": env!("CARGO_PKG_VERSION"),
138    })))
139}
140
141/// Handle the `/__lbheartbeat__` route
142pub async fn lb_heartbeat_route() -> HttpResponse {
143    // Used by the load balancers, just return OK.
144    HttpResponse::Ok().finish()
145}
146
147/// Handle the `/__version__` route
148pub async fn version_route() -> HttpResponse {
149    // Return the contents of the version.json file created by circleci
150    // and stored in the docker root
151    HttpResponse::Ok()
152        .content_type("application/json")
153        .body(include_str!("../../../version.json"))
154}
155
156/// Handle the `/v1/err` route
157pub async fn log_check() -> ApiResult<String> {
158    error!(
159        "Test Critical Message";
160        "status_code" => StatusCode::IM_A_TEAPOT.as_u16(),
161        "errno" => 999,
162    );
163
164    thread::spawn(|| {
165        panic!("LogCheck");
166    });
167
168    Err(ApiErrorKind::LogCheck.into())
169}