1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
//! # Merino Settings
//!
//! The top level settings struct is [Settings]. If you are looking for
//! documentation about the settings that can be set, start there.
//!
//! Configuration is specified in several ways, with later methods overriding earlier ones.
//!
//! 1. A base configuration checked into the repository, in `config/base.yaml`.
//!    This provides the default values for most settings.
//! 2. Per-environment configuration files in the `config` directory. The
//!    environment is selected using the environment variable `MERINO__ENV`. The
//!    settings for that environment are then loaded from `config/${env}.yaml`, if
//!    it exists. The default environment is "development". A "production"
//!    environment is also provided.
//! 3. A local configuration file not checked into the repository, at
//!    `config/local.yaml`. This file is in `.gitignore` and is safe to use for
//!    local configuration and secrets if desired.
//! 4. Environment variables that begin with `MERINO` and use `__` as a level
//!    separator. For example, `Settings::http::workers` can be controlled from the
//!    environment variable `MERINO__HTTP__WORKERS`.
//!
//! Tests should use `Settings::load_for_test` which only reads from
//! `config/base.yaml`, `config/test.yaml`, and `config/local_test.yaml` (if it
//! exists). It does not read from environment variables.
//!
//! Configuration files are canonically YAML files. However, any format supported
//! by the [config] crate can be used, including JSON and TOML. To choose another
//! format, simply use a different extension for your file, like
//! `config/local.toml`.

mod logging;
pub mod providers;
mod redis;

pub use logging::{LogFormat, LoggingSettings};

use anyhow::{Context, Result};
use config::{Config, Environment, File};
use http::Uri;
use sentry::types::Dsn;
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr, DurationSeconds};
use std::{collections::HashMap, net::SocketAddr, path::PathBuf, str::FromStr, time::Duration};

pub use crate::providers::SuggestionProviderConfig;
use crate::providers::SuggestionProviderSettings;

/// Top level settings object for Merino.
#[serde_as]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Settings {
    /// The environment Merino is running in. Should only be set with the
    /// `MERINO__ENV` environment variable.
    pub env: String,

    /// Enable additional features to debug the application. This should not be
    /// set to true in production environments.
    pub debug: bool,

    /// Settings for the HTTP server.
    pub http: HttpSettings,

    /// Settings for the suggestion providers
    pub provider_settings: ProviderSettings,

    /// Providers to use to generate suggestions
    #[serde(default)]
    pub suggestion_providers: HashMap<String, SuggestionProviderConfig>,

    /// Logging settings.
    pub logging: LoggingSettings,

    /// Metrics settings.
    pub metrics: MetricsSettings,

    /// Settings for error reporting via Sentry.
    pub sentry: SentrySettings,

    /// URL to redirect curious users to, that explains what this service is.
    /// Preferable a public wiki page. Optional.
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub public_documentation: Option<Uri>,

    /// Settings for connecting to Redis.
    pub redis: RedisSettings,

    /// Settings for connecting to Remote Settings.
    pub remote_settings: RemoteSettingsGlobalSettings,

    /// Settings to use when determining the location associated with requests.
    pub location: LocationSettings,

    /// If on, log the entire suggestion request object as a part of the
    /// tracing log, including the search query. When the setting is
    /// off, the suggest request object should be logged, but the
    /// search query should be blank.
    pub log_full_request: bool,

    pub elasticsearch: ElasticsearchSettings,
}

/// Settings for the HTTP server.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct HttpSettings {
    /// The host and port to listen on, such as "127.0.0.1:8080" or "0.0.0.0:80".
    pub listen: SocketAddr,

    /// The number of workers to use. Optional. If no value is provided, the
    /// number of logical cores will be used.
    pub workers: Option<usize>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum CacheType {
    None,
    Redis,
    Memory,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AdmRsSettings {
    /// Whether this provider should be active.
    pub enabled: bool,

    /// The path, relative or absolute, to where to store Remote Settings data.
    pub storage_path: PathBuf,

    /// The server to sync from. If no value is provided, a default is provided
    /// by the remote settings client.
    pub server: Option<String>,

    /// The collection to sync form.
    pub collection: String,

    /// Which cache, if any, to use with this provider.
    pub cache: CacheType,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct WikiFruitSettings {
    /// Whether this provider should be active.
    pub enabled: bool,

    /// Which cache, if any, to use with this provider.
    pub cache: CacheType,
}

#[serde_as]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct RedisSettings {
    /// The URL to connect to Redis at. Example: `redis://127.0.0.1/db`
    #[serde_as(as = "crate::redis::AsConnectionInfo")]
    pub url: ::redis::ConnectionInfo,
}

impl RedisSettings {
    #[must_use]
    pub fn redacted_url(&self) -> String {
        match (&self.url.username, &self.url.passwd) {
            (Some(username), Some(_)) => {
                format!(
                    "redis://{}:<PASSWORD>@{}/{}",
                    username, self.url.addr, self.url.db
                )
            }
            (Some(username), None) => {
                format!("redis://{}@{}/{}", username, self.url.addr, self.url.db)
            }
            (None, Some(_password)) => {
                format!("redis://:<PASSWORD>@{}/{}", self.url.addr, self.url.db)
            }
            (None, None) => format!("redis://{}/{}", self.url.addr, self.url.db),
        }
    }
}

#[serde_as]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct RemoteSettingsGlobalSettings {
    /// The server to sync from, including the protocol and port, but not including a trailing slash.
    ///
    /// ## Examples
    /// - `http://127.0.0.1`
    /// - `https://firefox.settings.services.mozilla.com`
    pub server: String,

    /// The bucket to use for Remote Settings providers, if not overridden by
    /// the provider.
    pub default_bucket: String,

    /// The collection to use for Remote Settings providers, if not overridden
    /// by the provider.
    pub default_collection: String,

    /// The HTTP request timeout (in seconds) for the underlying client of the Remote Settings client:
    /// [`ReqwestClient`](merino-adm::reqwest_client::ReqwestClient).
    #[serde_as(as = "DurationSeconds")]
    #[serde(rename = "http_request_timeout_sec")]
    pub http_request_timeout: Duration,

    /// The HTTP connect timeout (in seconds) for the underlying client of the Remote Settings client:
    /// [`ReqwestClient`](merino-adm::reqwest_client::ReqwestClient).
    #[serde_as(as = "DurationSeconds")]
    #[serde(rename = "http_connect_timeout_sec")]
    pub http_connect_timeout: Duration,

    /// The interval (in seconds) of the Remote Settings cron job. This should
    /// be set smaller than `RemoteSettingsConfig::resync_interval`.
    #[serde_as(as = "DurationSeconds")]
    #[serde(rename = "cron_interval_sec")]
    pub cron_interval: Duration,

    /// Only used for integration tests.
    /// This field populates the mock returned remote settings collection
    /// from inside the `merino_test_macro!(|settings| {...}) call.
    // note `#[cfg(test)]` will cause a compile error as the
    // `merino_test` proc_macro will not be able to find this
    // field.
    pub test_changes: Option<Vec<String>>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct LocationSettings {
    /// The location of the maxmind database to use to determine IP location. If
    /// not specified, location information will not be calculated.
    pub maxmind_database: Option<PathBuf>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct MetricsSettings {
    /// The host/IP address to send metrics to, such as "127.0.0.1" or "metrics.local".
    pub sink_host: String,

    /// The port to send metrics to.
    pub sink_port: u16,

    /// Maximum size in kilobytes that the metrics queue can grow to before
    /// locale metrics start to be dropped.
    pub max_queue_size_kb: usize,
}

/// Settings for the error and event reporting system Sentry.
///
/// Uses an enum to maintain invariants. In yaml or environment variable
/// configs, set using one of these patterns:
///
/// * mode=release, dsn=https://..., env=stage
/// * mode=server_debug, dsn=https://..., who=your_name
/// * mode=local_debug
/// * mode=disabled
///
/// In local_debug mode, events will be logged, but the DSN setting will be
/// ignored.  It will be set to a testing value as recommended by Sentry's docs.
///
/// In server_debug mode, you should configure the DSN to point to a testing
/// project on a real Sentry instance.
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(tag = "mode", rename_all = "snake_case")]
pub enum SentrySettings {
    Release { dsn: Dsn, env: String },
    ServerDebug { dsn: Dsn, who: String },
    LocalDebug,
    Disabled,
}

impl SentrySettings {
    /// Get the configured DSN.
    pub fn dsn(&self) -> Option<Dsn> {
        match self {
            SentrySettings::Release { dsn, .. } | SentrySettings::ServerDebug { dsn, .. } => {
                Some(dsn.clone())
            }
            SentrySettings::LocalDebug => Some(
                Dsn::from_str("https://public@example.com/1").expect("Bug: debug DSN is not valid"),
            ),
            SentrySettings::Disabled => None,
        }
    }

    /// Check if the Sentry settings are in debug mode
    pub fn debug(&self) -> bool {
        match self {
            SentrySettings::Release { .. } => false,
            SentrySettings::ServerDebug { .. } => true,
            SentrySettings::LocalDebug => true,
            SentrySettings::Disabled => false,
        }
    }

    pub fn env(&self) -> &str {
        match self {
            SentrySettings::Release { env, .. } => env.as_str(),
            SentrySettings::ServerDebug { who, .. } => who.as_str(),
            SentrySettings::LocalDebug => "debug",
            SentrySettings::Disabled => "disabled",
        }
    }
}

/// Top-level settings for suggestion providers.
///
/// This configuration controls how & where Merino loads the suggestion
/// providers.
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ProviderSettings {
    /// A local source, located in `path`. This is used for non-production
    /// environment
    Local { path: String },

    /// A remote source behind an HTTP endpoint. This is used for production.
    Remote { uri: String },
}

#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub struct ElasticsearchSettings {
    pub connection: ElasticsearchConnection,
}

#[serde_as]
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ElasticsearchConnection {
    Single {
        #[serde_as(as = "DisplayFromStr")]
        url: Uri,
    },
    Cloud {
        cloud_id: String,
    },
    None,
}

impl Settings {
    /// Load settings from configuration files and environment variables.
    ///
    /// # Errors
    /// If any of the configured values are invalid, or if any of the required
    /// configuration files are missing.
    pub async fn load() -> Result<Self> {
        let merino_env = std::env::var("MERINO__ENV").unwrap_or_else(|_| "development".to_string());

        let s = Config::builder()
            // Start off with the base config.
            .add_source(File::with_name("./config/base"))
            // Merge in an environment specific config.
            .add_source(File::with_name(&format!("config/{}", merino_env)).required(false))
            // Add a local configuration file that is `.gitignore`ed.
            .add_source(File::with_name("config/local").required(false))
            // Add environment variables that start with "MERINO" and have "__" to
            // separate levels. For example, `MERINO__HTTP__LISTEN` maps to
            // `Settings::http::listen`.
            .add_source(Environment::with_prefix("MERINO").separator("__"))
            .set_override("env", merino_env.as_str())
            .context("loading merino environment name")?
            .build()
            .context("loading merino settings")?;

        let mut settings: Settings =
            serde_path_to_error::deserialize(s).context("Deserializing settings")?;
        let provider_settings = SuggestionProviderSettings::load(&settings.provider_settings)
            .await
            .context("loading provider settings")?;
        settings.suggestion_providers = provider_settings.0;

        Ok(settings)
    }

    /// Load settings from configuration files for tests.
    pub fn load_for_tests() -> Self {
        let s = Config::builder()
            // Start off with the base config.
            .add_source(File::with_name("../config/base"))
            // Merge in test specific config.
            .add_source(File::with_name("../config/test"))
            // Add a local in test specific config.
            .add_source(File::with_name("../config/local_test").required(false))
            // Add a local configuration file that is `.gitignore`ed.
            .set_override("env", "test")
            .expect("Could not set env for tests")
            .build()
            .expect("Could not load settings for tests");

        let mut settings: Settings = s.try_deserialize().expect("Could not convert settings");
        let provider_settings = SuggestionProviderSettings::load_for_tests();
        settings.suggestion_providers = provider_settings.0;

        settings
    }
}

#[cfg(test)]
mod tests {
    use crate::Settings;
    use config::{builder::DefaultState, Config, ConfigBuilder, File};
    use parameterized::parameterized;

    fn load_config_files(files: &[&str]) -> ConfigBuilder<DefaultState> {
        let mut builder = Config::builder();
        for f in files {
            builder = builder.add_source(File::with_name(f))
        }
        builder
    }

    #[parameterized(config_name = {"ci", "development", "production", "test"})]
    fn config_loads(config_name: &str) {
        let mut builder =
            load_config_files(&["../config/base", &format!("../config/{}", config_name)]);

        // config is a required field that should never be set in the provided files.
        builder = builder
            .set_override("env", config_name)
            .expect("Could not set value");

        // special case: prod needs sentry manually configured
        if config_name == "production" {
            builder = builder
                .set_override("sentry.dsn", "https://public@example.com/1")
                .expect("Could not set value")
                .set_override("sentry.env", "test")
                .expect("Could not set value");
        }

        let config = builder.build().expect("Could not build Config");

        let settings = config.try_deserialize::<Settings>();
        if let Err(err) = &settings {
            println!("Problem while testing {} config: {}", config_name, err);
        }
        assert!(settings.is_ok());
    }
}