diff --git a/main.go b/main.go index dacb0ab..bff90cc 100644 --- a/main.go +++ b/main.go @@ -52,6 +52,10 @@ func main() { controller.GetMoon(res, req, &cache.MoonCache, &vars) }) + http.HandleFunc("/stats/", func(res http.ResponseWriter, req *http.Request) { + controller.GetStatistics(res, req, statDB) + }) + listenAddr := fmt.Sprintf(":%s", port) log.Printf("Server listening on %s", listenAddr) http.ListenAndServe(listenAddr, nil) diff --git a/model/statisticsModel.go b/model/statisticsModel.go index f33387c..466ee67 100644 --- a/model/statisticsModel.go +++ b/model/statisticsModel.go @@ -2,7 +2,10 @@ package model import ( "errors" + "slices" + "strconv" + "github.com/ceticamarco/zephyr/statistics" "github.com/ceticamarco/zephyr/types" ) @@ -11,7 +14,45 @@ func GetStatistics(cityName string, statDB *types.StatDB) (types.StatResult, err if statDB.IsKeyInvalid(cityName) { return types.StatResult{}, errors.New("Insufficient or outdated data to perform statistical analysis") } - // TODO: we have enough data, do the math! - return types.StatResult{}, nil + extractTemps := func(weatherArr []types.Weather) ([]float64, error) { + temps := make([]float64, 0, len(weatherArr)) + + for _, weather := range weatherArr { + temperature, err := strconv.ParseFloat(weather.Temperature, 64) + if err != nil { + return nil, err + } + temps = append(temps, temperature) + } + + return temps, nil + } + + // Extract records from the database + stats := statDB.GetCityStatistics(cityName) + + // Extract temperatures from weather statistics + temps, err := extractTemps(stats) + if err != nil { + return types.StatResult{}, err + } + + // Detect anomalies + anomalies := statistics.DetectAnomalies(stats) + if len(anomalies) == 0 { + anomalies = nil + } + + // Compute statistics + return types.StatResult{ + Min: slices.Min(temps), + Max: slices.Max(temps), + Count: len(stats), + Mean: statistics.Mean(temps), + StdDev: statistics.StdDev(temps), + Median: statistics.Median(temps), + Mode: statistics.Mode(temps), + Anomaly: &anomalies, + }, nil } diff --git a/statistics/primitives.go b/statistics/primitives.go new file mode 100644 index 0000000..5c2209b --- /dev/null +++ b/statistics/primitives.go @@ -0,0 +1,163 @@ +package statistics + +import ( + "math" + "slices" + "strconv" + + "github.com/ceticamarco/zephyr/types" +) + +func Mean(temperatures []float64) float64 { + if len(temperatures) == 0 { + return 0 + } + + var sum float64 + + for _, val := range temperatures { + sum += val + } + + return sum / float64(len(temperatures)) +} + +func StdDev(temperatures []float64) float64 { + if len(temperatures) == 0 { + return 0 + } + + mean := Mean(temperatures) + + var variance float64 + + for _, val := range temperatures { + variance += math.Pow((val - mean), 2) + } + + variance /= float64(len(temperatures)) + + return math.Sqrt(variance) +} + +func Median(temperatures []float64) float64 { + if len(temperatures) == 0 { + return 0 + } + + slices.Sort(temperatures) + length := len(temperatures) + midValue := length / 2 + + if length%2 == 0 { + return (temperatures[midValue-1] + temperatures[midValue]) / 2 + } else { + return temperatures[midValue] + } +} + +// This method will always returns the largest mode +// on a multi-modal dataset +func Mode(temperatures []float64) float64 { + if len(temperatures) == 0 { + return 0 + } + + slices.Sort(temperatures) + + frequencies := make(map[float64]int) + for _, val := range temperatures { + frequencies[val]++ + } + + var mode float64 = 0 + var maxFreq int = 0 + + for val, freq := range frequencies { + if freq > maxFreq || (freq == maxFreq && val > mode) { + mode = val + maxFreq = freq + } + } + + return mode +} + +// Detects statistical anomalies using the Robust Z-Score algorithm +// +// This method is based on the median and the Median Absolute Deviation(MAD), +// making it more robust to anomalies than the standard z-score which uses the arithmetical mean +// and standard deviation +// +// A value is considered an anomaly if its modified z-score exceeds a fixed threshold(4.5) +// and whether the absolute deviation surpasses another fixed parameter(8 degrees). +// These constants have been fine-tuned to work well with the weather data of a wide range of climates +// and to ignore daily temperature fluctuations while still detecting anomalies. +// +// The scaling constant Φ⁻¹(0.75) ≈ 0.6745 adjusts the MAD to be comparable to the standard deviation +// under the assumption of normal distribution (i.e. 75% of values lie within ~0.6745 standard deviations +// of the median). +// +// Daily temperatures collected over a short time window(1/2 month) *should* be normally distributed. +// This algorithm only work under this assumption. +func RobustZScore(temperatures []float64) []struct { + Idx int + Value float64 +} { + const threshold = 4.5 // threshold for MAD ZScore algorithms + const scale = 0.6745 // Φ⁻¹(3/4) ≈ 0.6745 + const minDeviation = 8.0 // outliers must deviate at least 8°C from the median + const epsilon = 1e-10 + + med := Median(temperatures) + absDevs := make([]float64, len(temperatures)) + for idx, val := range temperatures { + absDevs[idx] = math.Abs(val - med) + } + + madAbsDev := Median(absDevs) + if madAbsDev < epsilon { + return nil + } + + var anomalies []struct { + Idx int + Value float64 + } + for idx, val := range temperatures { + z := scale * (val - med) / madAbsDev + + if math.Abs(z) > threshold && math.Abs(val-med) >= minDeviation { + anomalies = append(anomalies, struct { + Idx int + Value float64 + }{ + Idx: idx, + Value: val, + }) + } + } + + return anomalies +} + +func DetectAnomalies(weatherArr []types.Weather) []types.WeatherAnomaly { + temps := make([]float64, len(weatherArr)) + + for idx, weather := range weatherArr { + temp, _ := strconv.ParseFloat(weather.Temperature, 64) + temps[idx] = temp + } + + // Apply the Robust/MAD Z-Score anomaly detection algorithm + anomalies := RobustZScore(temps) + result := make([]types.WeatherAnomaly, 0, len(anomalies)) + for _, anomaly := range anomalies { + result = append(result, types.WeatherAnomaly{ + Date: weatherArr[anomaly.Idx].Date, + Temp: anomaly.Value, + }) + } + + return result +} diff --git a/types/statDB.go b/types/statDB.go index 9342cf1..29d843c 100644 --- a/types/statDB.go +++ b/types/statDB.go @@ -50,3 +50,15 @@ func (statDB *StatDB) IsKeyInvalid(key string) bool { return true } + +func (statDB *StatDB) GetCityStatistics(cityName string) []Weather { + result := make([]Weather, 0) + + for key, record := range statDB.db { + if strings.HasSuffix(key, cityName) { + result = append(result, record) + } + } + + return result +} diff --git a/types/statistics.go b/types/statistics.go index 1ceb974..b0a4adf 100644 --- a/types/statistics.go +++ b/types/statistics.go @@ -10,12 +10,12 @@ type WeatherAnomaly struct { // The StatResult data type, representing weather statistics // of past meteorological events type StatResult struct { - Min float64 `json:"min"` - Max float64 `json:"max"` - Count int `json:"count"` - Mean float64 `json:"mean"` - StdDev float64 `json:"stdDev"` - Median float64 `json:"median"` - Mode float64 `json:"mode"` - Anomaly WeatherAnomaly `json:"anomaly"` + Min float64 `json:"min"` + Max float64 `json:"max"` + Count int `json:"count"` + Mean float64 `json:"mean"` + StdDev float64 `json:"stdDev"` + Median float64 `json:"median"` + Mode float64 `json:"mode"` + Anomaly *[]WeatherAnomaly `json:"anomaly"` }