Added stat methods and anomaly detection algorithm

This commit is contained in:
2025-06-19 12:09:23 +02:00
parent 2db2c57ce2
commit ca4b75af7a
5 changed files with 230 additions and 10 deletions

View File

@@ -52,6 +52,10 @@ func main() {
controller.GetMoon(res, req, &cache.MoonCache, &vars) controller.GetMoon(res, req, &cache.MoonCache, &vars)
}) })
http.HandleFunc("/stats/", func(res http.ResponseWriter, req *http.Request) {
controller.GetStatistics(res, req, statDB)
})
listenAddr := fmt.Sprintf(":%s", port) listenAddr := fmt.Sprintf(":%s", port)
log.Printf("Server listening on %s", listenAddr) log.Printf("Server listening on %s", listenAddr)
http.ListenAndServe(listenAddr, nil) http.ListenAndServe(listenAddr, nil)

View File

@@ -2,7 +2,10 @@ package model
import ( import (
"errors" "errors"
"slices"
"strconv"
"github.com/ceticamarco/zephyr/statistics"
"github.com/ceticamarco/zephyr/types" "github.com/ceticamarco/zephyr/types"
) )
@@ -11,7 +14,45 @@ func GetStatistics(cityName string, statDB *types.StatDB) (types.StatResult, err
if statDB.IsKeyInvalid(cityName) { if statDB.IsKeyInvalid(cityName) {
return types.StatResult{}, errors.New("Insufficient or outdated data to perform statistical analysis") return types.StatResult{}, errors.New("Insufficient or outdated data to perform statistical analysis")
} }
// TODO: we have enough data, do the math!
return types.StatResult{}, nil extractTemps := func(weatherArr []types.Weather) ([]float64, error) {
temps := make([]float64, 0, len(weatherArr))
for _, weather := range weatherArr {
temperature, err := strconv.ParseFloat(weather.Temperature, 64)
if err != nil {
return nil, err
}
temps = append(temps, temperature)
}
return temps, nil
}
// Extract records from the database
stats := statDB.GetCityStatistics(cityName)
// Extract temperatures from weather statistics
temps, err := extractTemps(stats)
if err != nil {
return types.StatResult{}, err
}
// Detect anomalies
anomalies := statistics.DetectAnomalies(stats)
if len(anomalies) == 0 {
anomalies = nil
}
// Compute statistics
return types.StatResult{
Min: slices.Min(temps),
Max: slices.Max(temps),
Count: len(stats),
Mean: statistics.Mean(temps),
StdDev: statistics.StdDev(temps),
Median: statistics.Median(temps),
Mode: statistics.Mode(temps),
Anomaly: &anomalies,
}, nil
} }

163
statistics/primitives.go Normal file
View File

@@ -0,0 +1,163 @@
package statistics
import (
"math"
"slices"
"strconv"
"github.com/ceticamarco/zephyr/types"
)
func Mean(temperatures []float64) float64 {
if len(temperatures) == 0 {
return 0
}
var sum float64
for _, val := range temperatures {
sum += val
}
return sum / float64(len(temperatures))
}
func StdDev(temperatures []float64) float64 {
if len(temperatures) == 0 {
return 0
}
mean := Mean(temperatures)
var variance float64
for _, val := range temperatures {
variance += math.Pow((val - mean), 2)
}
variance /= float64(len(temperatures))
return math.Sqrt(variance)
}
func Median(temperatures []float64) float64 {
if len(temperatures) == 0 {
return 0
}
slices.Sort(temperatures)
length := len(temperatures)
midValue := length / 2
if length%2 == 0 {
return (temperatures[midValue-1] + temperatures[midValue]) / 2
} else {
return temperatures[midValue]
}
}
// This method will always returns the largest mode
// on a multi-modal dataset
func Mode(temperatures []float64) float64 {
if len(temperatures) == 0 {
return 0
}
slices.Sort(temperatures)
frequencies := make(map[float64]int)
for _, val := range temperatures {
frequencies[val]++
}
var mode float64 = 0
var maxFreq int = 0
for val, freq := range frequencies {
if freq > maxFreq || (freq == maxFreq && val > mode) {
mode = val
maxFreq = freq
}
}
return mode
}
// Detects statistical anomalies using the Robust Z-Score algorithm
//
// This method is based on the median and the Median Absolute Deviation(MAD),
// making it more robust to anomalies than the standard z-score which uses the arithmetical mean
// and standard deviation
//
// A value is considered an anomaly if its modified z-score exceeds a fixed threshold(4.5)
// and whether the absolute deviation surpasses another fixed parameter(8 degrees).
// These constants have been fine-tuned to work well with the weather data of a wide range of climates
// and to ignore daily temperature fluctuations while still detecting anomalies.
//
// The scaling constant Φ⁻¹(0.75) ≈ 0.6745 adjusts the MAD to be comparable to the standard deviation
// under the assumption of normal distribution (i.e. 75% of values lie within ~0.6745 standard deviations
// of the median).
//
// Daily temperatures collected over a short time window(1/2 month) *should* be normally distributed.
// This algorithm only work under this assumption.
func RobustZScore(temperatures []float64) []struct {
Idx int
Value float64
} {
const threshold = 4.5 // threshold for MAD ZScore algorithms
const scale = 0.6745 // Φ⁻¹(3/4) ≈ 0.6745
const minDeviation = 8.0 // outliers must deviate at least 8°C from the median
const epsilon = 1e-10
med := Median(temperatures)
absDevs := make([]float64, len(temperatures))
for idx, val := range temperatures {
absDevs[idx] = math.Abs(val - med)
}
madAbsDev := Median(absDevs)
if madAbsDev < epsilon {
return nil
}
var anomalies []struct {
Idx int
Value float64
}
for idx, val := range temperatures {
z := scale * (val - med) / madAbsDev
if math.Abs(z) > threshold && math.Abs(val-med) >= minDeviation {
anomalies = append(anomalies, struct {
Idx int
Value float64
}{
Idx: idx,
Value: val,
})
}
}
return anomalies
}
func DetectAnomalies(weatherArr []types.Weather) []types.WeatherAnomaly {
temps := make([]float64, len(weatherArr))
for idx, weather := range weatherArr {
temp, _ := strconv.ParseFloat(weather.Temperature, 64)
temps[idx] = temp
}
// Apply the Robust/MAD Z-Score anomaly detection algorithm
anomalies := RobustZScore(temps)
result := make([]types.WeatherAnomaly, 0, len(anomalies))
for _, anomaly := range anomalies {
result = append(result, types.WeatherAnomaly{
Date: weatherArr[anomaly.Idx].Date,
Temp: anomaly.Value,
})
}
return result
}

View File

@@ -50,3 +50,15 @@ func (statDB *StatDB) IsKeyInvalid(key string) bool {
return true return true
} }
func (statDB *StatDB) GetCityStatistics(cityName string) []Weather {
result := make([]Weather, 0)
for key, record := range statDB.db {
if strings.HasSuffix(key, cityName) {
result = append(result, record)
}
}
return result
}

View File

@@ -17,5 +17,5 @@ type StatResult struct {
StdDev float64 `json:"stdDev"` StdDev float64 `json:"stdDev"`
Median float64 `json:"median"` Median float64 `json:"median"`
Mode float64 `json:"mode"` Mode float64 `json:"mode"`
Anomaly WeatherAnomaly `json:"anomaly"` Anomaly *[]WeatherAnomaly `json:"anomaly"`
} }