Added stat methods and anomaly detection algorithm
This commit is contained in:
163
statistics/primitives.go
Normal file
163
statistics/primitives.go
Normal file
@@ -0,0 +1,163 @@
|
||||
package statistics
|
||||
|
||||
import (
|
||||
"math"
|
||||
"slices"
|
||||
"strconv"
|
||||
|
||||
"github.com/ceticamarco/zephyr/types"
|
||||
)
|
||||
|
||||
func Mean(temperatures []float64) float64 {
|
||||
if len(temperatures) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var sum float64
|
||||
|
||||
for _, val := range temperatures {
|
||||
sum += val
|
||||
}
|
||||
|
||||
return sum / float64(len(temperatures))
|
||||
}
|
||||
|
||||
func StdDev(temperatures []float64) float64 {
|
||||
if len(temperatures) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
mean := Mean(temperatures)
|
||||
|
||||
var variance float64
|
||||
|
||||
for _, val := range temperatures {
|
||||
variance += math.Pow((val - mean), 2)
|
||||
}
|
||||
|
||||
variance /= float64(len(temperatures))
|
||||
|
||||
return math.Sqrt(variance)
|
||||
}
|
||||
|
||||
func Median(temperatures []float64) float64 {
|
||||
if len(temperatures) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
slices.Sort(temperatures)
|
||||
length := len(temperatures)
|
||||
midValue := length / 2
|
||||
|
||||
if length%2 == 0 {
|
||||
return (temperatures[midValue-1] + temperatures[midValue]) / 2
|
||||
} else {
|
||||
return temperatures[midValue]
|
||||
}
|
||||
}
|
||||
|
||||
// This method will always returns the largest mode
|
||||
// on a multi-modal dataset
|
||||
func Mode(temperatures []float64) float64 {
|
||||
if len(temperatures) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
slices.Sort(temperatures)
|
||||
|
||||
frequencies := make(map[float64]int)
|
||||
for _, val := range temperatures {
|
||||
frequencies[val]++
|
||||
}
|
||||
|
||||
var mode float64 = 0
|
||||
var maxFreq int = 0
|
||||
|
||||
for val, freq := range frequencies {
|
||||
if freq > maxFreq || (freq == maxFreq && val > mode) {
|
||||
mode = val
|
||||
maxFreq = freq
|
||||
}
|
||||
}
|
||||
|
||||
return mode
|
||||
}
|
||||
|
||||
// Detects statistical anomalies using the Robust Z-Score algorithm
|
||||
//
|
||||
// This method is based on the median and the Median Absolute Deviation(MAD),
|
||||
// making it more robust to anomalies than the standard z-score which uses the arithmetical mean
|
||||
// and standard deviation
|
||||
//
|
||||
// A value is considered an anomaly if its modified z-score exceeds a fixed threshold(4.5)
|
||||
// and whether the absolute deviation surpasses another fixed parameter(8 degrees).
|
||||
// These constants have been fine-tuned to work well with the weather data of a wide range of climates
|
||||
// and to ignore daily temperature fluctuations while still detecting anomalies.
|
||||
//
|
||||
// The scaling constant Φ⁻¹(0.75) ≈ 0.6745 adjusts the MAD to be comparable to the standard deviation
|
||||
// under the assumption of normal distribution (i.e. 75% of values lie within ~0.6745 standard deviations
|
||||
// of the median).
|
||||
//
|
||||
// Daily temperatures collected over a short time window(1/2 month) *should* be normally distributed.
|
||||
// This algorithm only work under this assumption.
|
||||
func RobustZScore(temperatures []float64) []struct {
|
||||
Idx int
|
||||
Value float64
|
||||
} {
|
||||
const threshold = 4.5 // threshold for MAD ZScore algorithms
|
||||
const scale = 0.6745 // Φ⁻¹(3/4) ≈ 0.6745
|
||||
const minDeviation = 8.0 // outliers must deviate at least 8°C from the median
|
||||
const epsilon = 1e-10
|
||||
|
||||
med := Median(temperatures)
|
||||
absDevs := make([]float64, len(temperatures))
|
||||
for idx, val := range temperatures {
|
||||
absDevs[idx] = math.Abs(val - med)
|
||||
}
|
||||
|
||||
madAbsDev := Median(absDevs)
|
||||
if madAbsDev < epsilon {
|
||||
return nil
|
||||
}
|
||||
|
||||
var anomalies []struct {
|
||||
Idx int
|
||||
Value float64
|
||||
}
|
||||
for idx, val := range temperatures {
|
||||
z := scale * (val - med) / madAbsDev
|
||||
|
||||
if math.Abs(z) > threshold && math.Abs(val-med) >= minDeviation {
|
||||
anomalies = append(anomalies, struct {
|
||||
Idx int
|
||||
Value float64
|
||||
}{
|
||||
Idx: idx,
|
||||
Value: val,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return anomalies
|
||||
}
|
||||
|
||||
func DetectAnomalies(weatherArr []types.Weather) []types.WeatherAnomaly {
|
||||
temps := make([]float64, len(weatherArr))
|
||||
|
||||
for idx, weather := range weatherArr {
|
||||
temp, _ := strconv.ParseFloat(weather.Temperature, 64)
|
||||
temps[idx] = temp
|
||||
}
|
||||
|
||||
// Apply the Robust/MAD Z-Score anomaly detection algorithm
|
||||
anomalies := RobustZScore(temps)
|
||||
result := make([]types.WeatherAnomaly, 0, len(anomalies))
|
||||
for _, anomaly := range anomalies {
|
||||
result = append(result, types.WeatherAnomaly{
|
||||
Date: weatherArr[anomaly.Idx].Date,
|
||||
Temp: anomaly.Value,
|
||||
})
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
Reference in New Issue
Block a user