Added stat methods and anomaly detection algorithm
This commit is contained in:
4
main.go
4
main.go
@@ -52,6 +52,10 @@ func main() {
|
|||||||
controller.GetMoon(res, req, &cache.MoonCache, &vars)
|
controller.GetMoon(res, req, &cache.MoonCache, &vars)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
http.HandleFunc("/stats/", func(res http.ResponseWriter, req *http.Request) {
|
||||||
|
controller.GetStatistics(res, req, statDB)
|
||||||
|
})
|
||||||
|
|
||||||
listenAddr := fmt.Sprintf(":%s", port)
|
listenAddr := fmt.Sprintf(":%s", port)
|
||||||
log.Printf("Server listening on %s", listenAddr)
|
log.Printf("Server listening on %s", listenAddr)
|
||||||
http.ListenAndServe(listenAddr, nil)
|
http.ListenAndServe(listenAddr, nil)
|
||||||
|
|||||||
@@ -2,7 +2,10 @@ package model
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
|
"slices"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/ceticamarco/zephyr/statistics"
|
||||||
"github.com/ceticamarco/zephyr/types"
|
"github.com/ceticamarco/zephyr/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -11,7 +14,45 @@ func GetStatistics(cityName string, statDB *types.StatDB) (types.StatResult, err
|
|||||||
if statDB.IsKeyInvalid(cityName) {
|
if statDB.IsKeyInvalid(cityName) {
|
||||||
return types.StatResult{}, errors.New("Insufficient or outdated data to perform statistical analysis")
|
return types.StatResult{}, errors.New("Insufficient or outdated data to perform statistical analysis")
|
||||||
}
|
}
|
||||||
// TODO: we have enough data, do the math!
|
|
||||||
|
|
||||||
return types.StatResult{}, nil
|
extractTemps := func(weatherArr []types.Weather) ([]float64, error) {
|
||||||
|
temps := make([]float64, 0, len(weatherArr))
|
||||||
|
|
||||||
|
for _, weather := range weatherArr {
|
||||||
|
temperature, err := strconv.ParseFloat(weather.Temperature, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
temps = append(temps, temperature)
|
||||||
|
}
|
||||||
|
|
||||||
|
return temps, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract records from the database
|
||||||
|
stats := statDB.GetCityStatistics(cityName)
|
||||||
|
|
||||||
|
// Extract temperatures from weather statistics
|
||||||
|
temps, err := extractTemps(stats)
|
||||||
|
if err != nil {
|
||||||
|
return types.StatResult{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect anomalies
|
||||||
|
anomalies := statistics.DetectAnomalies(stats)
|
||||||
|
if len(anomalies) == 0 {
|
||||||
|
anomalies = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute statistics
|
||||||
|
return types.StatResult{
|
||||||
|
Min: slices.Min(temps),
|
||||||
|
Max: slices.Max(temps),
|
||||||
|
Count: len(stats),
|
||||||
|
Mean: statistics.Mean(temps),
|
||||||
|
StdDev: statistics.StdDev(temps),
|
||||||
|
Median: statistics.Median(temps),
|
||||||
|
Mode: statistics.Mode(temps),
|
||||||
|
Anomaly: &anomalies,
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
163
statistics/primitives.go
Normal file
163
statistics/primitives.go
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
package statistics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
"slices"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/ceticamarco/zephyr/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Mean(temperatures []float64) float64 {
|
||||||
|
if len(temperatures) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
var sum float64
|
||||||
|
|
||||||
|
for _, val := range temperatures {
|
||||||
|
sum += val
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum / float64(len(temperatures))
|
||||||
|
}
|
||||||
|
|
||||||
|
func StdDev(temperatures []float64) float64 {
|
||||||
|
if len(temperatures) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
mean := Mean(temperatures)
|
||||||
|
|
||||||
|
var variance float64
|
||||||
|
|
||||||
|
for _, val := range temperatures {
|
||||||
|
variance += math.Pow((val - mean), 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
variance /= float64(len(temperatures))
|
||||||
|
|
||||||
|
return math.Sqrt(variance)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Median(temperatures []float64) float64 {
|
||||||
|
if len(temperatures) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.Sort(temperatures)
|
||||||
|
length := len(temperatures)
|
||||||
|
midValue := length / 2
|
||||||
|
|
||||||
|
if length%2 == 0 {
|
||||||
|
return (temperatures[midValue-1] + temperatures[midValue]) / 2
|
||||||
|
} else {
|
||||||
|
return temperatures[midValue]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This method will always returns the largest mode
|
||||||
|
// on a multi-modal dataset
|
||||||
|
func Mode(temperatures []float64) float64 {
|
||||||
|
if len(temperatures) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
slices.Sort(temperatures)
|
||||||
|
|
||||||
|
frequencies := make(map[float64]int)
|
||||||
|
for _, val := range temperatures {
|
||||||
|
frequencies[val]++
|
||||||
|
}
|
||||||
|
|
||||||
|
var mode float64 = 0
|
||||||
|
var maxFreq int = 0
|
||||||
|
|
||||||
|
for val, freq := range frequencies {
|
||||||
|
if freq > maxFreq || (freq == maxFreq && val > mode) {
|
||||||
|
mode = val
|
||||||
|
maxFreq = freq
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mode
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detects statistical anomalies using the Robust Z-Score algorithm
|
||||||
|
//
|
||||||
|
// This method is based on the median and the Median Absolute Deviation(MAD),
|
||||||
|
// making it more robust to anomalies than the standard z-score which uses the arithmetical mean
|
||||||
|
// and standard deviation
|
||||||
|
//
|
||||||
|
// A value is considered an anomaly if its modified z-score exceeds a fixed threshold(4.5)
|
||||||
|
// and whether the absolute deviation surpasses another fixed parameter(8 degrees).
|
||||||
|
// These constants have been fine-tuned to work well with the weather data of a wide range of climates
|
||||||
|
// and to ignore daily temperature fluctuations while still detecting anomalies.
|
||||||
|
//
|
||||||
|
// The scaling constant Φ⁻¹(0.75) ≈ 0.6745 adjusts the MAD to be comparable to the standard deviation
|
||||||
|
// under the assumption of normal distribution (i.e. 75% of values lie within ~0.6745 standard deviations
|
||||||
|
// of the median).
|
||||||
|
//
|
||||||
|
// Daily temperatures collected over a short time window(1/2 month) *should* be normally distributed.
|
||||||
|
// This algorithm only work under this assumption.
|
||||||
|
func RobustZScore(temperatures []float64) []struct {
|
||||||
|
Idx int
|
||||||
|
Value float64
|
||||||
|
} {
|
||||||
|
const threshold = 4.5 // threshold for MAD ZScore algorithms
|
||||||
|
const scale = 0.6745 // Φ⁻¹(3/4) ≈ 0.6745
|
||||||
|
const minDeviation = 8.0 // outliers must deviate at least 8°C from the median
|
||||||
|
const epsilon = 1e-10
|
||||||
|
|
||||||
|
med := Median(temperatures)
|
||||||
|
absDevs := make([]float64, len(temperatures))
|
||||||
|
for idx, val := range temperatures {
|
||||||
|
absDevs[idx] = math.Abs(val - med)
|
||||||
|
}
|
||||||
|
|
||||||
|
madAbsDev := Median(absDevs)
|
||||||
|
if madAbsDev < epsilon {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var anomalies []struct {
|
||||||
|
Idx int
|
||||||
|
Value float64
|
||||||
|
}
|
||||||
|
for idx, val := range temperatures {
|
||||||
|
z := scale * (val - med) / madAbsDev
|
||||||
|
|
||||||
|
if math.Abs(z) > threshold && math.Abs(val-med) >= minDeviation {
|
||||||
|
anomalies = append(anomalies, struct {
|
||||||
|
Idx int
|
||||||
|
Value float64
|
||||||
|
}{
|
||||||
|
Idx: idx,
|
||||||
|
Value: val,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return anomalies
|
||||||
|
}
|
||||||
|
|
||||||
|
func DetectAnomalies(weatherArr []types.Weather) []types.WeatherAnomaly {
|
||||||
|
temps := make([]float64, len(weatherArr))
|
||||||
|
|
||||||
|
for idx, weather := range weatherArr {
|
||||||
|
temp, _ := strconv.ParseFloat(weather.Temperature, 64)
|
||||||
|
temps[idx] = temp
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply the Robust/MAD Z-Score anomaly detection algorithm
|
||||||
|
anomalies := RobustZScore(temps)
|
||||||
|
result := make([]types.WeatherAnomaly, 0, len(anomalies))
|
||||||
|
for _, anomaly := range anomalies {
|
||||||
|
result = append(result, types.WeatherAnomaly{
|
||||||
|
Date: weatherArr[anomaly.Idx].Date,
|
||||||
|
Temp: anomaly.Value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
@@ -50,3 +50,15 @@ func (statDB *StatDB) IsKeyInvalid(key string) bool {
|
|||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (statDB *StatDB) GetCityStatistics(cityName string) []Weather {
|
||||||
|
result := make([]Weather, 0)
|
||||||
|
|
||||||
|
for key, record := range statDB.db {
|
||||||
|
if strings.HasSuffix(key, cityName) {
|
||||||
|
result = append(result, record)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,5 +17,5 @@ type StatResult struct {
|
|||||||
StdDev float64 `json:"stdDev"`
|
StdDev float64 `json:"stdDev"`
|
||||||
Median float64 `json:"median"`
|
Median float64 `json:"median"`
|
||||||
Mode float64 `json:"mode"`
|
Mode float64 `json:"mode"`
|
||||||
Anomaly WeatherAnomaly `json:"anomaly"`
|
Anomaly *[]WeatherAnomaly `json:"anomaly"`
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user