From 722b012cf9a69275357b10f8c5f2658c35f69d47 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 10:17:08 +0100 Subject: [PATCH 01/75] setup domain specific module --- .github/workflows/ci.yml | 3 + build.mill | 4 + vecxt_re/Readme.md | 4 + vecxt_re/package.mill | 70 +++++++ vecxt_re/resources/timeline.vl.json | 171 ++++++++++++++++++ vecxt_re/src-js-native/.keep | 0 {vecxtensions => vecxt_re}/src-js/rpt.scala | 6 +- vecxt_re/src-jvm-native/.keep | 0 .../src-jvm/SplitLosses.scala | 3 +- vecxt_re/src-jvm/plots.scala | 6 + {vecxtensions => vecxt_re}/src-jvm/rpt.scala | 7 +- .../src-native/rpt.scala | 6 +- .../src/DeductibleType.scala | 2 +- {vecxtensions => vecxt_re}/src/Layer.scala | 2 +- {vecxtensions => vecxt_re}/src/LossCalc.scala | 2 +- .../src/Retention_Limit.scala | 4 +- {vecxtensions => vecxt_re}/src/Tower.scala | 7 +- .../src/groupCumSum.scala | 4 +- .../src/groupDiff.scala | 2 +- .../src/groupSums.scala | 4 +- vecxt_re/test/src-js/.keep | 0 vecxt_re/test/src-jvm/.keep | 0 vecxt_re/test/src-native/.keep | 0 .../test/src/aggByItr.test.scala | 4 +- .../test/src/cross.test.scala | 8 +- .../test/src/groupCumul.test.scala | 2 +- .../test/src/groupDiff.test.scala | 2 +- .../test/src/layer.test.scala | 5 +- .../test/src/maketower.test.scala | 8 +- .../test/src/rpt.test.scala | 8 +- .../test/src/tower.test.scala | 6 +- .../test/src/vecEquals.scala | 2 +- 32 files changed, 299 insertions(+), 53 deletions(-) create mode 100644 vecxt_re/Readme.md create mode 100644 vecxt_re/package.mill create mode 100644 vecxt_re/resources/timeline.vl.json create mode 100644 vecxt_re/src-js-native/.keep rename {vecxtensions => vecxt_re}/src-js/rpt.scala (97%) create mode 100644 vecxt_re/src-jvm-native/.keep rename {vecxtensions => vecxt_re}/src-jvm/SplitLosses.scala (99%) create mode 100644 vecxt_re/src-jvm/plots.scala rename {vecxtensions => vecxt_re}/src-jvm/rpt.scala (98%) rename {vecxtensions => vecxt_re}/src-native/rpt.scala (97%) rename {vecxtensions => vecxt_re}/src/DeductibleType.scala (76%) rename {vecxtensions => vecxt_re}/src/Layer.scala (99%) rename {vecxtensions => vecxt_re}/src/LossCalc.scala (63%) rename {vecxtensions => vecxt_re}/src/Retention_Limit.scala (90%) rename {vecxtensions => vecxt_re}/src/Tower.scala (96%) rename {vecxtensions => vecxt_re}/src/groupCumSum.scala (94%) rename {vecxtensions => vecxt_re}/src/groupDiff.scala (98%) rename {vecxtensions => vecxt_re}/src/groupSums.scala (96%) create mode 100644 vecxt_re/test/src-js/.keep create mode 100644 vecxt_re/test/src-jvm/.keep create mode 100644 vecxt_re/test/src-native/.keep rename {vecxtensions => vecxt_re}/test/src/aggByItr.test.scala (96%) rename {vecxtensions => vecxt_re}/test/src/cross.test.scala (83%) rename {vecxtensions => vecxt_re}/test/src/groupCumul.test.scala (99%) rename {vecxtensions => vecxt_re}/test/src/groupDiff.test.scala (99%) rename {vecxtensions => vecxt_re}/test/src/layer.test.scala (99%) rename {vecxtensions => vecxt_re}/test/src/maketower.test.scala (96%) rename {vecxtensions => vecxt_re}/test/src/rpt.test.scala (98%) rename {vecxtensions => vecxt_re}/test/src/tower.test.scala (98%) rename {vecxtensions => vecxt_re}/test/src/vecEquals.scala (94%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 10d444a1..b1398a51 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,6 +68,9 @@ jobs: - name: Test run: ./mill vecxtensions.${{ matrix.project }}.test + - name: Test + run: ./mill vecxt_re.${{ matrix.project }}.test + - name: Laws Test if: matrix.project == 'jvm' run: ./mill laws.${{ matrix.project }}.test diff --git a/build.mill b/build.mill index 21a2e083..3f3e477d 100644 --- a/build.mill +++ b/build.mill @@ -77,3 +77,7 @@ trait CommonNative extends ScalaNativeModule with VecxtPublishModule: ) def scalaNativeVersion: Simple[String] = "0.5.9" end CommonNative + +trait ShareCompileResources extends ScalaModule { + override def compileResources = super.compileResources() ++ resources() +} diff --git a/vecxt_re/Readme.md b/vecxt_re/Readme.md new file mode 100644 index 00000000..62122f20 --- /dev/null +++ b/vecxt_re/Readme.md @@ -0,0 +1,4 @@ +# Vecxt Re + +A very domain spefic set of experiments in computational reinsurance. + diff --git a/vecxt_re/package.mill b/vecxt_re/package.mill new file mode 100644 index 00000000..6e9a2b6d --- /dev/null +++ b/vecxt_re/package.mill @@ -0,0 +1,70 @@ +package build.vecxt_re + +import mill.*, scalalib.*, scalajslib.*, publish.* +import mill.scalajslib.api.ModuleKind +import mill.api.Task.Simple + +object `package` extends Module: + trait VexctReModule extends PlatformScalaModule with build.VecxtPublishModule with build.ShareCompileResources: + def mvnDeps = super.mvnDeps() ++ Seq() + + trait VexctReTest extends ScalaTests, TestModule.Munit: + def mvnDeps = super.mvnDeps() ++ Seq( + mvn"org.scalameta::munit::${build.V.munitVersion}" + ) + override def forkArgs: Simple[Seq[String]] = super.forkArgs() ++ build.vecIncubatorFlag + end VexctReTest + end VexctReModule + + private def jsNativeSharedSources = Task.Sources { + os.sub / "src-js-native" + } + + private def jvmNativeSharedSources = Task.Sources { + os.sub / "src-jvm-native" + } + + object jvm extends VexctReModule: + def moduleDeps = Seq(build.vecxt.jvm) + override def scalaVersion = build.V.scalaVersion + override def forkArgs = super.forkArgs() ++ build.vecIncubatorFlag + // Ensure macro resource lookups (e.g. VegaPlot.fromResource) can see this module's resources during compilation + def sources = Task(super.sources() ++ jvmNativeSharedSources()) + + override def mvnDeps = super.mvnDeps() ++ Seq( + mvn"io.github.quafadas::scautable:0.0.35", + mvn"io.github.quafadas::dedav4s:0.10.3" + ) + + object test extends VexctReTest, ScalaTests: + def moduleDeps = Seq(jvm) + override def forkArgs: Simple[Seq[String]] = super.forkArgs() ++ build.vecIncubatorFlag + end test + end jvm + + object js extends VexctReModule with build.CommonJS: + def moduleDeps = Seq(build.vecxt.js) + override def mvnDeps = super.mvnDeps() + def sources = Task(super.sources() ++ jsNativeSharedSources()) + def moduleKind = ModuleKind.ESModule + def enableBsp = false + + object test extends VexctReTest, ScalaJSTests: + def moduleDeps = Seq(js) + def moduleKind = ModuleKind.CommonJSModule + override def enableBsp = false + end test + end js + + object native extends VexctReModule with build.CommonNative: + def moduleDeps = Seq(build.vecxt.native) + override def mvnDeps = super.mvnDeps() + def sources = Task(super.sources() ++ jsNativeSharedSources() ++ jvmNativeSharedSources()) + override def enableBsp = false + + object test extends ScalaNativeTests, VexctReTest: + override def moduleDeps = Seq(native) + override def enableBsp = false + end test + end native +end `package` diff --git a/vecxt_re/resources/timeline.vl.json b/vecxt_re/resources/timeline.vl.json new file mode 100644 index 00000000..61b1736a --- /dev/null +++ b/vecxt_re/resources/timeline.vl.json @@ -0,0 +1,171 @@ +{ + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "A timeline visualization of contracts going on and off risk.", + "width": 500, + "height": 80, + "padding": 5, + "data": [ + { + "name": "sections", + "values": [ + { + "SectionID": 382, + "SectionName": "Something", + "RiskInceptionDate": 1333576800000, + "RiskExpiryDate": 1459893600000, + "ScheduledMaturityDate": 1460498400000, + "ExtensionPeriod": null + }, + { + "SectionID": 3, + "SectionName": "else", + "RiskInceptionDate": 1333576800000, + "RiskExpiryDate": 1459893600000, + "ScheduledMaturityDate": 1460498400000, + "ExtensionPeriod": null + } + ], + "format": { + "parse": { + "RiskInceptionDate": "date", + "RiskExpiryDate": "date" + } + }, + "transform": [ + { + "type": "collect", + "sort": { + "field": "RiskInceptionDate" + } + } + ] + } + ], + "scales": [ + { + "name": "yscale", + "type": "band", + "range": [ + 0, + { + "signal": "height" + } + ], + "domain": { + "data": "sections", + "field": "SectionName" + } + }, + { + "name": "xscale2", + "type": "time", + "range": "width", + "domain": { + "fields": [ + { + "data": "sections", + "field": "RiskInceptionDate" + }, + { + "data": "sections", + "field": "RiskExpiryDate" + } + ] + } + }, + { + "name": "xscale", + "type": "time", + "range": "width", + "domain": { + "data": "sections", + "fields": [ + "RiskInceptionDate", + "RiskExpiryDate" + ] + } + }, + { + "name": "color", + "type": "ordinal", + "range": { + "scheme": "tableau20" + }, + "domain": { + "data": "sections", + "field": "cedent" + } + } + ], + "axes": [ + { + "orient": "bottom", + "scale": "xscale" + }, + { + "orient": "top", + "scale": "xscale", + "offset": 20 + } + ], + "marks": [ + { + "type": "text", + "from": { + "data": "sections" + }, + "encode": { + "enter": { + "x": { + "scale": "xscale", + "field": "RiskInceptionDate" + }, + "y": { + "scale": "yscale", + "field": "SectionName", + "offset": 17 + }, + "fill": { + "value": "#000" + }, + "text": { + "field": "SectionName" + }, + "fontSize": { + "value": 20 + } + } + } + }, + { + "type": "rect", + "from": { + "data": "sections" + }, + "encode": { + "enter": { + "x": { + "scale": "xscale", + "field": "RiskInceptionDate" + }, + "x2": { + "scale": "xscale", + "field": "RiskExpiryDate" + }, + "y": { + "scale": "yscale", + "field": "SectionName", + "offset": 20 + }, + "height": { + "value": 4 + }, + "fill": { + "scale": "color", + "field": "cedent" + } + } + } + } + ] +} \ No newline at end of file diff --git a/vecxt_re/src-js-native/.keep b/vecxt_re/src-js-native/.keep new file mode 100644 index 00000000..e69de29b diff --git a/vecxtensions/src-js/rpt.scala b/vecxt_re/src-js/rpt.scala similarity index 97% rename from vecxtensions/src-js/rpt.scala rename to vecxt_re/src-js/rpt.scala index 26213a55..976f5fc9 100644 --- a/vecxtensions/src-js/rpt.scala +++ b/vecxt_re/src-js/rpt.scala @@ -1,7 +1,7 @@ -package vecxt.reinsurance +package vecxt_re -import vecxt.reinsurance.Limits.Limit -import vecxt.reinsurance.Retentions.Retention +import vecxt_re.Limits.Limit +import vecxt_re.Retentions.Retention /* diff --git a/vecxt_re/src-jvm-native/.keep b/vecxt_re/src-jvm-native/.keep new file mode 100644 index 00000000..e69de29b diff --git a/vecxtensions/src-jvm/SplitLosses.scala b/vecxt_re/src-jvm/SplitLosses.scala similarity index 99% rename from vecxtensions/src-jvm/SplitLosses.scala rename to vecxt_re/src-jvm/SplitLosses.scala index 9182619f..0734c6bd 100644 --- a/vecxtensions/src-jvm/SplitLosses.scala +++ b/vecxt_re/src-jvm/SplitLosses.scala @@ -1,10 +1,9 @@ -package vecxt.reinsurance +package vecxt_re import java.util.concurrent.Executors import jdk.incubator.vector.{DoubleVector, VectorOperators, VectorSpecies} import vecxt.BoundsCheck.BoundsCheck -import scala.annotation.nowarn object SplitLosses: extension (tower: Tower) diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala new file mode 100644 index 00000000..6c4d54fd --- /dev/null +++ b/vecxt_re/src-jvm/plots.scala @@ -0,0 +1,6 @@ +package vecxt_re + +import io.github.quafadas.plots.SetupVega.{*, given} + +object Plots: + lazy val timeline = VegaPlot.fromResource("timeline.vl.json") \ No newline at end of file diff --git a/vecxtensions/src-jvm/rpt.scala b/vecxt_re/src-jvm/rpt.scala similarity index 98% rename from vecxtensions/src-jvm/rpt.scala rename to vecxt_re/src-jvm/rpt.scala index 44fc0c5c..6ae24f75 100644 --- a/vecxtensions/src-jvm/rpt.scala +++ b/vecxt_re/src-jvm/rpt.scala @@ -1,8 +1,7 @@ -package vecxt.reinsurance -import vecxt.reinsurance.Limits.* -import vecxt.reinsurance.Retentions.* +package vecxt_re +import vecxt_re.Limits.* +import vecxt_re.Retentions.* import vecxt.all.* -import vecxt.all.given import jdk.incubator.vector.DoubleVector import jdk.incubator.vector.VectorSpecies diff --git a/vecxtensions/src-native/rpt.scala b/vecxt_re/src-native/rpt.scala similarity index 97% rename from vecxtensions/src-native/rpt.scala rename to vecxt_re/src-native/rpt.scala index 9815837a..ec06e90e 100644 --- a/vecxtensions/src-native/rpt.scala +++ b/vecxt_re/src-native/rpt.scala @@ -1,6 +1,6 @@ -package vecxt.reinsurance -import vecxt.reinsurance.Limits.Limit -import vecxt.reinsurance.Retentions.Retention +package vecxt_re +import vecxt_re.Limits.Limit +import vecxt_re.Retentions.Retention import vecxt.all.* import vecxt.all.given diff --git a/vecxtensions/src/DeductibleType.scala b/vecxt_re/src/DeductibleType.scala similarity index 76% rename from vecxtensions/src/DeductibleType.scala rename to vecxt_re/src/DeductibleType.scala index 4b4d2400..f282b043 100644 --- a/vecxtensions/src/DeductibleType.scala +++ b/vecxt_re/src/DeductibleType.scala @@ -1,4 +1,4 @@ -package vecxt.reinsurance +package vecxt_re enum DeductibleType: case Retention, Franchise, ReverseFranchise diff --git a/vecxtensions/src/Layer.scala b/vecxt_re/src/Layer.scala similarity index 99% rename from vecxtensions/src/Layer.scala rename to vecxt_re/src/Layer.scala index 092235cb..45bd0752 100644 --- a/vecxtensions/src/Layer.scala +++ b/vecxt_re/src/Layer.scala @@ -1,4 +1,4 @@ -package vecxt.reinsurance +package vecxt_re object Layer: inline def apply(limit: Double, ret: Double): Layer = diff --git a/vecxtensions/src/LossCalc.scala b/vecxt_re/src/LossCalc.scala similarity index 63% rename from vecxtensions/src/LossCalc.scala rename to vecxt_re/src/LossCalc.scala index 8b7d3dbe..a02c3737 100644 --- a/vecxtensions/src/LossCalc.scala +++ b/vecxt_re/src/LossCalc.scala @@ -1,4 +1,4 @@ -package vecxt.reinsurance +package vecxt_re enum LossCalc: case Agg, Occ diff --git a/vecxtensions/src/Retention_Limit.scala b/vecxt_re/src/Retention_Limit.scala similarity index 90% rename from vecxtensions/src/Retention_Limit.scala rename to vecxt_re/src/Retention_Limit.scala index bbdac5db..5bdd31e8 100644 --- a/vecxtensions/src/Retention_Limit.scala +++ b/vecxt_re/src/Retention_Limit.scala @@ -1,6 +1,6 @@ -package vecxt.reinsurance +package vecxt_re -import vecxt.reinsurance.Retentions.Retention +import vecxt_re.Retentions.Retention object Retentions: opaque type Retention = Double diff --git a/vecxtensions/src/Tower.scala b/vecxt_re/src/Tower.scala similarity index 96% rename from vecxtensions/src/Tower.scala rename to vecxt_re/src/Tower.scala index 54b5b0af..ca184e37 100644 --- a/vecxtensions/src/Tower.scala +++ b/vecxt_re/src/Tower.scala @@ -1,10 +1,5 @@ -package vecxt.reinsurance +package vecxt_re -import java.util.UUID -import vecxtensions.{groupCumSum, groupDiff} -import vecxt.reinsurance.Limits.Limit -import vecxt.reinsurance.Retentions.Retention -import vecxt.reinsurance.rpt.* import vecxt.all.* object Tower: diff --git a/vecxtensions/src/groupCumSum.scala b/vecxt_re/src/groupCumSum.scala similarity index 94% rename from vecxtensions/src/groupCumSum.scala rename to vecxt_re/src/groupCumSum.scala index 7a1bac38..341c0209 100644 --- a/vecxtensions/src/groupCumSum.scala +++ b/vecxt_re/src/groupCumSum.scala @@ -1,6 +1,6 @@ -package vecxtensions +package vecxt_re -import vecxt.reinsurance.Layer +import vecxt_re.Layer def aggregateByItr( years: Array[Int], diff --git a/vecxtensions/src/groupDiff.scala b/vecxt_re/src/groupDiff.scala similarity index 98% rename from vecxtensions/src/groupDiff.scala rename to vecxt_re/src/groupDiff.scala index 3e474e01..b86b4bbe 100644 --- a/vecxtensions/src/groupDiff.scala +++ b/vecxt_re/src/groupDiff.scala @@ -1,4 +1,4 @@ -package vecxtensions +package vecxt_re /** - You have a sorted groups array. * - Each group has a small number of values. diff --git a/vecxtensions/src/groupSums.scala b/vecxt_re/src/groupSums.scala similarity index 96% rename from vecxtensions/src/groupSums.scala rename to vecxt_re/src/groupSums.scala index 27aab85d..49aca8a1 100644 --- a/vecxtensions/src/groupSums.scala +++ b/vecxt_re/src/groupSums.scala @@ -1,6 +1,4 @@ -package vecxtensions - -import vecxt.reinsurance.Layer +package vecxt_re /** - You have a sorted groups array. * - Each group has a small number of values. diff --git a/vecxt_re/test/src-js/.keep b/vecxt_re/test/src-js/.keep new file mode 100644 index 00000000..e69de29b diff --git a/vecxt_re/test/src-jvm/.keep b/vecxt_re/test/src-jvm/.keep new file mode 100644 index 00000000..e69de29b diff --git a/vecxt_re/test/src-native/.keep b/vecxt_re/test/src-native/.keep new file mode 100644 index 00000000..e69de29b diff --git a/vecxtensions/test/src/aggByItr.test.scala b/vecxt_re/test/src/aggByItr.test.scala similarity index 96% rename from vecxtensions/test/src/aggByItr.test.scala rename to vecxt_re/test/src/aggByItr.test.scala index f9a3aec5..83102f2f 100644 --- a/vecxtensions/test/src/aggByItr.test.scala +++ b/vecxt_re/test/src/aggByItr.test.scala @@ -1,7 +1,7 @@ -package vecxtensions +package vecxt_re import munit.FunSuite -import vecxt.reinsurance.Layer +import vecxt_re.Layer class AggregateByItrSpec extends FunSuite: diff --git a/vecxtensions/test/src/cross.test.scala b/vecxt_re/test/src/cross.test.scala similarity index 83% rename from vecxtensions/test/src/cross.test.scala rename to vecxt_re/test/src/cross.test.scala index 3c4f6d32..a4c18015 100644 --- a/vecxtensions/test/src/cross.test.scala +++ b/vecxt_re/test/src/cross.test.scala @@ -1,8 +1,8 @@ -package vecxt.reinsurance +package vecxt_re -import Limits.Limit -import Retentions.Retention -import rpt.* +import vecxt_re.Limits.Limit +import vecxt_re.Retentions.Retention +import vecxt_re.rpt.* import scala.util.chaining.* diff --git a/vecxtensions/test/src/groupCumul.test.scala b/vecxt_re/test/src/groupCumul.test.scala similarity index 99% rename from vecxtensions/test/src/groupCumul.test.scala rename to vecxt_re/test/src/groupCumul.test.scala index 08d1c215..837cf585 100644 --- a/vecxtensions/test/src/groupCumul.test.scala +++ b/vecxt_re/test/src/groupCumul.test.scala @@ -1,4 +1,4 @@ -package vecxtensions +package vecxt_re import munit.FunSuite diff --git a/vecxtensions/test/src/groupDiff.test.scala b/vecxt_re/test/src/groupDiff.test.scala similarity index 99% rename from vecxtensions/test/src/groupDiff.test.scala rename to vecxt_re/test/src/groupDiff.test.scala index 72adfb6e..8e4cf499 100644 --- a/vecxtensions/test/src/groupDiff.test.scala +++ b/vecxt_re/test/src/groupDiff.test.scala @@ -1,4 +1,4 @@ -package vecxtensions +package vecxt_re import munit.FunSuite diff --git a/vecxtensions/test/src/layer.test.scala b/vecxt_re/test/src/layer.test.scala similarity index 99% rename from vecxtensions/test/src/layer.test.scala rename to vecxt_re/test/src/layer.test.scala index dc27fb6a..9ef7c5f3 100644 --- a/vecxtensions/test/src/layer.test.scala +++ b/vecxt_re/test/src/layer.test.scala @@ -1,7 +1,6 @@ -package vecxt.reinsurance +package vecxt_re -import java.util.UUID -import vecxtensions.assertVecEquals +import vecxt_re.assertVecEquals class ScenarioRISuite extends munit.FunSuite: diff --git a/vecxtensions/test/src/maketower.test.scala b/vecxt_re/test/src/maketower.test.scala similarity index 96% rename from vecxtensions/test/src/maketower.test.scala rename to vecxt_re/test/src/maketower.test.scala index 19e923c7..b236dd4a 100644 --- a/vecxtensions/test/src/maketower.test.scala +++ b/vecxt_re/test/src/maketower.test.scala @@ -1,9 +1,7 @@ -package vecxt.reinsurance +package vecxt_re -import java.util.UUID -import vecxt.all.* -class TowerSuite extends munit.FunSuite: +class MakeTowerSuite extends munit.FunSuite: val sampleLayer = Layer( layerName = Some("Primary Layer"), @@ -103,4 +101,4 @@ class TowerSuite extends munit.FunSuite: assertEquals(scaledLayer.occLimit, originalLayer.occLimit) assertEquals(scaledLayer.occRetention, originalLayer.occRetention) -end TowerSuite +end MakeTowerSuite diff --git a/vecxtensions/test/src/rpt.test.scala b/vecxt_re/test/src/rpt.test.scala similarity index 98% rename from vecxtensions/test/src/rpt.test.scala rename to vecxt_re/test/src/rpt.test.scala index 5a670475..56918d89 100644 --- a/vecxtensions/test/src/rpt.test.scala +++ b/vecxt_re/test/src/rpt.test.scala @@ -1,8 +1,8 @@ -package vecxt.reinsurance +package vecxt_re -import vecxt.reinsurance.rpt.* -import Retentions.Retention -import Limits.Limit +import vecxt_re.rpt.* +import vecxt_re.Retentions.Retention +import vecxt_re.Limits.Limit class ReinsurancePricingSuite extends munit.FunSuite: diff --git a/vecxtensions/test/src/tower.test.scala b/vecxt_re/test/src/tower.test.scala similarity index 98% rename from vecxtensions/test/src/tower.test.scala rename to vecxt_re/test/src/tower.test.scala index 9ce46c9b..f38a19a4 100644 --- a/vecxtensions/test/src/tower.test.scala +++ b/vecxt_re/test/src/tower.test.scala @@ -1,9 +1,9 @@ -package vecxtensions +package vecxt_re -import vecxt.reinsurance.* +import vecxt_re.* import vecxt.all.* import vecxt.all.given -import vecxt.reinsurance.SplitLosses.* +import vecxt_re.SplitLosses.* class TowerSuite extends munit.FunSuite: diff --git a/vecxtensions/test/src/vecEquals.scala b/vecxt_re/test/src/vecEquals.scala similarity index 94% rename from vecxtensions/test/src/vecEquals.scala rename to vecxt_re/test/src/vecEquals.scala index 7fb3d3c9..f0aa42d8 100644 --- a/vecxtensions/test/src/vecEquals.scala +++ b/vecxt_re/test/src/vecEquals.scala @@ -1,4 +1,4 @@ -package vecxtensions +package vecxt_re import munit.Assertions.assertEqualsDouble From 34cd2bb27d9427e83246645ef77947f366727279 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 11:02:06 +0100 Subject: [PATCH 02/75] . --- build.mill | 4 +- vecxt_re/package.mill | 6 +- vecxt_re/resources/seasonality.vg.json | 111 +++++++++++++++++++++++++ vecxt_re/src-jvm/plots.scala | 4 +- vecxt_re/test/src/maketower.test.scala | 1 - 5 files changed, 119 insertions(+), 7 deletions(-) create mode 100644 vecxt_re/resources/seasonality.vg.json diff --git a/build.mill b/build.mill index 3f3e477d..eb207e47 100644 --- a/build.mill +++ b/build.mill @@ -78,6 +78,6 @@ trait CommonNative extends ScalaNativeModule with VecxtPublishModule: def scalaNativeVersion: Simple[String] = "0.5.9" end CommonNative -trait ShareCompileResources extends ScalaModule { +trait ShareCompileResources extends ScalaModule: override def compileResources = super.compileResources() ++ resources() -} +end ShareCompileResources diff --git a/vecxt_re/package.mill b/vecxt_re/package.mill index 6e9a2b6d..6c51e449 100644 --- a/vecxt_re/package.mill +++ b/vecxt_re/package.mill @@ -28,12 +28,12 @@ object `package` extends Module: def moduleDeps = Seq(build.vecxt.jvm) override def scalaVersion = build.V.scalaVersion override def forkArgs = super.forkArgs() ++ build.vecIncubatorFlag - // Ensure macro resource lookups (e.g. VegaPlot.fromResource) can see this module's resources during compilation + // Ensure macro resource lookups (e.g. VegaPlot.fromResource) can see this module's resources during compilation def sources = Task(super.sources() ++ jvmNativeSharedSources()) override def mvnDeps = super.mvnDeps() ++ Seq( - mvn"io.github.quafadas::scautable:0.0.35", - mvn"io.github.quafadas::dedav4s:0.10.3" + mvn"io.github.quafadas::scautable:0.0.35", + mvn"io.github.quafadas::dedav4s:0.10.3" ) object test extends VexctReTest, ScalaTests: diff --git a/vecxt_re/resources/seasonality.vg.json b/vecxt_re/resources/seasonality.vg.json new file mode 100644 index 00000000..89f0e0bd --- /dev/null +++ b/vecxt_re/resources/seasonality.vg.json @@ -0,0 +1,111 @@ +{ + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "A basic bar chart example, with value labels shown upon pointer hover.", + "padding": 5, + "data": [ + { + "name": "table", + "values": [ + {"category": "2022-12-31", "amount": 0, "color": true} + ] + } + ], + "signals": [ + { + "name": "tooltip", + "value": {}, + "on": [ + {"events": "rect:pointerover", "update": "datum"}, + {"events": "rect:pointerout", "update": "{}"} + ] + }, + { + "name": "height", + "init": "isFinite(containerSize()[1]) ? containerSize()[1] : 200", + "on": [ + { + "update": "isFinite(containerSize()[1]) ? containerSize()[1] : 200", + "events": "window:resize" + } + ] + }, + { + "name": "width", + "init": "isFinite(containerSize()[0]) ? containerSize()[0] : 200", + "on": [ + { + "update": "isFinite(containerSize()[0]) ? containerSize()[0] : 200", + "events": "window:resize" + } + ] + } + ], + "scales": [ + { + "name": "xscale", + "type": "band", + "domain": {"data": "table", "field": "category"}, + "range": "width", + "padding": 0.05, + "round": true + }, + { + "name": "yscale", + "domain": {"data": "table", "field": "amount"}, + "nice": true, + "range": "height" + }, + { + "name": "color", + "type": "ordinal", + "domain": {"data": "table", "field": "color"}, + "range": ["green", "steelblue"] + } + ], + "axes": [ + { + "orient": "bottom", + "scale": "xscale", + "labelAngle": -90, + "labelPadding": 30 + }, + {"orient": "left", "scale": "yscale"} + ], + "marks": [ + { + "type": "rect", + "from": {"data": "table"}, + "encode": { + "enter": { + "x": {"scale": "xscale", "field": "category"}, + "width": {"scale": "xscale", "band": 1}, + "y": {"scale": "yscale", "field": "amount"}, + "y2": {"scale": "yscale", "value": 0}, + "tooltip": {"signal": "datum"} + }, + "update": {"fill": {"scale": "color", "field": "color"}}, + "hover": {"fill": {"value": "red"}} + } + }, + { + "type": "text", + "encode": { + "enter": { + "align": {"value": "center"}, + "baseline": {"value": "bottom"}, + "fill": {"value": "#333"} + }, + "update": { + "x": {"scale": "xscale", "signal": "tooltip.category", "band": 0.5}, + "y": {"scale": "yscale", "signal": "tooltip.amount", "offset": -2}, + "text": {"signal": "tooltip.amount"}, + "fillOpacity": [ + {"test": "datum === tooltip", "value": 0}, + {"value": 1} + ] + } + } + } + ], + "autosize": {"type": "fit", "resize": true, "contains": "padding"} +} \ No newline at end of file diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 6c4d54fd..345fb40e 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -3,4 +3,6 @@ package vecxt_re import io.github.quafadas.plots.SetupVega.{*, given} object Plots: - lazy val timeline = VegaPlot.fromResource("timeline.vl.json") \ No newline at end of file + lazy val timeline = VegaPlot.fromResource("timeline.vl.json") // riskInceptionDate, riskExpiryDate + lazy val seasonality = VegaPlot.fromResource("seasonality.vg.json") // catagory, amount +end Plots diff --git a/vecxt_re/test/src/maketower.test.scala b/vecxt_re/test/src/maketower.test.scala index b236dd4a..1abbccd7 100644 --- a/vecxt_re/test/src/maketower.test.scala +++ b/vecxt_re/test/src/maketower.test.scala @@ -1,6 +1,5 @@ package vecxt_re - class MakeTowerSuite extends munit.FunSuite: val sampleLayer = Layer( From f758c3db30ffc3062e69321494abeb0d27e58556 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 11:09:35 +0100 Subject: [PATCH 03/75] . --- vecxt_re/resources/distDensity.vg.json | 256 ++++++++++++++++ vecxt_re/resources/seasonality.vg.json | 277 +++++++++++------- vecxt_re/src-js-native/.keep | 0 .../src-js-native/SplitLosses.scala | 0 vecxt_re/src-jvm/plots.scala | 2 +- 5 files changed, 433 insertions(+), 102 deletions(-) create mode 100644 vecxt_re/resources/distDensity.vg.json delete mode 100644 vecxt_re/src-js-native/.keep rename {vecxtensions => vecxt_re}/src-js-native/SplitLosses.scala (100%) diff --git a/vecxt_re/resources/distDensity.vg.json b/vecxt_re/resources/distDensity.vg.json new file mode 100644 index 00000000..2b2f2658 --- /dev/null +++ b/vecxt_re/resources/distDensity.vg.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "Based on https://vega.github.io/vega/examples/probability-density.vg.json", + "data": [ + { + "name": "points", + "values": [ + { + "tret": 1.1, + "probability": 0.1, + "c": "first", + "offset": 0.5 + }, + { + "tret": 1.01, + "probability": 0.2, + "c": "first", + "offset": 0.5 + }, + { + "tret": 0.99, + "probability": 0.3, + "c": "first", + "offset": 0.5 + }, + { + "tret": 0.5, + "probability": 0.5, + "c": "first", + "offset": 0.5 + }, + { + "tret": 0.1, + "probability": 0.99, + "c": "first", + "offset": 0.5 + } + ] + }, + { + "name": "summary", + "source": "points", + "transform": [ + { + "type": "aggregate", + "fields": [ + "tret", + "offset" + ], + "groupby": [ + "c" + ], + "ops": [ + "mean", + "mean" + ], + "as": [ + "mean", + "meanOffset" + ] + } + ] + } + ], + "scales": [ + { + "name": "color", + "type": "ordinal", + "domain": { + "data": "points", + "field": "c" + }, + "range": { + "scheme": "category20" + } + }, + { + "name": "xscale", + "range": "width", + "domain": { + "data": "points", + "field": "probability" + }, + "type": "log" + }, + { + "name": "yscale", + "type": "linear", + "range": "height", + "domain": { + "fields": [ + { + "data": "points", + "field": "tret" + } + ] + } + } + ], + "legends": [ + { + "orient": "top-right", + "fill": "color", + "offset": 0, + "zindex": 1 + } + ], + "marks": [ + { + "type": "group", + "from": { + "facet": { + "name": "series", + "data": "points", + "groupby": "c" + } + }, + "marks": [ + { + "type": "line", + "from": { + "data": "series" + }, + "encode": { + "enter": { + "x": { + "scale": "xscale", + "field": "probability" + }, + "y": { + "scale": "yscale", + "field": "tret" + }, + "stroke": { + "scale": "color", + "field": "c" + }, + "tooltip": { + "signal": "{tret : format(datum.tret*100, \".3f\")+\"%\" , probTretSmaller: format(datum.probability*100,\".3f\")+\"%\" }" + } + }, + "update": { + "interpolate": "monotone", + "strokeOpacity": { + "value": 1 + } + }, + "hover": { + "strokeOpacity": { + "value": 0.5 + } + } + } + } + ] + }, + { + "type": "rect", + "from": { + "data": "points" + }, + "interactive": true, + "encode": { + "enter": { + "y": { + "scale": "yscale", + "field": "tret" + }, + "height": { + "value": 2 + }, + "x": { + "value": 25, + "offset": { + "signal": "width" + }, + "mult": { + "field": "offset" + } + }, + "tooltip": { + "signal": "{tret : format(datum.tret*100, \".3f\")+\"%\" , probTretSmaller: format(datum.probability*100,\".3f\")+\"%\" }" + }, + "width": { + "value": 5 + }, + "fill": { + "scale": "color", + "field": "c" + }, + "fillOpacity": { + "value": 0.4 + } + } + } + }, + { + "type": "rect", + "from": { + "data": "summary" + }, + "interactive": true, + "zindex": 1, + "encode": { + "enter": { + "y": { + "scale": "yscale", + "field": "mean" + }, + "height": { + "value": 2 + }, + "x": { + "value": 25, + "offset": { + "signal": "width" + }, + "mult": { + "field": "meanOffset" + } + }, + "fill": { + "value": "black" + }, + "width": { + "value": 5 + }, + "fillOpacity": { + "value": 1 + } + } + } + } + ], + "axes": [ + { + "orient": "bottom", + "scale": "xscale", + "zindex": 0, + "grid": true, + "title": "CDF", + "titleAnchor": "middle", + "gridOpacity": 0.5 + }, + { + "orient": "left", + "scale": "yscale", + "zindex": 0, + "grid": true, + "title": "TRET", + "titleAnchor": "middle", + "gridOpacity": 0.5, + "domain": "false" + } + ] +} \ No newline at end of file diff --git a/vecxt_re/resources/seasonality.vg.json b/vecxt_re/resources/seasonality.vg.json index 89f0e0bd..5c3200ea 100644 --- a/vecxt_re/resources/seasonality.vg.json +++ b/vecxt_re/resources/seasonality.vg.json @@ -1,111 +1,186 @@ { - "$schema": "https://vega.github.io/schema/vega/v5.json", - "description": "A basic bar chart example, with value labels shown upon pointer hover.", - "padding": 5, - "data": [ - { - "name": "table", - "values": [ - {"category": "2022-12-31", "amount": 0, "color": true} - ] - } - ], - "signals": [ - { - "name": "tooltip", - "value": {}, - "on": [ - {"events": "rect:pointerover", "update": "datum"}, - {"events": "rect:pointerout", "update": "{}"} - ] - }, - { - "name": "height", - "init": "isFinite(containerSize()[1]) ? containerSize()[1] : 200", - "on": [ + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "A basic bar chart example, with value labels shown upon pointer hover.", + "padding": 5, + "data": [ { - "update": "isFinite(containerSize()[1]) ? containerSize()[1] : 200", - "events": "window:resize" + "name": "table", + "values": [ + { + "category": "2022-12-31", + "amount": 0, + "color": true + } + ] } - ] - }, - { - "name": "width", - "init": "isFinite(containerSize()[0]) ? containerSize()[0] : 200", - "on": [ + ], + "signals": [ + { + "name": "tooltip", + "value": {}, + "on": [ + { + "events": "rect:pointerover", + "update": "datum" + }, + { + "events": "rect:pointerout", + "update": "{}" + } + ] + }, + { + "name": "height", + "init": "isFinite(containerSize()[1]) ? containerSize()[1] : 200", + "on": [ + { + "update": "isFinite(containerSize()[1]) ? containerSize()[1] : 200", + "events": "window:resize" + } + ] + }, { - "update": "isFinite(containerSize()[0]) ? containerSize()[0] : 200", - "events": "window:resize" + "name": "width", + "init": "isFinite(containerSize()[0]) ? containerSize()[0] : 200", + "on": [ + { + "update": "isFinite(containerSize()[0]) ? containerSize()[0] : 200", + "events": "window:resize" + } + ] } - ] - } - ], - "scales": [ - { - "name": "xscale", - "type": "band", - "domain": {"data": "table", "field": "category"}, - "range": "width", - "padding": 0.05, - "round": true - }, - { - "name": "yscale", - "domain": {"data": "table", "field": "amount"}, - "nice": true, - "range": "height" - }, - { - "name": "color", - "type": "ordinal", - "domain": {"data": "table", "field": "color"}, - "range": ["green", "steelblue"] - } - ], - "axes": [ - { - "orient": "bottom", - "scale": "xscale", - "labelAngle": -90, - "labelPadding": 30 - }, - {"orient": "left", "scale": "yscale"} - ], - "marks": [ - { - "type": "rect", - "from": {"data": "table"}, - "encode": { - "enter": { - "x": {"scale": "xscale", "field": "category"}, - "width": {"scale": "xscale", "band": 1}, - "y": {"scale": "yscale", "field": "amount"}, - "y2": {"scale": "yscale", "value": 0}, - "tooltip": {"signal": "datum"} + ], + "scales": [ + { + "name": "xscale", + "type": "band", + "domain": { + "data": "table", + "field": "category" + }, + "range": "width", + "padding": 0.05, + "round": true + }, + { + "name": "yscale", + "domain": { + "data": "table", + "field": "amount" + }, + "nice": true, + "range": "height" }, - "update": {"fill": {"scale": "color", "field": "color"}}, - "hover": {"fill": {"value": "red"}} - } - }, - { - "type": "text", - "encode": { - "enter": { - "align": {"value": "center"}, - "baseline": {"value": "bottom"}, - "fill": {"value": "#333"} + { + "name": "color", + "type": "ordinal", + "domain": { + "data": "table", + "field": "color" + }, + "range": [ + "green", + "steelblue" + ] + } + ], + "axes": [ + { + "orient": "bottom", + "scale": "xscale", + "labelAngle": -90, + "labelPadding": 30 }, - "update": { - "x": {"scale": "xscale", "signal": "tooltip.category", "band": 0.5}, - "y": {"scale": "yscale", "signal": "tooltip.amount", "offset": -2}, - "text": {"signal": "tooltip.amount"}, - "fillOpacity": [ - {"test": "datum === tooltip", "value": 0}, - {"value": 1} - ] + { + "orient": "left", + "scale": "yscale" + } + ], + "marks": [ + { + "type": "rect", + "from": { + "data": "table" + }, + "encode": { + "enter": { + "x": { + "scale": "xscale", + "field": "category" + }, + "width": { + "scale": "xscale", + "band": 1 + }, + "y": { + "scale": "yscale", + "field": "amount" + }, + "y2": { + "scale": "yscale", + "value": 0 + }, + "tooltip": { + "signal": "datum" + } + }, + "update": { + "fill": { + "scale": "color", + "field": "color" + } + }, + "hover": { + "fill": { + "value": "red" + } + } + } + }, + { + "type": "text", + "encode": { + "enter": { + "align": { + "value": "center" + }, + "baseline": { + "value": "bottom" + }, + "fill": { + "value": "#333" + } + }, + "update": { + "x": { + "scale": "xscale", + "signal": "tooltip.category", + "band": 0.5 + }, + "y": { + "scale": "yscale", + "signal": "tooltip.amount", + "offset": -2 + }, + "text": { + "signal": "tooltip.amount" + }, + "fillOpacity": [ + { + "test": "datum === tooltip", + "value": 0 + }, + { + "value": 1 + } + ] + } + } } - } + ], + "autosize": { + "type": "fit", + "resize": true, + "contains": "padding" } - ], - "autosize": {"type": "fit", "resize": true, "contains": "padding"} } \ No newline at end of file diff --git a/vecxt_re/src-js-native/.keep b/vecxt_re/src-js-native/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/vecxtensions/src-js-native/SplitLosses.scala b/vecxt_re/src-js-native/SplitLosses.scala similarity index 100% rename from vecxtensions/src-js-native/SplitLosses.scala rename to vecxt_re/src-js-native/SplitLosses.scala diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 345fb40e..2cf6c52b 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -1,6 +1,6 @@ package vecxt_re -import io.github.quafadas.plots.SetupVega.{*, given} +import io.github.quafadas.plots.SetupVega.* object Plots: lazy val timeline = VegaPlot.fromResource("timeline.vl.json") // riskInceptionDate, riskExpiryDate From 87724b54f4e5940589dc6f7e2366b36f68976dc7 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 11:53:37 +0100 Subject: [PATCH 04/75] . --- vecxt_re/src-js-native/SplitLosses.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vecxt_re/src-js-native/SplitLosses.scala b/vecxt_re/src-js-native/SplitLosses.scala index d1508a8a..745538ed 100644 --- a/vecxt_re/src-js-native/SplitLosses.scala +++ b/vecxt_re/src-js-native/SplitLosses.scala @@ -1,4 +1,4 @@ -package vecxt.reinsurance +package vecxt_re import vecxt.BoundsCheck.BoundsCheck From d031b48c5bf6e9ba15d0b960b7e4060d210c9f7e Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 12:24:15 +0100 Subject: [PATCH 05/75] fix boolean suite --- vecxt/test/src/booleanarray.test.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vecxt/test/src/booleanarray.test.scala b/vecxt/test/src/booleanarray.test.scala index 8d67e734..aa43d639 100644 --- a/vecxt/test/src/booleanarray.test.scala +++ b/vecxt/test/src/booleanarray.test.scala @@ -2,7 +2,7 @@ package vecxt import vecxt.all.* -class BooleaArrayExtensionSuite extends munit.FunSuite: +class BooleanArrayExtensionSuite extends munit.FunSuite: test("all") { val v1 = Array[Boolean](true, true, true) @@ -154,4 +154,4 @@ class BooleaArrayExtensionSuite extends munit.FunSuite: } } -end BooleaArrayExtensionSuite +end BooleanArrayExtensionSuite From 188ad73e27f0a87fefb1f208cfb482526bbb8c81 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 12:25:09 +0100 Subject: [PATCH 06/75] int array mean and var --- vecxt/src-js/array.scala | 30 ++++++++++++++ vecxt/src-jvm/arrays.scala | 63 ++++++++++++++++++++++++++++++ vecxt/src-native/array.scala | 33 ++++++++++++++++ vecxt/test/src/intarray.test.scala | 34 ++++++++++++++++ 4 files changed, 160 insertions(+) diff --git a/vecxt/src-js/array.scala b/vecxt/src-js/array.scala index 8469e5bb..a56f3355 100644 --- a/vecxt/src-js/array.scala +++ b/vecxt/src-js/array.scala @@ -79,6 +79,36 @@ object arrays: end for newVec end apply + + inline def mean: Double = + var sum = 0.0 + var i = 0 + while i < vec.length do + sum += vec(i) + i += 1 + end while + sum / vec.length + end mean + + inline def variance: Double = + vec.meanAndVariance.variance + end variance + + inline def meanAndVariance: (mean: Double, variance: Double) = + val μ = vec.mean + var acc = 0.0 + var i = 0 + while i < vec.length do + val diff = vec(i) - μ + acc += diff * diff + i += 1 + end while + (μ, acc / vec.length) + end meanAndVariance + + inline def std: Double = Math.sqrt(vec.variance) + + inline def stdDev: Double = vec.std end extension extension (vec: Array[Double]) diff --git a/vecxt/src-jvm/arrays.scala b/vecxt/src-jvm/arrays.scala index 3d8410e8..d81df3c0 100644 --- a/vecxt/src-jvm/arrays.scala +++ b/vecxt/src-jvm/arrays.scala @@ -250,6 +250,69 @@ object arrays: temp end sumSIMD + inline def mean: Double = + var i = 0 + var acc = DoubleVector.zero(spd) + val tmp = new Array[Double](spdl) + + while i < spd.loopBound(vec.length) do + var lane = 0 + while lane < spdl do + tmp(lane) = vec(i + lane).toDouble + lane += 1 + end while + + acc = acc.add(DoubleVector.fromArray(spd, tmp, 0)) + i += spdl + end while + + var sum = acc.reduceLanes(VectorOperators.ADD) + while i < vec.length do + sum += vec(i) + i += 1 + end while + + sum / vec.length + end mean + + inline def variance: Double = meanAndVariance.variance + + inline def meanAndVariance: (mean: Double, variance: Double) = + val μ = vec.mean + val μVec = DoubleVector.broadcast(spd, μ) + + var i = 0 + var acc = DoubleVector.zero(spd) + val tmp = new Array[Double](spdl) + + while i < spd.loopBound(vec.length) do + var lane = 0 + while lane < spdl do + tmp(lane) = vec(i + lane).toDouble + lane += 1 + end while + + val v = DoubleVector.fromArray(spd, tmp, 0) + val diff = v.sub(μVec) + acc = diff.fma(diff, acc) + i += spdl + end while + + var sumSqDiff = acc.reduceLanes(VectorOperators.ADD) + + while i < vec.length do + val diff = vec(i).toDouble - μ + sumSqDiff = Math.fma(diff, diff, sumSqDiff) + i += 1 + end while + + (μ, sumSqDiff / vec.length) + end meanAndVariance + + inline def std: Double = Math.sqrt(vec.variance) + + inline def stdDev: Double = vec.std + inline def dot(vec2: Array[Int])(using inline boundsCheck: BoundsCheck): Int = dimCheck(vec, vec2) val newVec = Array.ofDim[Int](vec.length) diff --git a/vecxt/src-native/array.scala b/vecxt/src-native/array.scala index 867e81a8..c35dca03 100644 --- a/vecxt/src-native/array.scala +++ b/vecxt/src-native/array.scala @@ -46,6 +46,39 @@ object arrays: // end copy end extension + extension (vec: Array[Int]) + + inline def mean: Double = + var sum = 0.0 + var i = 0 + while i < vec.length do + sum += vec(i) + i += 1 + end while + sum / vec.length + end mean + + inline def variance: Double = + vec.meanAndVariance.variance + end variance + + inline def meanAndVariance: (mean: Double, variance: Double) = + val μ = vec.mean + var acc = 0.0 + var i = 0 + while i < vec.length do + val diff = vec(i) - μ + acc = Math.fma(diff, diff, acc) + i += 1 + end while + (μ, acc / vec.length) + end meanAndVariance + + inline def std: Double = Math.sqrt(vec.variance) + + inline def stdDev: Double = vec.std + end extension + extension [A: ClassTag](vec: Array[A]) def apply(index: Array[Boolean]): Array[A] = diff --git a/vecxt/test/src/intarray.test.scala b/vecxt/test/src/intarray.test.scala index d594a297..84fca641 100644 --- a/vecxt/test/src/intarray.test.scala +++ b/vecxt/test/src/intarray.test.scala @@ -65,4 +65,38 @@ class IntArrayExtensionSuite extends munit.FunSuite: assert(!v2.contiguous) } + test("mean arithmetic progression") { + val v = Array.tabulate[Int](10)(identity) + assertEqualsDouble(math.abs(v.mean - 4.5d), 0.0, 1e-12) + } + + test("variance/std zero spread") { + val v = Array.fill[Int](6)(7) + assertEqualsDouble(math.abs(v.mean - 7d), 0.0, 1e-12) + assertEqualsDouble(math.abs(v.variance), 0.0, 1e-12) + assertEqualsDouble(math.abs(v.std), 0.0, 1e-12) + } + + test("variance/std arithmetic progression") { + val v = Array.tabulate[Int](10)(identity) + val expectedVar = 8.25d + assertEqualsDouble(math.abs(v.variance - expectedVar), 0.0, 1e-9) + assertEqualsDouble(math.abs(v.std - math.sqrt(expectedVar)), 0.0, 1e-9) + } + + test("meanAndVariance zero spread") { + val v = Array.fill[Int](6)(7) + val stats = v.meanAndVariance + assertEqualsDouble(math.abs(stats.mean - 7d), 0.0, 1e-12) + assertEqualsDouble(math.abs(stats.variance), 0.0, 1e-12) + } + + test("meanAndVariance arithmetic progression") { + val v = Array.tabulate[Int](10)(identity) + val stats = v.meanAndVariance + val expectedVar = 8.25d + assertEqualsDouble(math.abs(stats.mean - 4.5d), 0.0, 1e-12) + assertEqualsDouble(math.abs(stats.variance - expectedVar), 0.0, 1e-9) + } + end IntArrayExtensionSuite From 16abc282718a2e63495b7d6f81b5b69cdc3eb96d Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 15:10:19 +0100 Subject: [PATCH 07/75] update variance to accept different modes --- vecxt/src-js/array.scala | 65 +++++++++++++++++++++++------- vecxt/src-jvm/arrays.scala | 52 ++++++++++++++++++------ vecxt/src-native/array.scala | 66 ++++++++++++++++++++++++------- vecxt/src/all.scala | 1 + vecxt/src/variance.scala | 11 ++++++ vecxt/test/src/array.test.scala | 2 +- vecxt/test/src/simple.stats.scala | 4 +- 7 files changed, 155 insertions(+), 46 deletions(-) create mode 100644 vecxt/src/variance.scala diff --git a/vecxt/src-js/array.scala b/vecxt/src-js/array.scala index a56f3355..7e58708d 100644 --- a/vecxt/src-js/array.scala +++ b/vecxt/src-js/array.scala @@ -90,11 +90,16 @@ object arrays: sum / vec.length end mean - inline def variance: Double = - vec.meanAndVariance.variance + inline def variance: Double = variance(VarianceMode.Population) + + inline def variance(mode: VarianceMode): Double = + vec.meanAndVariance(mode).variance end variance inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = val μ = vec.mean var acc = 0.0 var i = 0 @@ -103,12 +108,21 @@ object arrays: acc += diff * diff i += 1 end while - (μ, acc / vec.length) + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (μ, acc / denom) end meanAndVariance - inline def std: Double = Math.sqrt(vec.variance) + inline def std: Double = std(VarianceMode.Population) - inline def stdDev: Double = vec.std + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) end extension extension (vec: Array[Double]) @@ -138,12 +152,14 @@ object arrays: out end increments - inline def stdDev: Double = - // https://www.cuemath.com/data/standard-deviation/ - val mu = vec.mean - val diffs_2 = vec.map(num => (num - mu) * (num - mu)) - Math.sqrt(diffs_2.sum / (vec.length - 1)) - end stdDev + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) + + inline def std: Double = std(VarianceMode.Population) + + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) inline def mean: Double = vec.sumSIMD / vec.length @@ -167,12 +183,31 @@ object arrays: sum end product - def variance: Double = - // https://www.cuemath.com/sample-variance-formula/ - val μ = vec.mean - vec.map(i => (i - μ) * (i - μ)).sum / (vec.length - 1) + inline def variance: Double = variance(VarianceMode.Population) + + def variance(mode: VarianceMode): Double = + meanAndVariance(mode).variance end variance + inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + val μ = vec.mean + var acc = 0.0 + var i = 0 + while i < vec.length do + val diff = vec(i) - μ + acc += diff * diff + i += 1 + end while + + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (μ, acc / denom) + inline def unary_- : Array[Double] = val newVec = Array.ofDim[Double](vec.length) var i = 0 diff --git a/vecxt/src-jvm/arrays.scala b/vecxt/src-jvm/arrays.scala index d81df3c0..e3b0ec25 100644 --- a/vecxt/src-jvm/arrays.scala +++ b/vecxt/src-jvm/arrays.scala @@ -275,9 +275,15 @@ object arrays: sum / vec.length end mean - inline def variance: Double = meanAndVariance.variance + inline def variance: Double = variance(VarianceMode.Population) + + inline def variance(mode: VarianceMode): Double = + meanAndVariance(mode).variance inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = val μ = vec.mean val μVec = DoubleVector.broadcast(spd, μ) @@ -306,12 +312,21 @@ object arrays: i += 1 end while - (μ, sumSqDiff / vec.length) + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (μ, sumSqDiff / denom) end meanAndVariance - inline def std: Double = Math.sqrt(vec.variance) + inline def std: Double = std(VarianceMode.Population) - inline def stdDev: Double = vec.std + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) inline def dot(vec2: Array[Int])(using inline boundsCheck: BoundsCheck): Int = dimCheck(vec, vec2) @@ -780,18 +795,25 @@ object arrays: Matrix(out, (n, m))(using BoundsCheck.DoBoundsCheck.no) end outer - def variance: Double = - meanAndVariance.variance + inline def variance: Double = variance(VarianceMode.Population) + + def variance(mode: VarianceMode): Double = + meanAndVariance(mode).variance end variance - inline def stdDev: Double = - // https://www.cuemath.com/data/standard-deviation/ - val mu = vec.mean - val diffs_2 = vec.map(num => Math.pow(num - mu, 2)) - Math.sqrt(diffs_2.sumSIMD / (vec.length - 1)) - end stdDev + inline def std: Double = std(VarianceMode.Population) + + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = val μ = vec.mean val l = spd.length() var tmp = DoubleVector.zero(spd) @@ -813,7 +835,11 @@ object arrays: i += 1 end while - (μ, sumSqDiff * (1.0 / (vec.length - 1))) + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (μ, sumSqDiff / denom) end meanAndVariance diff --git a/vecxt/src-native/array.scala b/vecxt/src-native/array.scala index c35dca03..53543ccc 100644 --- a/vecxt/src-native/array.scala +++ b/vecxt/src-native/array.scala @@ -58,11 +58,16 @@ object arrays: sum / vec.length end mean - inline def variance: Double = - vec.meanAndVariance.variance + inline def variance: Double = variance(VarianceMode.Population) + + inline def variance(mode: VarianceMode): Double = + vec.meanAndVariance(mode).variance end variance inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = val μ = vec.mean var acc = 0.0 var i = 0 @@ -71,12 +76,21 @@ object arrays: acc = Math.fma(diff, diff, acc) i += 1 end while - (μ, acc / vec.length) + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (μ, acc / denom) end meanAndVariance - inline def std: Double = Math.sqrt(vec.variance) + inline def std: Double = std(VarianceMode.Population) - inline def stdDev: Double = vec.std + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) end extension extension [A: ClassTag](vec: Array[A]) @@ -200,18 +214,40 @@ object arrays: ranks end elementRanks - inline def variance: Double = - // https://www.cuemath.com/sample-variance-formula/ - val μ = vec.mean - vec.map(i => (i - μ) * (i - μ)).sum / (vec.length - 1) + inline def variance: Double = variance(VarianceMode.Population) + + inline def variance(mode: VarianceMode): Double = + meanAndVariance(mode).variance end variance - inline def stdDev: Double = - // https://www.cuemath.com/data/standard-deviation/ - val mu = vec.mean - val diffs_2 = vec.map(num => Math.pow(num - mu, 2)) - Math.sqrt(diffs_2.sum / (vec.length - 1)) - end stdDev + inline def std: Double = std(VarianceMode.Population) + + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) + + inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + val μ = vec.mean + var acc = 0.0 + var i = 0 + while i < vec.length do + val diff = vec(i) - μ + acc = Math.fma(diff, diff, acc) + i += 1 + end while + + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (μ, acc / denom) + end meanAndVariance inline def mean: Double = vec.sum / vec.length diff --git a/vecxt/src/all.scala b/vecxt/src/all.scala index 1de60478..b340baf9 100644 --- a/vecxt/src/all.scala +++ b/vecxt/src/all.scala @@ -8,6 +8,7 @@ object all: export vecxt.arrayUtil.* export vecxt.arrays.* export vecxt.DoubleArrays.* + export vecxt.VarianceMode // export vecxt.JsNativeDoubleArrays.* export vecxt.BooleanArrays.* diff --git a/vecxt/src/variance.scala b/vecxt/src/variance.scala new file mode 100644 index 00000000..5a7c358b --- /dev/null +++ b/vecxt/src/variance.scala @@ -0,0 +1,11 @@ +package vecxt + +enum VarianceMode: + case Population + case Sample + +object VarianceMode: + inline def denominator(length: Int, mode: VarianceMode): Double = + mode match + case VarianceMode.Population => length.toDouble + case VarianceMode.Sample => (length - 1).toDouble diff --git a/vecxt/test/src/array.test.scala b/vecxt/test/src/array.test.scala index d44542a2..59077134 100644 --- a/vecxt/test/src/array.test.scala +++ b/vecxt/test/src/array.test.scala @@ -409,7 +409,7 @@ class ArrayExtensionSuite extends munit.FunSuite: // https://www.storyofmathematics.com/sample-variance/#:~:text=7.%20Divide%20the%20number%20you%20get%20in%20step%206%20by example 3 val ages = Array[Double](26.0, 48.0, 67.0, 39.0, 25.0, 25.0, 36.0, 44.0, 44.0, 47.0, 53.0, 52.0, 52.0, 51.0, 52.0, 40.0, 77.0, 44.0, 40.0, 45.0, 48.0, 49.0, 19.0, 54.0, 82.0) - val variance = ages.variance + val variance = ages.variance(VarianceMode.Sample) assertEqualsDouble(variance, 216.82, 0.01) } diff --git a/vecxt/test/src/simple.stats.scala b/vecxt/test/src/simple.stats.scala index 2922c56c..27455fd7 100644 --- a/vecxt/test/src/simple.stats.scala +++ b/vecxt/test/src/simple.stats.scala @@ -26,8 +26,8 @@ class StatsSuite extends munit.FunSuite: test("sample variance and std") { val v = Array[Double](2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0) - assertEqualsDouble(v.variance, 4.571429, 0.00001) - assertEqualsDouble(v.stdDev, 2.13809, 0.00001) + assertEqualsDouble(v.variance(VarianceMode.Sample), 4.571429, 0.00001) + assertEqualsDouble(v.stdDev(VarianceMode.Sample), 2.13809, 0.00001) } test("elementRanks") { From 8cf8faee33d19a45acb5ec993c8ffa11b895cc06 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 15:18:42 +0100 Subject: [PATCH 08/75] . --- vecxt/src-js/array.scala | 1 + vecxt/src/variance.scala | 2 ++ 2 files changed, 3 insertions(+) diff --git a/vecxt/src-js/array.scala b/vecxt/src-js/array.scala index 7e58708d..d7621bd5 100644 --- a/vecxt/src-js/array.scala +++ b/vecxt/src-js/array.scala @@ -207,6 +207,7 @@ object arrays: case VarianceMode.Sample => (vec.length - 1).toDouble (μ, acc / denom) + end meanAndVariance inline def unary_- : Array[Double] = val newVec = Array.ofDim[Double](vec.length) diff --git a/vecxt/src/variance.scala b/vecxt/src/variance.scala index 5a7c358b..be7dc2b1 100644 --- a/vecxt/src/variance.scala +++ b/vecxt/src/variance.scala @@ -3,9 +3,11 @@ package vecxt enum VarianceMode: case Population case Sample +end VarianceMode object VarianceMode: inline def denominator(length: Int, mode: VarianceMode): Double = mode match case VarianceMode.Population => length.toDouble case VarianceMode.Sample => (length - 1).toDouble +end VarianceMode From 9dfce4aba73e7299728c51d0f5711c94b547dec8 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 15:41:13 +0100 Subject: [PATCH 09/75] DRY a little --- vecxt/src-jvm/arrays.scala | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/vecxt/src-jvm/arrays.scala b/vecxt/src-jvm/arrays.scala index e3b0ec25..307fcb7a 100644 --- a/vecxt/src-jvm/arrays.scala +++ b/vecxt/src-jvm/arrays.scala @@ -250,29 +250,8 @@ object arrays: temp end sumSIMD - inline def mean: Double = - var i = 0 - var acc = DoubleVector.zero(spd) - val tmp = new Array[Double](spdl) - - while i < spd.loopBound(vec.length) do - var lane = 0 - while lane < spdl do - tmp(lane) = vec(i + lane).toDouble - lane += 1 - end while - - acc = acc.add(DoubleVector.fromArray(spd, tmp, 0)) - i += spdl - end while - - var sum = acc.reduceLanes(VectorOperators.ADD) - while i < vec.length do - sum += vec(i) - i += 1 - end while - - sum / vec.length + inline def mean: Double = + sumSIMD / vec.length end mean inline def variance: Double = variance(VarianceMode.Population) From 9d827ee7bfec88684b8a03834e7adee3bf8b72ec Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 17:19:30 +0100 Subject: [PATCH 10/75] scenarios --- .vscode/launch.json | 2 +- vecxt/src-jvm/arrays.scala | 4 +- vecxt/src/doublearray.scala | 12 ++ vecxt/src/intarray.scala | 12 ++ vecxt/test/src/array.test.scala | 7 ++ vecxt/test/src/intarray.test.scala | 39 ++++-- vecxt_re/package.mill | 3 +- vecxt_re/resources/distDensity.vg.json | 2 +- vecxt_re/src-jvm/imposeClustering.scala | 88 ++++++++++++++ vecxt_re/src/groupSums.scala | 23 ++++ vecxt_re/src/scenario.scala | 132 +++++++++++++++++++++ vecxt_re/src/scenarr.scala | 130 ++++++++++++++++++++ vecxt_re/test/src/groupSumCount.test.scala | 52 ++++++++ vecxt_re/test/src/scenario.test.scala | 45 +++++++ vecxt_re/test/src/vecEquals.scala | 10 ++ 15 files changed, 546 insertions(+), 15 deletions(-) create mode 100644 vecxt_re/src-jvm/imposeClustering.scala create mode 100644 vecxt_re/src/scenario.scala create mode 100644 vecxt_re/src/scenarr.scala create mode 100644 vecxt_re/test/src/groupSumCount.test.scala create mode 100644 vecxt_re/test/src/scenario.test.scala diff --git a/.vscode/launch.json b/.vscode/launch.json index 3e8bca59..501631e9 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -9,7 +9,7 @@ "request": "launch", "name": "test Suite", "buildTarget": "vecxt.jvm.test", - "testClass": "vecxt.LongArraysSuite", + "testClass": "vecxt.IntArrayExtensionSuite", "jvmOptions": [ "--add-modules=jdk.incubator.vector" ], diff --git a/vecxt/src-jvm/arrays.scala b/vecxt/src-jvm/arrays.scala index 307fcb7a..f45c7df1 100644 --- a/vecxt/src-jvm/arrays.scala +++ b/vecxt/src-jvm/arrays.scala @@ -250,8 +250,8 @@ object arrays: temp end sumSIMD - inline def mean: Double = - sumSIMD / vec.length + inline def mean: Double = + sumSIMD / vec.length.toDouble end mean inline def variance: Double = variance(VarianceMode.Population) diff --git a/vecxt/src/doublearray.scala b/vecxt/src/doublearray.scala index 70f60d8d..cc4835b0 100644 --- a/vecxt/src/doublearray.scala +++ b/vecxt/src/doublearray.scala @@ -2,6 +2,18 @@ package vecxt object DoubleArrays: extension (vec: Array[Double]) + // TODO bnenchmark. + inline def select(indicies: Array[Int]): Array[Double] = + val len = indicies.length + val out = Array.ofDim[Double](len) + var i = 0 + while i < len do + out(i) = vec(indicies(i)) + i += 1 + end while + out + end select + inline def unique: Array[Double] = if vec.size == 0 then Array.empty[Double] else diff --git a/vecxt/src/intarray.scala b/vecxt/src/intarray.scala index a99eaf01..6dfe27be 100644 --- a/vecxt/src/intarray.scala +++ b/vecxt/src/intarray.scala @@ -4,6 +4,17 @@ import scala.util.control.Breaks.* object IntArrays: extension (arr: Array[Int]) + inline def select(indicies: Array[Int]): Array[Int] = + val len = indicies.length + val out = Array.ofDim[Int](len) + var i = 0 + while i < len do + out(i) = arr(indicies(i)) + i += 1 + end while + out + end select + inline def contiguous: Boolean = var i = 1 var out = true @@ -17,6 +28,7 @@ object IntArrays: end while } out + end contiguous end extension end IntArrays diff --git a/vecxt/test/src/array.test.scala b/vecxt/test/src/array.test.scala index 59077134..313cf417 100644 --- a/vecxt/test/src/array.test.scala +++ b/vecxt/test/src/array.test.scala @@ -432,6 +432,13 @@ class ArrayExtensionSuite extends munit.FunSuite: // assertEqualsDouble(v1.qdep(0.95, v3), 0.8, 0.0001) } + test("select picks elements by index order") { + val v = Array[Double](10.0, 20.0, 30.0, 40.0) + val idx = Array(3, 1, 0) + val out = v.select(idx) + assertEquals(out.toSeq, Seq(40.0, 20.0, 10.0)) + } + test("tvar index") { import vecxt.reinsurance.tVarIdx val v1 = Array.tabulate[Double](100)(_.toDouble) diff --git a/vecxt/test/src/intarray.test.scala b/vecxt/test/src/intarray.test.scala index 84fca641..23d060a4 100644 --- a/vecxt/test/src/intarray.test.scala +++ b/vecxt/test/src/intarray.test.scala @@ -67,36 +67,55 @@ class IntArrayExtensionSuite extends munit.FunSuite: test("mean arithmetic progression") { val v = Array.tabulate[Int](10)(identity) - assertEqualsDouble(math.abs(v.mean - 4.5d), 0.0, 1e-12) + println(v.printArr) + assertEqualsDouble(v.mean, 4.5d, 1e-12) } test("variance/std zero spread") { val v = Array.fill[Int](6)(7) - assertEqualsDouble(math.abs(v.mean - 7d), 0.0, 1e-12) - assertEqualsDouble(math.abs(v.variance), 0.0, 1e-12) - assertEqualsDouble(math.abs(v.std), 0.0, 1e-12) + assertEqualsDouble(v.mean, 7d, 0.0, 1e-12) + assertEqualsDouble(v.variance, 0.0, 1e-12) + assertEqualsDouble(v.std, 0.0, 1e-12) } test("variance/std arithmetic progression") { val v = Array.tabulate[Int](10)(identity) val expectedVar = 8.25d - assertEqualsDouble(math.abs(v.variance - expectedVar), 0.0, 1e-9) - assertEqualsDouble(math.abs(v.std - math.sqrt(expectedVar)), 0.0, 1e-9) + assertEqualsDouble(v.variance, expectedVar, 1e-9) + assertEqualsDouble(v.std, math.sqrt(expectedVar), 1e-9) } test("meanAndVariance zero spread") { val v = Array.fill[Int](6)(7) val stats = v.meanAndVariance - assertEqualsDouble(math.abs(stats.mean - 7d), 0.0, 1e-12) - assertEqualsDouble(math.abs(stats.variance), 0.0, 1e-12) + assertEqualsDouble(stats.mean, 7d, 1e-12) + assertEqualsDouble(stats.variance, 0.0, 1e-12) } test("meanAndVariance arithmetic progression") { val v = Array.tabulate[Int](10)(identity) val stats = v.meanAndVariance val expectedVar = 8.25d - assertEqualsDouble(math.abs(stats.mean - 4.5d), 0.0, 1e-12) - assertEqualsDouble(math.abs(stats.variance - expectedVar), 0.0, 1e-9) + assertEqualsDouble(stats.mean, 4.5d, 1e-12) + assertEqualsDouble(stats.variance, expectedVar, 1e-9) + } + + test("select picks indices in order") { + val base = Array.tabulate[Int](10)(identity) + val idx = Array(0, 3, 5, 9) + assertVecEquals(base.select(idx), Array(0, 3, 5, 9)) + } + + test("select handles duplicates and unsorted indices") { + val base = Array.tabulate[Int](6)(identity) + val idx = Array(5, 2, 5, 0) + assertVecEquals(base.select(idx), Array(5, 2, 5, 0)) + } + + test("select with empty index array") { + val base = Array.tabulate[Int](4)(identity) + val idx = Array.emptyIntArray + assertVecEquals(base.select(idx), Array.emptyIntArray) } end IntArrayExtensionSuite diff --git a/vecxt_re/package.mill b/vecxt_re/package.mill index 6c51e449..23182543 100644 --- a/vecxt_re/package.mill +++ b/vecxt_re/package.mill @@ -33,7 +33,8 @@ object `package` extends Module: override def mvnDeps = super.mvnDeps() ++ Seq( mvn"io.github.quafadas::scautable:0.0.35", - mvn"io.github.quafadas::dedav4s:0.10.3" + mvn"io.github.quafadas::dedav4s:0.10.3", + mvn"org.apache.commons:commons-math4-core:4.0-beta1" ) object test extends VexctReTest, ScalaTests: diff --git a/vecxt_re/resources/distDensity.vg.json b/vecxt_re/resources/distDensity.vg.json index 2b2f2658..c7dff452 100644 --- a/vecxt_re/resources/distDensity.vg.json +++ b/vecxt_re/resources/distDensity.vg.json @@ -250,7 +250,7 @@ "title": "TRET", "titleAnchor": "middle", "gridOpacity": 0.5, - "domain": "false" + "domain": false } ] } \ No newline at end of file diff --git a/vecxt_re/src-jvm/imposeClustering.scala b/vecxt_re/src-jvm/imposeClustering.scala new file mode 100644 index 00000000..81b5fef4 --- /dev/null +++ b/vecxt_re/src-jvm/imposeClustering.scala @@ -0,0 +1,88 @@ +// package vecxt_re + +// import vecxt.all.* + +// extension (scenario: Scenario) +// def imposeClustering(newCoeff: Double): Scenario = { +// // expectation and variance of new scenario +// val numItrs = scenario.numberIterations +// val frequency = scenario.freq +// val e = frequency.mean +// val v = newCoeff * Math.pow(e, 2) + e + +// // in (r,p) form +// val p = e / v +// val r = e * p / (1 - p) + +// val newDist: DiscreteDistr[Int] with Product = if (newCoeff > 0) { +// breeze.stats.distributions.NegativeBinomial(r, 1 - p) // different parameterisation to matlab +// } else { +// breeze.stats.distributions.Poisson(e) +// } + +// var newFreq: IndexedSeq[Int] = newDist.sample(numberIterations) +// val maxSteps = 10 +// val sumEvents = scenario.events.length +// var step = 0 + +// def matchMean( +// inFreq: IndexedSeq[Int], +// sumEvents: Int, +// newCoeff: Double +// ): IndexedSeq[Int] = { +// val delta = sumEvents - inFreq.sum; +// val anz = math.min(numItrs, Math.abs(delta)) +// val asVector: Array[Int] = Array(inFreq: _*) // for slicing... +// val asVectorDouble = convert(asVector, Double) + +// delta match { +// case n if (n < 0) => { + +// val d = breeze.numerics.abs(asVectorDouble - Math.max(Math.ceil(mean(asVectorDouble)), 1)) +// val temp: Matrix[Double] = Matrix(d.toArray.toScalaVector.zipWithIndex.map { case (x, y) => (x, y.toDouble) }: _*) +// val sorted = sortrows(temp, Vector(0)) +// sorted(::, 0) +// val idx = convert(sorted(::, 1), Int) +// val changeThese = idx(0 until anz) +// asVector(changeThese.toScalaVector) -= 1 +// val check = (asVector <:< 0).activeKeysIterator.toVector +// asVector(check) += 1 +// asVector.toScalaVector +// } +// case n if (n > 0) => { +// val d = breeze.numerics.abs(asVectorDouble - Math.floor(mean(asVectorDouble))) +// val temp: Matrix[Double] = Matrix(d.toArray.toScalaVector.zipWithIndex.map { case (x, y) => (x, y.toDouble) }: _*) +// val sorted = sortrows(temp, Vector(0)) +// sorted(::, 0) +// val idx = convert(sorted(::, 1), Int) +// val changeThese = idx(0 until anz) +// asVector(changeThese.toScalaVector) += 1 +// asVector.toScalaVector +// } +// } +// } +// while (newFreq.sum != sumEvents && step <= maxSteps) { +// newFreq = matchMean(newFreq, sumEvents, newCoeff) +// step = step + 1 +// } +// val frequencyC = convert(Array(newFreq: _*), Double) +// val meanFreqC = mean(frequencyC) +// (variance(frequencyC) - meanFreqC) / Math.pow(meanFreqC, 2) + +// if (step == maxSteps) { +// throw new Exception("Max steps reached, this probably didn't work") +// } +// val builder = Vector.newBuilder[Int] +// for ((numEvents, itr) <- newFreq.zipWithIndex) { +// // decumcount +// val etend = for (_ <- 1 to numEvents if numEvents > 0) yield (itr + 1) +// builder ++= etend +// } +// val decumcount = builder.result() + +// val zipTogether = decumcount.zip(events) +// val permute = zipTogether.map { case (itr, event) => event.copy(iteration = itr) } + +// scenario.copy(events = permute) + +// } diff --git a/vecxt_re/src/groupSums.scala b/vecxt_re/src/groupSums.scala index 49aca8a1..60c30cd7 100644 --- a/vecxt_re/src/groupSums.scala +++ b/vecxt_re/src/groupSums.scala @@ -51,3 +51,26 @@ inline def groupSum(groups: Array[Int], values: Array[Double], nitr: Int): Array result end groupSum + +/** - count by group index + * - Each group has a small number of values. + * - Each the groups are keyed by their index. + * - assumes groups are already sorted + */ +inline def groupCount(groups: Array[Int], nitr: Int): Array[Int] = + val result = Array.fill(nitr)(0) + val l = groups.length + var i = 0 + while i < l do + val g = groups(i) + var groupSum = 0 + // Process block of same group, computing cumulative sum + while i < l && groups(i) == g do + groupSum += 1 + i += 1 + end while + result(g - 1) = groupSum + end while + + result +end groupCount diff --git a/vecxt_re/src/scenario.scala b/vecxt_re/src/scenario.scala new file mode 100644 index 00000000..abcda8fd --- /dev/null +++ b/vecxt_re/src/scenario.scala @@ -0,0 +1,132 @@ +package vecxt_re + +import vecxt.all.* + +import java.time.LocalDate +import java.time.temporal.ChronoUnit + +case class Event(eventId: Long = scala.util.Random.nextLong(), iteration: Int = 0, day: Int = 0, loss: Double = 0): + def multiplyBy(scale: Double): Event = this.copy(loss = loss * scale) +end Event + +object Event: + inline def random(maxAmount: Double = 20, maxIter: Int = 10) = + Event(iteration = scala.util.Random.nextInt(maxIter), loss = scala.util.Random.nextDouble() * maxAmount) + inline def apply(iter: Int, amount: Double): Event = Event( + iteration = iter, + loss = amount + ) +end Event + +// case class IterationFrequency(itr: Int, freq: Int) + +// case class IterationAmount(itr: Int, amnt: Double) + +case class Scenario( + events: IndexedSeq[Event] = Vector(), + numberIterations: Int = 0, + threshold: Double = 0d, + day1: LocalDate = LocalDate.of(2019, 1, 1), + name: String = "", + id: Long = scala.util.Random.nextLong() +): + + lazy val eventsSorted: Array[Event] = Array.from(events.sortBy(event => (event.iteration, event.day))) + + lazy val freq: Array[Int] = groupCount(iterations, numberIterations) + + lazy val meanFreq: Double = freq.mean + + lazy val agg: Array[Double] = groupSum(iterations, amounts, numberIterations) + + lazy val claimDates: Array[LocalDate] = eventsSorted.map(d => ChronoUnit.DAYS.addTo(this.day1, d.day)) + + /** Interpretation: + * + * - Excess variance over Poisson, scaled by m^2: Var(X) = E[X] for Poisson, so (v - m) is the extra variance; + * dividing by m^2 scales it. + * - Method-of-moments estimate of 1/k for Negative Binomial: Var(X) = μ + μ^2 / k ⇒ (Var(X) - μ) / μ^2 = 1 / k. + * Thus, clusterCoeff estimates 1 / k. Smaller k (larger clusterCoeff) ⇒ more clustering/overdispersion. + * - Relation to Index of Dispersion (VMR = v / m): clusterCoeff = (v - m) / m^2 = (VMR - 1) / m. It is a + * mean-scaled excess dispersion; under NB, it targets 1 / k. + */ + lazy val clusterCoeff: Double = + val (m, v) = freq.meanAndVariance(VarianceMode.Sample) + (v - m) / Math.pow(m, 2) + end clusterCoeff + + /** Computes the variance-to-mean ratio (dispersion) based on the frequency data. This metric is calculated by + * dividing the variance by the mean, using values from `freq.meanAndVariance`. + * + * 1 = poisson distributed > 1 => overdispersed... but careful with sample size. + */ + lazy val varianceMeanRatio = + val (m, v) = freq.meanAndVariance(VarianceMode.Sample) + v / m + end varianceMeanRatio + + lazy val hasOccurence: Boolean = events.nonEmpty + + lazy val numSeasons: Int = math.ceil(days.maxSIMD / 365).toInt // doesnt deal so well with leap years. + + lazy val meanLoss: Double = amounts.sum / numberIterations + + lazy val days: Array[Int] = eventsSorted.map(_.day) + + lazy val iterations: Array[Int] = eventsSorted.map(_.iteration) + + lazy val amounts: Array[Double] = eventsSorted.map(_.loss) + + lazy val itrDayAmount: (itr: Array[Int], days: Array[Int], amounts: Array[Double]) = + (itr = iterations, days = days, amounts = amounts) + + lazy val period: (firstLoss: LocalDate, lastLoss: LocalDate) = + (day1.plusDays((days.minSIMD - 1).toLong), day1.plusDays((days.maxSIMD - 1).toLong)) + +end Scenario + +extension (scenario: Scenario) + inline def scaleAmntBy(scale: Double): Scenario = Scenario( + scenario.eventsSorted.map(_.multiplyBy(scale)), + scenario.numberIterations, + scenario.threshold * scale, + scenario.day1, + scenario.name + ) + + def shiftDay1To(date: LocalDate): Scenario = + scenario.period.firstLoss.plusYears(1).minusDays(1) +// val ndays = ChronoUnit.DAYS.between( period._1, seasonEnd) + 1 Let sjust ssume this is 365 ... there is a theoretical problem with air assuming 365 days. Leap years anyone? + val betweenStartDates = ChronoUnit.DAYS.between(scenario.day1, date).toInt + val newEvents = + scenario.eventsSorted.map(x => + Event(x.eventId, x.iteration, Math.floorMod(x.day - betweenStartDates - 1, 365) + 1, x.loss) + ) + Scenario(newEvents, scenario.numberIterations, scenario.threshold, date, scenario.name) + end shiftDay1To + + inline def removeClaimsAfter(date: LocalDate): Scenario = + val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) <= 0) + Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) + end removeClaimsAfter + + inline def removeClaimsBefore(date: LocalDate): Scenario = + val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) >= 0) + Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) + end removeClaimsBefore + + inline def applyThreshold(newThresh: Double): Scenario = + if !(newThresh > scenario.threshold) then + throw new Exception( + "Threshold may only be increased. Attempt to change it from " + scenario.threshold + " to " + newThresh + " is illegal" + ) + end if + Scenario( + scenario.eventsSorted.filter(_.loss > newThresh), + scenario.numberIterations, + newThresh, + scenario.day1, + scenario.name + ) + end applyThreshold +end extension diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala new file mode 100644 index 00000000..f1ebf2db --- /dev/null +++ b/vecxt_re/src/scenarr.scala @@ -0,0 +1,130 @@ +package vecxt_re + +import vecxt.all.* + +import java.time.LocalDate +import java.time.temporal.ChronoUnit + +case class Scenarr( + iterations: Array[Int], + days: Array[Int], + amounts: Array[Double], + numberIterations: Int = 0, + threshold: Double = 0d, + day1: LocalDate = LocalDate.of(2019, 1, 1), + name: String = "", + id: Long = scala.util.Random.nextLong(), + isSorted: Boolean = false +): + assert(iterations.length == days.length && days.length == amounts.length) + + lazy val freq: Array[Int] = groupCount(iterations, numberIterations) + + lazy val meanFreq: Double = freq.mean + + lazy val agg: Array[Double] = groupSum(iterations, amounts, numberIterations) + + lazy val claimDates: Array[LocalDate] = days.map(d => ChronoUnit.DAYS.addTo(this.day1, d)) + + /** Interpretation: + * + * - Excess variance over Poisson, scaled by m^2: Var(X) = E[X] for Poisson, so (v - m) is the extra variance; + * dividing by m^2 scales it. + * - Method-of-moments estimate of 1/k for Negative Binomial: Var(X) = μ + μ^2 / k ⇒ (Var(X) - μ) / μ^2 = 1 / k. + * Thus, clusterCoeff estimates 1 / k. Smaller k (larger clusterCoeff) ⇒ more clustering/overdispersion. + * - Relation to Index of Dispersion (VMR = v / m): clusterCoeff = (v - m) / m^2 = (VMR - 1) / m. It is a + * mean-scaled excess dispersion; under NB, it targets 1 / k. + */ + lazy val clusterCoeff: Double = + val (m, v) = freq.meanAndVariance(VarianceMode.Sample) + (v - m) / Math.pow(m, 2) + end clusterCoeff + + /** Computes the variance-to-mean ratio (dispersion) based on the frequency data. This metric is calculated by + * dividing the variance by the mean, using values from `freq.meanAndVariance`. + * + * 1 = poisson distributed > 1 => overdispersed... but careful with sample size. + */ + lazy val varianceMeanRatio = + val (m, v) = freq.meanAndVariance(VarianceMode.Sample) + v / m + end varianceMeanRatio + + lazy val hasOccurence: Boolean = amounts.nonEmpty + + lazy val numSeasons: Int = math.ceil(days.maxSIMD / 365).toInt // doesnt deal so well with leap years. + + lazy val meanLoss: Double = amounts.sum / numberIterations + + lazy val itrDayAmount: (itr: Array[Int], days: Array[Int], amounts: Array[Double]) = + (itr = iterations, days = days, amounts = amounts) + + lazy val period: (firstLoss: LocalDate, lastLoss: LocalDate) = + (day1.plusDays((days.minSIMD - 1).toLong), day1.plusDays((days.maxSIMD - 1).toLong)) + +end Scenarr + +object Scenarr: + extension (scenario: Scenarr) + inline def sorted: Scenarr = + val indicies = scenario.iterations.zipWithIndex + .zip(scenario.days) + .map { case ((iter, idx), day) => + (index = idx, iter = iter, day = day) + } + .sortBy(r => (r.iter, r.day)) + .map(_.index) + + Scenarr( + scenario.iterations.select(indicies), + scenario.days.select(indicies), + scenario.amounts.select(indicies), + scenario.numberIterations, + scenario.threshold, + scenario.day1, + scenario.name, + scenario.id, + isSorted = true + ) + end sorted + + inline def scaleAmntBy(scale: Double): Scenarr = + scenario.copy(amounts = scenario.amounts * scale, threshold = scenario.threshold * scale) + end extension + + // def shiftDay1To(date: LocalDate): Scenarr = + // scenario.period.firstLoss.plusYears(1).minusDays(1) + // // val ndays = ChronoUnit.DAYS.between( period._1, seasonEnd) + 1 Let sjust ssume this is 365 ... there is a theoretical problem with air assuming 365 days. Leap years anyone? + // val betweenStartDates = ChronoUnit.DAYS.between(scenario.day1, date).toInt + // val newEvents = + // scenario.eventsSorted.map(x => + // Event(x.eventId, x.iteration, Math.floorMod(x.day - betweenStartDates - 1, 365) + 1, x.loss) + // ) + // Scenario(newEvents, scenario.numberIterations, scenario.threshold, date, scenario.name) + // end shiftDay1To + + // inline def removeClaimsAfter(date: LocalDate): Scenarr = + // val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) <= 0) + // Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) + // end removeClaimsAfter + + // inline def removeClaimsBefore(date: LocalDate): Scenarr = + // val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) >= 0) + // Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) + // end removeClaimsBefore + + // inline def applyThreshold(newThresh: Double): Scenarr = + // if !(newThresh > scenario.threshold) then + // throw new Exception( + // "Threshold may only be increased. Attempt to change it from " + scenario.threshold + " to " + newThresh + " is illegal" + // ) + // end if + // Scenarr( + // scenario.eventsSorted.filter(_.loss > newThresh), + // scenario.numberIterations, + // newThresh, + // scenario.day1, + // scenario.name + // ) + // end applyThreshold +end Scenarr diff --git a/vecxt_re/test/src/groupSumCount.test.scala b/vecxt_re/test/src/groupSumCount.test.scala new file mode 100644 index 00000000..cf80a2b8 --- /dev/null +++ b/vecxt_re/test/src/groupSumCount.test.scala @@ -0,0 +1,52 @@ +package vecxt_re + +import munit.FunSuite + +class GroupSumCountSuite extends FunSuite: + + test("groupSum aggregates per 1-based group index with gaps") { + val groups = Array(1, 1, 2, 4, 4) + val values = Array(2.0, 3.0, 5, 10, 20) + + val result = groupSum(groups, values, nitr = 4) + + assertEquals(result.length, 4) + assertVecEquals(result, Array(5.0, 5, 0, 30)) + } + + test("groupCount counts occurrences per group index") { + val groups = Array(1, 1, 2, 4, 4) + + val result = groupCount(groups, nitr = 4) + + assertEquals(result.length, 4) + assertVecEquals(result, Array(2, 1, 0, 2)) + } + + test("handles empty input by returning zeroed buckets") { + val groups = Array.empty[Int] + val values = Array.empty[Double] + + val sumResult = groupSum(groups, values, nitr = 3) + val countResult = groupCount(groups, nitr = 3) + + assertEquals(sumResult.length, 3) + assertEquals(countResult.length, 3) + assertVecEquals(sumResult, Array(0.0, 0, 0)) + assertVecEquals(countResult, Array(0, 0, 0)) + } + + test("single group spanning all entries") { + val groups = Array(3, 3, 3) + val values = Array(1.5, 2.5, -4) + + val sumResult = groupSum(groups, values, nitr = 4) + val countResult = groupCount(groups, nitr = 4) + + val expectedSum = Array(0, 0, values.sum, 0) + val expectedCount = Array(0, 0, 3, 0) + + assertVecEquals(sumResult, expectedSum) + assertVecEquals(countResult, expectedCount) + } +end GroupSumCountSuite diff --git a/vecxt_re/test/src/scenario.test.scala b/vecxt_re/test/src/scenario.test.scala new file mode 100644 index 00000000..15ee620c --- /dev/null +++ b/vecxt_re/test/src/scenario.test.scala @@ -0,0 +1,45 @@ +package vecxt_re + +class ScenarioSuite extends munit.FunSuite: + + test("Events") { + + val event = Event.random + + } + + test("Random Scenario") { + val numItr = 10 + val s = Scenario( + Vector.fill(10)(Event.random(maxIter = numItr)), + numItr + ) + + assertEquals(s.iterations.length, 10) + assertEquals(s.amounts.length, 10) + + assert(s.hasOccurence) + } + + test("Some scenario stats") { + val e1 = Event(1, 15.0) + val e2 = Event(4, 25.0) + val e3 = Event(4, 1.0) + val e4 = Event(4, 1.0) + val e5 = Event(4, 1.0) + val numItr = 5 + + val s = Scenario( + Vector(e2, e3, e4, e5, e1), + numItr + ) + + assertVecEquals(s.freq, Array(1, 0, 0, 4, 0)) + assertVecEquals(s.agg, Array(15.0, 0, 0, 28.0, 0)) + assertEqualsDouble(s.meanFreq, (1 + 4) / 5.0, 0.00000001) + assertEqualsDouble(s.clusterCoeff, 2.0, 0.000001) + assertEqualsDouble(s.varianceMeanRatio, 3, 0.00001) + + } + +end ScenarioSuite diff --git a/vecxt_re/test/src/vecEquals.scala b/vecxt_re/test/src/vecEquals.scala index f0aa42d8..166df902 100644 --- a/vecxt_re/test/src/vecEquals.scala +++ b/vecxt_re/test/src/vecEquals.scala @@ -1,6 +1,7 @@ package vecxt_re import munit.Assertions.assertEqualsDouble +import munit.Assertions.assertEquals def assertVecEquals(v1: Array[Double], v2: Array[Double])(implicit loc: munit.Location): Unit = assert(v1.length == v2.length) @@ -10,3 +11,12 @@ def assertVecEquals(v1: Array[Double], v2: Array[Double])(implicit loc: munit.Lo i += 1 end while end assertVecEquals + +def assertVecEquals(v1: Array[Int], v2: Array[Int])(implicit loc: munit.Location): Unit = + assert(v1.length == v2.length) + var i: Int = 0; + while i < v1.length do + assertEquals(v1(i), v2(i), clue = s"at index $i") + i += 1 + end while +end assertVecEquals From 5225bfe845bc2fee68d6d330ca6a7010fd41a47d Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 17:30:19 +0100 Subject: [PATCH 11/75] fmt --- vecxt/src-js/array.scala | 48 +++++++++++++++++++ vecxt/src-native/array.scala | 48 +++++++++++++++++++ .../{src-jvm => src}/IntArrays.test.scala | 0 vecxt_re/src/scenarr.scala | 17 +++++-- 4 files changed, 108 insertions(+), 5 deletions(-) rename vecxt/test/{src-jvm => src}/IntArrays.test.scala (100%) diff --git a/vecxt/src-js/array.scala b/vecxt/src-js/array.scala index d7621bd5..574ef9ce 100644 --- a/vecxt/src-js/array.scala +++ b/vecxt/src-js/array.scala @@ -123,6 +123,54 @@ object arrays: inline def stdDev: Double = stdDev(VarianceMode.Population) inline def stdDev(mode: VarianceMode): Double = std(mode) + + inline def minSIMD: Int = + var i = 0 + var acc = Int.MaxValue + while i < vec.length do + val v = vec(i) + if v < acc then acc = v + end if + i += 1 + end while + acc + end minSIMD + + inline def maxSIMD: Int = + var i = 0 + var acc = Int.MinValue + while i < vec.length do + val v = vec(i) + if v > acc then acc = v + end if + i += 1 + end while + acc + end maxSIMD + + inline def minSIMD: Double = + var i = 0 + var acc = Double.PositiveInfinity + while i < vec.length do + val v = vec(i) + if v < acc then acc = v + end if + i += 1 + end while + acc + end minSIMD + + inline def maxSIMD: Double = + var i = 0 + var acc = Double.NegativeInfinity + while i < vec.length do + val v = vec(i) + if v > acc then acc = v + end if + i += 1 + end while + acc + end maxSIMD end extension extension (vec: Array[Double]) diff --git a/vecxt/src-native/array.scala b/vecxt/src-native/array.scala index 53543ccc..a3594743 100644 --- a/vecxt/src-native/array.scala +++ b/vecxt/src-native/array.scala @@ -91,6 +91,54 @@ object arrays: inline def stdDev: Double = stdDev(VarianceMode.Population) inline def stdDev(mode: VarianceMode): Double = std(mode) + + inline def minSIMD: Double = + var i = 0 + var acc = Double.PositiveInfinity + while i < vec.length do + val v = vec(i) + if v < acc then acc = v + end if + i += 1 + end while + acc + end minSIMD + + inline def maxSIMD: Double = + var i = 0 + var acc = Double.NegativeInfinity + while i < vec.length do + val v = vec(i) + if v > acc then acc = v + end if + i += 1 + end while + acc + end maxSIMD + + inline def minSIMD: Int = + var i = 0 + var acc = Int.MaxValue + while i < vec.length do + val v = vec(i) + if v < acc then acc = v + end if + i += 1 + end while + acc + end minSIMD + + inline def maxSIMD: Int = + var i = 0 + var acc = Int.MinValue + while i < vec.length do + val v = vec(i) + if v > acc then acc = v + end if + i += 1 + end while + acc + end maxSIMD end extension extension [A: ClassTag](vec: Array[A]) diff --git a/vecxt/test/src-jvm/IntArrays.test.scala b/vecxt/test/src/IntArrays.test.scala similarity index 100% rename from vecxt/test/src-jvm/IntArrays.test.scala rename to vecxt/test/src/IntArrays.test.scala diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index f1ebf2db..291d3e7c 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -18,11 +18,18 @@ case class Scenarr( ): assert(iterations.length == days.length && days.length == amounts.length) - lazy val freq: Array[Int] = groupCount(iterations, numberIterations) - - lazy val meanFreq: Double = freq.mean - - lazy val agg: Array[Double] = groupSum(iterations, amounts, numberIterations) + lazy val freq: Array[Int] = + assert(isSorted) + groupCount(iterations, numberIterations) + end freq + + lazy val meanFreq: Double = + freq.mean + + lazy val agg: Array[Double] = + assert(isSorted) + groupSum(iterations, amounts, numberIterations) + end agg lazy val claimDates: Array[LocalDate] = days.map(d => ChronoUnit.DAYS.addTo(this.day1, d)) From f4cab704263be1fc62e074a074cabf867c3834da Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 19 Jan 2026 17:42:31 +0100 Subject: [PATCH 12/75] . --- vecxt/src-js/array.scala | 33 ++++++++++++------------ vecxt/src-native/array.scala | 49 ++++++++++++++++++------------------ 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/vecxt/src-js/array.scala b/vecxt/src-js/array.scala index 574ef9ce..3b6eb3f2 100644 --- a/vecxt/src-js/array.scala +++ b/vecxt/src-js/array.scala @@ -148,6 +148,23 @@ object arrays: acc end maxSIMD + extension (vec: Array[Double]) + + inline def apply(index: Array[Boolean])(using inline boundsCheck: BoundsCheck.BoundsCheck): Array[Double] = + dimCheck(vec, index) + val trues = index.trues + val newVec = Array.ofDim[Double](trues) + var j = 0 + for i <- 0 until index.length do + // println(s"i: $i || j: $j || ${index(i)} ${vec(i)} ") + if index(i) then + newVec(j) = vec(i) + j = 1 + j + end for + newVec + end apply + + inline def minSIMD: Double = var i = 0 var acc = Double.PositiveInfinity @@ -173,22 +190,6 @@ object arrays: end maxSIMD end extension - extension (vec: Array[Double]) - - inline def apply(index: Array[Boolean])(using inline boundsCheck: BoundsCheck.BoundsCheck): Array[Double] = - dimCheck(vec, index) - val trues = index.trues - val newVec = Array.ofDim[Double](trues) - var j = 0 - for i <- 0 until index.length do - // println(s"i: $i || j: $j || ${index(i)} ${vec(i)} ") - if index(i) then - newVec(j) = vec(i) - j = 1 + j - end for - newVec - end apply - def increments: Array[Double] = val out = Array.ofDim[Double](vec.length) out(0) = vec(0) diff --git a/vecxt/src-native/array.scala b/vecxt/src-native/array.scala index a3594743..a80b07c1 100644 --- a/vecxt/src-native/array.scala +++ b/vecxt/src-native/array.scala @@ -92,30 +92,6 @@ object arrays: inline def stdDev(mode: VarianceMode): Double = std(mode) - inline def minSIMD: Double = - var i = 0 - var acc = Double.PositiveInfinity - while i < vec.length do - val v = vec(i) - if v < acc then acc = v - end if - i += 1 - end while - acc - end minSIMD - - inline def maxSIMD: Double = - var i = 0 - var acc = Double.NegativeInfinity - while i < vec.length do - val v = vec(i) - if v > acc then acc = v - end if - i += 1 - end while - acc - end maxSIMD - inline def minSIMD: Int = var i = 0 var acc = Int.MaxValue @@ -172,6 +148,31 @@ object arrays: newVec end apply + + inline def minSIMD: Double = + var i = 0 + var acc = Double.PositiveInfinity + while i < vec.length do + val v = vec(i) + if v < acc then acc = v + end if + i += 1 + end while + acc + end minSIMD + + inline def maxSIMD: Double = + var i = 0 + var acc = Double.NegativeInfinity + while i < vec.length do + val v = vec(i) + if v > acc then acc = v + end if + i += 1 + end while + acc + end maxSIMD + inline def product: Double = var sum = 1.0 var i = 0; From e76140f97b76015c61ae0cb99cc583f730999dce Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Mon, 19 Jan 2026 22:44:04 +0100 Subject: [PATCH 13/75] . --- vecxt_re/resources/distDensity.vg.json | 24 ++++++++++++------------ vecxt_re/src-jvm/plots.scala | 1 + 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/vecxt_re/resources/distDensity.vg.json b/vecxt_re/resources/distDensity.vg.json index c7dff452..1af950a0 100644 --- a/vecxt_re/resources/distDensity.vg.json +++ b/vecxt_re/resources/distDensity.vg.json @@ -6,31 +6,31 @@ "name": "points", "values": [ { - "tret": 1.1, + "amount": 1.1, "probability": 0.1, "c": "first", "offset": 0.5 }, { - "tret": 1.01, + "amount": 1.01, "probability": 0.2, "c": "first", "offset": 0.5 }, { - "tret": 0.99, + "amount": 0.99, "probability": 0.3, "c": "first", "offset": 0.5 }, { - "tret": 0.5, + "amount": 0.5, "probability": 0.5, "c": "first", "offset": 0.5 }, { - "tret": 0.1, + "amount": 0.1, "probability": 0.99, "c": "first", "offset": 0.5 @@ -44,7 +44,7 @@ { "type": "aggregate", "fields": [ - "tret", + "amount", "offset" ], "groupby": [ @@ -91,7 +91,7 @@ "fields": [ { "data": "points", - "field": "tret" + "field": "amount" } ] } @@ -129,14 +129,14 @@ }, "y": { "scale": "yscale", - "field": "tret" + "field": "amount" }, "stroke": { "scale": "color", "field": "c" }, "tooltip": { - "signal": "{tret : format(datum.tret*100, \".3f\")+\"%\" , probTretSmaller: format(datum.probability*100,\".3f\")+\"%\" }" + "signal": "{amount : format(datum.amount*100, \".3f\")+\"%\" , probAmountSmaller: format(datum.probability*100,\".3f\")+\"%\" }" } }, "update": { @@ -164,7 +164,7 @@ "enter": { "y": { "scale": "yscale", - "field": "tret" + "field": "amount" }, "height": { "value": 2 @@ -179,7 +179,7 @@ } }, "tooltip": { - "signal": "{tret : format(datum.tret*100, \".3f\")+\"%\" , probTretSmaller: format(datum.probability*100,\".3f\")+\"%\" }" + "signal": "{amount : format(datum.amount*100, \".3f\")+\"%\" , probAmountSmaller: format(datum.probability*100,\".3f\")+\"%\" }" }, "width": { "value": 5 @@ -247,7 +247,7 @@ "scale": "yscale", "zindex": 0, "grid": true, - "title": "TRET", + "title": "AMOUNT", "titleAnchor": "middle", "gridOpacity": 0.5, "domain": false diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 2cf6c52b..c638b93c 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -5,4 +5,5 @@ import io.github.quafadas.plots.SetupVega.* object Plots: lazy val timeline = VegaPlot.fromResource("timeline.vl.json") // riskInceptionDate, riskExpiryDate lazy val seasonality = VegaPlot.fromResource("seasonality.vg.json") // catagory, amount + lazy val distributionDensity = VegaPlot.fromResource("distDensity.vg.json") // value, density end Plots From a013054f3ab5c859a35c08a10d6b12fedc99cc64 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Mon, 19 Jan 2026 23:15:28 +0100 Subject: [PATCH 14/75] . --- .github/copilot-instructions.md | 11 ++++ .vscode/launch.json | 4 +- vecxt/src-js/array.scala | 14 ++++- vecxt/src-jvm/arrays.scala | 23 ++++++++ vecxt/src-native/array.scala | 14 ++++- vecxt/test/src/intScalar.test.scala | 33 +++++++++++ vecxt_re/src/scenarr.scala | 35 ++++++----- vecxt_re/test/src/scenario.test.scala | 84 +++++++++++++++++++++++++++ 8 files changed, 198 insertions(+), 20 deletions(-) create mode 100644 vecxt/test/src/intScalar.test.scala diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index ab628718..c298d672 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -45,6 +45,17 @@ vecxt/ │ ├── src-jvm/ # JVM-specific tests │ ├── src-js/ # Js-specific tests │ └── src-native/ # Scala Native-specific tests +├── vecxt_re/ # Domain specific library for reinsurance calculations +│ ├── src/ # Cross-platform shared source code +│ ├── src-jvm/ # JVM-specific implementations (SIMD Vector API) +│ ├── src-js/ # JavaScript-specific implementations +│ ├── src-js-native/ # JavaScript / native shared (DRY) implementations +│ ├── src-native/ # Scala Native-specific implementations +│ └── test/ # Cross-platform test suite (munit) +│ ├── src/ # Shared test source files +│ ├── src-jvm/ # JVM-specific tests +│ ├── src-js/ # Js-specific tests +│ └── src-native/ # Scala Native-specific tests ├── vecxt/ # Main source directory and core published module │ ├── src/ # Cross-platform shared source code │ ├── src-jvm/ # JVM-specific implementations (SIMD Vector API) diff --git a/.vscode/launch.json b/.vscode/launch.json index 501631e9..4ad93f65 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,8 +8,8 @@ "type": "scala", "request": "launch", "name": "test Suite", - "buildTarget": "vecxt.jvm.test", - "testClass": "vecxt.IntArrayExtensionSuite", + "buildTarget": "vecxt_re.jvm.test", + "testClass": "vecxt_re.ScenarioSuite", "jvmOptions": [ "--add-modules=jdk.incubator.vector" ], diff --git a/vecxt/src-js/array.scala b/vecxt/src-js/array.scala index 3b6eb3f2..dca707d3 100644 --- a/vecxt/src-js/array.scala +++ b/vecxt/src-js/array.scala @@ -148,6 +148,18 @@ object arrays: acc end maxSIMD + inline def -=(scalar: Int): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) - scalar + i += 1 + end while + end -= + + inline def -(scalar: Int): Array[Int] = + vec.clone().tap(_ -= scalar) + end - + extension (vec: Array[Double]) inline def apply(index: Array[Boolean])(using inline boundsCheck: BoundsCheck.BoundsCheck): Array[Double] = @@ -164,7 +176,7 @@ object arrays: newVec end apply - + inline def minSIMD: Double = var i = 0 var acc = Double.PositiveInfinity diff --git a/vecxt/src-jvm/arrays.scala b/vecxt/src-jvm/arrays.scala index f45c7df1..d18db49e 100644 --- a/vecxt/src-jvm/arrays.scala +++ b/vecxt/src-jvm/arrays.scala @@ -335,6 +335,29 @@ object arrays: vec.clone.tap(_ -= vec2) end - + inline def -=(scalar: Int): Unit = + + var i = 0 + + while i < spi.loopBound(vec.length) do + IntVector + .fromArray(spi, vec, i) + .sub(scalar) + .intoArray(vec, i) + i += spil + end while + + while i < vec.length do + vec(i) = vec(i) - scalar + i += 1 + end while + + end -= + + inline def -(scalar: Int): Array[Int] = + vec.clone().tap(_ -= scalar) + end - + inline def -=(vec2: Array[Int])(using inline boundsCheck: BoundsCheck): Unit = dimCheck(vec, vec2) var i = 0 diff --git a/vecxt/src-native/array.scala b/vecxt/src-native/array.scala index a80b07c1..5593e8b5 100644 --- a/vecxt/src-native/array.scala +++ b/vecxt/src-native/array.scala @@ -115,6 +115,18 @@ object arrays: end while acc end maxSIMD + + inline def -=(scalar: Int): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) - scalar + i += 1 + end while + end -= + + inline def -(scalar: Int): Array[Int] = + vec.clone().tap(_ -= scalar) + end - end extension extension [A: ClassTag](vec: Array[A]) @@ -148,7 +160,7 @@ object arrays: newVec end apply - + inline def minSIMD: Double = var i = 0 var acc = Double.PositiveInfinity diff --git a/vecxt/test/src/intScalar.test.scala b/vecxt/test/src/intScalar.test.scala new file mode 100644 index 00000000..ff61a31b --- /dev/null +++ b/vecxt/test/src/intScalar.test.scala @@ -0,0 +1,33 @@ +package vecxt + +import all.* + +class IntScalarOpsSuite extends munit.FunSuite: + + test("in-place subtraction -= scalar works and mutates array"): + val arr = Array(5, 3, 8) + arr -= 2 + assertEquals(arr.toSeq, Seq(3, 1, 6)) + + test("non-mutating - scalar returns new array and leaves original unchanged"): + val orig = Array(10, 0, -5) + val out = orig - 3 + assertEquals(out.toSeq, Seq(7, -3, -8)) + assertEquals(orig.toSeq, Seq(10, 0, -5)) + + test("subtracting zero does nothing"): + val a = Array(1, 2, 3) + val b = a.clone() + a -= 0 + assertEquals(a.toSeq, b.toSeq) + val c = b - 0 + assertEquals(c.toSeq, b.toSeq) + + test("works on empty arrays"): + val e = Array.empty[Int] + e -= 5 + assertEquals(e.toSeq, Seq()) + val e2 = e - 5 + assertEquals(e2.toSeq, Seq()) + +end IntScalarOpsSuite diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index 291d3e7c..9b5fc615 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -31,7 +31,7 @@ case class Scenarr( groupSum(iterations, amounts, numberIterations) end agg - lazy val claimDates: Array[LocalDate] = days.map(d => ChronoUnit.DAYS.addTo(this.day1, d)) + lazy val claimDates: Array[LocalDate] = (days - 1).map(d => ChronoUnit.DAYS.addTo(this.day1, d)) /** Interpretation: * @@ -97,7 +97,7 @@ object Scenarr: inline def scaleAmntBy(scale: Double): Scenarr = scenario.copy(amounts = scenario.amounts * scale, threshold = scenario.threshold * scale) - end extension + // def shiftDay1To(date: LocalDate): Scenarr = // scenario.period.firstLoss.plusYears(1).minusDays(1) @@ -120,18 +120,21 @@ object Scenarr: // Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) // end removeClaimsBefore - // inline def applyThreshold(newThresh: Double): Scenarr = - // if !(newThresh > scenario.threshold) then - // throw new Exception( - // "Threshold may only be increased. Attempt to change it from " + scenario.threshold + " to " + newThresh + " is illegal" - // ) - // end if - // Scenarr( - // scenario.eventsSorted.filter(_.loss > newThresh), - // scenario.numberIterations, - // newThresh, - // scenario.day1, - // scenario.name - // ) - // end applyThreshold + inline def applyThreshold(newThresh: Double): Scenarr = + if !(newThresh > scenario.threshold) then + throw new Exception( + "Threshold may only be increased. Attempt to change it from " + scenario.threshold + " to " + newThresh + " is illegal" + ) + end if + val idx = scenario.amounts > newThresh + Scenarr( + scenario.iterations(idx)(using false), + scenario.days(idx)(using false), + scenario.amounts(idx)(using false), + scenario.numberIterations, + newThresh, + scenario.day1, + scenario.name + ) + end applyThreshold end Scenarr diff --git a/vecxt_re/test/src/scenario.test.scala b/vecxt_re/test/src/scenario.test.scala index 15ee620c..6a974580 100644 --- a/vecxt_re/test/src/scenario.test.scala +++ b/vecxt_re/test/src/scenario.test.scala @@ -1,5 +1,7 @@ package vecxt_re +import java.time.LocalDate + class ScenarioSuite extends munit.FunSuite: test("Events") { @@ -42,4 +44,86 @@ class ScenarioSuite extends munit.FunSuite: } + test("scaleAmntBy doubles amounts and threshold, preserves other fields"): + val base = Scenarr( + iterations = Array(1, 1, 2), + days = Array(1, 2, 3), + amounts = Array(100.0, 200.0, 300.0), + numberIterations = 2, + threshold = 50.0 + ) + + val scaled = base.scaleAmntBy(2.0) + + assertEquals(scaled.amounts.toSeq, Seq(200.0, 400.0, 600.0)) + assertEquals(scaled.threshold, 100.0) + // other fields unchanged + assertEquals(scaled.iterations.toSeq, base.iterations.toSeq) + assertEquals(scaled.days.toSeq, base.days.toSeq) + assertEquals(scaled.numberIterations, base.numberIterations) + assertEquals(scaled.name, base.name) + assertEquals(scaled.isSorted, base.isSorted) + + test("scaleAmntBy with zero scale results in zero amounts and zero threshold"): + val base = Scenarr(Array(1), Array(1), Array(123.0), numberIterations = 1, threshold = 7.5) + val scaled0 = base.scaleAmntBy(0.0) + assertEquals(scaled0.amounts.toSeq, Seq(0.0)) + assertEquals(scaled0.threshold, 0.0) + + test("scaleAmntBy supports negative scaling and does not mutate original"): + val originalAmounts = Array(10.0, 20.0, 30.0) + val base = Scenarr(Array(1, 1, 1), Array(1, 2, 3), originalAmounts.clone(), numberIterations = 1, threshold = 5.0) + val scaled = base.scaleAmntBy(-1.5) + assertEquals(scaled.amounts.toSeq, Seq(-15.0, -30.0, -45.0)) + assertEquals(scaled.threshold, -7.5) + // original remains unchanged + assertEquals(base.amounts.toSeq, originalAmounts.toSeq) + assertEquals(base.threshold, 5.0) + + test("applyThreshold increases threshold and filters claims"): + val base = Scenarr( + iterations = Array(1, 2, 3), + days = Array(10, 20, 30), + amounts = Array(10.0, 20.0, 30.0), + numberIterations = 3, + threshold = 5.0 + ) + + val applied = base.applyThreshold(15.0) + + assertEquals(applied.amounts.toSeq, Seq(20.0, 30.0)) + assertEquals(applied.iterations.toSeq, Seq(2, 3)) + assertEquals(applied.days.toSeq, Seq(20, 30)) + assertEquals(applied.threshold, 15.0) + // original remains unchanged + assertEquals(base.amounts.toSeq, Seq(10.0, 20.0, 30.0)) + assertEquals(base.threshold, 5.0) + + test("applyThreshold throws if newThresh is not greater than current threshold"): + val base2 = Scenarr(Array(1), Array(1), Array(100.0), numberIterations = 1, threshold = 50.0) + val ex = intercept[Exception](base2.applyThreshold(50.0)) + assert(ex.getMessage.contains("Threshold may only be increased")) + + test("applyThreshold may result in no claims"): + val base3 = Scenarr(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 5.0) + val appliedEmpty = base3.applyThreshold(100.0) + assertEquals(appliedEmpty.amounts.toSeq, Seq()) + assertEquals(appliedEmpty.iterations.toSeq, Seq()) + assertEquals(appliedEmpty.days.toSeq, Seq()) + assertEquals(appliedEmpty.threshold, 100.0) + + test("claimDates maps day 1 to day1 property"): + val base = Scenarr( + iterations = Array(1, 2), + days = Array(1, 100), + amounts = Array(10.0, 20.0), + numberIterations = 2, + threshold = 1.0, + day1 = LocalDate.of(2019, 1, 1), + name = "claim-date-test" + ) + + // base claimDates: first should be day1 + assertEquals(base.claimDates.head, base.day1) + end ScenarioSuite From 08a606d8df89ddc0c22a36a9ec299f5c2a265988 Mon Sep 17 00:00:00 2001 From: partens Date: Wed, 21 Jan 2026 16:53:34 +0100 Subject: [PATCH 15/75] . --- experiments/src/rep_setup.scala | 10 ++ vecxt_re/resources/seasonality.vg.json | 221 +++++-------------------- vecxt_re/src-jvm/plots.scala | 24 ++- vecxt_re/src/all.scala | 6 + vecxt_re/src/scenarr.scala | 3 + 5 files changed, 79 insertions(+), 185 deletions(-) create mode 100644 experiments/src/rep_setup.scala create mode 100644 vecxt_re/src/all.scala diff --git a/experiments/src/rep_setup.scala b/experiments/src/rep_setup.scala new file mode 100644 index 00000000..08153ee6 --- /dev/null +++ b/experiments/src/rep_setup.scala @@ -0,0 +1,10 @@ +package experiments + +object RPT: + export vecxt.all.{*, given} + export io.github.quafadas.table.{*, given} + export io.github.quafadas.plots.SetupVega.{*, given} + export viz.PlotTargets.desktopBrowser + export vecxt_re.Scenario + export vecxt_re.Scenarr + export vecxt_re.Plots.* diff --git a/vecxt_re/resources/seasonality.vg.json b/vecxt_re/resources/seasonality.vg.json index 5c3200ea..57a52acf 100644 --- a/vecxt_re/resources/seasonality.vg.json +++ b/vecxt_re/resources/seasonality.vg.json @@ -1,186 +1,39 @@ + { - "$schema": "https://vega.github.io/schema/vega/v5.json", - "description": "A basic bar chart example, with value labels shown upon pointer hover.", - "padding": 5, - "data": [ - { - "name": "table", - "values": [ - { - "category": "2022-12-31", - "amount": 0, - "color": true - } - ] - } - ], - "signals": [ - { - "name": "tooltip", - "value": {}, - "on": [ - { - "events": "rect:pointerover", - "update": "datum" - }, - { - "events": "rect:pointerout", - "update": "{}" - } - ] - }, - { - "name": "height", - "init": "isFinite(containerSize()[1]) ? containerSize()[1] : 200", - "on": [ - { - "update": "isFinite(containerSize()[1]) ? containerSize()[1] : 200", - "events": "window:resize" - } - ] - }, - { - "name": "width", - "init": "isFinite(containerSize()[0]) ? containerSize()[0] : 200", - "on": [ - { - "update": "isFinite(containerSize()[0]) ? containerSize()[0] : 200", - "events": "window:resize" - } - ] - } - ], - "scales": [ - { - "name": "xscale", - "type": "band", - "domain": { - "data": "table", - "field": "category" - }, - "range": "width", - "padding": 0.05, - "round": true - }, - { - "name": "yscale", - "domain": { - "data": "table", - "field": "amount" - }, - "nice": true, - "range": "height" - }, - { - "name": "color", - "type": "ordinal", - "domain": { - "data": "table", - "field": "color" - }, - "range": [ - "green", - "steelblue" - ] - } - ], - "axes": [ - { - "orient": "bottom", - "scale": "xscale", - "labelAngle": -90, - "labelPadding": 30 - }, - { - "orient": "left", - "scale": "yscale" - } - ], - "marks": [ - { - "type": "rect", - "from": { - "data": "table" - }, - "encode": { - "enter": { - "x": { - "scale": "xscale", - "field": "category" - }, - "width": { - "scale": "xscale", - "band": 1 - }, - "y": { - "scale": "yscale", - "field": "amount" - }, - "y2": { - "scale": "yscale", - "value": 0 - }, - "tooltip": { - "signal": "datum" - } - }, - "update": { - "fill": { - "scale": "color", - "field": "color" - } - }, - "hover": { - "fill": { - "value": "red" - } - } - } - }, - { - "type": "text", - "encode": { - "enter": { - "align": { - "value": "center" - }, - "baseline": { - "value": "bottom" - }, - "fill": { - "value": "#333" - } - }, - "update": { - "x": { - "scale": "xscale", - "signal": "tooltip.category", - "band": 0.5 - }, - "y": { - "scale": "yscale", - "signal": "tooltip.amount", - "offset": -2 - }, - "text": { - "signal": "tooltip.amount" - }, - "fillOpacity": [ - { - "test": "datum === tooltip", - "value": 0 - }, - { - "value": 1 - } - ] - } - } - } - ], - "autosize": { - "type": "fit", - "resize": true, - "contains": "padding" + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "title": "Seasonality", + "autosize": {"type": "fit", "contains": "padding", "resize": true}, + "width":"container", + "height":"container", + "data": { + "values": [ + {"category": "Jan 2025", "amount": 100, "color": false}, + {"category": "Feb 2025", "amount": 200, "color": false} + ] + } + , + + "mark": {"type": "bar", "tooltip": true}, + + "encoding": { + "x": { + "field": "category", + "type": "temporal", + "timeUnit": "yearmonth", + "bandPosition": 0, + "axis": {"labelAngle": -45, "grid": false, "tickCount": "month"} + + }, + "y": { + "aggregate": "sum", + "field": "amount", + "type": "quantitative" + }, + "color": { + "field": "color", + "type": "nominal", + "scale": {"range": ["steelblue", "green"]}, + "legend": null } -} \ No newline at end of file + } +} diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index c638b93c..323bb58f 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -1,9 +1,31 @@ package vecxt_re -import io.github.quafadas.plots.SetupVega.* +import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} object Plots: lazy val timeline = VegaPlot.fromResource("timeline.vl.json") // riskInceptionDate, riskExpiryDate lazy val seasonality = VegaPlot.fromResource("seasonality.vg.json") // catagory, amount lazy val distributionDensity = VegaPlot.fromResource("distDensity.vg.json") // value, density + + extension (scenario: Scenarr) + inline def plotSeasonality(highlight: Option[(year: Int, month: Int)] = None)(using tgt: viz.LowPriorityPlotTarget) = + val calc = scenario.monthYear.zip(scenario.amounts).groupMapReduce(_._1)(_._2)(_ + _).toVector + val normaliseBy = calc.map(_._2).sum // total of all claims + val sorted = calc + .sortBy(row => (row._1.year, row._1.month)) + .map(row => + ( + category = + s"${row._1.month.getDisplayName(java.time.format.TextStyle.SHORT, java.util.Locale.getDefault())} ${row._1.year}", + amount = row._2 / normaliseBy, + color = highlight.exists(h => h.year == row._1.year && h.month == row._1.month.getValue) + ) + ) + + seasonality.plot( + _.title("Seasonality " + scenario.name), + _.data.values := sorted.asJson + ) + end extension end Plots diff --git a/vecxt_re/src/all.scala b/vecxt_re/src/all.scala new file mode 100644 index 00000000..d06b5847 --- /dev/null +++ b/vecxt_re/src/all.scala @@ -0,0 +1,6 @@ +package vecxt_re + +object all: + export vecxt_re.Scenario + export vecxt_re.Scenarr + export vecxt_re.Plots.* \ No newline at end of file diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index 9b5fc615..577b9304 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -4,6 +4,7 @@ import vecxt.all.* import java.time.LocalDate import java.time.temporal.ChronoUnit +import java.time.Month case class Scenarr( iterations: Array[Int], @@ -33,6 +34,8 @@ case class Scenarr( lazy val claimDates: Array[LocalDate] = (days - 1).map(d => ChronoUnit.DAYS.addTo(this.day1, d)) + lazy val monthYear: Array[(month: Month, year: Int)] = claimDates.map(d => (d.getMonth, d.getYear)) + /** Interpretation: * * - Excess variance over Poisson, scaled by m^2: Var(X) = E[X] for Poisson, so (v - m) is the extra variance; From a194ac37c00b27f3d15178f95eeeeb67c63643f1 Mon Sep 17 00:00:00 2001 From: partens Date: Wed, 21 Jan 2026 18:27:36 +0100 Subject: [PATCH 16/75] start reporting --- experiments/package.mill | 2 +- experiments/src/rep_setup.scala | 8 ++++- vecxt_re/src-js-native/SplitLosses.scala | 2 +- vecxt_re/src-jvm/SplitLosses.scala | 2 +- vecxt_re/src/Layer.scala | 2 ++ vecxt_re/src/LossCalc.scala | 12 ++++++++ vecxt_re/src/ReReporting.scala | 28 +++++++++++++++++ vecxt_re/src/SplitScenario.scala | 20 +++++++++++++ vecxt_re/src/all.scala | 4 ++- vecxt_re/test/src/layer.test.scala | 36 ++++++++++++++++++++++ vecxt_re/test/src/losscalc.test.scala | 38 ++++++++++++++++++++++++ 11 files changed, 149 insertions(+), 5 deletions(-) create mode 100644 vecxt_re/src/ReReporting.scala create mode 100644 vecxt_re/src/SplitScenario.scala create mode 100644 vecxt_re/test/src/losscalc.test.scala diff --git a/experiments/package.mill b/experiments/package.mill index eb1d7fcd..5c288e12 100644 --- a/experiments/package.mill +++ b/experiments/package.mill @@ -15,7 +15,7 @@ object `package` extends ScalaModule: override def forkArgs = super.forkArgs() ++ build.vecIncubatorFlag // override def mainClass = Some("mnist") - override def moduleDeps = Seq(build.vecxt.jvm, build.vecxtensions.jvm) + override def moduleDeps = Seq(build.vecxt.jvm, build.vecxtensions.jvm, build.vecxt_re.jvm) override def mvnDeps = super.mvnDeps() ++ Seq( mvn"com.lihaoyi::os-lib::0.10.4", mvn"io.github.quafadas::scautable::0.0.35", diff --git a/experiments/src/rep_setup.scala b/experiments/src/rep_setup.scala index 08153ee6..47d2663c 100644 --- a/experiments/src/rep_setup.scala +++ b/experiments/src/rep_setup.scala @@ -5,6 +5,12 @@ object RPT: export io.github.quafadas.table.{*, given} export io.github.quafadas.plots.SetupVega.{*, given} export viz.PlotTargets.desktopBrowser + export vecxt_re.Plots.* + export vecxt_re.rpt.* + export vecxt_re.SplitLosses.* + export vecxt_re.SplitScenario.* export vecxt_re.Scenario export vecxt_re.Scenarr - export vecxt_re.Plots.* + export vecxt_re.Tower + export vecxt_re.Tower.* + diff --git a/vecxt_re/src-js-native/SplitLosses.scala b/vecxt_re/src-js-native/SplitLosses.scala index 745538ed..8d62dc77 100644 --- a/vecxt_re/src-js-native/SplitLosses.scala +++ b/vecxt_re/src-js-native/SplitLosses.scala @@ -15,7 +15,7 @@ object SplitLosses: */ inline def splitAmntFast(years: Array[Int], losses: Array[Double])(using inline bc: BoundsCheck - ): (ceded: Array[Double], retained: Array[Double], splits: IndexedSeq[(Layer, Array[Double])]) = + ): (ceded: Array[Double], retained: Array[Double], splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])]) = inline if bc then assert(years.length == losses.length) end if if losses.isEmpty then (Array.empty[Double], Array.empty[Double], tower.layers.map(_ -> Array.empty[Double])) diff --git a/vecxt_re/src-jvm/SplitLosses.scala b/vecxt_re/src-jvm/SplitLosses.scala index 0734c6bd..a4993079 100644 --- a/vecxt_re/src-jvm/SplitLosses.scala +++ b/vecxt_re/src-jvm/SplitLosses.scala @@ -18,7 +18,7 @@ object SplitLosses: */ inline def splitAmntFast(years: Array[Int], losses: Array[Double])(using inline bc: BoundsCheck - ): (ceded: Array[Double], retained: Array[Double], splits: IndexedSeq[(Layer, Array[Double])]) = + ): (ceded: Array[Double], retained: Array[Double], splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])]) = inline if bc then assert(years.length == losses.length) end if if losses.isEmpty then (Array.empty[Double], Array.empty[Double], tower.layers.map(_ -> Array.empty[Double])) diff --git a/vecxt_re/src/Layer.scala b/vecxt_re/src/Layer.scala index 45bd0752..326cd2e2 100644 --- a/vecxt_re/src/Layer.scala +++ b/vecxt_re/src/Layer.scala @@ -57,6 +57,8 @@ case class Layer( lazy val occLayer = Sublayer(occLimit, occRetention, LossCalc.Occ, occType) lazy val aggLayer = Sublayer(aggLimit, aggRetention, LossCalc.Agg, aggType) + lazy val firstLimit = occLimit.orElse(aggLimit).getOrElse(Double.PositiveInfinity) + /** The smallest claim which exhausts the first limit of this layer */ lazy val cap = occLimit match case Some(occLimit) => diff --git a/vecxt_re/src/LossCalc.scala b/vecxt_re/src/LossCalc.scala index a02c3737..146f25d4 100644 --- a/vecxt_re/src/LossCalc.scala +++ b/vecxt_re/src/LossCalc.scala @@ -3,3 +3,15 @@ package vecxt_re enum LossCalc: case Agg, Occ end LossCalc + +enum ReportDenominator: + case FirstLimit + case AggLimit + case Custom(denominator: Double) + def fromlayer(layer: Layer) = + this match + case FirstLimit => layer.firstLimit + case AggLimit => layer.aggLimit.getOrElse(Double.PositiveInfinity) + case Custom(denominator) => denominator + +end ReportDenominator \ No newline at end of file diff --git a/vecxt_re/src/ReReporting.scala b/vecxt_re/src/ReReporting.scala new file mode 100644 index 00000000..f3b47f24 --- /dev/null +++ b/vecxt_re/src/ReReporting.scala @@ -0,0 +1,28 @@ +package vecxt_re + +import vecxt.all.* + +object ReReporting: + extension(calcd: (layer: Layer, cededToLayer: Array[Double])) + + inline def attachmentProbability(numIterations: Int) = (calcd.cededToLayer > 0).trues / numIterations.toDouble + + inline def exhaustionProbability(numIterations: Int) = + val exhaust = calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) - 0.01 + (calcd.cededToLayer > exhaust).trues / numIterations.toDouble + + inline def expectedLoss(numIterations: Int) = calcd.cededToLayer.sum / numIterations + + inline def std(numIterations: Int, years: Array[Int]) = groupSum(years, calcd.cededToLayer, numIterations).stdDev + + inline def expectedLossAggLimit(numIterations: Int) = calcd.cededToLayer.sum / (calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) * numIterations) + + inline def lossReport(numIterations: Int, limit: ReportDenominator ) = + ( + reportLimit = limit(layer) + attachmentProbability = attachmentProbability() + ) + + + + diff --git a/vecxt_re/src/SplitScenario.scala b/vecxt_re/src/SplitScenario.scala new file mode 100644 index 00000000..98a13f41 --- /dev/null +++ b/vecxt_re/src/SplitScenario.scala @@ -0,0 +1,20 @@ +package vecxt_re + +import vecxt.BoundsCheck.BoundsCheck +import vecxt_re.SplitLosses.splitAmntFast + +object SplitScenario: + extension (tower: Tower) + inline def splitScenarioAmounts(scenario: Scenarr)(using + inline bc: BoundsCheck + ): (ceded: Array[Double], retained: Array[Double], splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])]) = + val tmp = + if bc then scenario.sorted + else scenario + + tower.splitAmntFast( + tmp.iterations, + tmp.amounts + ) + end extension +end SplitScenario diff --git a/vecxt_re/src/all.scala b/vecxt_re/src/all.scala index d06b5847..133f1e00 100644 --- a/vecxt_re/src/all.scala +++ b/vecxt_re/src/all.scala @@ -3,4 +3,6 @@ package vecxt_re object all: export vecxt_re.Scenario export vecxt_re.Scenarr - export vecxt_re.Plots.* \ No newline at end of file + export vecxt_re.Plots.* + export vecxt_re.SplitLosses.* + export vecxt_re.SplitScenario.* \ No newline at end of file diff --git a/vecxt_re/test/src/layer.test.scala b/vecxt_re/test/src/layer.test.scala index 9ef7c5f3..63892071 100644 --- a/vecxt_re/test/src/layer.test.scala +++ b/vecxt_re/test/src/layer.test.scala @@ -4,6 +4,42 @@ import vecxt_re.assertVecEquals class ScenarioRISuite extends munit.FunSuite: + test("firstLimit prefers occLimit when both present") { + val layer = Layer( + occLimit = Some(10.0), + occRetention = Some(1.0), + aggLimit = Some(20.0) + ) + assertEqualsDouble(layer.firstLimit, 10.0, 0.0) + } + + test("firstLimit is occLimit when only occLimit is present") { + val layer = Layer( + occLimit = Some(15.0), + occRetention = Some(2.0), + aggLimit = None + ) + assertEqualsDouble(layer.firstLimit, 15.0, 0.0) + } + + test("firstLimit falls back to aggLimit when occLimit is absent") { + val layer = Layer( + occLimit = None, + occRetention = None, + aggLimit = Some(25.0) + ) + assertEqualsDouble(layer.firstLimit, 25.0, 0.0) + } + + test("firstLimit is PositiveInfinity when no limits are present") { + val layer = Layer( + occLimit = None, + occRetention = None, + aggLimit = None + ) + assertEqualsDouble(layer.firstLimit, Double.PositiveInfinity, 0.0) + } + test("Layer default construction") { val layer = Layer() diff --git a/vecxt_re/test/src/losscalc.test.scala b/vecxt_re/test/src/losscalc.test.scala new file mode 100644 index 00000000..a9190632 --- /dev/null +++ b/vecxt_re/test/src/losscalc.test.scala @@ -0,0 +1,38 @@ +package vecxt_re + +package vecxt_re + +import munit.FunSuite + +class LossCalcSuite extends FunSuite: + + test("ReportDenominator.FirstLimit uses occLimit when present") { + val layer = Layer(occLimit = Some(10.0), aggLimit = Some(20.0)) + assertEqualsDouble(ReportDenominator.FirstLimit.fromlayer(layer), 10.0, 0.0) + } + + test("ReportDenominator.FirstLimit falls back to aggLimit when occLimit missing") { + val layer = Layer(occLimit = None, aggLimit = Some(30.0)) + assertEqualsDouble(ReportDenominator.FirstLimit.fromlayer(layer), 30.0, 0.0) + } + + test("ReportDenominator.FirstLimit returns PositiveInfinity when no limits") { + val layer = Layer() + assertEqualsDouble(ReportDenominator.FirstLimit.fromlayer(layer), Double.PositiveInfinity, 0.0) + } + + test("ReportDenominator.AggLimit returns aggLimit when present") { + val layer = Layer(aggLimit = Some(40.0)) + assertEqualsDouble(ReportDenominator.AggLimit.fromlayer(layer), 40.0, 0.0) + } + + test("ReportDenominator.AggLimit returns PositiveInfinity when aggLimit missing") { + val layer = Layer(aggLimit = None) + assertEqualsDouble(ReportDenominator.AggLimit.fromlayer(layer), Double.PositiveInfinity, 0.0) + } + + test("ReportDenominator.Custom returns provided denominator") { + val layer = Layer() + assertEqualsDouble(ReportDenominator.Custom(55.5).fromlayer(layer), 55.5, 0.0) + } +end LossCalcSuite \ No newline at end of file From c75e5fe602d58a7dd3c9304bb2f7aee7bf7061c7 Mon Sep 17 00:00:00 2001 From: partens Date: Wed, 21 Jan 2026 18:42:04 +0100 Subject: [PATCH 17/75] . --- vecxt_re/src/ReReporting.scala | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/vecxt_re/src/ReReporting.scala b/vecxt_re/src/ReReporting.scala index f3b47f24..99a7a946 100644 --- a/vecxt_re/src/ReReporting.scala +++ b/vecxt_re/src/ReReporting.scala @@ -5,23 +5,28 @@ import vecxt.all.* object ReReporting: extension(calcd: (layer: Layer, cededToLayer: Array[Double])) - inline def attachmentProbability(numIterations: Int) = (calcd.cededToLayer > 0).trues / numIterations.toDouble + inline def attachmentProbability(numIterations: Int): Double = (calcd.cededToLayer > 0).trues / numIterations.toDouble - inline def exhaustionProbability(numIterations: Int) = + inline def exhaustionProbability(numIterations: Int, years: Array[Int]): Double = val exhaust = calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) - 0.01 - (calcd.cededToLayer > exhaust).trues / numIterations.toDouble + (groupSum(years, calcd.cededToLayer, numIterations) > exhaust).trues / numIterations.toDouble - inline def expectedLoss(numIterations: Int) = calcd.cededToLayer.sum / numIterations + inline def expectedLoss(numIterations: Int): Double = calcd.cededToLayer.sum / numIterations - inline def std(numIterations: Int, years: Array[Int]) = groupSum(years, calcd.cededToLayer, numIterations).stdDev + inline def std(numIterations: Int, years: Array[Int]): Double = groupSum(years, calcd.cededToLayer, numIterations).stdDev - inline def expectedLossAggLimit(numIterations: Int) = calcd.cededToLayer.sum / (calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) * numIterations) - - inline def lossReport(numIterations: Int, limit: ReportDenominator ) = - ( - reportLimit = limit(layer) - attachmentProbability = attachmentProbability() - ) + inline def expectedLossAggLimit(numIterations: Int): Double = calcd.cededToLayer.sum / (calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) * numIterations) + + inline def lossReport(numIterations: Int, years: Array[Int], limit: ReportDenominator ) : (limit: Double, el: Double, stdDev: Double, attachProb: Double, exhaustProb: Double) = + ( + limit = limit.fromlayer(calcd.layer), + el = expectedLoss(numIterations), + stdDev = std(numIterations, years), + attachProb = attachmentProbability(numIterations), + exhaustProb = exhaustionProbability(numIterations, years) + ) + + //TODO formatting From 75f41cadfde7d0f19fb8d20f39e983c77c2f6db1 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 22 Jan 2026 10:40:25 +0100 Subject: [PATCH 18/75] fixy --- .vscode/mcp.json | 4 +- experiments/src/rep_setup.scala | 26 +- vecxt/src-js/array.scala | 566 +++++++++++------------ vecxt/src-native/array.scala | 1 - vecxt/test/src-jvm/lu.test.scala | 63 +-- vecxt_re/resources/digraph.vg.json | 103 +++++ vecxt_re/src-js-native/SplitLosses.scala | 6 +- vecxt_re/src-jvm/SplitLosses.scala | 6 +- vecxt_re/src-jvm/plots.scala | 4 +- vecxt_re/src/LossCalc.scala | 14 +- vecxt_re/src/ReReporting.scala | 41 +- vecxt_re/src/Retention_Limit.scala | 3 +- vecxt_re/src/SplitScenario.scala | 6 +- vecxt_re/src/all.scala | 11 +- vecxt_re/src/scenarr.scala | 42 +- vecxt_re/test/src/aggByItr.test.scala | 1 - vecxt_re/test/src/cross.test.scala | 7 +- vecxt_re/test/src/layer.test.scala | 2 - vecxt_re/test/src/losscalc.test.scala | 2 +- vecxt_re/test/src/rpt.test.scala | 10 +- vecxt_re/test/src/tower.test.scala | 38 +- 21 files changed, 543 insertions(+), 413 deletions(-) create mode 100644 vecxt_re/resources/digraph.vg.json diff --git a/.vscode/mcp.json b/.vscode/mcp.json index 10703d90..9d2ba94f 100644 --- a/.vscode/mcp.json +++ b/.vscode/mcp.json @@ -1,8 +1,8 @@ { "servers": { "vecxt-metals": { - "url": "http://localhost:51891/sse", - "type": "sse" + "url": "http://localhost:51891/mcp", + "type": "http" } } } \ No newline at end of file diff --git a/experiments/src/rep_setup.scala b/experiments/src/rep_setup.scala index 47d2663c..f3654b53 100644 --- a/experiments/src/rep_setup.scala +++ b/experiments/src/rep_setup.scala @@ -1,16 +1,16 @@ package experiments object RPT: - export vecxt.all.{*, given} - export io.github.quafadas.table.{*, given} - export io.github.quafadas.plots.SetupVega.{*, given} - export viz.PlotTargets.desktopBrowser - export vecxt_re.Plots.* - export vecxt_re.rpt.* - export vecxt_re.SplitLosses.* - export vecxt_re.SplitScenario.* - export vecxt_re.Scenario - export vecxt_re.Scenarr - export vecxt_re.Tower - export vecxt_re.Tower.* - + export vecxt.all.{*, given} + export io.github.quafadas.table.{*, given} + export io.github.quafadas.plots.SetupVega.{*, given} + export viz.PlotTargets.desktopBrowser + export vecxt_re.Plots.* + export vecxt_re.rpt.* + export vecxt_re.SplitLosses.* + export vecxt_re.SplitScenario.* + export vecxt_re.Scenario + export vecxt_re.Scenarr + export vecxt_re.Tower + export vecxt_re.Tower.* +end RPT diff --git a/vecxt/src-js/array.scala b/vecxt/src-js/array.scala index dca707d3..23662889 100644 --- a/vecxt/src-js/array.scala +++ b/vecxt/src-js/array.scala @@ -159,6 +159,7 @@ object arrays: inline def -(scalar: Int): Array[Int] = vec.clone().tap(_ -= scalar) end - + end extension extension (vec: Array[Double]) @@ -176,7 +177,6 @@ object arrays: newVec end apply - inline def minSIMD: Double = var i = 0 var acc = Double.PositiveInfinity @@ -202,288 +202,288 @@ object arrays: end maxSIMD end extension - def increments: Array[Double] = - val out = Array.ofDim[Double](vec.length) - out(0) = vec(0) - var i = 1 - while i < vec.length do - out(i) = vec(i) - vec(i - 1) - i = i + 1 - end while - out - end increments - - inline def stdDev: Double = stdDev(VarianceMode.Population) - - inline def stdDev(mode: VarianceMode): Double = std(mode) - - inline def std: Double = std(VarianceMode.Population) - - inline def std(mode: VarianceMode): Double = - Math.sqrt(vec.variance(mode)) - - inline def mean: Double = vec.sumSIMD / vec.length - - inline def sum: Double = - var sum = 0.0 - var i = 0; - while i < vec.length do - sum = sum + vec(i) - i = i + 1 - end while - sum - end sum - - inline def product: Double = - var sum = 1.0 - var i = 0; - while i < vec.length do - sum *= vec(i) - i = i + 1 - end while - sum - end product - - inline def variance: Double = variance(VarianceMode.Population) - - def variance(mode: VarianceMode): Double = - meanAndVariance(mode).variance - end variance - - inline def meanAndVariance: (mean: Double, variance: Double) = - meanAndVariance(VarianceMode.Population) - - inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = - val μ = vec.mean - var acc = 0.0 - var i = 0 - while i < vec.length do - val diff = vec(i) - μ - acc += diff * diff - i += 1 - end while - - val denom = mode match - case VarianceMode.Population => vec.length.toDouble - case VarianceMode.Sample => (vec.length - 1).toDouble - - (μ, acc / denom) - end meanAndVariance - - inline def unary_- : Array[Double] = - val newVec = Array.ofDim[Double](vec.length) - var i = 0 - while i < vec.length do - newVec(i) = -vec(i) - i += 1 - end while - newVec - end unary_- - - inline def pearsonCorrelationCoefficient(thatVector: Array[Double])(using - inline boundsCheck: BoundsCheck.BoundsCheck - ): Double = - dimCheck(vec, thatVector) - val n = vec.length - var i = 0 - - var sum_x = 0.0 - var sum_y = 0.0 - var sum_xy = 0.0 - var sum_x2 = 0.0 - var sum_y2 = 0.0 - - while i < n do - sum_x = sum_x + vec(i) - sum_y = sum_y + thatVector(i) - sum_xy = sum_xy + vec(i) * thatVector(i) - sum_x2 = sum_x2 + vec(i) * vec(i) - sum_y2 = sum_y2 + thatVector(i) * thatVector(i) - i = i + 1 - end while - (n * sum_xy - (sum_x * sum_y)) / Math.sqrt( - (sum_x2 * n - sum_x * sum_x) * (sum_y2 * n - sum_y * sum_y) - ) - end pearsonCorrelationCoefficient - - inline def spearmansRankCorrelation(thatVector: Array[Double])(using - inline boundsCheck: BoundsCheck.BoundsCheck - ): Double = - dimCheck(vec, thatVector) - val theseRanks = vec.elementRanks - val thoseRanks = thatVector.elementRanks - theseRanks.pearsonCorrelationCoefficient(thoseRanks) - end spearmansRankCorrelation - - // An alias - pearson is the most commonly requested type of correlation - inline def corr(thatVector: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Double = - pearsonCorrelationCoefficient(thatVector) - - def elementRanks: Array[Double] = - val indexed1 = vec.zipWithIndex - val indexed = indexed1.toArray.sorted(using Ordering.by(_._1)) - - val ranks: Array[Double] = new Array(vec.length) // faster than zeros. - ranks(indexed.last._2) = vec.length - var currentValue: Double = indexed(0)._1 - var r0: Int = 0 - var rank: Int = 1 - while rank < vec.length do - val temp: Double = indexed(rank)._1 - val end: Int = - if temp != currentValue then rank - else if rank == vec.length - 1 then rank + 1 - else -1 - if end > -1 then - val avg: Double = (1.0 + (end + r0)) / 2.0 - var i: Int = r0; - while i < end do - ranks(indexed(i)._2) = avg - i += 1 - end while - r0 = rank - currentValue = temp - end if - rank += 1 - end while - ranks - end elementRanks - - inline def `cumsum!` = - var i = 1 - while i < vec.length do - vec(i) = vec(i - 1) + vec(i) - i = i + 1 - end while - end `cumsum!` - - inline def cumsum: Array[Double] = - val out = vec.clone() - out.`cumsum!` - out - end cumsum - - inline def dot(v1: Array[Double])(using inline boundsCheck: BoundsCheck): Double = - dimCheck(vec, v1) - - var product = 0.0 - var i = 0; - while i < vec.length do - product = product + vec(i) * v1(i) - i = i + 1 - end while - product - end dot - - inline def norm: Double = - Math.sqrt(vec.dot(vec)(using vecxt.BoundsCheck.DoBoundsCheck.no)) - end norm - - inline def +(d: Double): Array[Double] = - vec.clone().tap(_ += d) - - inline def +=(d: Double): Unit = - var i = 0 - while i < vec.length do - vec(i) = vec(i) + d - i = i + 1 - end while - end += - - inline def -(d: Double): Array[Double] = - vec.clone().tap(_ -= d) - end - - - inline def -=(d: Double): Unit = - var i = 0 - while i < vec.length do - vec(i) = vec(i) - d - i = i + 1 - end while - end -= - - inline def -(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Array[Double] = - dimCheck(vec, vec2) - vec.clone().tap(_ -= vec2) - end - - - inline def -=(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Unit = - dimCheck(vec, vec2) - var i = 0 - while i < vec.length do - vec(i) = vec(i) - vec2(i) - i = i + 1 - end while - end -= - - inline def +(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Array[Double] = - dimCheck(vec, vec2) - vec.clone().tap(_ += vec2) - end + - - inline def +:+(d: Double) = - vec.clone().tap(_ +:+= d) - end +:+ - - inline def +:+=(d: Double): Unit = - var i = 0 - while i < vec.length do - vec(i) = vec(i) + d - i = i + 1 - end while - end +:+= - - inline def +=(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Unit = - dimCheck(vec, vec2) - var i = 0 - while i < vec.length do - vec(i) = vec(i) + vec2(i) - i = i + 1 - end while - end += - - inline def add(d: Array[Double])(using inline boundsCheck: BoundsCheck): Array[Double] = vec + d - inline def multInPlace(d: Double): Unit = vec *= d - - inline def *=(d: Double): Unit = - var i = 0 - while i < vec.length do - vec(i) = vec(i) * d - i = i + 1 - end while - end *= - - inline def *(d: Double): Array[Double] = - vec.clone().tap(_ *= d) - end * - - inline def /=(d: Double): Array[Double] = - var i = 0 - while i < vec.length do - vec(i) = vec(i) / d - i = i + 1 - end while - vec - end /= - - inline def /(d: Double): Array[Double] = - vec.clone().tap(_ /= d) - end / - - def covariance(thatVector: Array[Double]): Double = - val μThis = vec.mean - val μThat = thatVector.mean - var cv: Double = 0 - var i: Int = 0; - while i < vec.length do - cv += (vec(i) - μThis) * (thatVector(i) - μThat) - i += 1 - end while - cv / (vec.length - 1) - end covariance - - def maxElement: Double = vec.max - // val t = js.Math.max( vec.toArray: _* ) + def increments: Array[Double] = + val out = Array.ofDim[Double](vec.length) + out(0) = vec(0) + var i = 1 + while i < vec.length do + out(i) = vec(i) - vec(i - 1) + i = i + 1 + end while + out + end increments + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) + + inline def std: Double = std(VarianceMode.Population) + + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def mean: Double = vec.sumSIMD / vec.length + + inline def sum: Double = + var sum = 0.0 + var i = 0; + while i < vec.length do + sum = sum + vec(i) + i = i + 1 + end while + sum + end sum + + inline def product: Double = + var sum = 1.0 + var i = 0; + while i < vec.length do + sum *= vec(i) + i = i + 1 + end while + sum + end product + + inline def variance: Double = variance(VarianceMode.Population) + + def variance(mode: VarianceMode): Double = + meanAndVariance(mode).variance + end variance + + inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + val μ = vec.mean + var acc = 0.0 + var i = 0 + while i < vec.length do + val diff = vec(i) - μ + acc += diff * diff + i += 1 + end while + + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (μ, acc / denom) + end meanAndVariance + + inline def unary_- : Array[Double] = + val newVec = Array.ofDim[Double](vec.length) + var i = 0 + while i < vec.length do + newVec(i) = -vec(i) + i += 1 + end while + newVec + end unary_- + + inline def pearsonCorrelationCoefficient(thatVector: Array[Double])(using + inline boundsCheck: BoundsCheck.BoundsCheck + ): Double = + dimCheck(vec, thatVector) + val n = vec.length + var i = 0 + + var sum_x = 0.0 + var sum_y = 0.0 + var sum_xy = 0.0 + var sum_x2 = 0.0 + var sum_y2 = 0.0 + + while i < n do + sum_x = sum_x + vec(i) + sum_y = sum_y + thatVector(i) + sum_xy = sum_xy + vec(i) * thatVector(i) + sum_x2 = sum_x2 + vec(i) * vec(i) + sum_y2 = sum_y2 + thatVector(i) * thatVector(i) + i = i + 1 + end while + (n * sum_xy - (sum_x * sum_y)) / Math.sqrt( + (sum_x2 * n - sum_x * sum_x) * (sum_y2 * n - sum_y * sum_y) + ) + end pearsonCorrelationCoefficient + + inline def spearmansRankCorrelation(thatVector: Array[Double])(using + inline boundsCheck: BoundsCheck.BoundsCheck + ): Double = + dimCheck(vec, thatVector) + val theseRanks = vec.elementRanks + val thoseRanks = thatVector.elementRanks + theseRanks.pearsonCorrelationCoefficient(thoseRanks) + end spearmansRankCorrelation + + // An alias - pearson is the most commonly requested type of correlation + inline def corr(thatVector: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Double = + pearsonCorrelationCoefficient(thatVector) + + def elementRanks: Array[Double] = + val indexed1 = vec.zipWithIndex + val indexed = indexed1.toArray.sorted(using Ordering.by(_._1)) + + val ranks: Array[Double] = new Array(vec.length) // faster than zeros. + ranks(indexed.last._2) = vec.length + var currentValue: Double = indexed(0)._1 + var r0: Int = 0 + var rank: Int = 1 + while rank < vec.length do + val temp: Double = indexed(rank)._1 + val end: Int = + if temp != currentValue then rank + else if rank == vec.length - 1 then rank + 1 + else -1 + if end > -1 then + val avg: Double = (1.0 + (end + r0)) / 2.0 + var i: Int = r0; + while i < end do + ranks(indexed(i)._2) = avg + i += 1 + end while + r0 = rank + currentValue = temp + end if + rank += 1 + end while + ranks + end elementRanks + + inline def `cumsum!` = + var i = 1 + while i < vec.length do + vec(i) = vec(i - 1) + vec(i) + i = i + 1 + end while + end `cumsum!` + + inline def cumsum: Array[Double] = + val out = vec.clone() + out.`cumsum!` + out + end cumsum + + inline def dot(v1: Array[Double])(using inline boundsCheck: BoundsCheck): Double = + dimCheck(vec, v1) + + var product = 0.0 + var i = 0; + while i < vec.length do + product = product + vec(i) * v1(i) + i = i + 1 + end while + product + end dot + + inline def norm: Double = + Math.sqrt(vec.dot(vec)(using vecxt.BoundsCheck.DoBoundsCheck.no)) + end norm + + inline def +(d: Double): Array[Double] = + vec.clone().tap(_ += d) + + inline def +=(d: Double): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) + d + i = i + 1 + end while + end += + + inline def -(d: Double): Array[Double] = + vec.clone().tap(_ -= d) + end - + + inline def -=(d: Double): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) - d + i = i + 1 + end while + end -= + + inline def -(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Array[Double] = + dimCheck(vec, vec2) + vec.clone().tap(_ -= vec2) + end - + + inline def -=(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Unit = + dimCheck(vec, vec2) + var i = 0 + while i < vec.length do + vec(i) = vec(i) - vec2(i) + i = i + 1 + end while + end -= + + inline def +(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Array[Double] = + dimCheck(vec, vec2) + vec.clone().tap(_ += vec2) + end + + + inline def +:+(d: Double) = + vec.clone().tap(_ +:+= d) + end +:+ + + inline def +:+=(d: Double): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) + d + i = i + 1 + end while + end +:+= + + inline def +=(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Unit = + dimCheck(vec, vec2) + var i = 0 + while i < vec.length do + vec(i) = vec(i) + vec2(i) + i = i + 1 + end while + end += + + inline def add(d: Array[Double])(using inline boundsCheck: BoundsCheck): Array[Double] = vec + d + inline def multInPlace(d: Double): Unit = vec *= d + + inline def *=(d: Double): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) * d + i = i + 1 + end while + end *= + + inline def *(d: Double): Array[Double] = + vec.clone().tap(_ *= d) + end * + + inline def /=(d: Double): Array[Double] = + var i = 0 + while i < vec.length do + vec(i) = vec(i) / d + i = i + 1 + end while + vec + end /= + + inline def /(d: Double): Array[Double] = + vec.clone().tap(_ /= d) + end / + + def covariance(thatVector: Array[Double]): Double = + val μThis = vec.mean + val μThat = thatVector.mean + var cv: Double = 0 + var i: Int = 0; + while i < vec.length do + cv += (vec(i) - μThis) * (thatVector(i) - μThat) + i += 1 + end while + cv / (vec.length - 1) + end covariance + + def maxElement: Double = vec.max + // val t = js.Math.max( vec.toArray: _* ) end extension extension (vec: Array[Array[Double]]) diff --git a/vecxt/src-native/array.scala b/vecxt/src-native/array.scala index 5593e8b5..beb59880 100644 --- a/vecxt/src-native/array.scala +++ b/vecxt/src-native/array.scala @@ -160,7 +160,6 @@ object arrays: newVec end apply - inline def minSIMD: Double = var i = 0 var acc = Double.PositiveInfinity diff --git a/vecxt/test/src-jvm/lu.test.scala b/vecxt/test/src-jvm/lu.test.scala index 29631625..20faa7a1 100644 --- a/vecxt/test/src-jvm/lu.test.scala +++ b/vecxt/test/src-jvm/lu.test.scala @@ -3,6 +3,8 @@ package vecxt import munit.FunSuite import all.* import BoundsCheck.DoBoundsCheck.yes +import scala.util.boundary +import scala.util.boundary.break class LUSuite extends FunSuite: @@ -37,49 +39,58 @@ class LUSuite extends FunSuite: def isLowerUnitTriangular(m: Matrix[Double], tol: Double = epsilon): Boolean = if m.rows < m.cols then return false end if - - for i <- 0 until m.rows do - for j <- 0 until m.cols do - if i < j then - // Above diagonal should be zero - if math.abs(m(i, j)) > tol then return false - else if i == j then - // Diagonal should be one - if math.abs(m(i, j) - 1.0) > tol then return false - end if + var lt = true + boundary { + for i <- 0 until m.rows do + for j <- 0 until m.cols do + if i < j then + // Above diagonal should be zero + if math.abs(m(i, j)) > tol then lt = false; break() + else if i == j then + // Diagonal should be one + if math.abs(m(i, j) - 1.0) > tol then lt = false; break() + end if + end for end for - end for - true + } + lt end isLowerUnitTriangular /** Helper to verify U is upper triangular */ def isUpperTriangular(m: Matrix[Double], tol: Double = epsilon): Boolean = if m.rows > m.cols then return false end if + var ut = true + boundary { + for i <- 0 until m.rows do + for j <- 0 until m.cols do + + if i > j then + // Below diagonal should be zero + if math.abs(m(i, j)) > tol then ut = false; break() + end if + end for - for i <- 0 until m.rows do - for j <- 0 until m.cols do - if i > j then - // Below diagonal should be zero - if math.abs(m(i, j)) > tol then return false - end if end for - end for - true + } + ut end isUpperTriangular /** Helper to check if two matrices are approximately equal */ def matricesEqual(a: Matrix[Double], b: Matrix[Double], tol: Double = epsilon): Boolean = if a.rows != b.rows || a.cols != b.cols then return false end if + var me = true + boundary { + for i <- 0 until a.rows do + for j <- 0 until a.cols do + if math.abs(a(i, j) - b(i, j)) > tol then me = false; break() - for i <- 0 until a.rows do - for j <- 0 until a.cols do - if math.abs(a(i, j) - b(i, j)) > tol then return false - end if + end for end for - end for - true + + } + me end matricesEqual test("LU decomposition of identity matrix") { diff --git a/vecxt_re/resources/digraph.vg.json b/vecxt_re/resources/digraph.vg.json new file mode 100644 index 00000000..12089701 --- /dev/null +++ b/vecxt_re/resources/digraph.vg.json @@ -0,0 +1,103 @@ +{ + "$schema": "https://vega.github.io/schema/vega/v6.json", + "description": "A node-link diagram with force-directed layout, depicting character co-occurrence in the novel Les Misérables.", + "width": 700, + "height": 500, + "padding": 0, + "autosize": "none", + + "signals": [ + { "name": "cx", "update": "width / 2" }, + { "name": "cy", "update": "height / 2" }, + { "name": "nodeRadius", "value": 20 } + ], + + "data": [ + { + "name": "node-data", + "values": [ + { "id": "A", "x": 100, "y": 100 }, + { "id": "B", "x": 300, "y": 120 }, + { "id": "C", "x": 200, "y": 300 } + ] + }, + { + "name": "link-data", + "values": [ + { "source": "A", "target": "B" }, + { "source": "B", "target": "C" }, + { "source": "A", "target": "C" } + ], + "transform": [ + { + "type": "lookup", + "from": "node-data", + "key": "id", + "fields": ["source"], + "as": ["sourceNode"] + }, + { + "type": "lookup", + "from": "node-data", + "key": "id", + "fields": ["target"], + "as": ["targetNode"] + } + ] + } + ], + + "scales": [ + { + "name": "color", + "type": "ordinal", + "domain": { "data": "node-data", "field": "group" }, + "range": { "scheme": "category20c" } + } + ], + + "marks": [ + { + "name": "nodes", + "type": "symbol", + "zindex": 1, + + "from": { "data": "node-data" }, + + "encode": { + "enter": { + "x": { "field": "x" }, + "y": { "field": "y" }, + "fill": { "scale": "color", "field": "group" }, + "stroke": { "value": "white" } + }, + "update": { + "size": { "signal": "2 * nodeRadius * nodeRadius" }, + "cursor": { "value": "pointer" } + } + } + }, + { + "type": "path", + "from": { "data": "link-data" }, + "interactive": false, + "encode": { + "update": { + "stroke": { "value": "#ccc" }, + "strokeWidth": { "value": 0.5 } + } + }, + "transform": [ + { + "type": "linkpath", + "shape": "diagonal", + "orient": "vertical", + "sourceX": "datum.sourceNode.x", + "sourceY": "datum.sourceNode.y", + "targetX": "datum.targetNode.x", + "targetY": "datum.targetNode.y" + } + ] + } + ] +} diff --git a/vecxt_re/src-js-native/SplitLosses.scala b/vecxt_re/src-js-native/SplitLosses.scala index 8d62dc77..61631fb9 100644 --- a/vecxt_re/src-js-native/SplitLosses.scala +++ b/vecxt_re/src-js-native/SplitLosses.scala @@ -15,7 +15,11 @@ object SplitLosses: */ inline def splitAmntFast(years: Array[Int], losses: Array[Double])(using inline bc: BoundsCheck - ): (ceded: Array[Double], retained: Array[Double], splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])]) = + ): ( + ceded: Array[Double], + retained: Array[Double], + splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])] + ) = inline if bc then assert(years.length == losses.length) end if if losses.isEmpty then (Array.empty[Double], Array.empty[Double], tower.layers.map(_ -> Array.empty[Double])) diff --git a/vecxt_re/src-jvm/SplitLosses.scala b/vecxt_re/src-jvm/SplitLosses.scala index a4993079..5903629d 100644 --- a/vecxt_re/src-jvm/SplitLosses.scala +++ b/vecxt_re/src-jvm/SplitLosses.scala @@ -18,7 +18,11 @@ object SplitLosses: */ inline def splitAmntFast(years: Array[Int], losses: Array[Double])(using inline bc: BoundsCheck - ): (ceded: Array[Double], retained: Array[Double], splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])]) = + ): ( + ceded: Array[Double], + retained: Array[Double], + splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])] + ) = inline if bc then assert(years.length == losses.length) end if if losses.isEmpty then (Array.empty[Double], Array.empty[Double], tower.layers.map(_ -> Array.empty[Double])) diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 323bb58f..669b2c92 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -9,7 +9,9 @@ object Plots: lazy val distributionDensity = VegaPlot.fromResource("distDensity.vg.json") // value, density extension (scenario: Scenarr) - inline def plotSeasonality(highlight: Option[(year: Int, month: Int)] = None)(using tgt: viz.LowPriorityPlotTarget) = + inline def plotSeasonality(highlight: Option[(year: Int, month: Int)] = None)(using + tgt: viz.LowPriorityPlotTarget + ) = val calc = scenario.monthYear.zip(scenario.amounts).groupMapReduce(_._1)(_._2)(_ + _).toVector val normaliseBy = calc.map(_._2).sum // total of all claims val sorted = calc diff --git a/vecxt_re/src/LossCalc.scala b/vecxt_re/src/LossCalc.scala index 146f25d4..7bab06ce 100644 --- a/vecxt_re/src/LossCalc.scala +++ b/vecxt_re/src/LossCalc.scala @@ -8,10 +8,10 @@ enum ReportDenominator: case FirstLimit case AggLimit case Custom(denominator: Double) - def fromlayer(layer: Layer) = - this match - case FirstLimit => layer.firstLimit - case AggLimit => layer.aggLimit.getOrElse(Double.PositiveInfinity) - case Custom(denominator) => denominator - -end ReportDenominator \ No newline at end of file + def fromlayer(layer: Layer) = + this match + case FirstLimit => layer.firstLimit + case AggLimit => layer.aggLimit.getOrElse(Double.PositiveInfinity) + case Custom(denominator) => denominator + +end ReportDenominator diff --git a/vecxt_re/src/ReReporting.scala b/vecxt_re/src/ReReporting.scala index 99a7a946..91b527a8 100644 --- a/vecxt_re/src/ReReporting.scala +++ b/vecxt_re/src/ReReporting.scala @@ -2,32 +2,39 @@ package vecxt_re import vecxt.all.* -object ReReporting: - extension(calcd: (layer: Layer, cededToLayer: Array[Double])) +object ReReporting: + extension (calcd: (layer: Layer, cededToLayer: Array[Double])) - inline def attachmentProbability(numIterations: Int): Double = (calcd.cededToLayer > 0).trues / numIterations.toDouble + inline def attachmentProbability(numIterations: Int, years: Array[Int]): Double = + (groupSum(years, calcd.cededToLayer, numIterations) > 0).trues / numIterations.toDouble - inline def exhaustionProbability(numIterations: Int, years: Array[Int]): Double = + inline def exhaustionProbability(numIterations: Int, years: Array[Int]): Double = val exhaust = calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) - 0.01 (groupSum(years, calcd.cededToLayer, numIterations) > exhaust).trues / numIterations.toDouble + end exhaustionProbability inline def expectedLoss(numIterations: Int): Double = calcd.cededToLayer.sum / numIterations - inline def std(numIterations: Int, years: Array[Int]): Double = groupSum(years, calcd.cededToLayer, numIterations).stdDev - - inline def expectedLossAggLimit(numIterations: Int): Double = calcd.cededToLayer.sum / (calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) * numIterations) + inline def std(numIterations: Int, years: Array[Int]): Double = + groupSum(years, calcd.cededToLayer, numIterations).stdDev - inline def lossReport(numIterations: Int, years: Array[Int], limit: ReportDenominator ) : (limit: Double, el: Double, stdDev: Double, attachProb: Double, exhaustProb: Double) = + inline def expectedLossAggLimit(numIterations: Int): Double = + calcd.cededToLayer.sum / (calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) * numIterations) + + inline def lossReport( + numIterations: Int, + years: Array[Int], + limit: ReportDenominator + ): (limit: Double, el: Double, stdDev: Double, attachProb: Double, exhaustProb: Double) = + val reportLimit = limit.fromlayer(calcd.layer) ( - limit = limit.fromlayer(calcd.layer), - el = expectedLoss(numIterations), - stdDev = std(numIterations, years), - attachProb = attachmentProbability(numIterations), + limit = reportLimit, + el = expectedLoss(numIterations) / reportLimit, + stdDev = std(numIterations, years) / reportLimit, + attachProb = attachmentProbability(numIterations, years), exhaustProb = exhaustionProbability(numIterations, years) ) - //TODO formatting - - - - + // TODO formatting + end extension +end ReReporting diff --git a/vecxt_re/src/Retention_Limit.scala b/vecxt_re/src/Retention_Limit.scala index 5bdd31e8..c00d92a6 100644 --- a/vecxt_re/src/Retention_Limit.scala +++ b/vecxt_re/src/Retention_Limit.scala @@ -1,7 +1,5 @@ package vecxt_re -import vecxt_re.Retentions.Retention - object Retentions: opaque type Retention = Double @@ -19,6 +17,7 @@ object Retentions: end Retentions object Limits: + import Retentions.Retention opaque type Limit = Double object Limit: diff --git a/vecxt_re/src/SplitScenario.scala b/vecxt_re/src/SplitScenario.scala index 98a13f41..04abcf8c 100644 --- a/vecxt_re/src/SplitScenario.scala +++ b/vecxt_re/src/SplitScenario.scala @@ -7,7 +7,11 @@ object SplitScenario: extension (tower: Tower) inline def splitScenarioAmounts(scenario: Scenarr)(using inline bc: BoundsCheck - ): (ceded: Array[Double], retained: Array[Double], splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])]) = + ): ( + ceded: Array[Double], + retained: Array[Double], + splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])] + ) = val tmp = if bc then scenario.sorted else scenario diff --git a/vecxt_re/src/all.scala b/vecxt_re/src/all.scala index 133f1e00..e98e8784 100644 --- a/vecxt_re/src/all.scala +++ b/vecxt_re/src/all.scala @@ -1,8 +1,9 @@ package vecxt_re object all: - export vecxt_re.Scenario - export vecxt_re.Scenarr - export vecxt_re.Plots.* - export vecxt_re.SplitLosses.* - export vecxt_re.SplitScenario.* \ No newline at end of file + export vecxt_re.Scenario + export vecxt_re.Scenarr + export vecxt_re.Plots.* + export vecxt_re.SplitLosses.* + export vecxt_re.SplitScenario.* +end all diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index 577b9304..cd52c301 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -101,27 +101,26 @@ object Scenarr: inline def scaleAmntBy(scale: Double): Scenarr = scenario.copy(amounts = scenario.amounts * scale, threshold = scenario.threshold * scale) - - // def shiftDay1To(date: LocalDate): Scenarr = - // scenario.period.firstLoss.plusYears(1).minusDays(1) - // // val ndays = ChronoUnit.DAYS.between( period._1, seasonEnd) + 1 Let sjust ssume this is 365 ... there is a theoretical problem with air assuming 365 days. Leap years anyone? - // val betweenStartDates = ChronoUnit.DAYS.between(scenario.day1, date).toInt - // val newEvents = - // scenario.eventsSorted.map(x => - // Event(x.eventId, x.iteration, Math.floorMod(x.day - betweenStartDates - 1, 365) + 1, x.loss) - // ) - // Scenario(newEvents, scenario.numberIterations, scenario.threshold, date, scenario.name) - // end shiftDay1To - - // inline def removeClaimsAfter(date: LocalDate): Scenarr = - // val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) <= 0) - // Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) - // end removeClaimsAfter - - // inline def removeClaimsBefore(date: LocalDate): Scenarr = - // val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) >= 0) - // Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) - // end removeClaimsBefore + // def shiftDay1To(date: LocalDate): Scenarr = + // scenario.period.firstLoss.plusYears(1).minusDays(1) + // // val ndays = ChronoUnit.DAYS.between( period._1, seasonEnd) + 1 Let sjust ssume this is 365 ... there is a theoretical problem with air assuming 365 days. Leap years anyone? + // val betweenStartDates = ChronoUnit.DAYS.between(scenario.day1, date).toInt + // val newEvents = + // scenario.eventsSorted.map(x => + // Event(x.eventId, x.iteration, Math.floorMod(x.day - betweenStartDates - 1, 365) + 1, x.loss) + // ) + // Scenario(newEvents, scenario.numberIterations, scenario.threshold, date, scenario.name) + // end shiftDay1To + + // inline def removeClaimsAfter(date: LocalDate): Scenarr = + // val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) <= 0) + // Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) + // end removeClaimsAfter + + // inline def removeClaimsBefore(date: LocalDate): Scenarr = + // val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) >= 0) + // Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) + // end removeClaimsBefore inline def applyThreshold(newThresh: Double): Scenarr = if !(newThresh > scenario.threshold) then @@ -140,4 +139,5 @@ object Scenarr: scenario.name ) end applyThreshold + end extension end Scenarr diff --git a/vecxt_re/test/src/aggByItr.test.scala b/vecxt_re/test/src/aggByItr.test.scala index 83102f2f..90cc70a6 100644 --- a/vecxt_re/test/src/aggByItr.test.scala +++ b/vecxt_re/test/src/aggByItr.test.scala @@ -1,7 +1,6 @@ package vecxt_re import munit.FunSuite -import vecxt_re.Layer class AggregateByItrSpec extends FunSuite: diff --git a/vecxt_re/test/src/cross.test.scala b/vecxt_re/test/src/cross.test.scala index a4c18015..c797653e 100644 --- a/vecxt_re/test/src/cross.test.scala +++ b/vecxt_re/test/src/cross.test.scala @@ -1,10 +1,9 @@ package vecxt_re -import vecxt_re.Limits.Limit -import vecxt_re.Retentions.Retention -import vecxt_re.rpt.* - import scala.util.chaining.* +import Limits.* +import Retentions.* +import rpt.reinsuranceFunction class XSuite extends munit.FunSuite: diff --git a/vecxt_re/test/src/layer.test.scala b/vecxt_re/test/src/layer.test.scala index 63892071..a3ef79d6 100644 --- a/vecxt_re/test/src/layer.test.scala +++ b/vecxt_re/test/src/layer.test.scala @@ -1,7 +1,5 @@ package vecxt_re -import vecxt_re.assertVecEquals - class ScenarioRISuite extends munit.FunSuite: test("firstLimit prefers occLimit when both present") { diff --git a/vecxt_re/test/src/losscalc.test.scala b/vecxt_re/test/src/losscalc.test.scala index a9190632..1f6e3a71 100644 --- a/vecxt_re/test/src/losscalc.test.scala +++ b/vecxt_re/test/src/losscalc.test.scala @@ -35,4 +35,4 @@ class LossCalcSuite extends FunSuite: val layer = Layer() assertEqualsDouble(ReportDenominator.Custom(55.5).fromlayer(layer), 55.5, 0.0) } -end LossCalcSuite \ No newline at end of file +end LossCalcSuite diff --git a/vecxt_re/test/src/rpt.test.scala b/vecxt_re/test/src/rpt.test.scala index 56918d89..13ce1cef 100644 --- a/vecxt_re/test/src/rpt.test.scala +++ b/vecxt_re/test/src/rpt.test.scala @@ -1,8 +1,8 @@ package vecxt_re -import vecxt_re.rpt.* -import vecxt_re.Retentions.Retention -import vecxt_re.Limits.Limit +import rpt.* +import Retentions.Retention +import Limits.Limit class ReinsurancePricingSuite extends munit.FunSuite: @@ -199,7 +199,7 @@ class ReinsuranceShareSuite extends munit.FunSuite: v1.reinsuranceFunction(Some(Limit(5.0)), Some(Retention(10.0))) v2.reinsuranceFunction(Some(Limit(5.0)), Some(Retention(10.0)), 1.0) - for i <- 0 until v1.length do assertEqualsDouble(v1(i), v2(i), 0.0001, s"Element $i") + for i <- 0.until(v1.length) do assertEqualsDouble(v1(i), v2(i), 0.0001, s"Element $i") end for } @@ -207,7 +207,7 @@ class ReinsuranceShareSuite extends munit.FunSuite: val v = Array[Double](8, 11, 16, 10.0) v.reinsuranceFunction(Some(Limit(5.0)), Some(Retention(10.0)), 0.0) - for i <- 0 until v.length do assertEqualsDouble(v(i), 0.0, 0.0001, s"Element $i") + for i <- 0.until(v.length) do assertEqualsDouble(v(i), 0.0, 0.0001, s"Element $i") end for } diff --git a/vecxt_re/test/src/tower.test.scala b/vecxt_re/test/src/tower.test.scala index f38a19a4..9daf504f 100644 --- a/vecxt_re/test/src/tower.test.scala +++ b/vecxt_re/test/src/tower.test.scala @@ -3,7 +3,7 @@ package vecxt_re import vecxt_re.* import vecxt.all.* import vecxt.all.given -import vecxt_re.SplitLosses.* +import SplitLosses.* class TowerSuite extends munit.FunSuite: @@ -11,11 +11,11 @@ class TowerSuite extends munit.FunSuite: losses: Array[Double], ceded: Array[Double], retained: Array[Double], - splits: IndexedSeq[(Layer, Array[Double])] = IndexedSeq.empty + splits: IndexedSeq[(layer:Layer, cededToLayer: Array[Double])] = IndexedSeq.empty ) = import vecxt.BoundsCheck.DoBoundsCheck.yes assertVecEquals(ceded + retained, losses) - assertVecEquals(splits.map(_._2).reduce(_ + _), ceded) + assertVecEquals(splits.map(_.cededToLayer).reduce(_ + _), ceded) end noleakage test("from retention") { @@ -44,7 +44,7 @@ class TowerSuite extends munit.FunSuite: assertEqualsDouble(ceded.head, 2.0, 0.001) assertEqualsDouble(retained.head, 10.0, 0.001) - assertEqualsDouble(splits.head._2.head, 2.0, 0.001) + assertEqualsDouble(splits.head.cededToLayer.head, 2.0, 0.001) noleakage(amounts, ceded, retained, splits) } @@ -60,7 +60,7 @@ class TowerSuite extends munit.FunSuite: assertEqualsDouble(ceded.head, 5.0, 0.001) assertEqualsDouble(retained.head, 12.0, 0.001) - assertEqualsDouble(splits.head._2.head, 5.0, 0.001) + assertEqualsDouble(splits.head.cededToLayer.head, 5.0, 0.001) noleakage(amounts, ceded, retained, splits) } @@ -76,7 +76,7 @@ class TowerSuite extends munit.FunSuite: assertEqualsDouble(ceded.head, 2.5, 0.001) assertEqualsDouble(retained.head, 14.5, 0.001) - assertEqualsDouble(splits.head._2.head, 2.5, 0.001) + assertEqualsDouble(splits.head.cededToLayer.head, 2.5, 0.001) noleakage(amounts, ceded, retained, splits) } @@ -93,7 +93,7 @@ class TowerSuite extends munit.FunSuite: val cededExpected = Array(2.0, 1.0) // (14 -10) * 0.5, (12 - 10) * 0.5 assertVecEquals(ceded, cededExpected) assertVecEquals(retained, amounts - cededExpected) - assertVecEquals(splits.head._2, cededExpected) + assertVecEquals(splits.head.cededToLayer, cededExpected) noleakage(amounts, ceded, retained, splits) } @@ -110,7 +110,7 @@ class TowerSuite extends munit.FunSuite: val cededExpected = Array(0.0, 2.0, 1.0, 0.0, 2.5) assertVecEquals(ceded, cededExpected) assertVecEquals(retained, amounts - cededExpected) - assertVecEquals(splits.head._2, cededExpected) + assertVecEquals(splits.head.cededToLayer, cededExpected) noleakage(amounts, ceded, retained, splits) } @@ -127,7 +127,7 @@ class TowerSuite extends munit.FunSuite: val cededExpected = Array(0.0, 7.0, 6.0, 0.0, 7.5) assertVecEquals(ceded, cededExpected) assertVecEquals(retained, amounts - cededExpected) - assertVecEquals(splits.head._2, cededExpected) + assertVecEquals(splits.head.cededToLayer, cededExpected) noleakage(amounts, ceded, retained, splits) } @@ -144,7 +144,7 @@ class TowerSuite extends munit.FunSuite: val cededExpected = Array(0.5, 1.5, 1.5, 0.0, 0.5) assertVecEquals(ceded, cededExpected) assertVecEquals(retained, amounts - cededExpected) - assertVecEquals(splits.head._2, cededExpected) + assertVecEquals(splits.head.cededToLayer, cededExpected) noleakage(amounts, ceded, retained, splits) } @@ -174,9 +174,9 @@ class TowerSuite extends munit.FunSuite: val (ceded, retained, splits) = tower.splitAmntFast(iterations, amounts) - val l1 = splits.head._2 - val l2 = splits(1)._2 - val l3 = splits.last._2 + val l1 = splits.head.cededToLayer + val l2 = splits(1).cededToLayer + val l3 = splits.last.cededToLayer assertVecEquals(ceded, l1 + l2 + l3) noleakage(amounts, ceded, retained, splits) @@ -193,9 +193,9 @@ class TowerSuite extends munit.FunSuite: val (ceded, retained, splits) = tower.splitAmntFast(iterations, amounts) - val l1 = splits.head._2 - val l2 = splits(1)._2 - val l3 = splits.last._2 + val l1 = splits.head.cededToLayer + val l2 = splits(1).cededToLayer + val l3 = splits.last.cededToLayer assertVecEquals(ceded, l1 + l2 + l3) noleakage(amounts, ceded, retained, splits) @@ -212,9 +212,9 @@ class TowerSuite extends munit.FunSuite: val (ceded, retained, splits) = tower.splitAmntFast(iterations, amounts) - val l1 = splits.head._2 - val l2 = splits(1)._2 - val l3 = splits.last._2 + val l1 = splits.head.cededToLayer + val l2 = splits(1).cededToLayer + val l3 = splits.last.cededToLayer assertVecEquals(ceded, l1 + l2 + l3) noleakage(amounts, ceded, retained, splits) From 913d4acec2db65ca92936d7c7ce01374f31fc5fc Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 22 Jan 2026 11:52:58 +0100 Subject: [PATCH 19/75] . --- experiments/src/rep_setup.scala | 3 + vecxt_re/src/Layer.scala | 1 + vecxt_re/src/Patchwork.scala | 146 +++++++++++++++++++++++++++ vecxt_re/src/ReReporting.scala | 3 +- vecxt_re/src/scenarr.scala | 23 ++++- vecxt_re/test/src/scenarr.test.scala | 136 +++++++++++++++++++++++++ 6 files changed, 308 insertions(+), 4 deletions(-) create mode 100644 vecxt_re/src/Patchwork.scala create mode 100644 vecxt_re/test/src/scenarr.test.scala diff --git a/experiments/src/rep_setup.scala b/experiments/src/rep_setup.scala index f3654b53..f8199b0a 100644 --- a/experiments/src/rep_setup.scala +++ b/experiments/src/rep_setup.scala @@ -13,4 +13,7 @@ object RPT: export vecxt_re.Scenarr export vecxt_re.Tower export vecxt_re.Tower.* + export vecxt_re.ReReporting.* + export vecxt_re.ReportDenominator + end RPT diff --git a/vecxt_re/src/Layer.scala b/vecxt_re/src/Layer.scala index 326cd2e2..88673cf8 100644 --- a/vecxt_re/src/Layer.scala +++ b/vecxt_re/src/Layer.scala @@ -59,6 +59,7 @@ case class Layer( lazy val firstLimit = occLimit.orElse(aggLimit).getOrElse(Double.PositiveInfinity) + /** The smallest claim which exhausts the first limit of this layer */ lazy val cap = occLimit match case Some(occLimit) => diff --git a/vecxt_re/src/Patchwork.scala b/vecxt_re/src/Patchwork.scala new file mode 100644 index 00000000..167e5fd4 --- /dev/null +++ b/vecxt_re/src/Patchwork.scala @@ -0,0 +1,146 @@ +package vecxt_re + +import vecxt.all.* + +object Patchwork: + inline def fromRetention(ret: Double, limits: IndexedSeq[Double]): Tower = + val retentions = Array((ret +: limits.dropRight(1))*).cumsum.toArray + + val layers = retentions.zip(limits).map((retention, limit) => Layer(limit, retention)) + Tower(layers) + end fromRetention + + inline def singleShot(ret: Double, limits: IndexedSeq[Double]) = + val retentions = Array((ret +: limits.dropRight(1))*).cumsum.toArray + + val layers = retentions.zip(limits).map { (retention, limit) => + Layer( + aggLimit = Some(limit), + occRetention = Some(retention) + ) + } + Tower(layers) + end singleShot + + inline def oneAt100(ret: Double, limits: IndexedSeq[Double]): Tower = + + val retentions = Array((ret +: limits.dropRight(1))*).cumsum.toArray + + val layers = retentions + .zip(limits) + .map((retention, limit) => + Layer( + occLimit = Some(limit), + occRetention = Some(retention), + aggLimit = Some(limit * 2), + reinstatement = Some(Array(1.0)) + ) + ) + Tower(layers) + end oneAt100 + +end Patchwork + +/** + * The key difference between a Patchwork and a Tower is that in a Patchwork the layers are independent of each other. Therefore, + * it's _retention_ is not a valid concept. Be wary of this - a patchwork ought to be for exploratory analysis only, it is unlikely + * to be a valid part of a reinsurance program. + * + * @param layers + * @param id + * @param name + * @param subjPremium + */ +case class Patchwork( + layers: IndexedSeq[Layer], + id: Long = scala.util.Random.nextLong(), + name: Option[String] = None, + subjPremium: Option[Double] = None +): + def applyScale(scale: Double): Patchwork = + Patchwork( + layers = layers.map(_.applyScale(scale)), + id = scala.util.Random.nextLong(), + name = name, + subjPremium = subjPremium.map(_ * scale) + ) + end applyScale + + /** A human friendly printout of this reinsurance patchwork. Skips any property which is "None" across all layers. Prints + * a console friendly table, with consistent spacing per column. + */ + def show: String = + if layers.isEmpty then return s"${name.getOrElse("Patchwork")}: no layers" + end if + + inline def formatDouble(value: Double): String = + BigDecimal(value).bigDecimal.stripTrailingZeros().toPlainString + + inline def optionalColumn(label: String, f: Layer => Option[String]): Option[(String, IndexedSeq[String])] = + val values = layers.map(f) + if values.exists(_.isDefined) then Some(label -> values.map(_.getOrElse("-"))) else None + end if + end optionalColumn + + inline def requiredColumn(label: String, f: Layer => String): (String, IndexedSeq[String]) = + label -> layers.map(f) + + val indexColumn = "Layer" -> layers.indices.map(i => (i + 1).toString) + + val columns = scala.collection.immutable + .Vector( + Some(indexColumn), + optionalColumn("Name", _.layerName), + optionalColumn("Occ Ret", l => l.occRetention.map(formatDouble)), + optionalColumn("Occ Lim", l => l.occLimit.map(formatDouble)), + Some(requiredColumn("Occ Type", _.occType.toString)), + optionalColumn("Agg Ret", l => l.aggRetention.map(formatDouble)), + optionalColumn("Agg Lim", l => l.aggLimit.map(formatDouble)), + Some(requiredColumn("Agg Type", _.aggType.toString)), + Some(requiredColumn("Share", l => formatDouble(l.share))), + optionalColumn("Reinst", l => l.reinstatement.map(_.map(formatDouble).mkString("[", ", ", "]"))), + optionalColumn("Currency", _.currency), + optionalColumn("Premium", l => l.basePremiumAmount.map(formatDouble)), + optionalColumn("Prem/Unit", l => l.basePremiumUnit.map(formatDouble)), + optionalColumn("Prem Desc", _.basePremiumDescription), + optionalColumn("Comm", l => l.commissionAmount.map(formatDouble)), + optionalColumn("Comm/Unit", l => l.commissionUnit.map(formatDouble)), + optionalColumn("Comm Desc", _.commissionDescription), + optionalColumn("Broker", l => l.brokerageAmount.map(formatDouble)), + optionalColumn("Broker/Unit", l => l.brokerageUnit.map(formatDouble)), + optionalColumn("Broker Desc", _.brokerageDescription), + optionalColumn("Tax", l => l.taxAmount.map(formatDouble)), + optionalColumn("Tax/Unit", l => l.taxUnit.map(formatDouble)), + optionalColumn("Tax Desc", _.taxDescription), + optionalColumn("Fee", l => l.feeAmount.map(formatDouble)), + optionalColumn("Fee/Unit", l => l.feeUnit.map(formatDouble)), + optionalColumn("Fee Desc", _.feeDescription) + ) + .flatten + + val widths = columns.map { case (label, rows) => + math.max(label.length, rows.map(_.length).maxOption.getOrElse(0)) + } + + inline def pad(value: String, width: Int): String = + val padding = width - value.length + if padding <= 0 then value else value + (" " * padding) + end if + end pad + + val header = columns.zip(widths).map { case ((label, _), w) => pad(label, w) }.mkString(" | ") + val separator = widths.map(w => "-" * w).mkString("-+-") + val rows = layers.indices.map { rowIdx => + columns.zip(widths).map { case ((_, vals), w) => pad(vals(rowIdx), w) }.mkString(" | ") + } + + val meta = Seq( + Some(s"${name.getOrElse("Tower")}: ${layers.length} layer(s)"), + subjPremium.map(v => s"Subject premium: ${formatDouble(v)}"), + Some(s"Id: $id") + ).flatten + + (meta ++ Seq(header, separator) ++ rows).mkString(System.lineSeparator) + end show + +end Patchwork diff --git a/vecxt_re/src/ReReporting.scala b/vecxt_re/src/ReReporting.scala index 91b527a8..da136224 100644 --- a/vecxt_re/src/ReReporting.scala +++ b/vecxt_re/src/ReReporting.scala @@ -25,9 +25,10 @@ object ReReporting: numIterations: Int, years: Array[Int], limit: ReportDenominator - ): (limit: Double, el: Double, stdDev: Double, attachProb: Double, exhaustProb: Double) = + ): (name: String, limit: Double, el: Double, stdDev: Double, attachProb: Double, exhaustProb: Double) = val reportLimit = limit.fromlayer(calcd.layer) ( + name = calcd.layer.layerName.getOrElse(s"Layer ${calcd.layer.layerId}"), limit = reportLimit, el = expectedLoss(numIterations) / reportLimit, stdDev = std(numIterations, years) / reportLimit, diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index cd52c301..d30e30e7 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -20,7 +20,7 @@ case class Scenarr( assert(iterations.length == days.length && days.length == amounts.length) lazy val freq: Array[Int] = - assert(isSorted) + assert(isSorted, "Scenario must be sorted to compute frequency") groupCount(iterations, numberIterations) end freq @@ -28,7 +28,7 @@ case class Scenarr( freq.mean lazy val agg: Array[Double] = - assert(isSorted) + assert(isSorted, "Scenario must be sorted to compute aggregate amounts") groupSum(iterations, amounts, numberIterations) end agg @@ -62,7 +62,7 @@ case class Scenarr( lazy val hasOccurence: Boolean = amounts.nonEmpty - lazy val numSeasons: Int = math.ceil(days.maxSIMD / 365).toInt // doesnt deal so well with leap years. + lazy val numSeasons: Int = math.ceil(days.maxSIMD.toDouble / 365).toInt // doesnt deal so well with leap years. lazy val meanLoss: Double = amounts.sum / numberIterations @@ -101,6 +101,23 @@ object Scenarr: inline def scaleAmntBy(scale: Double): Scenarr = scenario.copy(amounts = scenario.amounts * scale, threshold = scenario.threshold * scale) + inline def iteration(num: Int) = + assert(num > 0 && num <= scenario.numberIterations ) + val idx = scenario.iterations =:= num + Scenarr( + scenario.iterations(idx)(using false), + scenario.days(idx)(using false), + scenario.amounts(idx)(using false), + scenario.numberIterations, + scenario.threshold, + scenario.day1, + scenario.name, + scenario.id, + isSorted = scenario.isSorted + ) + end iteration + + // def shiftDay1To(date: LocalDate): Scenarr = // scenario.period.firstLoss.plusYears(1).minusDays(1) // // val ndays = ChronoUnit.DAYS.between( period._1, seasonEnd) + 1 Let sjust ssume this is 365 ... there is a theoretical problem with air assuming 365 days. Leap years anyone? diff --git a/vecxt_re/test/src/scenarr.test.scala b/vecxt_re/test/src/scenarr.test.scala new file mode 100644 index 00000000..db49dd83 --- /dev/null +++ b/vecxt_re/test/src/scenarr.test.scala @@ -0,0 +1,136 @@ +package vecxt_re + +import vecxt.all.* +import munit.FunSuite +import java.time.{LocalDate, Month} +import scala.util.Random + +class ScenarrSuite extends FunSuite: + + test("constructor should enforce array length equality") { + intercept[AssertionError] { + Scenarr(Array(1), Array(1, 2), Array(1.0)) + } + } + + test("freq, meanFreq, agg computed correctly for sorted scenario") { + val iterations = Array(1, 1, 1, 2, 3) + val days = Array(1, 2, 3, 4, 5) + val amounts = Array(10.0, 20.0, 30.0, 40.0, 50.0) + val sc = Scenarr(iterations, days, amounts, numberIterations = 3, isSorted = true) + + // Expected counts per iteration 1..3 => [3,1,1] + val expectedFreq = Array(3, 1, 1) + assertEquals(sc.freq.toList, expectedFreq.toList) + + val expectedMean = expectedFreq.sum.toDouble / expectedFreq.length + assert(math.abs(sc.meanFreq - expectedMean) < 1e-12) + + // Agg: sum amounts per iteration: iter1 -> 10+20+30 = 60, iter2 -> 40, iter3 -> 50 + val expectedAgg = Array(60.0, 40.0, 50.0) + assertEquals(sc.agg.toList, expectedAgg.toList) + + // meanLoss = amounts.sum / numberIterations = 150 / 3 = 50 + assert(math.abs(sc.meanLoss - 50.0) < 1e-12) + } + + test("clusterCoeff and varianceMeanRatio compute from sample variance".only) { + val iterations = Array(1, 2, 1, 3, 1) + val days = Array(1, 2, 3, 4, 5) + val amounts = Array(10.0, 20.0, 30.0, 40.0, 50.0) + val sc = Scenarr(iterations, days, amounts, numberIterations = 3) + + val sortedScen = sc.sorted + + // freq = [3,1,1] + val freqArr = sortedScen.freq + val (m, v) = freqArr.meanAndVariance(VarianceMode.Sample) + val expectedCluster = (v - m) / (m * m) + val expectedVMR = v / m + + assertEqualsDouble(sortedScen.clusterCoeff, expectedCluster, 1e-6) + assertEqualsDouble(sortedScen.varianceMeanRatio, expectedVMR, 1e-6) + } + + test("claimDates and monthYear mapping") { + val days = Array(1, 2) + val sc = Scenarr(Array(1, 1), days, Array(10.0, 20.0), numberIterations = 1) + val claimDates = sc.claimDates + assertEquals(claimDates(0), LocalDate.of(2019, 1, 1)) + assertEquals(claimDates(1), LocalDate.of(2019, 1, 2)) + + val my = sc.monthYear + assertEquals(my(0).month, Month.JANUARY) + assertEquals(my(0).year, 2019) + } + + test("numSeasons accounts for days spanning multiple years") { + val sc = Scenarr(Array(1, 1), Array(1, 400), Array(1.0, 2.0), numberIterations = 1) + println(sc.numSeasons) + assertEquals(sc.numSeasons, 2) + } + + test("itrDayAmount and period produce expected tuples") { + val days = Array(10, 100, 365, 366) + val sc = Scenarr(Array(1, 1, 1, 1), days, Array(5.0, 6.0, 7.0, 8.0), numberIterations = 1) + val (itr, d, a) = sc.itrDayAmount + assertEquals(itr.toList, Array(1, 1, 1, 1).toList) + assertEquals(d.toList, days.toList) + assertEquals(a.toList, Array(5.0, 6.0, 7.0, 8.0).toList) + + val (firstLoss, lastLoss) = sc.period + assertEquals(firstLoss, LocalDate.of(2019, 1, 10)) + assertEquals(lastLoss, LocalDate.of(2020, 1, 1)) // day 366 -> Jan 1 2020 from 2019-01-01 + } + + test("hasOccurence false for empty amounts") { + val sc = Scenarr(Array.emptyIntArray, Array.emptyIntArray, Array.emptyDoubleArray, numberIterations = 0) + assertEquals(sc.hasOccurence, false) + } + + test("sorted extension reorders by iteration then day and sets isSorted") { + val iter = Array(2, 1, 2) + val days = Array(10, 5, 8) + val amts = Array(20.0, 10.0, 15.0) + val sc = Scenarr(iter, days, amts, numberIterations = 2, isSorted = false) + + val ssorted = sc.sorted + assertEquals(ssorted.isSorted, true) + assertEquals(ssorted.iterations.toList, Array(1, 2, 2).toList) + assertEquals(ssorted.days.toList, Array(5, 8, 10).toList) + assertEquals(ssorted.amounts.toList, Array(10.0, 15.0, 20.0).toList) + } + + test("scaleAmntBy multiplies amounts and threshold") { + val sc = Scenarr(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 100.0) + val scaled = sc.scaleAmntBy(2.0) + assertEquals(scaled.threshold, 200.0) + assertEquals(scaled.amounts.toList, Array(20.0, 40.0).toList) + } + + test("iteration selects events for given iteration number") { + val iters = Array(2, 1, 2, 1) + val days = Array(1, 2, 3, 4) + val amts = Array(10.0, 11.0, 12.0, 13.0) + val sc = Scenarr(iters, days, amts, numberIterations = 2) + val only2 = sc.iteration(2) + assert(only2.iterations.forall(_ == 2)) + assertEquals(only2.amounts.toList, Array(10.0, 12.0).toList) + } + + test("applyThreshold filters amounts and only allows increasing threshold") { + val sc = Scenarr(Array(1, 1, 1), Array(1, 2, 3), Array(10.0, 50.0, 200.0), numberIterations = 1, threshold = 0.0) + val filtered = sc.applyThreshold(49.0) + // keep > 49 => 50 and 200 + assertEquals(filtered.amounts.toList, Array(50.0, 200.0).toList) + assertEquals(filtered.threshold, 49.0) + + intercept[Exception] { + sc.applyThreshold(0.0) // not strictly greater + } + intercept[Exception] { + sc.applyThreshold(-1.0) // decreasing + } + } + +end ScenarrSuite From 8c577913e4c34ab3cd7c2bb0d91cf663c260434a Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 22 Jan 2026 16:56:57 +0100 Subject: [PATCH 20/75] . --- vecxt_re/test/src/scenarr.test.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vecxt_re/test/src/scenarr.test.scala b/vecxt_re/test/src/scenarr.test.scala index db49dd83..8b68205d 100644 --- a/vecxt_re/test/src/scenarr.test.scala +++ b/vecxt_re/test/src/scenarr.test.scala @@ -34,7 +34,7 @@ class ScenarrSuite extends FunSuite: assert(math.abs(sc.meanLoss - 50.0) < 1e-12) } - test("clusterCoeff and varianceMeanRatio compute from sample variance".only) { + test("clusterCoeff and varianceMeanRatio compute from sample variance") { val iterations = Array(1, 2, 1, 3, 1) val days = Array(1, 2, 3, 4, 5) val amounts = Array(10.0, 20.0, 30.0, 40.0, 50.0) From fd397b42e249d32d2c76504b90f1ad9c34ddb7b5 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Fri, 23 Jan 2026 11:22:14 +0100 Subject: [PATCH 21/75] . --- .github/copilot-instructions.md | 6 +- .vscode/launch.json | 4 +- build.mill | 1 + experiments/src/pricing_fun.scala | 35 ++ site/docs/cheatsheet.md | 2 +- vecxt/src-js-native/array.scala | 22 + vecxt/src-js/array.scala | 565 +++++++++--------- vecxt/src-js/dimCheck.scala | 5 +- vecxt/src-jvm/arrays.scala | 17 - vecxt/src-jvm/dimCheck.scala | 6 + vecxt/src/intarray.scala | 19 + vecxt/test/src/array.test.scala | 9 +- vecxt/test/src/booleanarray.test.scala | 14 + vecxt/test/src/intarray.test.scala | 15 + vecxt_re/package.mill | 8 +- .../src-js-native/PlatformReporting.scala | 96 +++ vecxt_re/src-js-native/plots.scala | 5 + vecxt_re/src-jvm/PlatformReporting.scala | 98 +++ vecxt_re/src/Layer.scala | 1 - vecxt_re/src/Patchwork.scala | 11 +- vecxt_re/src/ReReporting.scala | 35 +- vecxt_re/src/groupSums.scala | 35 +- vecxt_re/src/scenarr.scala | 16 +- vecxt_re/test/src/attachment.test.scala | 0 vecxt_re/test/src/losscalc.test.scala | 220 ++++++- vecxt_re/test/src/scenarr.test.scala | 1 - vecxt_re/test/src/tower.test.scala | 2 +- 27 files changed, 898 insertions(+), 350 deletions(-) create mode 100644 experiments/src/pricing_fun.scala create mode 100644 vecxt_re/src-js-native/PlatformReporting.scala create mode 100644 vecxt_re/src-js-native/plots.scala create mode 100644 vecxt_re/src-jvm/PlatformReporting.scala create mode 100644 vecxt_re/test/src/attachment.test.scala diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index c298d672..461f5779 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -92,4 +92,8 @@ Follow styleguide.md for coding conventions Use inline methods where possible to avoid dispatch overhead where possible. ## GitHub Actions CI -The project uses GitHub Actions for CI/CD \ No newline at end of file +The project uses GitHub Actions for CI/CD + +## Vecxt Re + +Contains a bunch of domain specific code for reinsurance calculations, structures, and various reinsurance contract types. It will often rely on Vecxt. You should view the principles as the same - correctness above all else - performance matters. It also aims to eexpose a consistent cross platform API. \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index 4ad93f65..5c255353 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,8 +8,8 @@ "type": "scala", "request": "launch", "name": "test Suite", - "buildTarget": "vecxt_re.jvm.test", - "testClass": "vecxt_re.ScenarioSuite", + "buildTarget": "vecxt.jvm.test", + "testClass": "vecxt.BooleanArrayExtensionSuite", "jvmOptions": [ "--add-modules=jdk.incubator.vector" ], diff --git a/build.mill b/build.mill index eb207e47..9840f95e 100644 --- a/build.mill +++ b/build.mill @@ -37,6 +37,7 @@ object V: val munitVersion = "1.1.1" val blas: Dep = mvn"dev.ludovic.netlib:blas:3.0.4" val lapack: Dep = mvn"dev.ludovic.netlib:lapack:3.0.4" + val scalaJavaTime: Dep = mvn"io.github.cquiroz::scala-java-time::2.6.0" end V trait VecxtPublishModule extends PublishModule, ScalaModule, ScalafixModule: diff --git a/experiments/src/pricing_fun.scala b/experiments/src/pricing_fun.scala new file mode 100644 index 00000000..28266ec8 --- /dev/null +++ b/experiments/src/pricing_fun.scala @@ -0,0 +1,35 @@ +package experiments + +import RPT.* +import cats.syntax.all.* + +@main def pricingFun = + + val iterations = Array(1,1,2,3,1,2,3,4,5,10,10,10,10,10).sorted + val days = Array(1,2,3,4,5,6,7,8,9,10,11,12,13,14) + val amounts = Array(20.0,0,0,0,Int.MaxValue,0,0,0,0,0,25,30,0,0) + + println(iterations.printArr) + println(amounts.printArr) + + + val scen = Scenarr( + iterations = iterations, + days = days, + amounts = amounts, + numberIterations = 10, + threshold = 0.0 + ) + + val tower = Tower.singleShot(15, Array(10, 10, 10)) + + val iter10 = scen.iteration(10) + + val (ceded, retained, splits) = tower.splitScenarioAmounts(scen)(using true) + val (ceded10, retained10, splits10) = tower.splitScenarioAmounts(iter10)(using true) + + println(ceded10.printArr) + + splits10.map(_.cededToLayer).foreach(arr => println(arr.printArr)) + + splits.map(s => s.lossReport(scen.numberIterations, scen.iterations, ReportDenominator.FirstLimit)).ptbln diff --git a/site/docs/cheatsheet.md b/site/docs/cheatsheet.md index 0fdd3393..8f12966e 100644 --- a/site/docs/cheatsheet.md +++ b/site/docs/cheatsheet.md @@ -146,7 +146,7 @@ Is not supported in an "implicit" fashion. Look at the methods; | Element-wise equality | `a =:= b` | `a == b` | `a == b` | | Element-wise inequality | `a !:= b` | `a != b` | `a ~= b` | | Find indices where true | `idx.logicalIdx(...)` | `np.nonzero(a > 0.5)` | `find(a > 0.5)` | -| Boolean indexing | `a(a > 2.0)` | `a[a > 0.5]` | `a(a > 0.5)` | +| Boolean indexing | `a.mask(a > 2.0)` | `a[a > 0.5]` | `a(a > 0.5)` | | Count true values | `(a > 2.0).trues` | `np.sum(a > 0.5)` | `sum(a > 0.5)` | ## Array / Matrix Manipulation diff --git a/vecxt/src-js-native/array.scala b/vecxt/src-js-native/array.scala index 1820a772..a771ec6e 100644 --- a/vecxt/src-js-native/array.scala +++ b/vecxt/src-js-native/array.scala @@ -560,6 +560,12 @@ object JsNativeDoubleArrays: sum end dot + inline def =:=(nums: Array[Int]): Array[Boolean] = + logicalIdxArr(nums, (a, b) => a == b) + + inline def =:=(num: Int): Array[Boolean] = + logicalIdx((a, b) => a == b, num) + inline def <(num: Int): Array[Boolean] = logicalIdx((a, b) => a < b, num) @@ -587,6 +593,22 @@ object JsNativeDoubleArrays: end while idx end logicalIdx + + inline def logicalIdxArr( + compare: Array[Int], + inline op: (Int, Int) => Boolean + ): Array[Boolean] = + val n = vec.length + val idx = Array.fill(n)(false) + + var i = 0 + while i < n do + if op(vec(i), compare(i)) then idx(i) = true + end if + i = i + 1 + end while + idx + end logicalIdxArr end extension // extension [@specialized(Double, Int) A: Numeric](vec: Array[A]) diff --git a/vecxt/src-js/array.scala b/vecxt/src-js/array.scala index 23662889..40777593 100644 --- a/vecxt/src-js/array.scala +++ b/vecxt/src-js/array.scala @@ -200,290 +200,289 @@ object arrays: end while acc end maxSIMD - end extension - def increments: Array[Double] = - val out = Array.ofDim[Double](vec.length) - out(0) = vec(0) - var i = 1 - while i < vec.length do - out(i) = vec(i) - vec(i - 1) - i = i + 1 - end while - out - end increments - - inline def stdDev: Double = stdDev(VarianceMode.Population) - - inline def stdDev(mode: VarianceMode): Double = std(mode) - - inline def std: Double = std(VarianceMode.Population) - - inline def std(mode: VarianceMode): Double = - Math.sqrt(vec.variance(mode)) - - inline def mean: Double = vec.sumSIMD / vec.length - - inline def sum: Double = - var sum = 0.0 - var i = 0; - while i < vec.length do - sum = sum + vec(i) - i = i + 1 - end while - sum - end sum - - inline def product: Double = - var sum = 1.0 - var i = 0; - while i < vec.length do - sum *= vec(i) - i = i + 1 - end while - sum - end product - - inline def variance: Double = variance(VarianceMode.Population) - - def variance(mode: VarianceMode): Double = - meanAndVariance(mode).variance - end variance - - inline def meanAndVariance: (mean: Double, variance: Double) = - meanAndVariance(VarianceMode.Population) - - inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = - val μ = vec.mean - var acc = 0.0 - var i = 0 - while i < vec.length do - val diff = vec(i) - μ - acc += diff * diff - i += 1 - end while - - val denom = mode match - case VarianceMode.Population => vec.length.toDouble - case VarianceMode.Sample => (vec.length - 1).toDouble - - (μ, acc / denom) - end meanAndVariance - - inline def unary_- : Array[Double] = - val newVec = Array.ofDim[Double](vec.length) - var i = 0 - while i < vec.length do - newVec(i) = -vec(i) - i += 1 - end while - newVec - end unary_- - - inline def pearsonCorrelationCoefficient(thatVector: Array[Double])(using - inline boundsCheck: BoundsCheck.BoundsCheck - ): Double = - dimCheck(vec, thatVector) - val n = vec.length - var i = 0 - - var sum_x = 0.0 - var sum_y = 0.0 - var sum_xy = 0.0 - var sum_x2 = 0.0 - var sum_y2 = 0.0 - - while i < n do - sum_x = sum_x + vec(i) - sum_y = sum_y + thatVector(i) - sum_xy = sum_xy + vec(i) * thatVector(i) - sum_x2 = sum_x2 + vec(i) * vec(i) - sum_y2 = sum_y2 + thatVector(i) * thatVector(i) - i = i + 1 - end while - (n * sum_xy - (sum_x * sum_y)) / Math.sqrt( - (sum_x2 * n - sum_x * sum_x) * (sum_y2 * n - sum_y * sum_y) - ) - end pearsonCorrelationCoefficient - - inline def spearmansRankCorrelation(thatVector: Array[Double])(using - inline boundsCheck: BoundsCheck.BoundsCheck - ): Double = - dimCheck(vec, thatVector) - val theseRanks = vec.elementRanks - val thoseRanks = thatVector.elementRanks - theseRanks.pearsonCorrelationCoefficient(thoseRanks) - end spearmansRankCorrelation - - // An alias - pearson is the most commonly requested type of correlation - inline def corr(thatVector: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Double = - pearsonCorrelationCoefficient(thatVector) - - def elementRanks: Array[Double] = - val indexed1 = vec.zipWithIndex - val indexed = indexed1.toArray.sorted(using Ordering.by(_._1)) - - val ranks: Array[Double] = new Array(vec.length) // faster than zeros. - ranks(indexed.last._2) = vec.length - var currentValue: Double = indexed(0)._1 - var r0: Int = 0 - var rank: Int = 1 - while rank < vec.length do - val temp: Double = indexed(rank)._1 - val end: Int = - if temp != currentValue then rank - else if rank == vec.length - 1 then rank + 1 - else -1 - if end > -1 then - val avg: Double = (1.0 + (end + r0)) / 2.0 - var i: Int = r0; - while i < end do - ranks(indexed(i)._2) = avg - i += 1 - end while - r0 = rank - currentValue = temp - end if - rank += 1 - end while - ranks - end elementRanks - - inline def `cumsum!` = - var i = 1 - while i < vec.length do - vec(i) = vec(i - 1) + vec(i) - i = i + 1 - end while - end `cumsum!` - - inline def cumsum: Array[Double] = - val out = vec.clone() - out.`cumsum!` - out - end cumsum - - inline def dot(v1: Array[Double])(using inline boundsCheck: BoundsCheck): Double = - dimCheck(vec, v1) - - var product = 0.0 - var i = 0; - while i < vec.length do - product = product + vec(i) * v1(i) - i = i + 1 - end while - product - end dot - - inline def norm: Double = - Math.sqrt(vec.dot(vec)(using vecxt.BoundsCheck.DoBoundsCheck.no)) - end norm - - inline def +(d: Double): Array[Double] = - vec.clone().tap(_ += d) - - inline def +=(d: Double): Unit = - var i = 0 - while i < vec.length do - vec(i) = vec(i) + d - i = i + 1 - end while - end += - - inline def -(d: Double): Array[Double] = - vec.clone().tap(_ -= d) - end - - - inline def -=(d: Double): Unit = - var i = 0 - while i < vec.length do - vec(i) = vec(i) - d - i = i + 1 - end while - end -= - - inline def -(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Array[Double] = - dimCheck(vec, vec2) - vec.clone().tap(_ -= vec2) - end - - - inline def -=(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Unit = - dimCheck(vec, vec2) - var i = 0 - while i < vec.length do - vec(i) = vec(i) - vec2(i) - i = i + 1 - end while - end -= - - inline def +(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Array[Double] = - dimCheck(vec, vec2) - vec.clone().tap(_ += vec2) - end + - - inline def +:+(d: Double) = - vec.clone().tap(_ +:+= d) - end +:+ - - inline def +:+=(d: Double): Unit = - var i = 0 - while i < vec.length do - vec(i) = vec(i) + d - i = i + 1 - end while - end +:+= - - inline def +=(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Unit = - dimCheck(vec, vec2) - var i = 0 - while i < vec.length do - vec(i) = vec(i) + vec2(i) - i = i + 1 - end while - end += - - inline def add(d: Array[Double])(using inline boundsCheck: BoundsCheck): Array[Double] = vec + d - inline def multInPlace(d: Double): Unit = vec *= d - - inline def *=(d: Double): Unit = - var i = 0 - while i < vec.length do - vec(i) = vec(i) * d - i = i + 1 - end while - end *= - - inline def *(d: Double): Array[Double] = - vec.clone().tap(_ *= d) - end * - - inline def /=(d: Double): Array[Double] = - var i = 0 - while i < vec.length do - vec(i) = vec(i) / d - i = i + 1 - end while - vec - end /= - - inline def /(d: Double): Array[Double] = - vec.clone().tap(_ /= d) - end / - - def covariance(thatVector: Array[Double]): Double = - val μThis = vec.mean - val μThat = thatVector.mean - var cv: Double = 0 - var i: Int = 0; - while i < vec.length do - cv += (vec(i) - μThis) * (thatVector(i) - μThat) - i += 1 - end while - cv / (vec.length - 1) - end covariance - - def maxElement: Double = vec.max - // val t = js.Math.max( vec.toArray: _* ) + def increments: Array[Double] = + val out = Array.ofDim[Double](vec.length) + out(0) = vec(0) + var i = 1 + while i < vec.length do + out(i) = vec(i) - vec(i - 1) + i = i + 1 + end while + out + end increments + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) + + inline def std: Double = std(VarianceMode.Population) + + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def mean: Double = vec.sumSIMD / vec.length + + inline def sum: Double = + var sum = 0.0 + var i = 0; + while i < vec.length do + sum = sum + vec(i) + i = i + 1 + end while + sum + end sum + + inline def product: Double = + var sum = 1.0 + var i = 0; + while i < vec.length do + sum *= vec(i) + i = i + 1 + end while + sum + end product + + inline def variance: Double = variance(VarianceMode.Population) + + def variance(mode: VarianceMode): Double = + meanAndVariance(mode).variance + end variance + + inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + val μ = vec.mean + var acc = 0.0 + var i = 0 + while i < vec.length do + val diff = vec(i) - μ + acc += diff * diff + i += 1 + end while + + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (μ, acc / denom) + end meanAndVariance + + inline def unary_- : Array[Double] = + val newVec = Array.ofDim[Double](vec.length) + var i = 0 + while i < vec.length do + newVec(i) = -vec(i) + i += 1 + end while + newVec + end unary_- + + inline def pearsonCorrelationCoefficient(thatVector: Array[Double])(using + inline boundsCheck: BoundsCheck.BoundsCheck + ): Double = + dimCheck(vec, thatVector) + val n = vec.length + var i = 0 + + var sum_x = 0.0 + var sum_y = 0.0 + var sum_xy = 0.0 + var sum_x2 = 0.0 + var sum_y2 = 0.0 + + while i < n do + sum_x = sum_x + vec(i) + sum_y = sum_y + thatVector(i) + sum_xy = sum_xy + vec(i) * thatVector(i) + sum_x2 = sum_x2 + vec(i) * vec(i) + sum_y2 = sum_y2 + thatVector(i) * thatVector(i) + i = i + 1 + end while + (n * sum_xy - (sum_x * sum_y)) / Math.sqrt( + (sum_x2 * n - sum_x * sum_x) * (sum_y2 * n - sum_y * sum_y) + ) + end pearsonCorrelationCoefficient + + inline def spearmansRankCorrelation(thatVector: Array[Double])(using + inline boundsCheck: BoundsCheck.BoundsCheck + ): Double = + dimCheck(vec, thatVector) + val theseRanks = vec.elementRanks + val thoseRanks = thatVector.elementRanks + theseRanks.pearsonCorrelationCoefficient(thoseRanks) + end spearmansRankCorrelation + + // An alias - pearson is the most commonly requested type of correlation + inline def corr(thatVector: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Double = + pearsonCorrelationCoefficient(thatVector) + + def elementRanks: Array[Double] = + val indexed1 = vec.zipWithIndex + val indexed = indexed1.toArray.sorted(using Ordering.by(_._1)) + + val ranks: Array[Double] = new Array(vec.length) // faster than zeros. + ranks(indexed.last._2) = vec.length + var currentValue: Double = indexed(0)._1 + var r0: Int = 0 + var rank: Int = 1 + while rank < vec.length do + val temp: Double = indexed(rank)._1 + val end: Int = + if temp != currentValue then rank + else if rank == vec.length - 1 then rank + 1 + else -1 + if end > -1 then + val avg: Double = (1.0 + (end + r0)) / 2.0 + var i: Int = r0; + while i < end do + ranks(indexed(i)._2) = avg + i += 1 + end while + r0 = rank + currentValue = temp + end if + rank += 1 + end while + ranks + end elementRanks + + inline def `cumsum!` = + var i = 1 + while i < vec.length do + vec(i) = vec(i - 1) + vec(i) + i = i + 1 + end while + end `cumsum!` + + inline def cumsum: Array[Double] = + val out = vec.clone() + out.`cumsum!` + out + end cumsum + + inline def dot(v1: Array[Double])(using inline boundsCheck: BoundsCheck): Double = + dimCheck(vec, v1) + + var product = 0.0 + var i = 0; + while i < vec.length do + product = product + vec(i) * v1(i) + i = i + 1 + end while + product + end dot + + inline def norm: Double = + Math.sqrt(vec.dot(vec)(using vecxt.BoundsCheck.DoBoundsCheck.no)) + end norm + + inline def +(d: Double): Array[Double] = + vec.clone().tap(_ += d) + + inline def +=(d: Double): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) + d + i = i + 1 + end while + end += + + inline def -(d: Double): Array[Double] = + vec.clone().tap(_ -= d) + end - + + inline def -=(d: Double): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) - d + i = i + 1 + end while + end -= + + inline def -(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Array[Double] = + dimCheck(vec, vec2) + vec.clone().tap(_ -= vec2) + end - + + inline def -=(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Unit = + dimCheck(vec, vec2) + var i = 0 + while i < vec.length do + vec(i) = vec(i) - vec2(i) + i = i + 1 + end while + end -= + + inline def +(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Array[Double] = + dimCheck(vec, vec2) + vec.clone().tap(_ += vec2) + end + + + inline def +:+(d: Double) = + vec.clone().tap(_ +:+= d) + end +:+ + + inline def +:+=(d: Double): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) + d + i = i + 1 + end while + end +:+= + + inline def +=(vec2: Array[Double])(using inline boundsCheck: BoundsCheck.BoundsCheck): Unit = + dimCheck(vec, vec2) + var i = 0 + while i < vec.length do + vec(i) = vec(i) + vec2(i) + i = i + 1 + end while + end += + + inline def add(d: Array[Double])(using inline boundsCheck: BoundsCheck): Array[Double] = vec + d + inline def multInPlace(d: Double): Unit = vec *= d + + inline def *=(d: Double): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) * d + i = i + 1 + end while + end *= + + inline def *(d: Double): Array[Double] = + vec.clone().tap(_ *= d) + end * + + inline def /=(d: Double): Array[Double] = + var i = 0 + while i < vec.length do + vec(i) = vec(i) / d + i = i + 1 + end while + vec + end /= + + inline def /(d: Double): Array[Double] = + vec.clone().tap(_ /= d) + end / + + def covariance(thatVector: Array[Double]): Double = + val μThis = vec.mean + val μThat = thatVector.mean + var cv: Double = 0 + var i: Int = 0; + while i < vec.length do + cv += (vec(i) - μThis) * (thatVector(i) - μThat) + i += 1 + end while + cv / (vec.length - 1) + end covariance + + def maxElement: Double = vec.max + // val t = js.Math.max( vec.toArray: _* ) end extension extension (vec: Array[Array[Double]]) diff --git a/vecxt/src-js/dimCheck.scala b/vecxt/src-js/dimCheck.scala index 4bf2a8b2..7520572f 100644 --- a/vecxt/src-js/dimCheck.scala +++ b/vecxt/src-js/dimCheck.scala @@ -1,7 +1,5 @@ package vecxt -import scala.scalajs.js.typedarray.Float64Array - import vecxt.BoundsCheck.BoundsCheck case class VectorDimensionMismatch(givenDimension: Int, requiredDimension: Int) @@ -25,6 +23,9 @@ protected[vecxt] object dimCheck: inline def apply[A](a: Array[Double], b: scala.scalajs.js.Array[A])(using inline doCheck: BoundsCheck) = inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) + inline def apply[A](a: Array[A], b: Array[Boolean])(using inline doCheck: BoundsCheck) = + inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) + inline def apply(a: Array[Double], b: Array[Double])(using inline doCheck: BoundsCheck) = inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) diff --git a/vecxt/src-jvm/arrays.scala b/vecxt/src-jvm/arrays.scala index d18db49e..907c901a 100644 --- a/vecxt/src-jvm/arrays.scala +++ b/vecxt/src-jvm/arrays.scala @@ -3,7 +3,6 @@ package vecxt import scala.reflect.ClassTag import scala.util.chaining.* -import vecxt.BooleanArrays.trues import vecxt.BoundsCheck.BoundsCheck import vecxt.matrix.Matrix @@ -437,22 +436,6 @@ object arrays: end extension - extension [@specialized(Double, Int) A](vec: Array[A])(using ClassTag[A]) - inline def apply(index: Array[Boolean])(using inline boundsCheck: BoundsCheck) = - dimCheck(vec, index) - val trues = index.trues - val newVec: Array[A] = new Array[A](trues) - var j = 0 - for i <- 0 until index.length do - // println(s"i: $i || j: $j || ${index(i)} ${vec(i)} ") - if index(i) then - newVec(j) = vec(i) - j = 1 + j - end for - newVec - end apply - end extension - extension (d: Double) inline def /(arr: Array[Double]) = val out = new Array[Double](arr.length) diff --git a/vecxt/src-jvm/dimCheck.scala b/vecxt/src-jvm/dimCheck.scala index dabb8ba1..f39c3847 100644 --- a/vecxt/src-jvm/dimCheck.scala +++ b/vecxt/src-jvm/dimCheck.scala @@ -11,6 +11,12 @@ protected[vecxt] object dimCheck: inline def apply[A, B](a: Array[A], b: Array[B])(using inline doCheck: BoundsCheck) = inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) + inline def apply[A](a: Array[A], b: Array[Boolean])(using inline doCheck: BoundsCheck) = + inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) + + inline def apply[A](a: Array[A], b: Array[A])(using inline doCheck: BoundsCheck) = + inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) + inline def apply(a: Array[Double], b: Array[Double])(using inline doCheck: BoundsCheck) = inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) end dimCheck diff --git a/vecxt/src/intarray.scala b/vecxt/src/intarray.scala index 6dfe27be..6eb0bbe8 100644 --- a/vecxt/src/intarray.scala +++ b/vecxt/src/intarray.scala @@ -1,8 +1,27 @@ package vecxt import scala.util.control.Breaks.* +import scala.reflect.ClassTag +import vecxt.BoundsCheck.BoundsCheck +import vecxt.BooleanArrays.trues object IntArrays: + + extension [A](vec: Array[A]) + inline def mask(index: Array[Boolean])(using inline boundsCheck: BoundsCheck, ct: ClassTag[A]) = + dimCheck(vec, index) + val trues = index.trues + val newVec: Array[A] = new Array[A](trues) + var j = 0 + for i <- 0 until index.length do + // println(s"i: $i || j: $j || ${index(i)} ${vec(i)} ") + if index(i) then + newVec(j) = vec(i) + j = 1 + j + end for + newVec + end mask + end extension extension (arr: Array[Int]) inline def select(indicies: Array[Int]): Array[Int] = val len = indicies.length diff --git a/vecxt/test/src/array.test.scala b/vecxt/test/src/array.test.scala index 313cf417..76a0c054 100644 --- a/vecxt/test/src/array.test.scala +++ b/vecxt/test/src/array.test.scala @@ -147,15 +147,16 @@ class ArrayExtensionSuite extends munit.FunSuite: // val afterIndex = v1(vIdx) // assertEqualsDouble(afterIndex(0), 1.0, 0.0001) // assertEqualsDouble(afterIndex(1), 3.0, 0.0001) + import vecxt.BoundsCheck.DoBoundsCheck.yes val v2 = Array[Double](1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0) val vIdx2 = Array[Boolean](true, false, true, true, false, true, false, true, false) - val afterIndex2 = v2(vIdx2) + val afterIndex2 = v2.mask(vIdx2) assertEqualsDouble(afterIndex2(4), 8.0, 0.0001) val v3 = Array[Int](1, 2, 3, 4, 5, 6, 7, 8, 9) val vIdx3 = Array[Boolean](true, false, true, true, false, true, false, true, false) - val afterIndex3 = v3(vIdx3) + val afterIndex3 = v3.mask(vIdx3) assertEquals(afterIndex3(4), 8) } @@ -398,7 +399,7 @@ class ArrayExtensionSuite extends munit.FunSuite: test("Array indexing") { val v1 = Array[Double](1.0, 2.0, 3.0) val vIdx = Array[Boolean](true, false, true) - val afterIndex = v1(vIdx) + val afterIndex = v1.mask(vIdx)(using true) assertEquals(afterIndex.length, 2) assertEqualsDouble(afterIndex.head, 1, 0.0001) @@ -538,7 +539,7 @@ class ArrayExtensionSuite extends munit.FunSuite: assert(tvar(9)) assert(tvar(6)) - val v4 = v1(tvar) + val v4 = v1.mask(tvar) assertEquals(v4.length, 2) assertEquals(v4(0), 2.0) assertEquals(v4(1), 1.0) diff --git a/vecxt/test/src/booleanarray.test.scala b/vecxt/test/src/booleanarray.test.scala index aa43d639..d8107111 100644 --- a/vecxt/test/src/booleanarray.test.scala +++ b/vecxt/test/src/booleanarray.test.scala @@ -154,4 +154,18 @@ class BooleanArrayExtensionSuite extends munit.FunSuite: } } + test("Indexing into via select".only) { + val v1 = Array[Boolean](true, false, true, false, true) + + val v2 = Array[Int](0, 1, 2, 3, 4) + val indexed = v2.mask(v1)(using true) + + assertEquals(indexed.length, v1.trues) + + assertEquals(indexed(0), 0) + assertEquals(indexed(1), 2) + assertEquals(indexed(2), 4) + + } + end BooleanArrayExtensionSuite diff --git a/vecxt/test/src/intarray.test.scala b/vecxt/test/src/intarray.test.scala index 23d060a4..99616db6 100644 --- a/vecxt/test/src/intarray.test.scala +++ b/vecxt/test/src/intarray.test.scala @@ -31,6 +31,21 @@ class IntArrayExtensionSuite extends munit.FunSuite: assertEquals(v1.sum, 45) } + test("array eq") { + val v1 = Array(1, 2, 3, 4, 5) + + val compared = v1 =:= v1.reverse + + val compared2 = v1 =:= 2 + + assertEquals(compared.trues, 1) + assert(compared(2)) + + assertEquals(compared.trues, 1) + assert(compared(2)) + + } + test("increments") { val v1 = Array[Int](1, 2, 3, 4, 5, 6, 7, 8, 9, 10) diff --git a/vecxt_re/package.mill b/vecxt_re/package.mill index 23182543..c81b2763 100644 --- a/vecxt_re/package.mill +++ b/vecxt_re/package.mill @@ -45,7 +45,9 @@ object `package` extends Module: object js extends VexctReModule with build.CommonJS: def moduleDeps = Seq(build.vecxt.js) - override def mvnDeps = super.mvnDeps() + override def mvnDeps = super.mvnDeps() ++ Seq( + build.V.scalaJavaTime + ) def sources = Task(super.sources() ++ jsNativeSharedSources()) def moduleKind = ModuleKind.ESModule def enableBsp = false @@ -59,7 +61,9 @@ object `package` extends Module: object native extends VexctReModule with build.CommonNative: def moduleDeps = Seq(build.vecxt.native) - override def mvnDeps = super.mvnDeps() + override def mvnDeps = super.mvnDeps() ++ Seq( + build.V.scalaJavaTime + ) def sources = Task(super.sources() ++ jsNativeSharedSources() ++ jvmNativeSharedSources()) override def enableBsp = false diff --git a/vecxt_re/src-js-native/PlatformReporting.scala b/vecxt_re/src-js-native/PlatformReporting.scala new file mode 100644 index 00000000..0e22a5e2 --- /dev/null +++ b/vecxt_re/src-js-native/PlatformReporting.scala @@ -0,0 +1,96 @@ +package vecxt_re + +/** Platform-specific reporting implementations for JS and Native. + * + * Uses a streaming single-pass algorithm to compute all loss metrics efficiently. + */ +object PlatformReporting: + + /** Computes loss report metrics in a single pass using Welford's online algorithm. + * + * Instead of calling groupSum multiple times (for attachment, exhaustion, std, and EL), this method iterates + * through the grouped sums once, accumulating all intermediate results: + * - Sum for expected loss (EL) + * - Count of attached iterations (groupSum > 0) + * - Count of exhausted iterations (groupSum >= exhaust threshold) + * - Running mean and M2 for Welford's variance algorithm + * + * @param calcd + * Tuple of (layer, cededToLayer array) + * @param numIterations + * Number of iterations/years + * @param years + * Sorted array of 1-based iteration indices + * @param limit + * Report denominator for normalizing results + * @return + * Named tuple with all loss report metrics + */ + inline def lossReportFast( + calcd: (layer: Layer, cededToLayer: Array[Double]), + numIterations: Int, + years: Array[Int], + limit: ReportDenominator + ): (name: String, limit: Double, el: Double, stdDev: Double, attachProb: Double, exhaustProb: Double) = + val reportLimit = limit.fromlayer(calcd.layer) + val exhaust = calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) - 0.01 + val values = calcd.cededToLayer + val l = years.length + + // Accumulators + var totalSum = 0.0 // For expected loss + var attachCount = 0 // Count of iterations with loss > 0 + var exhaustCount = 0 // Count of iterations at exhaustion + // Welford's online algorithm accumulators + var mean = 0.0 + var m2 = 0.0 + var n = 0 // Count for Welford (should equal numIterations at end) + + // Single pass through groups (similar to groupSum but computing all metrics) + var i = 0 + var currentGroup = 1 + while currentGroup <= numIterations do + var groupSum = 0.0 + + // Sum all values in this group + while i < l && years(i) == currentGroup do + groupSum += values(i) + i += 1 + end while + + // Update total sum for EL + totalSum += groupSum + + // Update attachment count (any positive loss) + if groupSum > 0 then attachCount += 1 + + // Update exhaustion count + if groupSum > exhaust then exhaustCount += 1 + + // Welford's online algorithm for variance + n += 1 + val delta = groupSum - mean + mean += delta / n + val delta2 = groupSum - mean + m2 += delta * delta2 + + currentGroup += 1 + end while + + // Compute final statistics + val el = totalSum / numIterations + val variance = if n > 0 then m2 / n else 0.0 + val stdDev = Math.sqrt(variance) + val attachProb = attachCount.toDouble / numIterations + val exhaustProb = exhaustCount.toDouble / numIterations + + ( + name = calcd.layer.layerName.getOrElse(s"Layer ${calcd.layer.layerId}"), + limit = reportLimit, + el = el / reportLimit, + stdDev = stdDev / reportLimit, + attachProb = attachProb, + exhaustProb = exhaustProb + ) + end lossReportFast +end PlatformReporting \ No newline at end of file diff --git a/vecxt_re/src-js-native/plots.scala b/vecxt_re/src-js-native/plots.scala new file mode 100644 index 00000000..971a5222 --- /dev/null +++ b/vecxt_re/src-js-native/plots.scala @@ -0,0 +1,5 @@ +package vecxt_re + +object Plots: + +end Plots diff --git a/vecxt_re/src-jvm/PlatformReporting.scala b/vecxt_re/src-jvm/PlatformReporting.scala new file mode 100644 index 00000000..55806a53 --- /dev/null +++ b/vecxt_re/src-jvm/PlatformReporting.scala @@ -0,0 +1,98 @@ +package vecxt_re + +/** Platform-specific reporting implementations for JVM. + * + * Uses a streaming single-pass algorithm to compute all loss metrics efficiently. The algorithm processes groups + * inline (avoiding the allocation of an intermediate array) and uses Welford's online algorithm for numerically + * stable variance computation. + */ +object PlatformReporting: + + /** Computes loss report metrics in a single pass using Welford's online algorithm. + * + * Instead of calling groupSum multiple times (for attachment, exhaustion, std, and EL), this method iterates + * through the grouped sums once, accumulating all intermediate results: + * - Sum for expected loss (EL) + * - Count of attached iterations (groupSum > 0) + * - Count of exhausted iterations (groupSum >= exhaust threshold) + * - Running mean and M2 for Welford's variance algorithm + * + * @param calcd + * Tuple of (layer, cededToLayer array) + * @param numIterations + * Number of iterations/years + * @param years + * Sorted array of 1-based iteration indices + * @param limit + * Report denominator for normalizing results + * @return + * Named tuple with all loss report metrics + */ + inline def lossReportFast( + calcd: (layer: Layer, cededToLayer: Array[Double]), + numIterations: Int, + years: Array[Int], + limit: ReportDenominator + ): (name: String, limit: Double, el: Double, stdDev: Double, attachProb: Double, exhaustProb: Double) = + val reportLimit = limit.fromlayer(calcd.layer) + val exhaust = calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) - 0.01 + val values = calcd.cededToLayer + val l = years.length + + // Accumulators + var totalSum = 0.0 // For expected loss + var attachCount = 0 // Count of iterations with loss > 0 + var exhaustCount = 0 // Count of iterations at exhaustion + // Welford's online algorithm accumulators + var mean = 0.0 + var m2 = 0.0 + var n = 0 // Count for Welford (should equal numIterations at end) + + // Single pass through groups (similar to groupSum but computing all metrics inline) + var i = 0 + var currentGroup = 1 + while currentGroup <= numIterations do + var groupSum = 0.0 + + // Sum all values in this group - use scalar loop since group sizes are typically small + while i < l && years(i) == currentGroup do + groupSum += values(i) + i += 1 + end while + + // Update total sum for EL + totalSum += groupSum + + // Update attachment count (any positive loss) + if groupSum > 0 then attachCount += 1 + + // Update exhaustion count + if groupSum > exhaust then exhaustCount += 1 + + // Welford's online algorithm for variance + n += 1 + val delta = groupSum - mean + mean += delta / n + val delta2 = groupSum - mean + m2 += delta * delta2 + + currentGroup += 1 + end while + + // Compute final statistics + val el = totalSum / numIterations + val variance = if n > 0 then m2 / n else 0.0 + val stdDev = Math.sqrt(variance) + val attachProb = attachCount.toDouble / numIterations + val exhaustProb = exhaustCount.toDouble / numIterations + + ( + name = calcd.layer.layerName.getOrElse(s"Layer ${calcd.layer.layerId}"), + limit = reportLimit, + el = el / reportLimit, + stdDev = stdDev / reportLimit, + attachProb = attachProb, + exhaustProb = exhaustProb + ) + end lossReportFast +end PlatformReporting \ No newline at end of file diff --git a/vecxt_re/src/Layer.scala b/vecxt_re/src/Layer.scala index 88673cf8..326cd2e2 100644 --- a/vecxt_re/src/Layer.scala +++ b/vecxt_re/src/Layer.scala @@ -59,7 +59,6 @@ case class Layer( lazy val firstLimit = occLimit.orElse(aggLimit).getOrElse(Double.PositiveInfinity) - /** The smallest claim which exhausts the first limit of this layer */ lazy val cap = occLimit match case Some(occLimit) => diff --git a/vecxt_re/src/Patchwork.scala b/vecxt_re/src/Patchwork.scala index 167e5fd4..7b51963a 100644 --- a/vecxt_re/src/Patchwork.scala +++ b/vecxt_re/src/Patchwork.scala @@ -41,10 +41,9 @@ object Patchwork: end Patchwork -/** - * The key difference between a Patchwork and a Tower is that in a Patchwork the layers are independent of each other. Therefore, - * it's _retention_ is not a valid concept. Be wary of this - a patchwork ought to be for exploratory analysis only, it is unlikely - * to be a valid part of a reinsurance program. +/** The key difference between a Patchwork and a Tower is that in a Patchwork the layers are independent of each other. + * Therefore, it's _retention_ is not a valid concept. Be wary of this - a patchwork ought to be for exploratory + * analysis only, it is unlikely to be a valid part of a reinsurance program. * * @param layers * @param id @@ -66,8 +65,8 @@ case class Patchwork( ) end applyScale - /** A human friendly printout of this reinsurance patchwork. Skips any property which is "None" across all layers. Prints - * a console friendly table, with consistent spacing per column. + /** A human friendly printout of this reinsurance patchwork. Skips any property which is "None" across all layers. + * Prints a console friendly table, with consistent spacing per column. */ def show: String = if layers.isEmpty then return s"${name.getOrElse("Patchwork")}: no layers" diff --git a/vecxt_re/src/ReReporting.scala b/vecxt_re/src/ReReporting.scala index da136224..d5bfbf22 100644 --- a/vecxt_re/src/ReReporting.scala +++ b/vecxt_re/src/ReReporting.scala @@ -13,29 +13,34 @@ object ReReporting: (groupSum(years, calcd.cededToLayer, numIterations) > exhaust).trues / numIterations.toDouble end exhaustionProbability - inline def expectedLoss(numIterations: Int): Double = calcd.cededToLayer.sum / numIterations + inline def expectedLoss(numIterations: Int): Double = calcd.cededToLayer.sumSIMD / numIterations inline def std(numIterations: Int, years: Array[Int]): Double = groupSum(years, calcd.cededToLayer, numIterations).stdDev - inline def expectedLossAggLimit(numIterations: Int): Double = - calcd.cededToLayer.sum / (calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) * numIterations) - + /** Efficient single-pass loss report computation. + * + * This method computes all loss metrics (EL, std, attachment probability, exhaustion probability) in a single pass + * through the data, using Welford's online algorithm for numerically stable variance computation. + * + * This is significantly more efficient than calling the individual metric methods separately, as it avoids + * multiple iterations through the grouped sums. + * + * @param numIterations + * Number of simulation iterations + * @param years + * Sorted array of 1-based iteration indices + * @param limit + * Report denominator for normalizing EL and std + * @return + * Named tuple with (name, limit, el, stdDev, attachProb, exhaustProb) + */ inline def lossReport( numIterations: Int, years: Array[Int], limit: ReportDenominator ): (name: String, limit: Double, el: Double, stdDev: Double, attachProb: Double, exhaustProb: Double) = - val reportLimit = limit.fromlayer(calcd.layer) - ( - name = calcd.layer.layerName.getOrElse(s"Layer ${calcd.layer.layerId}"), - limit = reportLimit, - el = expectedLoss(numIterations) / reportLimit, - stdDev = std(numIterations, years) / reportLimit, - attachProb = attachmentProbability(numIterations, years), - exhaustProb = exhaustionProbability(numIterations, years) - ) - - // TODO formatting + PlatformReporting.lossReportFast(calcd, numIterations, years, limit) + end extension end ReReporting diff --git a/vecxt_re/src/groupSums.scala b/vecxt_re/src/groupSums.scala index 60c30cd7..442530ac 100644 --- a/vecxt_re/src/groupSums.scala +++ b/vecxt_re/src/groupSums.scala @@ -29,10 +29,37 @@ inline def groupCumSum(groups: Array[Int], values: Array[Double]): Array[Double] end if end groupCumSum -/** - sum by group index - * - Each group has a small number of values. - * - Each the groups are keyed by their index. - * - assumes groups are already sorted +/** Compute the sum of values for each group identified by an integer index. + * + * The function expects `groups` to be sorted in non-decreasing order and that `groups` and `values` have the same + * length. Group indices are 1-based and must be in the range 1..nitr. The returned array has length `nitr`; element at + * position `i` (0-based) contains the sum of values for group index `i+1`. Groups with no entries produce a zero in + * the corresponding slot. + * + * Preconditions: + * - groups.length == values.length + * - groups is sorted (runs of identical indices are contiguous) + * - every g in groups satisfies 1 <= g <= nitr + * + * Complexity: O(groups.length) time, O(nitr) extra space. + * + * This method is unsafe and performs no checks that these conditions are satisfied. It is the responsibility of the + * caller. + * + * @param groups + * sorted array of 1-based group indices (length L) + * @param values + * array of values corresponding to each group index (length L) + * @param nitr + * number of groups (size of the returned array) + * @return + * an Array[Double] of length `nitr` where each element is the sum for that group + * @throws ArrayIndexOutOfBoundsException + * if a group index is outside 1..nitr + * @throws IllegalArgumentException + * if groups.length != values.length + * + * Example: groups = Array(1, 1, 3), values = Array(1.0, 2.0, 4.0), nitr = 4 result = Array(3.0, 0.0, 4.0, 0.0) */ inline def groupSum(groups: Array[Int], values: Array[Double], nitr: Int): Array[Double] = val result = Array.fill(nitr)(0.0) diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index d30e30e7..0f56c005 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -102,12 +102,13 @@ object Scenarr: scenario.copy(amounts = scenario.amounts * scale, threshold = scenario.threshold * scale) inline def iteration(num: Int) = - assert(num > 0 && num <= scenario.numberIterations ) + assert(num > 0 && num <= scenario.numberIterations) val idx = scenario.iterations =:= num + import vecxt.BoundsCheck.DoBoundsCheck.yes Scenarr( - scenario.iterations(idx)(using false), - scenario.days(idx)(using false), - scenario.amounts(idx)(using false), + scenario.iterations.mask(idx), + scenario.days.mask(idx), + scenario.amounts.mask(idx), scenario.numberIterations, scenario.threshold, scenario.day1, @@ -117,7 +118,6 @@ object Scenarr: ) end iteration - // def shiftDay1To(date: LocalDate): Scenarr = // scenario.period.firstLoss.plusYears(1).minusDays(1) // // val ndays = ChronoUnit.DAYS.between( period._1, seasonEnd) + 1 Let sjust ssume this is 365 ... there is a theoretical problem with air assuming 365 days. Leap years anyone? @@ -147,9 +147,9 @@ object Scenarr: end if val idx = scenario.amounts > newThresh Scenarr( - scenario.iterations(idx)(using false), - scenario.days(idx)(using false), - scenario.amounts(idx)(using false), + scenario.iterations.mask(idx)(using false), + scenario.days.mask(idx)(using false), + scenario.amounts.mask(idx)(using false), scenario.numberIterations, newThresh, scenario.day1, diff --git a/vecxt_re/test/src/attachment.test.scala b/vecxt_re/test/src/attachment.test.scala new file mode 100644 index 00000000..e69de29b diff --git a/vecxt_re/test/src/losscalc.test.scala b/vecxt_re/test/src/losscalc.test.scala index 1f6e3a71..59d2245d 100644 --- a/vecxt_re/test/src/losscalc.test.scala +++ b/vecxt_re/test/src/losscalc.test.scala @@ -1,8 +1,7 @@ package vecxt_re -package vecxt_re - import munit.FunSuite +import ReReporting.* class LossCalcSuite extends FunSuite: @@ -36,3 +35,220 @@ class LossCalcSuite extends FunSuite: assertEqualsDouble(ReportDenominator.Custom(55.5).fromlayer(layer), 55.5, 0.0) } end LossCalcSuite + +class LossReportSuite extends FunSuite: + + test("lossReport computes EL correctly") { + // 5 iterations, total loss = 10 + 0 + 20 + 5 + 15 = 50 + // EL = 50 / 5 = 10 + val layer = Layer(occLimit = Some(100.0)) + val years = Array(1, 1, 3, 4, 5, 5) + val cededToLayer = Array(5.0, 5.0, 20.0, 5.0, 10.0, 5.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 5 + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layer) + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + // Total = 10 + 20 + 5 + 15 = 50, EL = 50/5 = 10, normalized = 10/100 = 0.1 + assertEqualsDouble(report.el, 0.1, 0.0001, "EL should be 10/100 = 0.1") + // Compare against single metric calculation + val singleMetricEL = calcd.expectedLoss(numIterations) / reportLimit + assertEqualsDouble(report.el, singleMetricEL, 0.0001, "lossReport EL should match expectedLoss") + } + + test("lossReport computes attachment probability correctly") { + // 5 iterations: iter 1 has loss, iter 2 has 0 loss, iter 3 has loss, iter 4 has loss, iter 5 has loss + // Attachment = 4/5 = 0.8 + val layer = Layer(occLimit = Some(100.0)) + val years = Array(1, 1, 3, 4, 5, 5) + val cededToLayer = Array(5.0, 5.0, 20.0, 5.0, 10.0, 5.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 5 + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + // Iterations 1,3,4,5 have losses, iteration 2 has 0 + assertEqualsDouble(report.attachProb, 0.8, 0.0001, "Attachment probability should be 4/5 = 0.8") + // Compare against single metric calculation + val singleMetricAttach = calcd.attachmentProbability(numIterations, years) + assertEqualsDouble( + report.attachProb, + singleMetricAttach, + 0.0001, + "lossReport attachProb should match attachmentProbability" + ) + } + + test("lossReport computes exhaustion probability correctly") { + // Layer with aggLimit of 10, 5 iterations + // iter 1: 10 (exhausted), iter 2: 0, iter 3: 20 (exhausted), iter 4: 5, iter 5: 15 (exhausted) + // Exhaustion = 3/5 = 0.6 + val layer = Layer(occLimit = Some(100.0), aggLimit = Some(10.0)) + val years = Array(1, 1, 3, 4, 5, 5) + val cededToLayer = Array(5.0, 5.0, 20.0, 5.0, 10.0, 5.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 5 + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + // Per-iteration sums: 1->10, 2->0, 3->20, 4->5, 5->15 + // exhaust threshold = 10 - 0.01 = 9.99 + // Iterations 1, 3, 5 exceed 9.99, so exhaustion = 3/5 = 0.6 + assertEqualsDouble(report.exhaustProb, 0.6, 0.0001, "Exhaustion probability should be 3/5 = 0.6") + // Compare against single metric calculation + val singleMetricExhaust = calcd.exhaustionProbability(numIterations, years) + assertEqualsDouble( + report.exhaustProb, + singleMetricExhaust, + 0.0001, + "lossReport exhaustProb should match exhaustionProbability" + ) + } + + test("lossReport computes stdDev correctly") { + // 5 iterations with per-iteration sums: 10, 0, 20, 5, 15 + // Mean = (10 + 0 + 20 + 5 + 15) / 5 = 50 / 5 = 10 + // Variance = ((10-10)^2 + (0-10)^2 + (20-10)^2 + (5-10)^2 + (15-10)^2) / 5 + // = (0 + 100 + 100 + 25 + 25) / 5 = 250 / 5 = 50 + // StdDev = sqrt(50) ≈ 7.071 + val layer = Layer(occLimit = Some(100.0)) + val years = Array(1, 1, 3, 4, 5, 5) + val cededToLayer = Array(5.0, 5.0, 20.0, 5.0, 10.0, 5.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 5 + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layer) + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + val expectedStdDev = Math.sqrt(50.0) / 100.0 // normalized by limit + assertEqualsDouble(report.stdDev, expectedStdDev, 0.0001, s"StdDev should be sqrt(50)/100 = $expectedStdDev") + // Compare against single metric calculation + val singleMetricStd = calcd.std(numIterations, years) / reportLimit + assertEqualsDouble(report.stdDev, singleMetricStd, 0.0001, "lossReport stdDev should match std") + } + + test("lossReport with all zero losses") { + val layer = Layer(occLimit = Some(100.0), aggLimit = Some(10.0)) + val years = Array[Int]() + val cededToLayer = Array[Double]() + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 5 + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layer) + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + assertEqualsDouble(report.el, 0.0, 0.0001, "EL should be 0") + assertEqualsDouble(report.stdDev, 0.0, 0.0001, "StdDev should be 0") + assertEqualsDouble(report.attachProb, 0.0, 0.0001, "Attachment probability should be 0") + assertEqualsDouble(report.exhaustProb, 0.0, 0.0001, "Exhaustion probability should be 0") + // Compare against single metric calculations + assertEqualsDouble( + report.el, + calcd.expectedLoss(numIterations) / reportLimit, + 0.0001, + "lossReport EL should match expectedLoss" + ) + assertEqualsDouble( + report.attachProb, + calcd.attachmentProbability(numIterations, years), + 0.0001, + "lossReport attachProb should match attachmentProbability" + ) + assertEqualsDouble( + report.exhaustProb, + calcd.exhaustionProbability(numIterations, years), + 0.0001, + "lossReport exhaustProb should match exhaustionProbability" + ) + assertEqualsDouble( + report.stdDev, + calcd.std(numIterations, years) / reportLimit, + 0.0001, + "lossReport stdDev should match std" + ) + } + + test("lossReport returns correct layer name") { + val layer = Layer(occLimit = Some(100.0), layerName = Some("Test Layer")) + val years = Array(1) + val cededToLayer = Array(10.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val report = calcd.lossReport(1, years, ReportDenominator.FirstLimit) + + assertEquals(report.name, "Test Layer") + } + + test("lossReport returns correct limit") { + val layer = Layer(occLimit = Some(100.0), aggLimit = Some(200.0)) + val years = Array(1) + val cededToLayer = Array(10.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val report = calcd.lossReport(1, years, ReportDenominator.FirstLimit) + + assertEqualsDouble(report.limit, 100.0, 0.0001) + } + + test("lossReport with single iteration") { + val layer = Layer(occLimit = Some(50.0)) + val years = Array(1, 1, 1) + val cededToLayer = Array(10.0, 15.0, 25.0) // Total = 50 + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 1 + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layer) + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + assertEqualsDouble(report.el, 1.0, 0.0001, "EL should be 50/50 = 1.0") + assertEqualsDouble(report.attachProb, 1.0, 0.0001, "Attachment should be 1.0") + assertEqualsDouble(report.stdDev, 0.0, 0.0001, "StdDev should be 0 with single iteration") + // Compare against single metric calculations + assertEqualsDouble( + report.el, + calcd.expectedLoss(numIterations) / reportLimit, + 0.0001, + "lossReport EL should match expectedLoss" + ) + assertEqualsDouble( + report.attachProb, + calcd.attachmentProbability(numIterations, years), + 0.0001, + "lossReport attachProb should match attachmentProbability" + ) + assertEqualsDouble( + report.stdDev, + calcd.std(numIterations, years) / reportLimit, + 0.0001, + "lossReport stdDev should match std" + ) + } + + test("lossReport matches all single metrics for complex scenario") { + // A more complex test case with many iterations and varied losses + val layer = Layer(occLimit = Some(50.0), aggLimit = Some(30.0), layerName = Some("Complex Layer")) + val years = Array(1, 1, 2, 3, 3, 3, 5, 7, 7, 10) + val cededToLayer = Array(10.0, 5.0, 25.0, 8.0, 12.0, 5.0, 40.0, 3.0, 7.0, 15.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 10 + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layer) + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + // Compare all metrics against single metric calculations + val singleMetricEL = calcd.expectedLoss(numIterations) / reportLimit + val singleMetricAttach = calcd.attachmentProbability(numIterations, years) + val singleMetricExhaust = calcd.exhaustionProbability(numIterations, years) + val singleMetricStd = calcd.std(numIterations, years) / reportLimit + + assertEqualsDouble(report.el, singleMetricEL, 0.0001, "lossReport EL should match expectedLoss") + assertEqualsDouble( + report.attachProb, + singleMetricAttach, + 0.0001, + "lossReport attachProb should match attachmentProbability" + ) + assertEqualsDouble( + report.exhaustProb, + singleMetricExhaust, + 0.0001, + "lossReport exhaustProb should match exhaustionProbability" + ) + assertEqualsDouble(report.stdDev, singleMetricStd, 0.0001, "lossReport stdDev should match std") + assertEquals(report.name, "Complex Layer") + assertEqualsDouble(report.limit, 50.0, 0.0001) + } + +end LossReportSuite diff --git a/vecxt_re/test/src/scenarr.test.scala b/vecxt_re/test/src/scenarr.test.scala index 8b68205d..b2061e26 100644 --- a/vecxt_re/test/src/scenarr.test.scala +++ b/vecxt_re/test/src/scenarr.test.scala @@ -3,7 +3,6 @@ package vecxt_re import vecxt.all.* import munit.FunSuite import java.time.{LocalDate, Month} -import scala.util.Random class ScenarrSuite extends FunSuite: diff --git a/vecxt_re/test/src/tower.test.scala b/vecxt_re/test/src/tower.test.scala index 9daf504f..fb89cf2a 100644 --- a/vecxt_re/test/src/tower.test.scala +++ b/vecxt_re/test/src/tower.test.scala @@ -11,7 +11,7 @@ class TowerSuite extends munit.FunSuite: losses: Array[Double], ceded: Array[Double], retained: Array[Double], - splits: IndexedSeq[(layer:Layer, cededToLayer: Array[Double])] = IndexedSeq.empty + splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])] = IndexedSeq.empty ) = import vecxt.BoundsCheck.DoBoundsCheck.yes assertVecEquals(ceded + retained, losses) From 8dbb5b686395bd38b44a3ee1ab5b82493d371ec5 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Fri, 23 Jan 2026 11:25:28 +0100 Subject: [PATCH 22/75] . --- experiments/src/pricing_fun.scala | 8 ++++---- vecxt_re/src-js-native/PlatformReporting.scala | 8 +++++--- vecxt_re/src-jvm/PlatformReporting.scala | 12 +++++++----- vecxt_re/test/src/attachment.test.scala | 0 4 files changed, 16 insertions(+), 12 deletions(-) delete mode 100644 vecxt_re/test/src/attachment.test.scala diff --git a/experiments/src/pricing_fun.scala b/experiments/src/pricing_fun.scala index 28266ec8..9badcc0b 100644 --- a/experiments/src/pricing_fun.scala +++ b/experiments/src/pricing_fun.scala @@ -5,14 +5,13 @@ import cats.syntax.all.* @main def pricingFun = - val iterations = Array(1,1,2,3,1,2,3,4,5,10,10,10,10,10).sorted - val days = Array(1,2,3,4,5,6,7,8,9,10,11,12,13,14) - val amounts = Array(20.0,0,0,0,Int.MaxValue,0,0,0,0,0,25,30,0,0) + val iterations = Array(1, 1, 2, 3, 1, 2, 3, 4, 5, 10, 10, 10, 10, 10).sorted + val days = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) + val amounts = Array(20.0, 0, 0, 0, Int.MaxValue, 0, 0, 0, 0, 0, 25, 30, 0, 0) println(iterations.printArr) println(amounts.printArr) - val scen = Scenarr( iterations = iterations, days = days, @@ -33,3 +32,4 @@ import cats.syntax.all.* splits10.map(_.cededToLayer).foreach(arr => println(arr.printArr)) splits.map(s => s.lossReport(scen.numberIterations, scen.iterations, ReportDenominator.FirstLimit)).ptbln +end pricingFun diff --git a/vecxt_re/src-js-native/PlatformReporting.scala b/vecxt_re/src-js-native/PlatformReporting.scala index 0e22a5e2..9739d87a 100644 --- a/vecxt_re/src-js-native/PlatformReporting.scala +++ b/vecxt_re/src-js-native/PlatformReporting.scala @@ -8,8 +8,8 @@ object PlatformReporting: /** Computes loss report metrics in a single pass using Welford's online algorithm. * - * Instead of calling groupSum multiple times (for attachment, exhaustion, std, and EL), this method iterates - * through the grouped sums once, accumulating all intermediate results: + * Instead of calling groupSum multiple times (for attachment, exhaustion, std, and EL), this method iterates through + * the grouped sums once, accumulating all intermediate results: * - Sum for expected loss (EL) * - Count of attached iterations (groupSum > 0) * - Count of exhausted iterations (groupSum >= exhaust threshold) @@ -63,9 +63,11 @@ object PlatformReporting: // Update attachment count (any positive loss) if groupSum > 0 then attachCount += 1 + end if // Update exhaustion count if groupSum > exhaust then exhaustCount += 1 + end if // Welford's online algorithm for variance n += 1 @@ -93,4 +95,4 @@ object PlatformReporting: exhaustProb = exhaustProb ) end lossReportFast -end PlatformReporting \ No newline at end of file +end PlatformReporting diff --git a/vecxt_re/src-jvm/PlatformReporting.scala b/vecxt_re/src-jvm/PlatformReporting.scala index 55806a53..8d041cd4 100644 --- a/vecxt_re/src-jvm/PlatformReporting.scala +++ b/vecxt_re/src-jvm/PlatformReporting.scala @@ -3,15 +3,15 @@ package vecxt_re /** Platform-specific reporting implementations for JVM. * * Uses a streaming single-pass algorithm to compute all loss metrics efficiently. The algorithm processes groups - * inline (avoiding the allocation of an intermediate array) and uses Welford's online algorithm for numerically - * stable variance computation. + * inline (avoiding the allocation of an intermediate array) and uses Welford's online algorithm for numerically stable + * variance computation. */ object PlatformReporting: /** Computes loss report metrics in a single pass using Welford's online algorithm. * - * Instead of calling groupSum multiple times (for attachment, exhaustion, std, and EL), this method iterates - * through the grouped sums once, accumulating all intermediate results: + * Instead of calling groupSum multiple times (for attachment, exhaustion, std, and EL), this method iterates through + * the grouped sums once, accumulating all intermediate results: * - Sum for expected loss (EL) * - Count of attached iterations (groupSum > 0) * - Count of exhausted iterations (groupSum >= exhaust threshold) @@ -65,9 +65,11 @@ object PlatformReporting: // Update attachment count (any positive loss) if groupSum > 0 then attachCount += 1 + end if // Update exhaustion count if groupSum > exhaust then exhaustCount += 1 + end if // Welford's online algorithm for variance n += 1 @@ -95,4 +97,4 @@ object PlatformReporting: exhaustProb = exhaustProb ) end lossReportFast -end PlatformReporting \ No newline at end of file +end PlatformReporting diff --git a/vecxt_re/test/src/attachment.test.scala b/vecxt_re/test/src/attachment.test.scala deleted file mode 100644 index e69de29b..00000000 From 62760522cbd5d7982297cacfc0ee37d6595b7fc5 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Fri, 23 Jan 2026 11:30:51 +0100 Subject: [PATCH 23/75] . --- vecxt/test/src/booleanarray.test.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vecxt/test/src/booleanarray.test.scala b/vecxt/test/src/booleanarray.test.scala index d8107111..cd561db7 100644 --- a/vecxt/test/src/booleanarray.test.scala +++ b/vecxt/test/src/booleanarray.test.scala @@ -154,7 +154,7 @@ class BooleanArrayExtensionSuite extends munit.FunSuite: } } - test("Indexing into via select".only) { + test("Indexing into via select") { val v1 = Array[Boolean](true, false, true, false, true) val v2 = Array[Int](0, 1, 2, 3, 4) From f05eb13d8bab45d2f52c0ad32b08c4c6fb77f00f Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Fri, 23 Jan 2026 11:49:02 +0100 Subject: [PATCH 24/75] . --- site/docs/examples.md | 4 ++-- vecxt_re/src/all.scala | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/site/docs/examples.md b/site/docs/examples.md index cdd7d1a1..642ccf96 100644 --- a/site/docs/examples.md +++ b/site/docs/examples.md @@ -68,7 +68,7 @@ v1.cumsum.printArr (v1 < 2).printArr (v1 <= 2).printArr -(v1(v1 <= 2)).printArr +(v1.mask(v1 <= 2)).printArr (v1.outer(v2)).printMat @@ -121,7 +121,7 @@ v1.dot(v2) (v1 < 2).printArr (v1 <= 2).printArr -(v1(v1 <= 2)).printArr +(v1.mask(v1 <= 2)).printArr ``` diff --git a/vecxt_re/src/all.scala b/vecxt_re/src/all.scala index e98e8784..a2a00739 100644 --- a/vecxt_re/src/all.scala +++ b/vecxt_re/src/all.scala @@ -6,4 +6,6 @@ object all: export vecxt_re.Plots.* export vecxt_re.SplitLosses.* export vecxt_re.SplitScenario.* + export vecxt_re.DeductibleType.* + export vecxt_re.ReReporting.* end all From 202c187e3abbce128d3f6b4df78dad0d0c06648b Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Fri, 23 Jan 2026 12:07:55 +0100 Subject: [PATCH 25/75] welfords algorithm --- vecxt/src-js/array.scala | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/vecxt/src-js/array.scala b/vecxt/src-js/array.scala index 40777593..cb8021c2 100644 --- a/vecxt/src-js/array.scala +++ b/vecxt/src-js/array.scala @@ -100,19 +100,22 @@ object arrays: meanAndVariance(VarianceMode.Population) inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = - val μ = vec.mean - var acc = 0.0 + var mean = 0.0 + var m2 = 0.0 var i = 0 while i < vec.length do - val diff = vec(i) - μ - acc += diff * diff + val n = i + 1 + val delta = vec(i) - mean + mean += delta / n + val delta2 = vec(i) - mean + m2 += delta * delta2 i += 1 end while val denom = mode match case VarianceMode.Population => vec.length.toDouble case VarianceMode.Sample => (vec.length - 1).toDouble - (μ, acc / denom) + (mean, m2 / denom) end meanAndVariance inline def std: Double = std(VarianceMode.Population) @@ -253,12 +256,15 @@ object arrays: meanAndVariance(VarianceMode.Population) inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = - val μ = vec.mean - var acc = 0.0 + var mean = 0.0 + var m2 = 0.0 var i = 0 while i < vec.length do - val diff = vec(i) - μ - acc += diff * diff + val n = i + 1 + val delta = vec(i) - mean + mean += delta / n + val delta2 = vec(i) - mean + m2 += delta * delta2 i += 1 end while @@ -266,7 +272,7 @@ object arrays: case VarianceMode.Population => vec.length.toDouble case VarianceMode.Sample => (vec.length - 1).toDouble - (μ, acc / denom) + (mean, m2 / denom) end meanAndVariance inline def unary_- : Array[Double] = From 1b9811191f7afd1a9a50f75904a9ae6c8d03af7a Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Fri, 23 Jan 2026 12:12:19 +0100 Subject: [PATCH 26/75] . --- vecxt/src-native/array.scala | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/vecxt/src-native/array.scala b/vecxt/src-native/array.scala index beb59880..019c5a1b 100644 --- a/vecxt/src-native/array.scala +++ b/vecxt/src-native/array.scala @@ -68,19 +68,22 @@ object arrays: meanAndVariance(VarianceMode.Population) inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = - val μ = vec.mean - var acc = 0.0 + var mean = 0.0 + var m2 = 0.0 var i = 0 while i < vec.length do - val diff = vec(i) - μ - acc = Math.fma(diff, diff, acc) + val n = i + 1 + val delta = vec(i) - mean + mean += delta / n + val delta2 = vec(i) - mean + m2 += delta * delta2 i += 1 end while val denom = mode match case VarianceMode.Population => vec.length.toDouble case VarianceMode.Sample => (vec.length - 1).toDouble - (μ, acc / denom) + (mean, m2 / denom) end meanAndVariance inline def std: Double = std(VarianceMode.Population) @@ -293,12 +296,15 @@ object arrays: meanAndVariance(VarianceMode.Population) inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = - val μ = vec.mean - var acc = 0.0 + var mean = 0.0 + var m2 = 0.0 var i = 0 while i < vec.length do - val diff = vec(i) - μ - acc = Math.fma(diff, diff, acc) + val n = i + 1 + val delta = vec(i) - mean + mean += delta / n + val delta2 = vec(i) - mean + m2 += delta * delta2 i += 1 end while @@ -306,7 +312,7 @@ object arrays: case VarianceMode.Population => vec.length.toDouble case VarianceMode.Sample => (vec.length - 1).toDouble - (μ, acc / denom) + (mean, m2 / denom) end meanAndVariance inline def mean: Double = vec.sum / vec.length From 777ebfadfc00ead2ed45cf04025ac1f68c47af63 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Fri, 23 Jan 2026 14:38:03 +0100 Subject: [PATCH 27/75] . --- benchmark/package.mill | 2 +- benchmark/src/LossReportBenchmark.scala | 98 +++++++++++++++++++++++++ benchmark/src/groupOpsBenchmark.scala | 25 +++++-- benchmark/src/splitAmnt.scala | 4 +- benchmark/src/variance.scala | 41 ++++++++--- experiments/src/cheatsheet.scala | 2 +- vecxt/src-jvm/arrays.scala | 97 +++++++++++++++++++++++- 7 files changed, 245 insertions(+), 24 deletions(-) create mode 100644 benchmark/src/LossReportBenchmark.scala diff --git a/benchmark/package.mill b/benchmark/package.mill index b5570157..30db05e2 100644 --- a/benchmark/package.mill +++ b/benchmark/package.mill @@ -9,7 +9,7 @@ object `package` extends JmhModule with ScalaModule: def scalaVersion = build.vecxt.jvm.scalaVersion def jmhCoreVersion = "1.37" override def forkArgs: T[Seq[String]] = super.forkArgs() ++ build.vecIncubatorFlag - override def moduleDeps: Seq[JavaModule] = Seq(build.vecxt.jvm, build.vecxtensions.jvm) + override def moduleDeps: Seq[JavaModule] = Seq(build.vecxt.jvm, build.vecxtensions.jvm, build.vecxt_re.jvm) def enableBsp = false // override def generateBenchmarkSources = T{ diff --git a/benchmark/src/LossReportBenchmark.scala b/benchmark/src/LossReportBenchmark.scala new file mode 100644 index 00000000..a812e915 --- /dev/null +++ b/benchmark/src/LossReportBenchmark.scala @@ -0,0 +1,98 @@ +package vecxt.benchmark + +import org.openjdk.jmh.annotations.* +import org.openjdk.jmh.infra.Blackhole +import scala.compiletime.uninitialized +import scala.util.Random +import vecxt_re.* +import vecxt_re.ReReporting.* +import vecxt.all.* + +// ./mill benchmark.runJmh "vecxt.benchmark.LossReportBenchmark" -jvmArgs --add-modules=jdk.incubator.vector -rf json -wi 1 -i 3 -f 1 + +/** + * 231] Benchmark (numEventsStr) (numIterationsStr) Mode Cnt Score Error Units +231] LossReportBenchmark.lossReport_fast 10000 100 thrpt 3 177346.981 ± 24137.324 ops/s +231] LossReportBenchmark.lossReport_fast 10000 1000 thrpt 3 180400.504 ± 8719.687 ops/s +231] LossReportBenchmark.lossReport_fast 100000 100 thrpt 3 11731.510 ± 1945.957 ops/s +231] LossReportBenchmark.lossReport_fast 100000 1000 thrpt 3 17443.246 ± 425.030 ops/s +231] LossReportBenchmark.lossReport_separate 10000 100 thrpt 3 46850.187 ± 7232.734 ops/s +231] LossReportBenchmark.lossReport_separate 10000 1000 thrpt 3 49876.719 ± 5238.487 ops/s +231] LossReportBenchmark.lossReport_separate 100000 100 thrpt 3 3360.234 ± 326.993 ops/s +231] LossReportBenchmark.lossReport_separate 100000 1000 thrpt 3 4706.819 ± 615.832 ops/s + */ + + + +@State(Scope.Thread) +class LossReportBenchmark extends BLASBenchmark: + + @Param(Array("10000", "100000")) + var numEventsStr: String = uninitialized + + @Param(Array("100", "1000")) + var numIterationsStr: String = uninitialized + + var years: Array[Int] = uninitialized + var ceded: Array[Double] = uninitialized + var layerObj: Layer = uninitialized + + @Setup(Level.Trial) + def setup: Unit = + val rng = new Random(0) + val numEvents = numEventsStr.toInt + val numIterations = numIterationsStr.toInt + + val yrs = Array.ofDim[Int](numEvents) + var i = 0 + while i < numEvents do + yrs(i) = rng.nextInt(numIterations) + 1 // 1-based group indices + i += 1 + end while + + java.util.Arrays.sort(yrs) + + years = yrs + + ceded = Array.ofDim[Double](numEvents) + i = 0 + while i < numEvents do + // random loss values between 0 and 100 + ceded(i) = rng.nextDouble() * 100.0 + i += 1 + end while + + // Choose a layer with a moderate aggLimit to cause some exhaustion hits + layerObj = Layer(occLimit = Some(100.0), aggLimit = Some(50.0)) + () + end setup + + @Benchmark + def lossReport_fast(bh: Blackhole) = + val calcd = (layerObj, ceded) + val r = calcd.lossReport(numIterationsStr.toInt, years, ReportDenominator.FirstLimit) + // consume fields so JMH doesn't optimize away + bh.consume(r.el) + bh.consume(r.stdDev) + bh.consume(r.attachProb) + bh.consume(r.exhaustProb) + end lossReport_fast + + @Benchmark + def lossReport_separate(bh: Blackhole) = + val calcd = (layerObj, ceded) + val n = numIterationsStr.toInt + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layerObj) + + val el = calcd.expectedLoss(n) / reportLimit + val std = calcd.std(n, years) / reportLimit + val attach = calcd.attachmentProbability(n, years) + val exhaust = calcd.exhaustionProbability(n, years) + + bh.consume(el) + bh.consume(std) + bh.consume(attach) + bh.consume(exhaust) + end lossReport_separate + +end LossReportBenchmark diff --git a/benchmark/src/groupOpsBenchmark.scala b/benchmark/src/groupOpsBenchmark.scala index 0e3d35fb..98a9967e 100644 --- a/benchmark/src/groupOpsBenchmark.scala +++ b/benchmark/src/groupOpsBenchmark.scala @@ -9,9 +9,12 @@ import org.openjdk.jmh.annotations.* import org.openjdk.jmh.infra.Blackhole import scala.compiletime.uninitialized import vecxtensions.* +import vecxt_re.* import java.util.Random import java.util.concurrent.TimeUnit +// ./mill benchmark.runJmh "vecxt.benchmark.LossReportBenchmark" -jvmArgs --add-modules=jdk.incubator.vector -rf json -wi 1 -i 3 -f 1 + @State(Scope.Thread) @BenchmarkMode(Array(Mode.AverageTime)) @OutputTimeUnit(TimeUnit.NANOSECONDS) @@ -91,11 +94,21 @@ class GroupOpsBenchmark: bh.consume(valuesCopy2) end benchGroupDiffInPlace - @Benchmark - def benchGroupSum(bh: Blackhole): Unit = - val (uniqueGroups, sums) = groupSum(groups, values) - bh.consume(uniqueGroups) - bh.consume(sums) - end benchGroupSum + // @Benchmark + // def benchGroupSum(bh: Blackhole): Unit = + // // groupSum in vecxt_re takes nitr; compute number of groups then call it + // var maxGroup = 0 + // var i = 0 + // while i < groups.length do + // if groups(i) > maxGroup then maxGroup = groups(i) + // end if + // i += 1 + // end while + // val numGroups = maxGroup + 1 + // val sums = groupSum(groups, values, numGroups) + // val uniqueGroups = Array.tabulate(numGroups)(identity) + // bh.consume(uniqueGroups) + // bh.consume(sums) + // end benchGroupSum end GroupOpsBenchmark diff --git a/benchmark/src/splitAmnt.scala b/benchmark/src/splitAmnt.scala index 207b5009..4ef551f3 100644 --- a/benchmark/src/splitAmnt.scala +++ b/benchmark/src/splitAmnt.scala @@ -9,12 +9,12 @@ import org.openjdk.jmh.infra.Blackhole import scala.compiletime.uninitialized import vecxt.all.* import vecxt.all.given -import vecxt.reinsurance.* +import vecxt_re.* import jdk.incubator.vector.VectorSpecies import jdk.incubator.vector.VectorOperators import jdk.incubator.vector.DoubleVector import java.util.Random -import vecxt.reinsurance.SplitLosses.splitAmntFast +import vecxt_re.SplitLosses.splitAmntFast // mill benchmark.runJmh vecxt.benchmark.SplitAmntBenchmark -jvmArgs --add-modules=jdk.incubator.vector -rf json @State(Scope.Thread) diff --git a/benchmark/src/variance.scala b/benchmark/src/variance.scala index c2605334..7304adf6 100644 --- a/benchmark/src/variance.scala +++ b/benchmark/src/variance.scala @@ -10,10 +10,22 @@ import jdk.incubator.vector.VectorSpecies import jdk.incubator.vector.VectorOperators import jdk.incubator.vector.DoubleVector +// ./mill benchmark.runJmh "vecxt.benchmark.VarianceBenchmark" -jvmArgs --add-modules=jdk.incubator.vector -rf json -wi 2 -i 3 -f 1 + +/** + * + * 231] Benchmark (len) Mode Cnt Score Error Units +231] VarianceBenchmark.var_simd_twopass 1000 thrpt 3 1087302.435 ± 16013.286 ops/s +231] VarianceBenchmark.var_simd_twopass 100000 thrpt 3 9578.869 ± 334.606 ops/s +231] VarianceBenchmark.var_simd_welford 1000 thrpt 3 436244.559 ± 6158.585 ops/s +231] VarianceBenchmark.var_simd_welford 100000 thrpt 3 4187.715 ± 203.266 ops/s + */ + + @State(Scope.Thread) class VarianceBenchmark extends BLASBenchmark: - @Param(Array("3", "128", "100000")) + @Param(Array("1000", "100000")) var len: String = uninitialized; var arr: Array[Double] = uninitialized @@ -32,20 +44,29 @@ class VarianceBenchmark extends BLASBenchmark: vec.map(i => (i - μ) * (i - μ)).sumSIMD / (vec.length - 1) end extension - + // @Benchmark + // def var_naive_twopass(bh: Blackhole) = + // val r = arr.variance2 + // bh.consume(r); + // end var_naive_twopass @Benchmark - def var_loop(bh: Blackhole) = - val r = arr.variance2 + def var_simd_twopass(bh: Blackhole) = + val r = arr.meanAndVarianceTwoPass(VarianceMode.Sample).variance bh.consume(r); - end var_loop + end var_simd_twopass + // @Benchmark + // def var_simd_welford(bh: Blackhole) = + // val r = arr.meanAndVarianceWelfordSIMD(VarianceMode.Sample).variance + // bh.consume(r); + // end var_simd_welford - @Benchmark - def var_vec(bh: Blackhole) = - val r = arr.variance - bh.consume(r); - end var_vec + // @Benchmark + // def var_default(bh: Blackhole) = + // val r = arr.variance(VarianceMode.Sample) + // bh.consume(r); + // end var_default end VarianceBenchmark diff --git a/experiments/src/cheatsheet.scala b/experiments/src/cheatsheet.scala index 01ff037d..898c7247 100644 --- a/experiments/src/cheatsheet.scala +++ b/experiments/src/cheatsheet.scala @@ -254,7 +254,7 @@ object CheatsheetTest: not(boolArr2) // Boolean indexing - val filtered = a(a > 2.0) + val filtered = a.mask(a > 2.0) println(s"Filtered (>2): ${filtered.mkString(", ")}") val countTrues = boolArr.trues diff --git a/vecxt/src-jvm/arrays.scala b/vecxt/src-jvm/arrays.scala index 907c901a..a61cc123 100644 --- a/vecxt/src-jvm/arrays.scala +++ b/vecxt/src-jvm/arrays.scala @@ -262,6 +262,15 @@ object arrays: meanAndVariance(VarianceMode.Population) inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + meanAndVarianceTwoPass(mode) + end meanAndVariance + + /** 231] Benchmark (len) Mode Cnt Score Error Units 231] VarianceBenchmark.var_simd_twopass 1000 thrpt 3 1087302.435 + * ± 16013.286 ops/s 231] VarianceBenchmark.var_simd_twopass 100000 thrpt 3 9578.869 ± 334.606 ops/s 231] + * VarianceBenchmark.var_simd_welford 1000 thrpt 3 436244.559 ± 6158.585 ops/s 231] + * VarianceBenchmark.var_simd_welford 100000 thrpt 3 4187.715 ± 203.266 ops/s + */ + inline def meanAndVarianceTwoPass(mode: VarianceMode): (mean: Double, variance: Double) = val μ = vec.mean val μVec = DoubleVector.broadcast(spd, μ) @@ -295,7 +304,7 @@ object arrays: case VarianceMode.Sample => (vec.length - 1).toDouble (μ, sumSqDiff / denom) - end meanAndVariance + end meanAndVarianceTwoPass inline def std: Double = std(VarianceMode.Population) @@ -799,6 +808,87 @@ object arrays: meanAndVariance(VarianceMode.Population) inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + meanAndVarianceTwoPass(mode) + end meanAndVariance + + /** True SIMD-optimized Welford's algorithm for computing mean and variance. + * + * Each SIMD lane maintains independent Welford accumulators (n, mean, M2). Lanes process strided elements: lane 0 + * gets [0,4,8,...], lane 1 gets [1,5,9,...], etc. At the end, all lanes are merged using the parallel Welford + * merge formula: + * + * δ = meanB - meanA n = nA + nB mean = meanA + δ * nB / n M2 = M2A + M2B + δ² * nA * nB / n + * + * This algorimth is crushed by the simple two pass SIMD version. + * + * 231] Benchmark (len) Mode Cnt Score Error Units 231] VarianceBenchmark.var_simd_twopass 1000 thrpt 3 1087302.435 + * ± 16013.286 ops/s 231] VarianceBenchmark.var_simd_twopass 100000 thrpt 3 9578.869 ± 334.606 ops/s 231] + * VarianceBenchmark.var_simd_welford 1000 thrpt 3 436244.559 ± 6158.585 ops/s 231] + * VarianceBenchmark.var_simd_welford 100000 thrpt 3 4187.715 ± 203.266 ops/s + */ + private inline def meanAndVarianceWelfordSIMD(mode: VarianceMode): (mean: Double, variance: Double) = + if vec.length == 0 then (0.0, 0.0) + else + // Per-lane accumulators + var laneMeans = DoubleVector.zero(spd) + var delta = DoubleVector.zero(spd) + var delta2 = DoubleVector.zero(spd) + var laneM2 = DoubleVector.zero(spd) + + var i = 0 + var j: Double = 1 + // ALl lanes will have processed J elements at the end of this loop + while i < spd.loopBound(vec.length) do + j = j + 1 + val values = DoubleVector.fromArray(spd, vec, i) + delta = values.sub(laneMeans) // Use current mean + laneMeans = laneMeans.add(delta.div(DoubleVector.broadcast(spd, j))) + delta2 = values.sub(laneMeans) // Use updated mean + laneM2 = laneM2.add(delta.mul(delta2)) + i += spdl + end while + + // val laneSumA = laneSum.toArray() + val laneMean = laneMeans.toArray() + val laneM2A = laneM2.toArray() + // Merge all lanes + var globalN = j + var globalMean = laneMean(0) + var globalM2 = laneM2A(0) + + var lane = 1 + while lane < spdl do + val delta = laneMean(lane) - globalMean + val newN = globalN + j + globalMean = globalMean + delta * j / newN + globalM2 = globalM2 + laneM2A(lane) + delta * delta * globalN * j / newN + globalN = newN + + lane += 1 + end while + + // Process tail elements + while i < vec.length do + val n = globalN + 1 + val delta = vec(i) - globalMean + globalMean += delta / n + val delta2 = vec(i) - globalMean + globalM2 += delta * delta2 + globalN = n + i += 1 + end while + + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (globalMean, globalM2 / denom) + end if + end meanAndVarianceWelfordSIMD + + /** Two-pass variance calculation (legacy, for comparison). First pass computes mean, second pass computes variance. + */ + inline def meanAndVarianceTwoPass(mode: VarianceMode): (mean: Double, variance: Double) = val μ = vec.mean val l = spd.length() var tmp = DoubleVector.zero(spd) @@ -807,7 +897,7 @@ object arrays: var i = 0 while i < spd.loopBound(vec.length) do val v = DoubleVector.fromArray(spd, vec, i) - val diff = v.sub(μVec) // Broadcast mean once, reuse + val diff = v.sub(μVec) tmp = diff.fma(diff, tmp) i += spdl end while @@ -825,8 +915,7 @@ object arrays: case VarianceMode.Sample => (vec.length - 1).toDouble (μ, sumSqDiff / denom) - - end meanAndVariance + end meanAndVarianceTwoPass inline def mean: Double = vec.sumSIMD / vec.length From f23672c88683fa56f0affa353abdfe7266ece700 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Fri, 23 Jan 2026 17:05:35 +0100 Subject: [PATCH 28/75] . --- vecxt_re/src/scenarr.scala | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index 0f56c005..aab8c223 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -66,8 +66,8 @@ case class Scenarr( lazy val meanLoss: Double = amounts.sum / numberIterations - lazy val itrDayAmount: (itr: Array[Int], days: Array[Int], amounts: Array[Double]) = - (itr = iterations, days = days, amounts = amounts) + lazy val itrDayAmount: Array[(itr: Int, day: Int, amnt: Double)] = + iterations.zip(days).zip(amounts).map { case ((i, d), a) => (itr = i, day = d, amnt = a) } lazy val period: (firstLoss: LocalDate, lastLoss: LocalDate) = (day1.plusDays((days.minSIMD - 1).toLong), day1.plusDays((days.maxSIMD - 1).toLong)) @@ -98,6 +98,23 @@ object Scenarr: ) end sorted + inline def takeFirstNIterations(i: Int)= + assert(i > 0 && i <= scenario.numberIterations) + val idx = scenario.iterations <= i + import vecxt.BoundsCheck.DoBoundsCheck.yes + Scenarr( + scenario.iterations.mask(idx), + scenario.days.mask(idx), + scenario.amounts.mask(idx), + i, + scenario.threshold, + scenario.day1, + scenario.name, + scenario.id, + isSorted = scenario.isSorted + ) + end takeFirstNIterations + inline def scaleAmntBy(scale: Double): Scenarr = scenario.copy(amounts = scenario.amounts * scale, threshold = scenario.threshold * scale) From 581da5c0c4def013666be92693f4070218488cad Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Fri, 23 Jan 2026 17:23:02 +0100 Subject: [PATCH 29/75] . --- .vscode/launch.json | 12 +- benchmark/src/LossReportBenchmark.scala | 20 ++-- benchmark/src/variance.scala | 12 +- experiments/resources/losses.csv | 11 ++ experiments/src/pricing_fun.scala | 44 ++++--- vecxt_re/src/Patchwork.scala | 145 ------------------------ vecxt_re/src/scenarr.scala | 2 +- vecxt_re/test/src/scenarr.test.scala | 10 +- 8 files changed, 60 insertions(+), 196 deletions(-) create mode 100644 experiments/resources/losses.csv delete mode 100644 vecxt_re/src/Patchwork.scala diff --git a/.vscode/launch.json b/.vscode/launch.json index 5c255353..35383fb6 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -20,19 +20,15 @@ { "type": "scala", "request": "launch", - "name": "debug test", - "buildTarget": "vecxt.jvm.test", - "testClass": "vecxt.MatrixAdditionTest", + "name": "Experiment", + "buildTarget": "experiments", + "mainClass": "experiments.pricingFun", "jvmOptions": [ "--add-modules=jdk.incubator.vector" ], "args": [ "-oD" - ], - "env": {}, - "internalConsoleOptions": "openOnSessionStart", - "preLaunchTask": "", - "postDebugTask": "" + ] }, { "type": "scala", diff --git a/benchmark/src/LossReportBenchmark.scala b/benchmark/src/LossReportBenchmark.scala index a812e915..990dd1e4 100644 --- a/benchmark/src/LossReportBenchmark.scala +++ b/benchmark/src/LossReportBenchmark.scala @@ -10,20 +10,16 @@ import vecxt.all.* // ./mill benchmark.runJmh "vecxt.benchmark.LossReportBenchmark" -jvmArgs --add-modules=jdk.incubator.vector -rf json -wi 1 -i 3 -f 1 -/** - * 231] Benchmark (numEventsStr) (numIterationsStr) Mode Cnt Score Error Units -231] LossReportBenchmark.lossReport_fast 10000 100 thrpt 3 177346.981 ± 24137.324 ops/s -231] LossReportBenchmark.lossReport_fast 10000 1000 thrpt 3 180400.504 ± 8719.687 ops/s -231] LossReportBenchmark.lossReport_fast 100000 100 thrpt 3 11731.510 ± 1945.957 ops/s -231] LossReportBenchmark.lossReport_fast 100000 1000 thrpt 3 17443.246 ± 425.030 ops/s -231] LossReportBenchmark.lossReport_separate 10000 100 thrpt 3 46850.187 ± 7232.734 ops/s -231] LossReportBenchmark.lossReport_separate 10000 1000 thrpt 3 49876.719 ± 5238.487 ops/s -231] LossReportBenchmark.lossReport_separate 100000 100 thrpt 3 3360.234 ± 326.993 ops/s -231] LossReportBenchmark.lossReport_separate 100000 1000 thrpt 3 4706.819 ± 615.832 ops/s +/** 231] Benchmark (numEventsStr) (numIterationsStr) Mode Cnt Score Error Units 231] LossReportBenchmark.lossReport_fast + * 10000 100 thrpt 3 177346.981 ± 24137.324 ops/s 231] LossReportBenchmark.lossReport_fast 10000 1000 thrpt 3 + * 180400.504 ± 8719.687 ops/s 231] LossReportBenchmark.lossReport_fast 100000 100 thrpt 3 11731.510 ± 1945.957 ops/s + * 231] LossReportBenchmark.lossReport_fast 100000 1000 thrpt 3 17443.246 ± 425.030 ops/s 231] + * LossReportBenchmark.lossReport_separate 10000 100 thrpt 3 46850.187 ± 7232.734 ops/s 231] + * LossReportBenchmark.lossReport_separate 10000 1000 thrpt 3 49876.719 ± 5238.487 ops/s 231] + * LossReportBenchmark.lossReport_separate 100000 100 thrpt 3 3360.234 ± 326.993 ops/s 231] + * LossReportBenchmark.lossReport_separate 100000 1000 thrpt 3 4706.819 ± 615.832 ops/s */ - - @State(Scope.Thread) class LossReportBenchmark extends BLASBenchmark: diff --git a/benchmark/src/variance.scala b/benchmark/src/variance.scala index 7304adf6..9f50cd70 100644 --- a/benchmark/src/variance.scala +++ b/benchmark/src/variance.scala @@ -12,16 +12,12 @@ import jdk.incubator.vector.DoubleVector // ./mill benchmark.runJmh "vecxt.benchmark.VarianceBenchmark" -jvmArgs --add-modules=jdk.incubator.vector -rf json -wi 2 -i 3 -f 1 -/** - * - * 231] Benchmark (len) Mode Cnt Score Error Units -231] VarianceBenchmark.var_simd_twopass 1000 thrpt 3 1087302.435 ± 16013.286 ops/s -231] VarianceBenchmark.var_simd_twopass 100000 thrpt 3 9578.869 ± 334.606 ops/s -231] VarianceBenchmark.var_simd_welford 1000 thrpt 3 436244.559 ± 6158.585 ops/s -231] VarianceBenchmark.var_simd_welford 100000 thrpt 3 4187.715 ± 203.266 ops/s +/** 231] Benchmark (len) Mode Cnt Score Error Units 231] VarianceBenchmark.var_simd_twopass 1000 thrpt 3 1087302.435 ± + * 16013.286 ops/s 231] VarianceBenchmark.var_simd_twopass 100000 thrpt 3 9578.869 ± 334.606 ops/s 231] + * VarianceBenchmark.var_simd_welford 1000 thrpt 3 436244.559 ± 6158.585 ops/s 231] VarianceBenchmark.var_simd_welford + * 100000 thrpt 3 4187.715 ± 203.266 ops/s */ - @State(Scope.Thread) class VarianceBenchmark extends BLASBenchmark: diff --git a/experiments/resources/losses.csv b/experiments/resources/losses.csv new file mode 100644 index 00000000..db38f689 --- /dev/null +++ b/experiments/resources/losses.csv @@ -0,0 +1,11 @@ +year,day,amount +3,302,5.912378260806521E8 +2,60,9.862215041507638E7 +2,147,6.174601056303087E8 +5,49,7.371032155830323E8 +7,57,8.011450710400633E8 +8,81,7.835310794931588E8 +1,25,9.332911010018561E8 +4,139,6.391918434382262E8 +1,276,6.679874680098424E8 +2,93,7.796052636961774E8 \ No newline at end of file diff --git a/experiments/src/pricing_fun.scala b/experiments/src/pricing_fun.scala index 9badcc0b..d81202db 100644 --- a/experiments/src/pricing_fun.scala +++ b/experiments/src/pricing_fun.scala @@ -2,34 +2,44 @@ package experiments import RPT.* import cats.syntax.all.* +import io.github.quafadas.table.TypeInferrer +import vecxt.BoundsCheck.DoBoundsCheck.yes @main def pricingFun = - val iterations = Array(1, 1, 2, 3, 1, 2, 3, 4, 5, 10, 10, 10, 10, 10).sorted - val days = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) - val amounts = Array(20.0, 0, 0, 0, Int.MaxValue, 0, 0, 0, 0, 0, 25, 30, 0, 0) - - println(iterations.printArr) - println(amounts.printArr) + val data = CSV.resource("losses.csv", CsvOpts(TypeInferrer.FromAllRows, ReadAs.Columns)) val scen = Scenarr( - iterations = iterations, - days = days, - amounts = amounts, + iterations = data.year, + days = data.day, + amounts = data.amount, numberIterations = 10, threshold = 0.0 - ) + ).sorted + + // val scen1 = scen.iteration(1).copy(numberIterations = 1) + + // println(scen1) + scen.itrDayAmount.ptbln + + val tower = Tower.singleShot(500e6, Array(150e6, 150e6, 100e6)) + val tower2 = Tower.singleShot(900e6, Array(100e6)) + + val (ceded, retained, splits) = tower.splitScenarioAmounts(scen) + val (ceded2, retained2, splits2) = tower2.splitScenarioAmounts(scen) + + // println(ceded.printArr) + // println(retained.printArr) - val tower = Tower.singleShot(15, Array(10, 10, 10)) + // println() - val iter10 = scen.iteration(10) + (splits ++ splits2).map(s => s.lossReport(scen.numberIterations, scen.iterations, ReportDenominator.FirstLimit)).ptbln - val (ceded, retained, splits) = tower.splitScenarioAmounts(scen)(using true) - val (ceded10, retained10, splits10) = tower.splitScenarioAmounts(iter10)(using true) + // val (ceded10, retained10, splits10) = tower.splitScenarioAmounts(iter10)(using true) - println(ceded10.printArr) + // println(ceded10.printArr) - splits10.map(_.cededToLayer).foreach(arr => println(arr.printArr)) + // splits10.map(_.cededToLayer).foreach(arr => println(arr.printArr)) - splits.map(s => s.lossReport(scen.numberIterations, scen.iterations, ReportDenominator.FirstLimit)).ptbln + // splits.map(s => s.lossReport(scen.numberIterations, scen.iterations, ReportDenominator.FirstLimit)).ptbln end pricingFun diff --git a/vecxt_re/src/Patchwork.scala b/vecxt_re/src/Patchwork.scala deleted file mode 100644 index 7b51963a..00000000 --- a/vecxt_re/src/Patchwork.scala +++ /dev/null @@ -1,145 +0,0 @@ -package vecxt_re - -import vecxt.all.* - -object Patchwork: - inline def fromRetention(ret: Double, limits: IndexedSeq[Double]): Tower = - val retentions = Array((ret +: limits.dropRight(1))*).cumsum.toArray - - val layers = retentions.zip(limits).map((retention, limit) => Layer(limit, retention)) - Tower(layers) - end fromRetention - - inline def singleShot(ret: Double, limits: IndexedSeq[Double]) = - val retentions = Array((ret +: limits.dropRight(1))*).cumsum.toArray - - val layers = retentions.zip(limits).map { (retention, limit) => - Layer( - aggLimit = Some(limit), - occRetention = Some(retention) - ) - } - Tower(layers) - end singleShot - - inline def oneAt100(ret: Double, limits: IndexedSeq[Double]): Tower = - - val retentions = Array((ret +: limits.dropRight(1))*).cumsum.toArray - - val layers = retentions - .zip(limits) - .map((retention, limit) => - Layer( - occLimit = Some(limit), - occRetention = Some(retention), - aggLimit = Some(limit * 2), - reinstatement = Some(Array(1.0)) - ) - ) - Tower(layers) - end oneAt100 - -end Patchwork - -/** The key difference between a Patchwork and a Tower is that in a Patchwork the layers are independent of each other. - * Therefore, it's _retention_ is not a valid concept. Be wary of this - a patchwork ought to be for exploratory - * analysis only, it is unlikely to be a valid part of a reinsurance program. - * - * @param layers - * @param id - * @param name - * @param subjPremium - */ -case class Patchwork( - layers: IndexedSeq[Layer], - id: Long = scala.util.Random.nextLong(), - name: Option[String] = None, - subjPremium: Option[Double] = None -): - def applyScale(scale: Double): Patchwork = - Patchwork( - layers = layers.map(_.applyScale(scale)), - id = scala.util.Random.nextLong(), - name = name, - subjPremium = subjPremium.map(_ * scale) - ) - end applyScale - - /** A human friendly printout of this reinsurance patchwork. Skips any property which is "None" across all layers. - * Prints a console friendly table, with consistent spacing per column. - */ - def show: String = - if layers.isEmpty then return s"${name.getOrElse("Patchwork")}: no layers" - end if - - inline def formatDouble(value: Double): String = - BigDecimal(value).bigDecimal.stripTrailingZeros().toPlainString - - inline def optionalColumn(label: String, f: Layer => Option[String]): Option[(String, IndexedSeq[String])] = - val values = layers.map(f) - if values.exists(_.isDefined) then Some(label -> values.map(_.getOrElse("-"))) else None - end if - end optionalColumn - - inline def requiredColumn(label: String, f: Layer => String): (String, IndexedSeq[String]) = - label -> layers.map(f) - - val indexColumn = "Layer" -> layers.indices.map(i => (i + 1).toString) - - val columns = scala.collection.immutable - .Vector( - Some(indexColumn), - optionalColumn("Name", _.layerName), - optionalColumn("Occ Ret", l => l.occRetention.map(formatDouble)), - optionalColumn("Occ Lim", l => l.occLimit.map(formatDouble)), - Some(requiredColumn("Occ Type", _.occType.toString)), - optionalColumn("Agg Ret", l => l.aggRetention.map(formatDouble)), - optionalColumn("Agg Lim", l => l.aggLimit.map(formatDouble)), - Some(requiredColumn("Agg Type", _.aggType.toString)), - Some(requiredColumn("Share", l => formatDouble(l.share))), - optionalColumn("Reinst", l => l.reinstatement.map(_.map(formatDouble).mkString("[", ", ", "]"))), - optionalColumn("Currency", _.currency), - optionalColumn("Premium", l => l.basePremiumAmount.map(formatDouble)), - optionalColumn("Prem/Unit", l => l.basePremiumUnit.map(formatDouble)), - optionalColumn("Prem Desc", _.basePremiumDescription), - optionalColumn("Comm", l => l.commissionAmount.map(formatDouble)), - optionalColumn("Comm/Unit", l => l.commissionUnit.map(formatDouble)), - optionalColumn("Comm Desc", _.commissionDescription), - optionalColumn("Broker", l => l.brokerageAmount.map(formatDouble)), - optionalColumn("Broker/Unit", l => l.brokerageUnit.map(formatDouble)), - optionalColumn("Broker Desc", _.brokerageDescription), - optionalColumn("Tax", l => l.taxAmount.map(formatDouble)), - optionalColumn("Tax/Unit", l => l.taxUnit.map(formatDouble)), - optionalColumn("Tax Desc", _.taxDescription), - optionalColumn("Fee", l => l.feeAmount.map(formatDouble)), - optionalColumn("Fee/Unit", l => l.feeUnit.map(formatDouble)), - optionalColumn("Fee Desc", _.feeDescription) - ) - .flatten - - val widths = columns.map { case (label, rows) => - math.max(label.length, rows.map(_.length).maxOption.getOrElse(0)) - } - - inline def pad(value: String, width: Int): String = - val padding = width - value.length - if padding <= 0 then value else value + (" " * padding) - end if - end pad - - val header = columns.zip(widths).map { case ((label, _), w) => pad(label, w) }.mkString(" | ") - val separator = widths.map(w => "-" * w).mkString("-+-") - val rows = layers.indices.map { rowIdx => - columns.zip(widths).map { case ((_, vals), w) => pad(vals(rowIdx), w) }.mkString(" | ") - } - - val meta = Seq( - Some(s"${name.getOrElse("Tower")}: ${layers.length} layer(s)"), - subjPremium.map(v => s"Subject premium: ${formatDouble(v)}"), - Some(s"Id: $id") - ).flatten - - (meta ++ Seq(header, separator) ++ rows).mkString(System.lineSeparator) - end show - -end Patchwork diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index aab8c223..84efab22 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -10,7 +10,7 @@ case class Scenarr( iterations: Array[Int], days: Array[Int], amounts: Array[Double], - numberIterations: Int = 0, + numberIterations: Int, threshold: Double = 0d, day1: LocalDate = LocalDate.of(2019, 1, 1), name: String = "", diff --git a/vecxt_re/test/src/scenarr.test.scala b/vecxt_re/test/src/scenarr.test.scala index b2061e26..df2e03e8 100644 --- a/vecxt_re/test/src/scenarr.test.scala +++ b/vecxt_re/test/src/scenarr.test.scala @@ -8,7 +8,7 @@ class ScenarrSuite extends FunSuite: test("constructor should enforce array length equality") { intercept[AssertionError] { - Scenarr(Array(1), Array(1, 2), Array(1.0)) + Scenarr(Array(1), Array(1, 2), Array(1.0), 2) } } @@ -72,10 +72,10 @@ class ScenarrSuite extends FunSuite: test("itrDayAmount and period produce expected tuples") { val days = Array(10, 100, 365, 366) val sc = Scenarr(Array(1, 1, 1, 1), days, Array(5.0, 6.0, 7.0, 8.0), numberIterations = 1) - val (itr, d, a) = sc.itrDayAmount - assertEquals(itr.toList, Array(1, 1, 1, 1).toList) - assertEquals(d.toList, days.toList) - assertEquals(a.toList, Array(5.0, 6.0, 7.0, 8.0).toList) + val itda = sc.itrDayAmount + assertVecEquals(itda.map(_.itr), Array(1, 1, 1, 1)) + assertVecEquals(itda.map(_.day), days) + assertVecEquals(itda.map(_.amnt), Array(5.0, 6.0, 7.0, 8.0)) val (firstLoss, lastLoss) = sc.period assertEquals(firstLoss, LocalDate.of(2019, 1, 10)) From bbf5c5e7fd01e73993ba2eb4293223785964ec39 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Fri, 23 Jan 2026 17:23:33 +0100 Subject: [PATCH 30/75] . --- vecxt_re/src/scenarr.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index 84efab22..bd7d42ef 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -98,7 +98,7 @@ object Scenarr: ) end sorted - inline def takeFirstNIterations(i: Int)= + inline def takeFirstNIterations(i: Int) = assert(i > 0 && i <= scenario.numberIterations) val idx = scenario.iterations <= i import vecxt.BoundsCheck.DoBoundsCheck.yes From 19fa7d0f2f7137207657880c47c1bf2022678465 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Sun, 25 Jan 2026 17:40:48 +0100 Subject: [PATCH 31/75] Add BegBin --- vecxt_re/package.mill | 6 +- vecxt_re/resources/index.vl.json | 95 +++++++++ vecxt_re/resources/negBinCumul.vl.json | 18 ++ vecxt_re/resources/negBinProb.vl.json | 18 ++ vecxt_re/src-jvm/NegativeBinomial.scala | 187 ++++++++++++++++++ vecxt_re/src/CalendarYearIndex.scala | 164 +++++++++++++++ vecxt_re/src/Dist.scala | 72 +++++++ vecxt_re/src/IndexPerPeriod.scala | 123 ++++++++++++ vecxt_re/src/Rand.scala | 98 +++++++++ vecxt_re/src/all.scala | 2 + vecxt_re/test/src-jvm/NegBin.test.scala | 128 ++++++++++++ .../test/src/calendarYearIndex.test.scala | 139 +++++++++++++ vecxt_re/test/src/indexPerPeriod.test.scala | 131 ++++++++++++ 13 files changed, 1179 insertions(+), 2 deletions(-) create mode 100644 vecxt_re/resources/index.vl.json create mode 100644 vecxt_re/resources/negBinCumul.vl.json create mode 100644 vecxt_re/resources/negBinProb.vl.json create mode 100644 vecxt_re/src-jvm/NegativeBinomial.scala create mode 100644 vecxt_re/src/CalendarYearIndex.scala create mode 100644 vecxt_re/src/Dist.scala create mode 100644 vecxt_re/src/IndexPerPeriod.scala create mode 100644 vecxt_re/src/Rand.scala create mode 100644 vecxt_re/test/src-jvm/NegBin.test.scala create mode 100644 vecxt_re/test/src/calendarYearIndex.test.scala create mode 100644 vecxt_re/test/src/indexPerPeriod.test.scala diff --git a/vecxt_re/package.mill b/vecxt_re/package.mill index c81b2763..32fad6cd 100644 --- a/vecxt_re/package.mill +++ b/vecxt_re/package.mill @@ -33,8 +33,10 @@ object `package` extends Module: override def mvnDeps = super.mvnDeps() ++ Seq( mvn"io.github.quafadas::scautable:0.0.35", - mvn"io.github.quafadas::dedav4s:0.10.3", - mvn"org.apache.commons:commons-math4-core:4.0-beta1" + mvn"io.github.quafadas::dedav4s:0.10.3-10-07d7b3-DIRTY64264df4", + mvn"org.apache.commons:commons-math4-core:4.0-beta1", + mvn"org.apache.commons:commons-statistics-distribution:1.1", + mvn"org.apache.commons:commons-rng-simple:1.6" ) object test extends VexctReTest, ScalaTests: diff --git a/vecxt_re/resources/index.vl.json b/vecxt_re/resources/index.vl.json new file mode 100644 index 00000000..16d4e69a --- /dev/null +++ b/vecxt_re/resources/index.vl.json @@ -0,0 +1,95 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Incremental index and cumulative index over time.", + "width":"container", + "height":"container", + "data": { + "values": [ + {"year": 2024, "index": 1.05, "cumulative": 0.9, "threshold": 1.1, "missing": 0.9}, + {"year": 2025, "index": 1.05, "cumulative": 0.975, "threshold": 1.1, "missing": 0.975}, + {"year": 2026, "index": 1.05, "cumulative": 1.0, "threshold": 1.1, "missing": 1.0} + ] + }, + "layer": [ + { + "mark": { + "type": "area", + "color": "red", + "opacity": 0.2, + "tooltip": true + }, + "encoding": { + "x": { + "field": "year", + "type": "ordinal", + "title": "Year" + }, + "y": { + "field": "missing", + "type": "quantitative", + "scale": { "zero": false }, + "title": "Index" + }, + "y2": { + "datum": 1.0 + } + } + }, + + { + "mark": { + "type": "point", + "filled": true, + "size": 80, + "tooltip": true + }, + "encoding": { + "x": { + "field": "year", + "type": "ordinal" + }, + "y": { + "field": "index", + "type": "quantitative" + } + } + }, + { + "mark": { + "type": "rule", + "tooltip": true + }, + "encoding": { + "x": { + "field": "year", + "type": "ordinal" + }, + "y": { + "field": "index", + "type": "quantitative" + }, + "y2": { + "datum": 1.0 + } + } + }, + { + "mark": { + "type": "line", + "tooltip": true, + "strokeDash": [4, 4] + }, + "encoding": { + "x": { + "field": "year", + "type": "ordinal" + }, + "y": { + "field": "threshold", + "type": "quantitative" + } + } + } + ] + +} diff --git a/vecxt_re/resources/negBinCumul.vl.json b/vecxt_re/resources/negBinCumul.vl.json new file mode 100644 index 00000000..618b9eb9 --- /dev/null +++ b/vecxt_re/resources/negBinCumul.vl.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Negative Binomial distribution cumulative probabilities.", + "data": {"values": [ + {"value": 0, "probability": 0.0}, + {"value": 1, "probability": 0.1}, + {"value": 2, "probability": 0.18} + ]}, + "mark": { + "type": "line", "interpolate": "step-after", "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative"}, + "y": {"field": "prob", "type": "quantitative"} + }, + "width": "container", + "height": "container" +} \ No newline at end of file diff --git a/vecxt_re/resources/negBinProb.vl.json b/vecxt_re/resources/negBinProb.vl.json new file mode 100644 index 00000000..a2e14071 --- /dev/null +++ b/vecxt_re/resources/negBinProb.vl.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Negative Binomial distribution probabilities.", + "data": {"values": [ + {"value": 0, "prob": 0.0}, + {"value": 1, "prob": 0.1}, + {"value": 2, "prob": 0.18} + ]}, + "mark": { + "type": "bar", "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative"}, + "y": {"field": "prob", "type": "quantitative"} + }, + "width": "container", + "height": "container" +} \ No newline at end of file diff --git a/vecxt_re/src-jvm/NegativeBinomial.scala b/vecxt_re/src-jvm/NegativeBinomial.scala new file mode 100644 index 00000000..4f5e4d8e --- /dev/null +++ b/vecxt_re/src-jvm/NegativeBinomial.scala @@ -0,0 +1,187 @@ +package vecxt_re + +import org.apache.commons.numbers.gamma.LogGamma +import org.apache.commons.statistics.distribution.GammaDistribution +import org.apache.commons.statistics.distribution.PoissonDistribution +import org.apache.commons.rng.simple.RandomSource +import io.github.quafadas.plots.SetupVega.{*, given} +import io.circe.syntax.* + +/** Negative Binomial Distribution with alternative parameterization. + * + * Uses the parameterization: + * - r = a (number of successes, can be any positive real) + * - p = 1 / (1 + b) (probability of success) + * + * Which gives: + * - mean = a * b + * - variance = a * b * (1 + b) + * + * Under this parameterisation, as b -> 0, the distribution will converge to Poisson(ab). The parameter b is therefore + * a measure of overdispersion. + * + * Implementation uses the gamma-Poisson mixture representation, which allows non-integer a: If λ ~ Gamma(a, b) and X | + * λ ~ Poisson(λ), then X ~ NegativeBinomial(a, b) + * + * @param a + * shape parameter (must be positive, can be non-integer) + * @param b + * scale/dispersion parameter (must be positive) + */ + +//TODO: JS, facade to Stdlib gamma, poisson etc. +case class NegativeBinomial(a: Double, b: Double) + extends DiscreteDistr[Int] + with HasMean[Double] + with HasVariance[Double]: + require(a > 0, "a must be positive") + require(b > 0, "b must be positive") + require(a.isFinite, "a must be finite") + require(b.isFinite, "b must be finite") + + private val p: Double = 1.0 / (1.0 + b) + private val logP: Double = math.log(p) + private val log1MinusP: Double = math.log1p(-p) // log(1-p) = log(b/(1+b)) + + private val rng = RandomSource.XO_RO_SHI_RO_128_PP.create() + + // Gamma distribution with shape=a, scale=b for the mixture representation + private val gammaDistribution = GammaDistribution.of(a, b) + private val gammaSampler = gammaDistribution.createSampler(rng) + + /** Draw using gamma-Poisson mixture: λ ~ Gamma(a, b), X | λ ~ Poisson(λ) */ + def draw: Int = + val lambda = gammaSampler.sample() + if lambda <= 0 then 0 + else PoissonDistribution.of(lambda).createSampler(rng).sample() + end if + end draw + + /** PMF: P(X = k) = Γ(a + k) / (Γ(a) * k!) * p^a * (1-p)^k + */ + def probabilityOf(x: Int): Double = + if x < 0 then 0.0 + else math.exp(logProbabilityOf(x)) + + /** Log PMF: log P(X = k) = logΓ(a + k) - logΓ(a) - logΓ(k + 1) + a*log(p) + k*log(1-p) + */ + override def logProbabilityOf(x: Int): Double = + if x < 0 then Double.NegativeInfinity + else + LogGamma.value(a + x) - LogGamma.value(a) - LogGamma.value(x + 1) + + a * logP + x * log1MinusP + + def mean: Double = a * b + + def variance: Double = a * b * (1.0 + b) + + def plot(using viz.LowPriorityPlotTarget) = + val linePlot = VegaPlot.fromResource("negBinProb.vl.json") + val maxX = (mean + 4 * math.sqrt(variance)).toInt + val data = (0 to maxX).map { k => + (value = k, prob = probabilityOf(k)) + } + linePlot.plot( + _.data.values := data.asJson, + _ += (title = s"Negative Binomial Distribution Marginal Probabilities (a=$a, b=$b)").asJson + ) + end plot + + def plotCdf(using viz.LowPriorityPlotTarget) = + val linePlot = VegaPlot.fromResource("negBinCumul.vl.json") + val maxX = (mean + 4 * math.sqrt(variance)).toInt + var cumProb = 0.0 + val data = (0 to maxX).map { k => + cumProb += probabilityOf(k) + (value = k, prob = cumProb) + } + linePlot.plot( + _.data.values := data.asJson, + _ += (title = s"Negative Binomial Distribution Cumulative Probabilities (a=$a, b=$b)").asJson + ) + end plotCdf +end NegativeBinomial + +object NegativeBinomial: + inline def fromMeanDispersion(mu: Double, b: Double): NegativeBinomial = + NegativeBinomial(mu / b, b) + + inline def poisson(mu: Double): NegativeBinomial = + NegativeBinomial(mu / 1e-12, 1e-12) + + /** Maximum likelihood estimation for Negative Binomial parameters. + * + * Uses Newton-Raphson iteration on the profile likelihood for 'a', with method of moments as the initial estimate. + * + * For parameterization p = 1/(1+b), mean = a*b, with b = mean/a: + * - Score: S(a) = Σᵢ [ψ(a + xᵢ) - ψ(a)] + n·log(a/(a + x̄)) + * - Hessian: H(a) = Σᵢ [ψ'(a + xᵢ) - ψ'(a)] + n·x̄/(a·(a + x̄)) + * + * @param observations + * array of non-negative integer observations + * @param maxIter + * maximum number of Newton-Raphson iterations + * @param tol + * convergence tolerance for parameter 'a' + * @return + * Named tuple with `dist`: the fitted NegativeBinomial distribution, and `converged`: whether the optimizer converged within maxIter + */ + def mle(observations: Array[Int], maxIter: Int = 100, tol: Double = 1e-8): (dist: NegativeBinomial, converged: Boolean) = + require(observations.nonEmpty, "observations must not be empty") + require(observations.forall(_ >= 0), "all observations must be non-negative") + + val n = observations.length.toDouble + val xbar = observations.sum / n + val variance = observations.map(x => (x - xbar) * (x - xbar)).sum / n + + require(xbar > 0, "mean must be positive for NB fitting") + + // If variance <= mean, data is underdispersed relative to Poisson + // In this case, return near-Poisson (small b) + if variance <= xbar then (NegativeBinomial(xbar / 1e-10, 1e-10), true) + else + // Method of moments initial estimates: + // b = variance/mean - 1 + // a = mean/b = mean^2 / (variance - mean) + val bMom = (variance / xbar) - 1.0 + val aMom = xbar / bMom + + // Newton-Raphson iteration on the profile score equation for 'a' + // With b = xbar/a, the profile log-likelihood score is: + // S(a) = Σᵢ [ψ(a + xᵢ) - ψ(a)] + n·log(a/(a + xbar)) + var a = aMom + var iter = 0 + var converged = false + + while iter < maxIter && !converged do + // Score: S(a) = Σᵢ [ψ(a + xᵢ) - ψ(a)] + n·log(a/(a + xbar)) + var score = n * math.log(a / (a + xbar)) + + // Hessian (negative): -H(a) = Σᵢ [ψ'(a) - ψ'(a + xᵢ)] + n·xbar/(a·(a + xbar)) + var negHessian = n * xbar / (a * (a + xbar)) + + var i = 0 + while i < observations.length do + val x = observations(i) + score += org.apache.commons.numbers.gamma.Digamma.value(a + x) - + org.apache.commons.numbers.gamma.Digamma.value(a) + negHessian += org.apache.commons.numbers.gamma.Trigamma.value(a) - + org.apache.commons.numbers.gamma.Trigamma.value(a + x) + i += 1 + end while + + val delta = score / negHessian + val aNew = a + delta + + if aNew <= 0 then a = a / 2.0 + else a = aNew + + converged = math.abs(delta) < tol * math.abs(a) + iter += 1 + end while + + val bFinal = xbar / a + (NegativeBinomial(a, bFinal), converged) + end if + end mle +end NegativeBinomial diff --git a/vecxt_re/src/CalendarYearIndex.scala b/vecxt_re/src/CalendarYearIndex.scala new file mode 100644 index 00000000..ba5acb72 --- /dev/null +++ b/vecxt_re/src/CalendarYearIndex.scala @@ -0,0 +1,164 @@ +package vecxt_re + +import io.github.quafadas.plots.SetupVega.{*, given} +import viz.macros.VegaPlot +import io.circe.syntax.* + +/** A calendar year-based wrapper around IndexPerPeriod for on-leveling historical data. + * + * This class maps calendar years to index factors, allowing on-leveling of datasets where data points are labeled with + * their calendar year. + * + * @param currentYear + * The current/reference year (period 0) + * @param years + * Array of years in descending order (most recent first) + * @param indices + * Array of index factors corresponding to each year + */ +case class CalendarYearIndex(currentYear: Int, years: Array[Int], indices: Array[Double]): + require(years.length == indices.length, "years and indices must have the same length") + require(years.length > 0, "must provide at least one year") + + private val yearToIdx: Map[Int, Int] = years.zipWithIndex.toMap + private val underlying: IndexPerPeriod = IndexPerPeriod(indices) + + /** Number of years covered by this index */ + inline def numYears: Int = years.length + + /** The earliest year covered */ + inline def earliestYear: Int = years.last + + /** The latest year covered (should equal currentYear if properly constructed) */ + inline def latestYear: Int = years.head + + /** Get the index factor for a specific year. + * + * @param year + * The calendar year + * @return + * The index factor for that year + * @throws NoSuchElementException + * if year is not in the index + */ + def indexAt(year: Int): Double = + val idx = yearToIdx.getOrElse(year, throw new NoSuchElementException(s"Year $year not in index")) + indices(idx) + end indexAt + + /** Calculate the cumulative on-leveling factor from a historical year to the current year. + * + * @param fromYear + * The historical year + * @return + * The cumulative factor to on-level from that year to current + */ + def cumulativeToCurrentFrom(fromYear: Int): Double = + val periodsBack = currentYear - fromYear + underlying.cumulativeToCurrentFrom(periodsBack) + end cumulativeToCurrentFrom + + /** Apply on-leveling to an array of values, given their corresponding years. + * + * @param values + * The historical values to on-level + * @param dataYears + * The calendar year for each value (same length as values) + * @return + * Array of on-leveled values + */ + def onLevel(values: Array[Double], dataYears: Array[Int]): Array[Double] = + require(values.length == dataYears.length, "values and dataYears must have the same length") + val result = new Array[Double](values.length) + var i = 0 + while i < values.length do + result(i) = values(i) * cumulativeToCurrentFrom(dataYears(i)) + i += 1 + end while + result + end onLevel + + def suggestedNewThreshold(reportThreshold: Double): Double = + val periodBack = currentYear - latestYear + val factor = underlying.cumulativeToCurrentFrom(periodBack) + reportThreshold * factor + end suggestedNewThreshold + +end CalendarYearIndex + +object CalendarYearIndex: + + extension (idx: CalendarYearIndex) + def plotIndex(reportingThreshold: Double)(using viz.LowPriorityPlotTarget) = + val linePlot2 = VegaPlot.fromResource("index.vl.json") + val cumulative = idx.onLevel(Array.fill(idx.years.length)(1.0), idx.years) + val factors = idx.years.zip(idx.indices).zip(cumulative).map { + case ((year, index), cumulative) => + ( + year = year, + index = index, + missing = 1 / cumulative, + threshold = idx.suggestedNewThreshold(reportingThreshold) + ) + } + linePlot2.plot( + _.data.values := factors.asJson + ) + end extension + + /** Create a CalendarYearIndex from arrays of years and their corresponding indices. Years should be provided in + * descending order (most recent first). + * + * @param years + * Array of calendar years in descending order + * @param indices + * Array of index factors for each year + * @return + * CalendarYearIndex with the current year set to the first (most recent) year + */ + def apply(years: Array[Int], indices: Array[Double]): CalendarYearIndex = + require(years.length > 0, "must provide at least one year") + CalendarYearIndex(years.head, years, indices) + end apply + + /** Create a CalendarYearIndex from a range of years with a constant rate change. + * + * @param fromYear + * The earliest year (inclusive) + * @param toYear + * The current/latest year (inclusive) + * @param factor + * The constant factor for each year (e.g., 1.05 for 5% per year) + * @return + * CalendarYearIndex spanning the specified years + */ + def constant(fromYear: Int, toYear: Int, factor: Double): CalendarYearIndex = + require(toYear >= fromYear, "toYear must be >= fromYear") + val numYears = toYear - fromYear + 1 + val years = Array.tabulate(numYears)(i => toYear - i) + val indices = Array.fill(numYears)(factor) + CalendarYearIndex(toYear, years, indices) + end constant + + /** Create a CalendarYearIndex from arrays of years and rate changes (as percentages). Years should be provided in + * descending order (most recent first). + * + * @param years + * Array of calendar years in descending order + * @param rateChanges + * Array of percentage changes for each year (e.g., 5.0 for 5%) + * @return + * CalendarYearIndex with rate changes converted to factors + */ + def fromRateChanges(years: Array[Int], rateChanges: Array[Double]): CalendarYearIndex = + require(years.length == rateChanges.length, "years and rateChanges must have the same length") + val factors = new Array[Double](rateChanges.length) + var i = 0 + while i < rateChanges.length do + factors(i) = 1.0 + rateChanges(i) / 100.0 + i += 1 + end while + CalendarYearIndex(years, factors) + end fromRateChanges + +end CalendarYearIndex diff --git a/vecxt_re/src/Dist.scala b/vecxt_re/src/Dist.scala new file mode 100644 index 00000000..5a3ec223 --- /dev/null +++ b/vecxt_re/src/Dist.scala @@ -0,0 +1,72 @@ +package vecxt_re + +trait Density[T] { + + /** Returns the unnormalized value of the measure*/ + def apply(x: T): Double + + /** Returns the log unnormalized value of the measure*/ + def logApply(x: T): Double = math.log(apply(x)) +} + +/** + * Represents a continuous Distribution. + */ +trait ContinuousDistr[T] extends Density[T] with Rand[T] { + + /** Returns the probability density function at that point.*/ + def pdf(x: T): Double = math.exp(logPdf(x)) + def logPdf(x: T): Double = unnormalizedLogPdf(x) - logNormalizer + + /** Returns the probability density function up to a constant at that point.*/ + def unnormalizedPdf(x: T): Double = math.exp(unnormalizedLogPdf(x)) + + def unnormalizedLogPdf(x: T): Double + def logNormalizer: Double + + // 1/Z where Z = exp(logNormalizer) + lazy val normalizer + : Double = math.exp(-logNormalizer) + + def apply(x: T) = unnormalizedPdf(x) + override def logApply(x: T) = unnormalizedLogPdf(x) +} + +trait HasCdf { + def probability(x: Double, y: Double): Double // Probability that P(x < X <= y) + def cdf(x: Double): Double + + // experimental plotting support + def plot(using viz.LowPriorityPlotTarget): viz.VizReturn + def plotCdf(using viz.LowPriorityPlotTarget): viz.VizReturn + +} + +trait HasInverseCdf { + def inverseCdf(p: Double): Double //Compute the quantile of p +} + +/** + * Represents a discrete Distribution + */ +trait DiscreteDistr[T] extends Density[T] with Rand[T] { + + /** Returns the probability of that draw. */ + def probabilityOf(x: T): Double + def logProbabilityOf(x: T): Double = math.log(probabilityOf(x)) + + /** Returns the probability of that draw up to a constant */ + def unnormalizedProbabilityOf(x: T): Double = probabilityOf(x) + def unnormalizedLogProbabilityOf(x: T): Double = math.log(unnormalizedProbabilityOf(x)) + + def apply(x: T) = unnormalizedProbabilityOf(x) + override def logApply(x: T) = unnormalizedLogProbabilityOf(x) +} + +trait HasMean[T] { + def mean: T +} + +trait HasVariance[T] { + def variance: T +} \ No newline at end of file diff --git a/vecxt_re/src/IndexPerPeriod.scala b/vecxt_re/src/IndexPerPeriod.scala new file mode 100644 index 00000000..e3c1d18f --- /dev/null +++ b/vecxt_re/src/IndexPerPeriod.scala @@ -0,0 +1,123 @@ +package vecxt_re + +import vecxt.all.* +import vecxt.BoundsCheck.DoBoundsCheck.yes + +/** + * Aims to provide a (very) simple index mapping for period-based models. + * + * Each period is associated with a unique index, which provided from period zero 0 going backwards for some historic number of periods. + * The basic goal of this is to "on-level" some historical dataset, which has labels corresponding to the periods here. + * + * This object provides methods to: + * - Retrieve the index for a given period. + * - Retrieve the cumulative index which will "on level" some historical number, from it's "historical period" to the "current period" + * + * @param indices Array of indices where indices(0) is the current period (period 0) and indices(n) is n periods back. + * Each index typically represents a rate change factor for that period (e.g., 1.05 for 5% increase). + */ +case class IndexPerPeriod(indices: Array[Double]): + + /** Precomputed cumulative factors: cumulativeFactorsAll(i) = product of indices(0) to indices(i-1) */ + private lazy val cumulativeFactorsAll: Array[Double] = + // cumulative product via exp(cumsum(log(x))) + // Prepend 1.0 to get array where result(0) = 1.0, result(1) = indices(0), result(2) = indices(0)*indices(1), etc. + val cumProd = indices.log + cumProd.`cumsum!` + cumProd.`exp!` + // Prepend 1.0 for period 0 (current period needs no adjustment) + Array.tabulate(indices.length + 1)(i => if i == 0 then 1.0 else cumProd(i - 1)) + end cumulativeFactorsAll + + /** Number of periods available in the index */ + inline def numPeriods: Int = indices.length + + /** + * Get the index value for a specific period. + * + * @param period The period number (0 = current, 1 = one period back, etc.) + * @return The index value for that period + * @throws IndexOutOfBoundsException if period is outside the available range + */ + inline def indexAt(period: Int): Double = indices(period) + + /** + * Calculate the cumulative on-leveling factor from a historical period to the current period. + * + * This multiplies all indices from period 0 up to (but not including) the specified historical period. + * The result is the factor needed to bring a value from the historical period to current levels. + * + * For example, if you have rate changes of 5% (1.05) each year for 3 years: + * - indices = Array(1.05, 1.05, 1.05) + * - cumulativeToCurrentFrom(0) = 1.0 (already current) + * - cumulativeToCurrentFrom(1) = 1.05 (one period back, need to apply current period's change) + * - cumulativeToCurrentFrom(2) = 1.05 * 1.05 = 1.1025 + * - cumulativeToCurrentFrom(3) = 1.05 * 1.05 * 1.05 = 1.157625 + * + * @param fromPeriod The historical period number (0 = current, positive = periods back) + * @return The cumulative factor to on-level from that period to current + */ + inline def cumulativeToCurrentFrom(fromPeriod: Int): Double = + if fromPeriod <= 0 then 1.0 + else if fromPeriod >= cumulativeFactorsAll.length then cumulativeFactorsAll.last + else cumulativeFactorsAll(fromPeriod) + end cumulativeToCurrentFrom + + /** + * Calculate cumulative on-leveling factors for all periods up to a given period. + * + * @param upToPeriod The maximum period to calculate (exclusive) + * @return Array where result(i) is the cumulative factor from period i to current + */ + inline def cumulativeFactors(upToPeriod: Int): Array[Double] = + val n = math.min(upToPeriod, cumulativeFactorsAll.length) + if n == cumulativeFactorsAll.length then cumulativeFactorsAll.clone() + else Array.tabulate(n)(i => cumulativeFactorsAll(i)) + end cumulativeFactors + + /** + * Apply on-leveling to an array of values, given their corresponding period labels. + * + * @param values The historical values to on-level + * @param periods The period label for each value (same length as values) + * @return Array of on-leveled values + */ + inline def onLevel(values: Array[Double], periods: Array[Int]): Array[Double] = + require(values.length == periods.length, "values and periods must have the same length") + // Map periods to cumulative factors, clamping to valid range + val factors: Array[Double] = Array.tabulate(periods.length) { i => + val p = periods(i) + if p <= 0 then 1.0 + else if p >= cumulativeFactorsAll.length then cumulativeFactorsAll.last + else cumulativeFactorsAll(p) + } + values * (factors: Array[Double]) + end onLevel + +end IndexPerPeriod + +object IndexPerPeriod: + + /** + * Create an IndexPerPeriod from an array of rate changes (as percentages). + * + * @param rateChanges Array of rate changes where each value is the percentage change. + * e.g., 5.0 means a 5% increase, -3.0 means a 3% decrease. + * @return IndexPerPeriod with the rate changes converted to factors + */ + inline def fromRateChanges(rateChanges: Array[Double]): IndexPerPeriod = + IndexPerPeriod((rateChanges / 100.0) + 1.0) + end fromRateChanges + + /** + * Create an IndexPerPeriod with a constant rate change for all periods. + * + * @param numPeriods Number of historical periods + * @param factor The constant factor for each period (e.g., 1.05 for 5% per period) + * @return IndexPerPeriod with constant factors + */ + inline def constant(numPeriods: Int, factor: Double): IndexPerPeriod = + IndexPerPeriod(Array.fill(numPeriods)(factor)) + end constant + +end IndexPerPeriod diff --git a/vecxt_re/src/Rand.scala b/vecxt_re/src/Rand.scala new file mode 100644 index 00000000..ecfe1102 --- /dev/null +++ b/vecxt_re/src/Rand.scala @@ -0,0 +1,98 @@ +package vecxt_re + +/** + * A trait for monadic distributions. Provides support for use in for-comprehensions + */ +trait Rand[T] { outer => + + /** + * Gets one sample from the distribution. Equivalent to sample + */ + def draw: T + + inline def get = draw + + /** Overridden by filter/map/flatmap for monadic invocations. Basically, rejection samplers will return None here */ + def drawOpt: Option[T] = Some(draw) + + /** + * Gets one sample from the distribution. Equivalent to get + */ + inline def sample = get + + /** + * Gets n samples from the distribution. + */ + inline def sample(n: Int): IndexedSeq[T] = IndexedSeq.fill(n)(draw) + + /** + * Gets n samples from the distribution into a specified collection type. + */ + inline def sampleTo[C](n: Int)(using factory: scala.collection.Factory[T, C]): C = { + val builder = factory.newBuilder + builder.sizeHint(n) + var i = 0 + while (i < n) { + builder += draw + i += 1 + } + builder.result() + } + + /** + * An infinitely long iterator that samples repeatedly from the Rand + * @return an iterator that repeatedly samples + */ + inline def samples: Iterator[T] = Iterator.continually(draw) + + /** + * Converts a random sampler of one type to a random sampler of another type. + * Examples: + * uniform.map(_*2) gives a Rand[Double] in the range [0,2] + * Equivalently, for(x <- uniform) yield 2*x + * + * @param f the transform to apply to the sampled value. + * + */ + def map[E](f: T => E): Rand[E] = MappedRand(outer, f) + + def flatMap[E](f: T => Rand[E]): Rand[E] = FlatMappedRand(outer, f) + + def withFilter(p: T => Boolean): Rand[T] = FilteredRand(outer, p) + +} + +private final case class MappedRand[@specialized(Int, Double) T, @specialized(Int, Double) U]( + rand: Rand[T], + func: T => U) + extends Rand[U] { + override def draw: U = func(rand.draw) + override def drawOpt: Option[U] = rand.drawOpt.map(func) + override def map[E](f: U => E): Rand[E] = MappedRand(rand, (x: T) => f(func(x))) +} + +private final case class FlatMappedRand[@specialized(Int, Double) T, @specialized(Int, Double) U]( + rand: Rand[T], + func: T => Rand[U]) + extends Rand[U] { + override def draw: U = func(rand.draw).draw + override def drawOpt: Option[U] = rand.drawOpt.flatMap(x => func(x).drawOpt) + override def flatMap[E](f: U => Rand[E]): Rand[E] = FlatMappedRand(rand, (x: T) => func(x).flatMap(f)) +} + +private final case class FilteredRand[@specialized(Int, Double) T]( + rand: Rand[T], + predicate: T => Boolean) + extends Rand[T] { + override def draw: T = { + var result = rand.draw + var attempts = 0 + while (!predicate(result)) { + attempts += 1 + if (attempts > 100000) throw new RuntimeException("Rejection sampling exceeded max attempts") + result = rand.draw + } + result + } + override def drawOpt: Option[T] = rand.drawOpt.filter(predicate) +} \ No newline at end of file diff --git a/vecxt_re/src/all.scala b/vecxt_re/src/all.scala index a2a00739..f0361aaf 100644 --- a/vecxt_re/src/all.scala +++ b/vecxt_re/src/all.scala @@ -8,4 +8,6 @@ object all: export vecxt_re.SplitScenario.* export vecxt_re.DeductibleType.* export vecxt_re.ReReporting.* + export vecxt_re.IndexPerPeriod + export vecxt_re.CalendarYearIndex end all diff --git a/vecxt_re/test/src-jvm/NegBin.test.scala b/vecxt_re/test/src-jvm/NegBin.test.scala new file mode 100644 index 00000000..2ab2b8e5 --- /dev/null +++ b/vecxt_re/test/src-jvm/NegBin.test.scala @@ -0,0 +1,128 @@ +package vecxt_re + +import munit.FunSuite +import vecxt.all.* +import org.apache.commons.statistics.distribution.PoissonDistribution + +class NegBinTest extends FunSuite: + + inline val localTests = false + + test("pmf approximately normalizes") { + val nb = NegativeBinomial(a = 2.5, b = 1.2) + + val mu = nb.mean + val sd = math.sqrt(nb.variance) + val K = (mu + 15 * sd).toInt + + val sum = (0 to K).map(nb.probabilityOf).sum + + println(sum) + + assert(math.abs(sum - 1.0) < 1e-8) + } + + test("pmf mean and variance match theory") { + val nb = NegativeBinomial(3.0, 0.7) + + val K = 500 + val probs = (0 to K).map(k => nb.probabilityOf(k)) + + val mean = probs.zipWithIndex.map { case (p, k) => p * k }.sum + val varr = probs.zipWithIndex.map { case (p, k) => p * k * k }.sum - mean * mean + + assert(math.abs(mean - nb.mean) < 1e-6) + assert(math.abs(varr - nb.variance) < 1e-6) + } + + test("approaches Poisson as b -> 0") { + val mu = 4.0 + val b = 1e-6 + val a = mu / b + + val nb = NegativeBinomial(a, b) + val pois = PoissonDistribution.of(mu) + + assert(nb.probabilityOf(-1) == 0.0) + assert(nb.logProbabilityOf(-1).isNegInfinity) + + (0 to 20).foreach { k => + val diff = + math.abs(nb.probabilityOf(k) - pois.probability(k)) + assert(diff < 1e-6) + } + } + + test("works with small a < 1 (fractional shape)") { + val nb = NegativeBinomial(a = 0.5, b = 2.0) + + // Verify PMF normalizes + val K = 200 + val sum = (0 to K).map(nb.probabilityOf).sum + assert(math.abs(sum - 1.0) < 1e-6) + + // Verify mean and variance from PMF match theoretical values + val probs = (0 to K).map(k => nb.probabilityOf(k)) + val mean = probs.zipWithIndex.map { case (p, k) => p * k }.sum + val varr = probs.zipWithIndex.map { case (p, k) => p * k * k }.sum - mean * mean + + // a * b = 0.5 * 2.0 = 1.0 + assert(math.abs(mean - nb.mean) < 1e-5) + assert(math.abs(nb.mean - 1.0) < 1e-10) + + // a * b * (1 + b) = 0.5 * 2.0 * 3.0 = 3.0 + assert(math.abs(varr - nb.variance) < 1e-4) + assert(math.abs(nb.variance - 3.0) < 1e-10) + } + + // Ignored in CI as slow + test("SLOW: sampling mean and variance") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.NegBinTest.sampling mean and variance IN CI========") + val nb = NegativeBinomial(5.0, 0.8) + val n = 2_000_000 + + val xs = Array.fill(n)(nb.draw.toDouble) + + val mean = xs.sum / n + val varr = xs.map(x => (x - mean)*(x - mean)).sum / n + + assert(math.abs(mean - nb.mean) < 5e-3) + assert(math.abs(varr - nb.variance) < 5e-2) + } + + test("SLOW: sampling distribution matches pmf") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.NegBinTest.sampling distribution matches pmf IN CI========") + val nb = NegativeBinomial(2.0, 1.5) + val n = 500_000 + + val samples = Array.fill(n)(nb.draw) + val counts = samples.groupBy(identity).view.mapValues(_.size).toMap + + val K = 20 + (0 to K).foreach { k => + val expected = n * nb.probabilityOf(k) + val observed = counts.getOrElse(k, 0) + assert(math.abs(observed - expected) < 5 * math.sqrt(expected)) + } + } + + test("SLOW: MLE recovers parameters") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.MLE recovers parameters IN CI========") + + val trueNb = NegativeBinomial(4.0, 0.6) + val data = Array.fill(10_000)(trueNb.draw) + + val (fitted, converged) = NegativeBinomial.mle(data) + assert(converged) + + println(s"True parameters: a=${trueNb.a}, b=${trueNb.b}") + println(s"Fitted parameters: a=${fitted.a}, b=${fitted.b}") + + assertEqualsDouble(fitted.mean , trueNb.mean, 0.1) + assertEqualsDouble(fitted.b , trueNb.b, 0.1) + } + +end NegBinTest \ No newline at end of file diff --git a/vecxt_re/test/src/calendarYearIndex.test.scala b/vecxt_re/test/src/calendarYearIndex.test.scala new file mode 100644 index 00000000..9bb453e3 --- /dev/null +++ b/vecxt_re/test/src/calendarYearIndex.test.scala @@ -0,0 +1,139 @@ +package vecxt_re + +import munit.FunSuite + +class CalendarYearIndexSuite extends FunSuite: + + test("basic construction with years and indices") { + val years = Array(2024, 2023, 2022, 2021) + val indices = Array(1.05, 1.03, 1.02, 1.04) + val idx = CalendarYearIndex(years, indices) + + assertEquals(idx.currentYear, 2024) + assertEquals(idx.numYears, 4) + assertEquals(idx.latestYear, 2024) + assertEquals(idx.earliestYear, 2021) + } + + test("indexAt returns correct factor for each year") { + val years = Array(2024, 2023, 2022) + val indices = Array(1.05, 1.03, 1.02) + val idx = CalendarYearIndex(years, indices) + + assertEquals(idx.indexAt(2024), 1.05) + assertEquals(idx.indexAt(2023), 1.03) + assertEquals(idx.indexAt(2022), 1.02) + } + + test("indexAt throws for unknown year") { + val idx = CalendarYearIndex(Array(2024, 2023), Array(1.05, 1.03)) + + intercept[NoSuchElementException] { + idx.indexAt(2020) + } + } + + test("cumulativeToCurrentFrom current year returns 1.0") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.05, 1.03, 1.02)) + assertEquals(idx.cumulativeToCurrentFrom(2024), 1.0) + } + + test("cumulativeToCurrentFrom one year back") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.05, 1.03, 1.02)) + assertEquals(idx.cumulativeToCurrentFrom(2023), 1.05) + } + + test("cumulativeToCurrentFrom two years back") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.05, 1.03, 1.02)) + assertEqualsDouble(idx.cumulativeToCurrentFrom(2022), 1.05 * 1.03, 1e-10) + } + + test("cumulativeToCurrentFrom three years back") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.05, 1.03, 1.02)) + assertEqualsDouble(idx.cumulativeToCurrentFrom(2021), 1.05 * 1.03 * 1.02, 1e-10) + } + + test("onLevel applies correct cumulative factors by year") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.05, 1.03, 1.02)) + + val values = Array(100.0, 200.0, 300.0) + val dataYears = Array(2024, 2023, 2022) + + val result = idx.onLevel(values, dataYears) + + assertEquals(result(0), 100.0) // 2024: current year, factor = 1.0 + assertEqualsDouble(result(1), 200.0 * 1.05, 1e-10) // 2023: one year back + assertEqualsDouble(result(2), 300.0 * 1.05 * 1.03, 1e-10) // 2022: two years back + } + + test("onLevel with mixed year order") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.10, 1.05, 1.03)) + + val values = Array(100.0, 100.0, 100.0) + val dataYears = Array(2022, 2024, 2023) + + val result = idx.onLevel(values, dataYears) + + assertEqualsDouble(result(0), 100.0 * 1.10 * 1.05, 1e-10) // 2022 + assertEquals(result(1), 100.0) // 2024 + assertEqualsDouble(result(2), 100.0 * 1.10, 1e-10) // 2023 + } + + test("onLevel throws on mismatched array lengths") { + val idx = CalendarYearIndex(Array(2024, 2023), Array(1.05, 1.03)) + + intercept[IllegalArgumentException] { + idx.onLevel(Array(100.0, 200.0), Array(2024)) + } + } + + test("constant creates uniform factors across year range") { + val idx = CalendarYearIndex.constant(2020, 2024, 1.05) + + assertEquals(idx.currentYear, 2024) + assertEquals(idx.numYears, 5) + assertEquals(idx.earliestYear, 2020) + assertEquals(idx.latestYear, 2024) + + assertEquals(idx.indexAt(2024), 1.05) + assertEquals(idx.indexAt(2023), 1.05) + assertEquals(idx.indexAt(2020), 1.05) + } + + test("constant cumulative grows exponentially by years back") { + val idx = CalendarYearIndex.constant(2020, 2024, 1.10) + + assertEqualsDouble(idx.cumulativeToCurrentFrom(2021), Math.pow(1.10, 3), 1e-10) + } + + test("fromRateChanges creates correct factors") { + val years = Array(2024, 2023, 2022) + val rateChanges = Array(5.0, 3.0, -2.0) + val idx = CalendarYearIndex.fromRateChanges(years, rateChanges) + + assertEqualsDouble(idx.indexAt(2024), 1.05, 1e-10) + assertEqualsDouble(idx.indexAt(2023), 1.03, 1e-10) + assertEqualsDouble(idx.indexAt(2022), 0.98, 1e-10) + } + + test("construction fails with empty arrays") { + intercept[IllegalArgumentException] { + CalendarYearIndex(Array.empty[Int], Array.empty[Double]) + } + } + + test("construction fails with mismatched array lengths") { + intercept[IllegalArgumentException] { + CalendarYearIndex(2024, Array(2024, 2023), Array(1.05)) + } + } + + test("explicit currentYear constructor") { + val idx = CalendarYearIndex(2025, Array(2024, 2023), Array(1.05, 1.03)) + + assertEquals(idx.currentYear, 2025) + // From 2023 to 2025 is 2 years back + assertEqualsDouble(idx.cumulativeToCurrentFrom(2023), 1.05 * 1.03, 1e-10) + } + +end CalendarYearIndexSuite diff --git a/vecxt_re/test/src/indexPerPeriod.test.scala b/vecxt_re/test/src/indexPerPeriod.test.scala new file mode 100644 index 00000000..4ee9a704 --- /dev/null +++ b/vecxt_re/test/src/indexPerPeriod.test.scala @@ -0,0 +1,131 @@ +package vecxt_re + +import munit.FunSuite + +class IndexPerPeriodSuite extends FunSuite: + + test("indexAt returns correct index for each period") { + val indices = Array(1.05, 1.03, 1.02) + val idx = IndexPerPeriod(indices) + + assertEquals(idx.indexAt(0), 1.05) + assertEquals(idx.indexAt(1), 1.03) + assertEquals(idx.indexAt(2), 1.02) + } + + test("numPeriods returns correct count") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEquals(idx.numPeriods, 3) + } + + test("cumulativeToCurrentFrom period 0 returns 1.0") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEquals(idx.cumulativeToCurrentFrom(0), 1.0) + } + + test("cumulativeToCurrentFrom negative period returns 1.0") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEquals(idx.cumulativeToCurrentFrom(-1), 1.0) + } + + test("cumulativeToCurrentFrom period 1 returns first index") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEquals(idx.cumulativeToCurrentFrom(1), 1.05) + } + + test("cumulativeToCurrentFrom period 2 returns product of first two indices") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEqualsDouble(idx.cumulativeToCurrentFrom(2), 1.05 * 1.03, 1e-10) + } + + test("cumulativeToCurrentFrom period 3 returns product of all indices") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEqualsDouble(idx.cumulativeToCurrentFrom(3), 1.05 * 1.03 * 1.02, 1e-10) + } + + test("cumulativeToCurrentFrom beyond available periods uses all indices") { + val idx = IndexPerPeriod(Array(1.05, 1.03)) + assertEqualsDouble(idx.cumulativeToCurrentFrom(5), 1.05 * 1.03, 1e-10) + } + + test("cumulativeFactors returns correct array") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + val factors = idx.cumulativeFactors(4) + + assertEquals(factors.length, 4) + assertEquals(factors(0), 1.0) + assertEqualsDouble(factors(1), 1.05, 1e-10) + assertEqualsDouble(factors(2), 1.05 * 1.03, 1e-10) + assertEqualsDouble(factors(3), 1.05 * 1.03 * 1.02, 1e-10) + } + + test("cumulativeFactors with upToPeriod less than numPeriods") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + val factors = idx.cumulativeFactors(2) + + assertEquals(factors.length, 2) + assertEquals(factors(0), 1.0) + assertEqualsDouble(factors(1), 1.05, 1e-10) + } + + test("onLevel applies correct cumulative factors") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + + val values = Array(100.0, 200.0, 300.0) + val periods = Array(0, 1, 2) + + val result = idx.onLevel(values, periods) + + assertEquals(result.length, 3) + assertEquals(result(0), 100.0) // period 0: * 1.0 + assertEqualsDouble(result(1), 200.0 * 1.05, 1e-10) // period 1: * 1.05 + assertEqualsDouble(result(2), 300.0 * 1.05 * 1.03, 1e-10) // period 2: * (1.05 * 1.03) + } + + test("onLevel with mixed period order") { + val idx = IndexPerPeriod(Array(1.10, 1.05)) + + val values = Array(100.0, 100.0, 100.0) + val periods = Array(2, 0, 1) + + val result = idx.onLevel(values, periods) + + assertEqualsDouble(result(0), 100.0 * 1.10 * 1.05, 1e-10) // period 2 + assertEquals(result(1), 100.0) // period 0 + assertEqualsDouble(result(2), 100.0 * 1.10, 1e-10) // period 1 + } + + test("onLevel throws on mismatched array lengths") { + val idx = IndexPerPeriod(Array(1.05)) + val values = Array(100.0, 200.0) + val periods = Array(0) + + intercept[IllegalArgumentException] { + idx.onLevel(values, periods) + } + } + + test("fromRateChanges creates correct factors") { + val idx = IndexPerPeriod.fromRateChanges(Array(5.0, 3.0, -2.0)) + + assertEqualsDouble(idx.indexAt(0), 1.05, 1e-10) + assertEqualsDouble(idx.indexAt(1), 1.03, 1e-10) + assertEqualsDouble(idx.indexAt(2), 0.98, 1e-10) + } + + test("constant creates uniform factors") { + val idx = IndexPerPeriod.constant(3, 1.05) + + assertEquals(idx.numPeriods, 3) + assertEquals(idx.indexAt(0), 1.05) + assertEquals(idx.indexAt(1), 1.05) + assertEquals(idx.indexAt(2), 1.05) + } + + test("constant cumulative grows exponentially") { + val idx = IndexPerPeriod.constant(5, 1.10) + + assertEqualsDouble(idx.cumulativeToCurrentFrom(3), Math.pow(1.10, 3), 1e-10) + } + +end IndexPerPeriodSuite From 678c60b2900ce4f14d4cb2e70b060f69da725ac4 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Sun, 25 Jan 2026 17:41:41 +0100 Subject: [PATCH 32/75] . --- vecxt_re/package.mill | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vecxt_re/package.mill b/vecxt_re/package.mill index 32fad6cd..7c0e82ef 100644 --- a/vecxt_re/package.mill +++ b/vecxt_re/package.mill @@ -33,7 +33,7 @@ object `package` extends Module: override def mvnDeps = super.mvnDeps() ++ Seq( mvn"io.github.quafadas::scautable:0.0.35", - mvn"io.github.quafadas::dedav4s:0.10.3-10-07d7b3-DIRTY64264df4", + mvn"io.github.quafadas::dedav4s:0.10.4", mvn"org.apache.commons:commons-math4-core:4.0-beta1", mvn"org.apache.commons:commons-statistics-distribution:1.1", mvn"org.apache.commons:commons-rng-simple:1.6" From b924f8b4d356b93be0b3d1c69307269d30425678 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Mon, 26 Jan 2026 09:00:08 +0100 Subject: [PATCH 33/75] . --- .vscode/launch.json | 42 +------ experiments/package.mill | 2 +- experiments/resources/idx.csv | 7 ++ experiments/src/index.scala | 11 ++ vecxt_re/resources/paretoCdf.vl.json | 43 +++++++ vecxt_re/resources/paretoPdf.vl.json | 117 ++++++++++++++++++ .../src-jvm/{ => dist}/NegativeBinomial.scala | 0 vecxt_re/src-jvm/dist/Pareto.scala | 103 +++++++++++++++ vecxt_re/src/{ => dist}/Dist.scala | 0 vecxt_re/src/{ => dist}/Rand.scala | 0 10 files changed, 288 insertions(+), 37 deletions(-) create mode 100644 experiments/resources/idx.csv create mode 100644 experiments/src/index.scala create mode 100644 vecxt_re/resources/paretoCdf.vl.json create mode 100644 vecxt_re/resources/paretoPdf.vl.json rename vecxt_re/src-jvm/{ => dist}/NegativeBinomial.scala (100%) create mode 100644 vecxt_re/src-jvm/dist/Pareto.scala rename vecxt_re/src/{ => dist}/Dist.scala (100%) rename vecxt_re/src/{ => dist}/Rand.scala (100%) diff --git a/.vscode/launch.json b/.vscode/launch.json index 35383fb6..3e72c3a0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,45 +1,15 @@ { - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ { "type": "scala", "request": "launch", - "name": "test Suite", - "buildTarget": "vecxt.jvm.test", - "testClass": "vecxt.BooleanArrayExtensionSuite", - "jvmOptions": [ - "--add-modules=jdk.incubator.vector" - ], - "args": [ - "-oD" - ] - }, - { - "type": "scala", - "request": "launch", - "name": "Experiment", - "buildTarget": "experiments", - "mainClass": "experiments.pricingFun", - "jvmOptions": [ - "--add-modules=jdk.incubator.vector" - ], - "args": [ - "-oD" - ] - }, - { - "type": "scala", - "request": "attach", - "name": "Attach debugger", - // name of the module that is being debugging - "buildTarget": "vecxt.jvm.test", - // Host of the jvm to connect to - "hostName": "localhost", - // Port to connect to - "port": 5005 + "name": "plotIndex", + "mainClass": "experiments.plotIndex", + "buildTarget": "file:///Users/simon/Code/vecxt/experiments", + "args": [], + "jvmOptions": [], + "env": {} } ] } \ No newline at end of file diff --git a/experiments/package.mill b/experiments/package.mill index 5c288e12..5a85357c 100644 --- a/experiments/package.mill +++ b/experiments/package.mill @@ -19,7 +19,7 @@ object `package` extends ScalaModule: override def mvnDeps = super.mvnDeps() ++ Seq( mvn"com.lihaoyi::os-lib::0.10.4", mvn"io.github.quafadas::scautable::0.0.35", - mvn"io.github.quafadas::dedav4s::0.10.3" + mvn"io.github.quafadas::dedav4s::0.10.4" ) end `package` diff --git a/experiments/resources/idx.csv b/experiments/resources/idx.csv new file mode 100644 index 00000000..f713c83b --- /dev/null +++ b/experiments/resources/idx.csv @@ -0,0 +1,7 @@ +year,idx +2020, 1.05 +2021, 1.03 +2022, 1.04 +2023, 1.02 +2024, 1.06 +2025, 1.03 diff --git a/experiments/src/index.scala b/experiments/src/index.scala new file mode 100644 index 00000000..9f4f4380 --- /dev/null +++ b/experiments/src/index.scala @@ -0,0 +1,11 @@ +package experiments + +import io.github.quafadas.table.{*, given} +import io.github.quafadas.plots.SetupVegaBrowser.{*, given} + +@main def plotIndex = + val idx = CSV.resource("idx.csv", CsvOpts(TypeInferrer.FromAllRows, ReadAs.Columns)) + val calYrIdx = vecxt_re.CalendarYearIndex(2025, idx.year, idx.idx) + println(calYrIdx) + calYrIdx.plotIndex(1.0) + println("finished") \ No newline at end of file diff --git a/vecxt_re/resources/paretoCdf.vl.json b/vecxt_re/resources/paretoCdf.vl.json new file mode 100644 index 00000000..312a6298 --- /dev/null +++ b/vecxt_re/resources/paretoCdf.vl.json @@ -0,0 +1,43 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "CDF of a Pareto distribution", + "width": "container", + "height": "container", + + "data": { + "sequence": { + "start": 1, + "stop": 20, + "step": 0.05 + } + }, + + "transform": [ + { + "calculate": "1 - pow(1 / datum.data, 2)", + "as": "cdf" + } + ], + + "mark": { + "type": "line", + "interpolate": "monotone", + "strokeWidth": 2, + "tooltip": true + }, + + "encoding": { + "x": { + "field": "data", + "type": "quantitative", + "title": "x", + "scale": { "zero": false } + }, + "y": { + "field": "cdf", + "type": "quantitative", + "title": "F(x)", + "scale": { "domain": [0, 1] } + } + } +} \ No newline at end of file diff --git a/vecxt_re/resources/paretoPdf.vl.json b/vecxt_re/resources/paretoPdf.vl.json new file mode 100644 index 00000000..c37a7443 --- /dev/null +++ b/vecxt_re/resources/paretoPdf.vl.json @@ -0,0 +1,117 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Pareto PDF with correctly normalized log-binned histogram and analytic overlay", + "width": 600, + "height": 300, + + "layer": [ + { + "data": { + "values": [ + { "x": 1.02 }, { "x": 1.05 }, { "x": 1.08 }, { "x": 1.10 }, + { "x": 1.15 }, { "x": 1.20 }, { "x": 1.25 }, { "x": 1.30 }, + { "x": 1.40 }, { "x": 1.50 }, { "x": 1.70 }, { "x": 1.90 }, + { "x": 2.10 }, { "x": 2.40 }, { "x": 2.80 }, { "x": 3.20 }, + { "x": 3.80 }, { "x": 4.50 }, { "x": 5.30 }, { "x": 6.20 }, + { "x": 7.50 }, { "x": 9.00 }, { "x": 11.0 }, { "x": 14.0 }, + { "x": 18.0 } + ] + }, + + "transform": [ + { + "calculate": "log(datum.x)", + "as": "logx" + }, + { + "bin": { "maxbins": 25 }, + "field": "logx", + "as": ["logx0", "logx1"] + }, + { + "aggregate": [ + { "op": "count", "as": "count" } + ], + "groupby": ["logx0", "logx1"] + }, + { + "joinaggregate": [ + { "op": "sum", "field": "count", "as": "N" } + ] + }, + { + "calculate": "exp(datum.logx0)", + "as": "x0" + }, + { + "calculate": "exp(datum.logx1)", + "as": "x1" + }, + { + "calculate": "datum.count / (datum.N * (datum.x1 - datum.x0))", + "as": "density" + } + ], + + "mark": { + "type": "bar", + "opacity": 0.6 + }, + + "encoding": { + "x": { + "field": "x0", + "type": "quantitative", + "scale": { "type": "log" }, + "title": "x" + }, + "x2": { + "field": "x1" + }, + "y": { + "field": "density", + "type": "quantitative", + "title": "PDF" + }, + "tooltip": [ + { "field": "count", "type": "quantitative", "title": "Count" }, + { "field": "density", "type": "quantitative", "title": "Density" } + ] + } + }, + + { + "data": { + "sequence": { + "start": 1, + "stop": 20, + "step": 0.05 + } + }, + + "transform": [ + { + "calculate": "2 * pow(datum.data, -3)", + "as": "pdf" + } + ], + + "mark": { + "type": "line", + "strokeWidth": 2 + }, + + "encoding": { + "x": { + "field": "data", + "type": "quantitative", + "scale": { "type": "log" } + }, + "y": { + "field": "pdf", + "type": "quantitative" + } + } + } + ] +} \ No newline at end of file diff --git a/vecxt_re/src-jvm/NegativeBinomial.scala b/vecxt_re/src-jvm/dist/NegativeBinomial.scala similarity index 100% rename from vecxt_re/src-jvm/NegativeBinomial.scala rename to vecxt_re/src-jvm/dist/NegativeBinomial.scala diff --git a/vecxt_re/src-jvm/dist/Pareto.scala b/vecxt_re/src-jvm/dist/Pareto.scala new file mode 100644 index 00000000..868cf052 --- /dev/null +++ b/vecxt_re/src-jvm/dist/Pareto.scala @@ -0,0 +1,103 @@ +package vecxt_re + +import org.apache.commons.statistics.distribution.ParetoDistribution +import org.apache.commons.rng.simple.RandomSource +import io.github.quafadas.plots.SetupVega.{*, given} +import io.circe.syntax.* + +/** Pareto Type I Distribution. + * + * The Pareto distribution is a power-law probability distribution commonly used to model the distribution of wealth, + * insurance losses, and other phenomena where small values are common and large values are rare but possible. + * + * For scale parameter k (minimum possible value) and shape parameter α (Pareto index): + * - PDF: f(x) = α * k^α / x^(α+1) for x >= k + * - CDF: F(x) = 1 - (k/x)^α for x >= k + * - Mean: k * α / (α - 1) for α > 1, otherwise infinite + * - Variance: k² * α / ((α-1)² * (α-2)) for α > 2, otherwise infinite + * + * @param scale + * Scale parameter k (minimum possible value of X, must be positive) + * @param shape + * Shape parameter α (Pareto index, must be positive) + */ +case class Pareto(scale: Double, shape: Double) + extends ContinuousDistr[Double] + with HasMean[Double] + with HasVariance[Double] + with HasCdf + with HasInverseCdf: + + require(scale > 0, "scale must be positive") + require(shape > 0, "shape must be positive") + + private val rng = RandomSource.XO_RO_SHI_RO_128_PP.create() + private val distribution = ParetoDistribution.of(scale, shape) + private val sampler = distribution.createSampler(rng) + + /** Draw a random sample from the Pareto distribution */ + def draw: Double = sampler.sample() + + /** Unnormalized log PDF */ + def unnormalizedLogPdf(x: Double): Double = + if x < scale then Double.NegativeInfinity + else distribution.logDensity(x) + + /** Log normalizer (Pareto is already normalized, so this is 0) */ + def logNormalizer: Double = 0.0 + + /** Probability that x < X <= y */ + def probability(x: Double, y: Double): Double = distribution.probability(x, y) + + /** Cumulative distribution function */ + def cdf(x: Double): Double = distribution.cumulativeProbability(x) + + /** Inverse CDF (quantile function) */ + def inverseCdf(p: Double): Double = distribution.inverseCumulativeProbability(p) + + /** Survival function P(X > x) */ + def survivalProbability(x: Double): Double = distribution.survivalProbability(x) + + /** Inverse survival probability */ + def inverseSurvivalProbability(p: Double): Double = distribution.inverseSurvivalProbability(p) + + def mean: Double = distribution.getMean() + + def variance: Double = distribution.getVariance() + + private def guessMaxXForPlot = shape match + case s if s > 2 => mean + 4 * math.sqrt(variance) // mean and variance are defined + case s if s > 1 => mean + 20 * scale // no well defined variance + case _ => scale * 10 // no well defined mean + + def plot(using viz.LowPriorityPlotTarget) = + + val linePlot = VegaPlot.fromResource("paretoPdf.vl.json") + val maxX = guessMaxXForPlot + val numPoints = 1000 + val data = (0 until numPoints).map { _ => + (x = draw) + } + linePlot.plot( + _.layer.head.data.values := data.asJson, + _.layer(1).data.sequence.start := scale, + _.layer(1).data.sequence.stop := maxX, + _.layer(1).data.sequence.step := (maxX - scale) / 200, + _ += (title = s"Pareto Distribution PDF (scale=$scale, shape=$shape)").asJson + ) + end plot + + def plotCdf(using viz.LowPriorityPlotTarget) = + val linePlot = VegaPlot.fromResource("paretoCdf.vl.json") + val maxX = guessMaxXForPlot + + linePlot.plot( + _.data.sequence.start := scale, + _.data.sequence.stop := maxX, + _.data.sequence.step := (maxX - scale) / 200, + _.transform.head.calculate := s"1 - pow($scale / datum.data, $shape)", + _ += (title = s"Pareto Distribution CDF (scale=$scale, shape=$shape)").asJson + ) + end plotCdf + +end Pareto diff --git a/vecxt_re/src/Dist.scala b/vecxt_re/src/dist/Dist.scala similarity index 100% rename from vecxt_re/src/Dist.scala rename to vecxt_re/src/dist/Dist.scala diff --git a/vecxt_re/src/Rand.scala b/vecxt_re/src/dist/Rand.scala similarity index 100% rename from vecxt_re/src/Rand.scala rename to vecxt_re/src/dist/Rand.scala From 42e1795c66a9ba6400bb4beed4d406992d373722 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 26 Jan 2026 14:25:36 +0100 Subject: [PATCH 34/75] pareto plot --- vecxt_re/package.mill | 2 +- vecxt_re/resources/paretoPdf.vl.json | 84 +++++++---- vecxt_re/src-jvm/dist/NegativeBinomial.scala | 140 ++++++++++++++++++- vecxt_re/src-jvm/dist/Pareto.scala | 27 ++-- vecxt_re/test/src-jvm/NegBin.test.scala | 96 ++++++++++--- 5 files changed, 294 insertions(+), 55 deletions(-) diff --git a/vecxt_re/package.mill b/vecxt_re/package.mill index 7c0e82ef..8a59dc6e 100644 --- a/vecxt_re/package.mill +++ b/vecxt_re/package.mill @@ -33,7 +33,7 @@ object `package` extends Module: override def mvnDeps = super.mvnDeps() ++ Seq( mvn"io.github.quafadas::scautable:0.0.35", - mvn"io.github.quafadas::dedav4s:0.10.4", + mvn"io.github.quafadas::dedav4s:0.10.5", mvn"org.apache.commons:commons-math4-core:4.0-beta1", mvn"org.apache.commons:commons-statistics-distribution:1.1", mvn"org.apache.commons:commons-rng-simple:1.6" diff --git a/vecxt_re/resources/paretoPdf.vl.json b/vecxt_re/resources/paretoPdf.vl.json index c37a7443..3584bd6a 100644 --- a/vecxt_re/resources/paretoPdf.vl.json +++ b/vecxt_re/resources/paretoPdf.vl.json @@ -1,42 +1,59 @@ { "$schema": "https://vega.github.io/schema/vega-lite/v5.json", "description": "Pareto PDF with correctly normalized log-binned histogram and analytic overlay", - "width": 600, - "height": 300, - + "width": "container", + "height": "container", + "resolve": { + "scale": { + "x": "shared" + } + }, "layer": [ { "data": { "values": [ - { "x": 1.02 }, { "x": 1.05 }, { "x": 1.08 }, { "x": 1.10 }, - { "x": 1.15 }, { "x": 1.20 }, { "x": 1.25 }, { "x": 1.30 }, - { "x": 1.40 }, { "x": 1.50 }, { "x": 1.70 }, { "x": 1.90 }, - { "x": 2.10 }, { "x": 2.40 }, { "x": 2.80 }, { "x": 3.20 }, - { "x": 3.80 }, { "x": 4.50 }, { "x": 5.30 }, { "x": 6.20 }, - { "x": 7.50 }, { "x": 9.00 }, { "x": 11.0 }, { "x": 14.0 }, - { "x": 18.0 } + { + "x": 1.02 + }, + { + "x": 1.05 + } ] }, - "transform": [ { "calculate": "log(datum.x)", "as": "logx" }, { - "bin": { "maxbins": 25 }, + "bin": { + "maxbins": 50 + }, "field": "logx", - "as": ["logx0", "logx1"] + "as": [ + "logx0", + "logx1" + ] }, { "aggregate": [ - { "op": "count", "as": "count" } + { + "op": "count", + "as": "count" + } ], - "groupby": ["logx0", "logx1"] + "groupby": [ + "logx0", + "logx1" + ] }, { "joinaggregate": [ - { "op": "sum", "field": "count", "as": "N" } + { + "op": "sum", + "field": "count", + "as": "N" + } ] }, { @@ -52,18 +69,22 @@ "as": "density" } ], - "mark": { "type": "bar", "opacity": 0.6 }, - "encoding": { "x": { "field": "x0", "type": "quantitative", - "scale": { "type": "log" }, - "title": "x" + "title": "x", + "scale": { + "nice": false, + "domain": [ + 1, + 20 + ] + } }, "x2": { "field": "x1" @@ -74,12 +95,19 @@ "title": "PDF" }, "tooltip": [ - { "field": "count", "type": "quantitative", "title": "Count" }, - { "field": "density", "type": "quantitative", "title": "Density" } + { + "field": "count", + "type": "quantitative", + "title": "Count" + }, + { + "field": "density", + "type": "quantitative", + "title": "Density" + } ] } }, - { "data": { "sequence": { @@ -88,24 +116,24 @@ "step": 0.05 } }, - "transform": [ { - "calculate": "2 * pow(datum.data, -3)", + "calculate": "pow(datum.data, -2)", "as": "pdf" } ], - "mark": { "type": "line", "strokeWidth": 2 }, - "encoding": { "x": { "field": "data", "type": "quantitative", - "scale": { "type": "log" } + "scale": { + "nice": false, + "type": "log" + } }, "y": { "field": "pdf", diff --git a/vecxt_re/src-jvm/dist/NegativeBinomial.scala b/vecxt_re/src-jvm/dist/NegativeBinomial.scala index 4f5e4d8e..46d4abd2 100644 --- a/vecxt_re/src-jvm/dist/NegativeBinomial.scala +++ b/vecxt_re/src-jvm/dist/NegativeBinomial.scala @@ -126,7 +126,7 @@ object NegativeBinomial: * @return * Named tuple with `dist`: the fitted NegativeBinomial distribution, and `converged`: whether the optimizer converged within maxIter */ - def mle(observations: Array[Int], maxIter: Int = 100, tol: Double = 1e-8): (dist: NegativeBinomial, converged: Boolean) = + def mle(observations: Array[Int], maxIter: Int = 500, tol: Double = 1e-8): (dist: NegativeBinomial, converged: Boolean) = require(observations.nonEmpty, "observations must not be empty") require(observations.forall(_ >= 0), "all observations must be non-negative") @@ -184,4 +184,142 @@ object NegativeBinomial: (NegativeBinomial(a, bFinal), converged) end if end mle + + /** Maximum likelihood estimation for the volume-adjusted Negative Binomial. + * + * We observe pairs $(n_j, v_j)$ where $n_j$ is the count and $v_j$ is the volume ratio (historical volume / modeled volume). + * With parameters $(r, \beta)$ and $p = 1/(1+\beta v_j)$ the likelihood is + * $$ + * L(r,\beta) = \prod_j \frac{\Gamma(r+n_j)}{\Gamma(r)\,\Gamma(n_j+1)} \left(\frac{\beta v_j}{1+\beta v_j}\right)^{n_j} \left(\frac{1}{1+\beta v_j}\right)^r. + * $$ + * The log-likelihood is + * $$ + * \ell(r,\beta) = \sum_j \big[\log\Gamma(r+n_j) - \log\Gamma(r) - \log\Gamma(n_j+1) + n_j(\log(\beta v_j) - \log(1+\beta v_j)) - r\,\log(1+\beta v_j)\big]. + * $$ + * Gradient components: + * $$\partial_\beta \ell = \sum_j \Big( \frac{n_j}{\beta(1+\beta v_j)} - \frac{r v_j}{1+\beta v_j} \Big),\quad + * \partial_r \ell = \sum_j \big[\psi(r+n_j) - \psi(r) - \log(1+\beta v_j)\big],$$ + * and Hessian entries: + * $$\partial^2_{\beta\beta} \ell = \sum_j \Big( \frac{r v_j}{(1+\beta v_j)^2} - \frac{n_j(1+2\beta v_j)}{\beta^2(1+\beta v_j)^2} \Big),$$ + * $$\partial^2_{rr} \ell = \sum_j \big[\psi'(r+n_j) - \psi'(r)\big],\quad \partial^2_{\beta r} \ell = -\sum_j \frac{v_j}{1+\beta v_j}.$$ + * + * Implementation details: + * - Initialize from method of moments on rates $n_j / v_j$; if underdispersed, start at a small $\beta$. + * - Newton updates solve the $2\times2$ system from the gradient/Hessian; a tiny ridge is added to keep the Hessian invertible. + * - Step halving is applied to enforce positivity of $r$ and $\beta$. + * + * @param observations + * non-negative counts $n_j$ + * @param volumes + * positive volume ratios $v_j$ (same units as modeled period) + * @param maxIter + * maximum Newton steps + * @param tol + * relative tolerance on both parameters + * @return + * tuple of fitted `NegativeBinomial(r, beta)` and a convergence flag + */ + def volweightedMle(observations: Array[Int], volumes: Array[Double], maxIter: Int = 500, tol: Double = 1e-8): (dist: NegativeBinomial, converged: Boolean) = + require(observations.nonEmpty, "observations must not be empty") + require(observations.length == volumes.length, "observations and volumes must have the same length") + require(observations.forall(_ >= 0), "all observations must be non-negative") + require(volumes.forall(v => v > 0 && v.isFinite), "volumes must be positive and finite") + + val nObs = observations.length + + var i = 0 + var sumRate = 0.0 + while i < nObs do + sumRate += observations(i) / volumes(i) + i += 1 + end while + + val meanRate = sumRate / nObs + require(meanRate > 0, "mean per unit volume must be positive for NB fitting") + + var varRate = 0.0 + i = 0 + while i < nObs do + val rate = observations(i) / volumes(i) + val diff = rate - meanRate + varRate += diff * diff + i += 1 + end while + varRate /= nObs.toDouble + + val betaFloor = 1e-6 + var beta = + if varRate <= meanRate then betaFloor + else math.max((varRate / meanRate) - 1.0, betaFloor) + var r = meanRate / beta + + var iter = 0 + var converged = false + val ridge = 1e-12 + + while iter < maxIter && !converged do + var gBeta = 0.0 + var gR = 0.0 + var hbb = 0.0 + var hrr = 0.0 + var hbr = 0.0 + + i = 0 + while i < nObs do + val n = observations(i).toDouble + val v = volumes(i) + val betaV = beta * v + val denom = 1.0 + betaV + val invDenom = 1.0 / denom + val invDenom2 = invDenom * invDenom + val invBeta = 1.0 / beta + + gBeta += n * invBeta * invDenom - r * v * invDenom + gR += org.apache.commons.numbers.gamma.Digamma.value(r + n) - + org.apache.commons.numbers.gamma.Digamma.value(r) - + math.log(denom) + + hbb += r * v * invDenom2 - n * (1.0 + 2.0 * betaV) * invBeta * invBeta * invDenom2 + hrr += org.apache.commons.numbers.gamma.Trigamma.value(r + n) - + org.apache.commons.numbers.gamma.Trigamma.value(r) + hbr -= v * invDenom + i += 1 + end while + + val hbbAdj = hbb + ridge + val hrrAdj = hrr + ridge + val det = hbbAdj * hrrAdj - hbr * hbr + + if det.isNaN || det.isInfinite || math.abs(det) < 1e-18 then + iter = maxIter + else + val deltaBeta = (gBeta * hrrAdj - gR * hbr) / det + val deltaR = (hbbAdj * gR - hbr * gBeta) / det + + var step = 1.0 + var newBeta = beta - step * deltaBeta + var newR = r - step * deltaR + + while step > 1e-3 && (newBeta <= 0 || newR <= 0 || newBeta.isNaN || newR.isNaN) do + step *= 0.5 + newBeta = beta - step * deltaBeta + newR = r - step * deltaR + end while + + if newBeta > 0 && newR > 0 && newBeta.isFinite && newR.isFinite then + beta = newBeta + r = newR + converged = + math.abs(step * deltaBeta) <= tol * math.abs(beta) && + math.abs(step * deltaR) <= tol * math.abs(r) + else iter = maxIter + end if + + iter += 1 + end while + + (NegativeBinomial(r, beta), converged) + + inline def mleVolumeWeighted(observations: Array[Int], volumes: Array[Double], maxIter: Int = 100, tol: Double = 1e-8): (dist: NegativeBinomial, converged: Boolean) = volweightedMle(observations, volumes, maxIter, tol) + end NegativeBinomial diff --git a/vecxt_re/src-jvm/dist/Pareto.scala b/vecxt_re/src-jvm/dist/Pareto.scala index 868cf052..c550a5e0 100644 --- a/vecxt_re/src-jvm/dist/Pareto.scala +++ b/vecxt_re/src-jvm/dist/Pareto.scala @@ -72,17 +72,28 @@ case class Pareto(scale: Double, shape: Double) def plot(using viz.LowPriorityPlotTarget) = - val linePlot = VegaPlot.fromResource("paretoPdf.vl.json") - val maxX = guessMaxXForPlot - val numPoints = 1000 + val linePlot2 = VegaPlot.fromResource("paretoPdf.vl.json") + val maxX = guessMaxXForPlot * 5 + val numPoints = 10000 val data = (0 until numPoints).map { _ => - (x = draw) + (x = Math.min(maxX, draw)) } - linePlot.plot( + + // Analytic Pareto( scale=k, shape=α ) PDF: f(x) = α k^α / x^(α+1) for x >= k + // The Vega template contains a placeholder formula; we inject the parameterized one here. + val pdfExpr = s"$shape * pow($scale, $shape) * pow(datum.data, -(${shape + 1.0}))" + + linePlot2.plot( _.layer.head.data.values := data.asJson, - _.layer(1).data.sequence.start := scale, - _.layer(1).data.sequence.stop := maxX, - _.layer(1).data.sequence.step := (maxX - scale) / 200, + _.layer.head.encoding.x.scale.domain := List(scale, maxX).asJson, + _.layer.head.encoding.x.scale.nice := false, + _.layer.head.encoding.x.scale.domain := List(scale, maxX).asJson, + _.layer._1.encoding.x.scale.nice := false, + _.layer._1.data.sequence.start := scale, + _.layer._1.data.sequence.stop := maxX, + _.layer._1.data.sequence.step := (maxX - scale) / 200, + _.layer._1.transform.head.calculate := pdfExpr, + _ += (title = s"Pareto Distribution PDF (scale=$scale, shape=$shape)").asJson ) end plot diff --git a/vecxt_re/test/src-jvm/NegBin.test.scala b/vecxt_re/test/src-jvm/NegBin.test.scala index 2ab2b8e5..68aef872 100644 --- a/vecxt_re/test/src-jvm/NegBin.test.scala +++ b/vecxt_re/test/src-jvm/NegBin.test.scala @@ -6,18 +6,16 @@ import org.apache.commons.statistics.distribution.PoissonDistribution class NegBinTest extends FunSuite: - inline val localTests = false + inline val localTests = true test("pmf approximately normalizes") { val nb = NegativeBinomial(a = 2.5, b = 1.2) val mu = nb.mean val sd = math.sqrt(nb.variance) - val K = (mu + 15 * sd).toInt + val K = (mu + 15 * sd).toInt - val sum = (0 to K).map(nb.probabilityOf).sum - - println(sum) + val sum = (0 to K).map(nb.probabilityOf).sum assert(math.abs(sum - 1.0) < 1e-8) } @@ -37,8 +35,8 @@ class NegBinTest extends FunSuite: test("approaches Poisson as b -> 0") { val mu = 4.0 - val b = 1e-6 - val a = mu / b + val b = 1e-6 + val a = mu / b val nb = NegativeBinomial(a, b) val pois = PoissonDistribution.of(mu) @@ -80,12 +78,12 @@ class NegBinTest extends FunSuite: assume(localTests, "Don't run local-only tests in CI ideally as they are slow") println("=============TURN OFF vecxt_re.NegBinTest.sampling mean and variance IN CI========") val nb = NegativeBinomial(5.0, 0.8) - val n = 2_000_000 + val n = 2_000_000 val xs = Array.fill(n)(nb.draw.toDouble) val mean = xs.sum / n - val varr = xs.map(x => (x - mean)*(x - mean)).sum / n + val varr = xs.map(x => (x - mean) * (x - mean)).sum / n assert(math.abs(mean - nb.mean) < 5e-3) assert(math.abs(varr - nb.variance) < 5e-2) @@ -95,10 +93,10 @@ class NegBinTest extends FunSuite: assume(localTests, "Don't run local-only tests in CI ideally as they are slow") println("=============TURN OFF vecxt_re.NegBinTest.sampling distribution matches pmf IN CI========") val nb = NegativeBinomial(2.0, 1.5) - val n = 500_000 + val n = 500_000 val samples = Array.fill(n)(nb.draw) - val counts = samples.groupBy(identity).view.mapValues(_.size).toMap + val counts = samples.groupBy(identity).view.mapValues(_.size).toMap val K = 20 (0 to K).foreach { k => @@ -113,16 +111,80 @@ class NegBinTest extends FunSuite: println("=============TURN OFF vecxt_re.MLE recovers parameters IN CI========") val trueNb = NegativeBinomial(4.0, 0.6) - val data = Array.fill(10_000)(trueNb.draw) + val data = Array.fill(10_000)(trueNb.draw) val (fitted, converged) = NegativeBinomial.mle(data) assert(converged) - println(s"True parameters: a=${trueNb.a}, b=${trueNb.b}") - println(s"Fitted parameters: a=${fitted.a}, b=${fitted.b}") + // println(s"True parameters: a=${trueNb.a}, b=${trueNb.b}") + // println(s"Fitted parameters: a=${fitted.a}, b=${fitted.b}") + + assertEqualsDouble(fitted.mean, trueNb.mean, 0.1) + assertEqualsDouble(fitted.b, trueNb.b, 0.1) + } + + test("SLOW: vol weighted MLE follows standard case with uniform volumes ") { + + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.MLE recovers parameters IN CI========") + + val trueNb = NegativeBinomial(4.0, 0.6) + val data = Array.fill(10_000)(trueNb.draw) + + val (fitted, converged) = NegativeBinomial.mleVolumeWeighted(data, Array.fill(10_000)(1.0)) + assert(converged) + + assertEqualsDouble(fitted.mean, trueNb.mean, 0.1) + assertEqualsDouble(fitted.b, trueNb.b, 0.1) + } + - assertEqualsDouble(fitted.mean , trueNb.mean, 0.1) - assertEqualsDouble(fitted.b , trueNb.b, 0.1) + /** + * + * + * This directly exercises the volume factors: counts drawn with v = 0.5 use scale βv = 0.4, and with v = 2.0 use βv = 1.6; the fitter must undo that scaling to recover β = 0.8. + */ + test("SLOW: volume-weighted MLE recovers base params with mixed volumes") { + assume(localTests, "Skip heavy sampling in CI") + + val rTrue = 3.2 + val betaTrue = 0.8 + val seed = 12345L + val nPerBucket = 25_000 + val vols = Array.fill(nPerBucket)(0.5) ++ Array.fill(nPerBucket)(2.0) + + val rng = org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create(seed) + val gammaLow = org.apache.commons.statistics.distribution.GammaDistribution + .of(rTrue, betaTrue * 0.5) + .createSampler(rng) + val gammaHigh = org.apache.commons.statistics.distribution.GammaDistribution + .of(rTrue, betaTrue * 2.0) + .createSampler(rng) + + val data = new Array[Int](vols.length) + var i = 0 + while i < vols.length do + val lambda = + if i < nPerBucket then gammaLow.sample() + else gammaHigh.sample() + data(i) = org.apache.commons.statistics.distribution.PoissonDistribution + .of(lambda) + .createSampler(rng) + .sample() + i += 1 + end while + + val (fitted, converged) = NegativeBinomial.mleVolumeWeighted(data, vols, maxIter = 200, tol = 1e-8) + assert(converged) + assertEqualsDouble(fitted.a, rTrue, 0.1) + assertEqualsDouble(fitted.b, betaTrue, 0.1) + + // Ignoring volumes collapses a mixture of scaled NB's into a single NB, which should fit worse + // (at minimum: it should be less accurate on the modeled-period mean and dispersion). + val modeledMean = rTrue * betaTrue + val (unweighted, _) = NegativeBinomial.mle(data) + assert(math.abs(fitted.mean - modeledMean) <= math.abs(unweighted.mean - modeledMean)) + assert(math.abs(fitted.b - betaTrue) <= math.abs(unweighted.b - betaTrue)) } -end NegBinTest \ No newline at end of file +end NegBinTest From d38bf05f56952ada812ff04e03ba4d96044d7c05 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 26 Jan 2026 16:33:31 +0100 Subject: [PATCH 35/75] . --- vecxt_re/resources/empiricalCdf.vl.json | 12 ++ vecxt_re/resources/empiricalPdf.vl.json | 23 +++ vecxt_re/resources/hist.vg.json | 96 ++++++++++ vecxt_re/resources/paretoPdfFixed.vl.json | 40 ++++ vecxt_re/src-jvm/dist/Empirical.scala | 186 +++++++++++++++++++ vecxt_re/src-jvm/dist/NegativeBinomial.scala | 40 +++- vecxt_re/test/src-jvm/Empirical.test.scala | 101 ++++++++++ 7 files changed, 495 insertions(+), 3 deletions(-) create mode 100644 vecxt_re/resources/empiricalCdf.vl.json create mode 100644 vecxt_re/resources/empiricalPdf.vl.json create mode 100644 vecxt_re/resources/hist.vg.json create mode 100644 vecxt_re/resources/paretoPdfFixed.vl.json create mode 100644 vecxt_re/src-jvm/dist/Empirical.scala create mode 100644 vecxt_re/test/src-jvm/Empirical.test.scala diff --git a/vecxt_re/resources/empiricalCdf.vl.json b/vecxt_re/resources/empiricalCdf.vl.json new file mode 100644 index 00000000..10f01943 --- /dev/null +++ b/vecxt_re/resources/empiricalCdf.vl.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Empirical CDF (optionally weighted)", + "width": "container", + "height": "container", + "data": { "values": [ { "x": 0.0, "cdf": 0.0 } ] }, + "mark": { "type": "line", "interpolate": "step-after" }, + "encoding": { + "x": { "field": "x", "type": "quantitative", "title": "x" }, + "y": { "field": "cdf", "type": "quantitative", "title": "CDF" } + } +} diff --git a/vecxt_re/resources/empiricalPdf.vl.json b/vecxt_re/resources/empiricalPdf.vl.json new file mode 100644 index 00000000..82c6d343 --- /dev/null +++ b/vecxt_re/resources/empiricalPdf.vl.json @@ -0,0 +1,23 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Empirical distribution PDF via (optionally weighted) histogram", + "width": "container", + "height": "container", + "data": { "values": [ { "x": 0.0, "w": 1.0 } ] }, + "transform": [ + { "bin": { "maxbins": 60 }, "field": "x", "as": ["x0", "x1"] }, + { "aggregate": [ { "op": "sum", "field": "w", "as": "binWeight" } ], "groupby": ["x0", "x1"] }, + { "joinaggregate": [ { "op": "sum", "field": "binWeight", "as": "W" } ] }, + { "calculate": "datum.binWeight / (datum.W * (datum.x1 - datum.x0))", "as": "density" } + ], + "mark": { "type": "bar", "opacity": 0.65 }, + "encoding": { + "x": { "field": "x0", "type": "quantitative", "title": "x" }, + "x2": { "field": "x1" }, + "y": { "field": "density", "type": "quantitative", "title": "PDF" }, + "tooltip": [ + { "field": "binWeight", "type": "quantitative", "title": "Bin weight" }, + { "field": "density", "type": "quantitative", "title": "Density" } + ] + } +} diff --git a/vecxt_re/resources/hist.vg.json b/vecxt_re/resources/hist.vg.json new file mode 100644 index 00000000..2485e709 --- /dev/null +++ b/vecxt_re/resources/hist.vg.json @@ -0,0 +1,96 @@ +{ + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "An interactive histogram for visualizing a univariate distribution.", + "width": 500, + "height": 100, + "padding": 5, + + "signals": [ + { "name": "binOffset", "value": 0, + "bind": {"input": "range", "min": 1, "max": 10} }, + { "name": "binStep", "value": 1, + "bind": {"input": "range", "min": 0.001, "max":0, "step": 1} } + ], + + "data": [ + { + "name": "points", + "values": [ + {"u": 0}, {"u": 1}, {"u": 2}, {"u": 3}, {"u": 4}, + {"u": 5}, {"u": 6}, {"u": 7}, {"u": 8}, {"u": 9}, {"u": 10} + + ] + }, + { + "name": "binned", + "source": "points", + "transform": [ + { + "type": "bin", "field": "u", + "extent": [0, 10], + "anchor": {"signal": "binOffset"}, + "step": {"signal": "binStep"}, + "nice": false + }, + { + "type": "aggregate", + "key": "bin0", "groupby": ["bin0", "bin1"], + "fields": ["bin0"], "ops": ["count"], "as": ["count"] + } + ] + } + ], + + "scales": [ + { + "name": "xscale", + "type": "linear", + "range": "width", + "domain": [0, 10] + }, + { + "name": "yscale", + "type": "linear", + "range": "height", "round": true, + "domain": {"data": "binned", "field": "count"}, + "zero": true, "nice": true + } + ], + + "axes": [ + {"orient": "bottom", "scale": "xscale", "zindex": 1}, + {"orient": "left", "scale": "yscale", "tickCount": 5, "zindex": 1} + ], + + "marks": [ + { + "type": "rect", + "from": {"data": "binned"}, + "encode": { + "update": { + "x": {"scale": "xscale", "field": "bin0"}, + "x2": {"scale": "xscale", "field": "bin1", + "offset": {"signal": "binStep > 0.02 ? -0.5 : 0"}}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "steelblue"} + }, + "hover": { "fill": {"value": "firebrick"} } + } + }, + { + "type": "rect", + "from": {"data": "points"}, + "encode": { + "enter": { + "x": {"scale": "xscale", "field": "u"}, + "width": {"value": 1}, + "y": {"value": 25, "offset": {"signal": "height"}}, + "height": {"value": 5}, + "fill": {"value": "steelblue"}, + "fillOpacity": {"value": 0.4} + } + } + } + ] +} \ No newline at end of file diff --git a/vecxt_re/resources/paretoPdfFixed.vl.json b/vecxt_re/resources/paretoPdfFixed.vl.json new file mode 100644 index 00000000..fb5fc58d --- /dev/null +++ b/vecxt_re/resources/paretoPdfFixed.vl.json @@ -0,0 +1,40 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Pareto PDF with log-binned histogram and analytic overlay", + "width": "container", + "height": "container", + "resolve": { + "scale": { + "x": "shared" + } + }, + "layer": [ + { + "data": { "values": [ { "x": 1.02 }, { "x": 1.05 } ] }, + "transform": [ + { "calculate": "log(datum.x)", "as": "logx" }, + { "bin": { "maxbins": 50 }, "field": "logx", "as": ["logx0", "logx1"] }, + { "aggregate": [ { "op": "count", "as": "count" } ], "groupby": ["logx0", "logx1"] }, + { "joinaggregate": [ { "op": "sum", "field": "count", "as": "N" } ] }, + { "calculate": "exp(datum.logx0)", "as": "x0" }, + { "calculate": "exp(datum.logx1)", "as": "x1" }, + { "calculate": "datum.count / (datum.N * (datum.x1 - datum.x0))", "as": "density" } + ], + "mark": { "type": "bar", "opacity": 0.6 }, + "encoding": { + "x": { "field": "x0", "type": "quantitative", "scale": { "type": "log" }, "title": "x" }, + "x2": { "field": "x1" }, + "y": { "field": "density", "type": "quantitative", "title": "PDF" } + } + }, + { + "data": { "sequence": { "start": 1, "stop": 20, "step": 0.05 } }, + "transform": [ { "calculate": "pow(datum.data, -2)", "as": "pdf" } ], + "mark": { "type": "line", "strokeWidth": 2 }, + "encoding": { + "x": { "field": "data", "type": "quantitative", "scale": { "type": "log" } }, + "y": { "field": "pdf", "type": "quantitative" } + } + } + ] +} diff --git a/vecxt_re/src-jvm/dist/Empirical.scala b/vecxt_re/src-jvm/dist/Empirical.scala new file mode 100644 index 00000000..72de5fb3 --- /dev/null +++ b/vecxt_re/src-jvm/dist/Empirical.scala @@ -0,0 +1,186 @@ +package vecxt_re + +import org.apache.commons.rng.simple.RandomSource +import io.github.quafadas.plots.SetupVega.{*, given} +import io.circe.syntax.* + +/** Empirical distribution (JVM only). + * + * This is a nonparametric distribution built directly from observed samples. + * It supports positive weights $w_i$. + * + * The distribution is represented as a discrete measure on the (possibly repeated) sample values: + * $$\mathbb{P}(X = x) = \sum_{i: x_i = x} \frac{w_i}{\sum_k w_k}.$$ + * + * Consequently, the CDF is a right-continuous step function + * $$F(t) = \mathbb{P}(X \le t) = \sum_{x \le t} \mathbb{P}(X=x).$$ + * + * Sampling is performed by inverse-transform sampling on the cumulative weights. + * + * Note: Since this is an empirical (atomic) distribution, a density/PDF in the usual continuous sense is not defined. + * The `plot` method instead displays a (weighted) histogram density estimate. + */ +case class Empirical(values: IArray[Double], weights: IArray[Double]) + extends DiscreteDistr[Double] + with HasMean[Double] + with HasVariance[Double] + with HasCdf + with HasInverseCdf: + + require(values.nonEmpty, "values must not be empty") + require(values.forall(_.isFinite), "all values must be finite") + require(weights.length == values.length, "weights must be the same length as values") + require(weights.forall(w => w > 0 && w.isFinite), "weights must be positive and finite") + + private val rng = RandomSource.XO_RO_SHI_RO_128_PP.create() + + private val n = values.length + + private val pairs: Array[(Double, Double)] = + val out = new Array[(Double, Double)](n) + var j = 0 + while j < n do + out(j) = (values(j), weights(j)) + j += 1 + end while + out + + scala.util.Sorting.stableSort(pairs, (a: (Double, Double), b: (Double, Double)) => a._1 < b._1) + + // Compress duplicates so we have unique support points. + private val xsBuf = scala.collection.mutable.ArrayBuffer.empty[Double] + private val wBuf = scala.collection.mutable.ArrayBuffer.empty[Double] + + private var totalWeight = 0.0 + private var i = 0 + while i < pairs.length do + val x = pairs(i)._1 + var wSum = 0.0 + while i < pairs.length && pairs(i)._1 == x do + val w = pairs(i)._2 + wSum += w + totalWeight += w + i += 1 + end while + xsBuf += x + wBuf += wSum + end while + + private val xs: Array[Double] = xsBuf.toArray + private val probs: Array[Double] = wBuf.toArray.map(_ / totalWeight) + + private val cdfVals: Array[Double] = + val out = new Array[Double](probs.length) + var acc = 0.0 + var j = 0 + while j < probs.length do + acc += probs(j) + out(j) = acc + j += 1 + end while + out + + private val meanVal: Double = + var s = 0.0 + var j = 0 + while j < xs.length do + s += xs(j) * probs(j) + j += 1 + end while + s + + private val varVal: Double = + var s2 = 0.0 + var j = 0 + while j < xs.length do + val d = xs(j) - meanVal + s2 += probs(j) * d * d + j += 1 + end while + s2 + + def mean: Double = meanVal + + def variance: Double = varVal + + /** Probability mass at exactly `x` (sums weights for duplicates). */ + def probabilityOf(x: Double): Double = + val idx = java.util.Arrays.binarySearch(xs, x) + if idx >= 0 then probs(idx) else 0.0 + + /** Draw a sample using inverse CDF sampling over the atomic masses. */ + def draw: Double = + val u = rng.nextDouble() + inverseCdf(u) + + /** CDF $F(t)=P(X\le t)$ (right-continuous). */ + def cdf(x: Double): Double = + if x < xs(0) then 0.0 + else if x >= xs(xs.length - 1) then 1.0 + else + // Find the last index with xs(idx) <= x + val ip = java.util.Arrays.binarySearch(xs, x) + val idx = if ip >= 0 then ip else -ip - 2 + cdfVals(idx) + + /** Probability that $x < X \le y$. */ + def probability(x: Double, y: Double): Double = + if y <= x then 0.0 + else cdf(y) - cdf(x) + + /** Inverse CDF (quantile function): returns the smallest `x` with `F(x) >= p`. */ + def inverseCdf(p: Double): Double = + require(p >= 0.0 && p <= 1.0, "p must be in [0,1]") + if p <= 0.0 then xs(0) + else + val ip = java.util.Arrays.binarySearch(cdfVals, p) + val idx = if ip >= 0 then ip else -ip - 1 + xs(math.min(idx, xs.length - 1)) + + /** Plot a (weighted) histogram density estimate. */ + def plot(using viz.LowPriorityPlotTarget) = + val plot = VegaPlot.fromResource("empiricalPdf.vl.json") + val data = (0 until n).map { i => (x = values(i), w = weights(i)) } + plot.plot( + _.data.values := data.asJson, + _ += (title = s"Empirical Distribution (n=$n)").asJson + ) + + /** Plot the empirical CDF (step function). */ + def plotCdf(using viz.LowPriorityPlotTarget) = + val plot = VegaPlot.fromResource("empiricalCdf.vl.json") + + // Add an initial point at (min, 0) so the step is visible from the left. + val points = + val pts = scala.collection.mutable.ArrayBuffer.empty[(x: Double, cdf: Double)] + pts += ((x = xs(0), cdf = 0.0)) + var j = 0 + while j < xs.length do + pts += ((x = xs(j), cdf = cdfVals(j))) + j += 1 + end while + pts.toVector + + plot.plot( + _.data.values := points.asJson, + _ += (title = s"Empirical CDF (n=$n)").asJson + ) + +end Empirical + +object Empirical: + /** Construct an unweighted empirical distribution (all weights equal to $1$). + * + * Note: We intentionally avoid `apply` overloads here because `IArray[Double]` erases to `Array[Double]` + * on the JVM, which can create signature collisions with the case class companion methods. + */ + inline def equalWeights(values: Array[Double]): Empirical = + Empirical( + IArray.from(values), + IArray.from(Array.fill(values.length)(1.0)) + ) + + /** Construct a weighted empirical distribution from arrays. */ + inline def weighted(values: Array[Double], weights: Array[Double]): Empirical = + Empirical(IArray.from(values), IArray.from(weights)) + diff --git a/vecxt_re/src-jvm/dist/NegativeBinomial.scala b/vecxt_re/src-jvm/dist/NegativeBinomial.scala index 46d4abd2..908757d7 100644 --- a/vecxt_re/src-jvm/dist/NegativeBinomial.scala +++ b/vecxt_re/src-jvm/dist/NegativeBinomial.scala @@ -132,6 +132,30 @@ object NegativeBinomial: val n = observations.length.toDouble val xbar = observations.sum / n + val sumX = observations.sum.toDouble + // Constant term in the log-likelihood: -∑ log Γ(x_i+1) + var sumLogFact = 0.0 + var _i = 0 + while _i < observations.length do + sumLogFact += LogGamma.value(observations(_i) + 1) + _i += 1 + end while + + // Profile log-likelihood with b = xbar/a (equivalently p = a/(a+xbar)) + def profileLogLik(a: Double): Double = + if a <= 0 || !a.isFinite then Double.NegativeInfinity + else + val p = a / (a + xbar) + val logP = math.log(p) + val log1MinusP = math.log1p(-p) + var ll = n * a * logP + sumX * log1MinusP - sumLogFact - n * LogGamma.value(a) + var k = 0 + while k < observations.length do + ll += LogGamma.value(a + observations(k)) + k += 1 + end while + ll + val variance = observations.map(x => (x - xbar) * (x - xbar)).sum / n require(xbar > 0, "mean must be positive for NB fitting") @@ -171,12 +195,22 @@ object NegativeBinomial: end while val delta = score / negHessian - val aNew = a + delta - if aNew <= 0 then a = a / 2.0 + // Backtracking line search on the profile log-likelihood to improve robustness. + val llCur = profileLogLik(a) + var step = 1.0 + var aNew = a + step * delta + var llNew = profileLogLik(aNew) + while step > 1e-6 && llNew < llCur do + step *= 0.5 + aNew = a + step * delta + llNew = profileLogLik(aNew) + end while + + if aNew <= 0 || !aNew.isFinite then a = a / 2.0 else a = aNew - converged = math.abs(delta) < tol * math.abs(a) + converged = math.abs(step * delta) < tol * math.abs(a) iter += 1 end while diff --git a/vecxt_re/test/src-jvm/Empirical.test.scala b/vecxt_re/test/src-jvm/Empirical.test.scala new file mode 100644 index 00000000..93b849ad --- /dev/null +++ b/vecxt_re/test/src-jvm/Empirical.test.scala @@ -0,0 +1,101 @@ +package vecxt_re + +import munit.FunSuite + +class EmpiricalTest extends FunSuite: + + test("cdf/inverseCdf are consistent (unweighted)") { + val xs = Array(3.0, 1.0, 2.0, 2.0) + val emp = Empirical.equalWeights(xs) + + assertEqualsDouble(emp.cdf(0.5), 0.0, 1e-12) + assertEqualsDouble(emp.cdf(1.0), 0.25, 1e-12) + assertEqualsDouble(emp.cdf(1.5), 0.25, 1e-12) + assertEqualsDouble(emp.cdf(2.0), 0.75, 1e-12) + assertEqualsDouble(emp.cdf(10.0), 1.0, 1e-12) + + // Quantiles: smallest x with F(x) >= p + assertEqualsDouble(emp.inverseCdf(0.0), 1.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(0.25), 1.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(0.2500001), 2.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(0.75), 2.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(1.0), 3.0, 1e-12) + } + + test("mean/variance match weighted formulas") { + val xs = Array(1.0, 10.0) + val ws = Array(3.0, 1.0) // 75% at 1, 25% at 10 + val emp = Empirical.weighted(xs, ws) + + val mean = 0.75 * 1.0 + 0.25 * 10.0 + val variance = 0.75 * (1.0 - mean) * (1.0 - mean) + 0.25 * (10.0 - mean) * (10.0 - mean) + + assertEqualsDouble(emp.mean, mean, 1e-12) + assertEqualsDouble(emp.variance, variance, 1e-12) + + assertEqualsDouble(emp.probabilityOf(1.0), 0.75, 1e-12) + assertEqualsDouble(emp.probabilityOf(10.0), 0.25, 1e-12) + assertEqualsDouble(emp.probability(1.0, 10.0), 0.25, 1e-12) // P(1 < X <= 10) = 0.25 + } + + test("single element distribution") { + val emp = Empirical.equalWeights(Array(42.0)) + assertEqualsDouble(emp.mean, 42.0, 1e-12) + assertEqualsDouble(emp.variance, 0.0, 1e-12) + assertEqualsDouble(emp.cdf(41.0), 0.0, 1e-12) + assertEqualsDouble(emp.cdf(42.0), 1.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(0.0), 42.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(0.5), 42.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(1.0), 42.0, 1e-12) + assertEqualsDouble(emp.probabilityOf(42.0), 1.0, 1e-12) + } + + test("all duplicate values are merged") { + val emp = Empirical.equalWeights(Array(5.0, 5.0, 5.0)) + assertEqualsDouble(emp.mean, 5.0, 1e-12) + assertEqualsDouble(emp.variance, 0.0, 1e-12) + assertEqualsDouble(emp.probabilityOf(5.0), 1.0, 1e-12) + } + + test("inverseCdf(1.0) returns maximum") { + val emp = Empirical.equalWeights(Array(1.0, 2.0, 100.0)) + assertEqualsDouble(emp.inverseCdf(1.0), 100.0, 1e-12) + } + + test("cdf at exact max value equals 1") { + val emp = Empirical.equalWeights(Array(1.0, 2.0, 3.0)) + assertEqualsDouble(emp.cdf(3.0), 1.0, 1e-12) + } + + test("draw returns values in support") { + val xs = Array(10.0, 20.0, 30.0) + val emp = Empirical.equalWeights(xs) + val samples = (1 to 100).map(_ => emp.draw) + assert(samples.forall(s => xs.contains(s))) + } + + test("construction fails on empty values") { + intercept[IllegalArgumentException] { + Empirical.equalWeights(Array.empty[Double]) + } + } + + test("construction fails on zero weight") { + intercept[IllegalArgumentException] { + Empirical.weighted(Array(1.0), Array(0.0)) + } + } + + test("construction fails on negative weight") { + intercept[IllegalArgumentException] { + Empirical.weighted(Array(1.0, 2.0), Array(1.0, -1.0)) + } + } + + test("construction fails on NaN in values") { + intercept[IllegalArgumentException] { + Empirical.equalWeights(Array(1.0, Double.NaN)) + } + } + +end EmpiricalTest From 76b87066f50ac3bc695d7288962d2cb3fdad7d1c Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 26 Jan 2026 16:45:22 +0100 Subject: [PATCH 36/75] . --- .github/workflows/autofix.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/autofix.yml diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml new file mode 100644 index 00000000..83266889 --- /dev/null +++ b/.github/workflows/autofix.yml @@ -0,0 +1,19 @@ +name: 'autofix.ci' +on: + pull_request: +jobs: + autofix: + if: github.event.pull_request.draft == false + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 25 + + - name: Run autoformat + run: ./mill mill.scalalib.scalafmt.ScalafmtModule/reformatAll __.sources + + - uses: autofix-ci/action@ff86a557419858bb967097bfc916833f5647fa8c \ No newline at end of file From 75eece26a5ab6746d75aff2321e7e06710a5c65e Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 26 Jan 2026 17:03:13 +0100 Subject: [PATCH 37/75] . --- vecxt_re/test/src-jvm/Empirical.test.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/vecxt_re/test/src-jvm/Empirical.test.scala b/vecxt_re/test/src-jvm/Empirical.test.scala index 93b849ad..6241978e 100644 --- a/vecxt_re/test/src-jvm/Empirical.test.scala +++ b/vecxt_re/test/src-jvm/Empirical.test.scala @@ -97,5 +97,6 @@ class EmpiricalTest extends FunSuite: Empirical.equalWeights(Array(1.0, Double.NaN)) } } + end EmpiricalTest From 6ad8d69c2a59b8f98cbf1f56824a38a9ea02380a Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Mon, 26 Jan 2026 16:04:52 +0000 Subject: [PATCH 38/75] [autofix.ci] apply automated fixes --- experiments/src/index.scala | 3 +- vecxt_re/src-jvm/dist/Empirical.scala | 31 ++++--- vecxt_re/src-jvm/dist/NegativeBinomial.scala | 64 ++++++++------ vecxt_re/src-jvm/dist/Pareto.scala | 7 +- vecxt_re/src/CalendarYearIndex.scala | 15 ++-- vecxt_re/src/IndexPerPeriod.scala | 89 +++++++++++-------- vecxt_re/src/dist/Dist.scala | 52 +++++------ vecxt_re/src/dist/Rand.scala | 93 +++++++++----------- vecxt_re/test/src-jvm/Empirical.test.scala | 1 - vecxt_re/test/src-jvm/NegBin.test.scala | 11 +-- 10 files changed, 192 insertions(+), 174 deletions(-) diff --git a/experiments/src/index.scala b/experiments/src/index.scala index 9f4f4380..af10a63b 100644 --- a/experiments/src/index.scala +++ b/experiments/src/index.scala @@ -8,4 +8,5 @@ import io.github.quafadas.plots.SetupVegaBrowser.{*, given} val calYrIdx = vecxt_re.CalendarYearIndex(2025, idx.year, idx.idx) println(calYrIdx) calYrIdx.plotIndex(1.0) - println("finished") \ No newline at end of file + println("finished") +end plotIndex diff --git a/vecxt_re/src-jvm/dist/Empirical.scala b/vecxt_re/src-jvm/dist/Empirical.scala index 72de5fb3..d287be9a 100644 --- a/vecxt_re/src-jvm/dist/Empirical.scala +++ b/vecxt_re/src-jvm/dist/Empirical.scala @@ -6,14 +6,13 @@ import io.circe.syntax.* /** Empirical distribution (JVM only). * - * This is a nonparametric distribution built directly from observed samples. - * It supports positive weights $w_i$. + * This is a nonparametric distribution built directly from observed samples. It supports positive weights $w_i$. * - * The distribution is represented as a discrete measure on the (possibly repeated) sample values: - * $$\mathbb{P}(X = x) = \sum_{i: x_i = x} \frac{w_i}{\sum_k w_k}.$$ + * The distribution is represented as a discrete measure on the (possibly repeated) sample values: $$\mathbb{P}(X = x) = + * \sum_{i: x_i = x} \frac{w_i}{\sum_k w_k}.$$ * - * Consequently, the CDF is a right-continuous step function - * $$F(t) = \mathbb{P}(X \le t) = \sum_{x \le t} \mathbb{P}(X=x).$$ + * Consequently, the CDF is a right-continuous step function $$F(t) = \mathbb{P}(X \le t) = \sum_{x \le t} + * \mathbb{P}(X=x).$$ * * Sampling is performed by inverse-transform sampling on the cumulative weights. * @@ -44,6 +43,7 @@ case class Empirical(values: IArray[Double], weights: IArray[Double]) j += 1 end while out + end pairs scala.util.Sorting.stableSort(pairs, (a: (Double, Double), b: (Double, Double)) => a._1 < b._1) @@ -79,6 +79,7 @@ case class Empirical(values: IArray[Double], weights: IArray[Double]) j += 1 end while out + end cdfVals private val meanVal: Double = var s = 0.0 @@ -88,6 +89,7 @@ case class Empirical(values: IArray[Double], weights: IArray[Double]) j += 1 end while s + end meanVal private val varVal: Double = var s2 = 0.0 @@ -98,6 +100,7 @@ case class Empirical(values: IArray[Double], weights: IArray[Double]) j += 1 end while s2 + end varVal def mean: Double = meanVal @@ -107,11 +110,14 @@ case class Empirical(values: IArray[Double], weights: IArray[Double]) def probabilityOf(x: Double): Double = val idx = java.util.Arrays.binarySearch(xs, x) if idx >= 0 then probs(idx) else 0.0 + end if + end probabilityOf /** Draw a sample using inverse CDF sampling over the atomic masses. */ def draw: Double = val u = rng.nextDouble() inverseCdf(u) + end draw /** CDF $F(t)=P(X\le t)$ (right-continuous). */ def cdf(x: Double): Double = @@ -136,15 +142,18 @@ case class Empirical(values: IArray[Double], weights: IArray[Double]) val ip = java.util.Arrays.binarySearch(cdfVals, p) val idx = if ip >= 0 then ip else -ip - 1 xs(math.min(idx, xs.length - 1)) + end if + end inverseCdf /** Plot a (weighted) histogram density estimate. */ def plot(using viz.LowPriorityPlotTarget) = val plot = VegaPlot.fromResource("empiricalPdf.vl.json") - val data = (0 until n).map { i => (x = values(i), w = weights(i)) } + val data = (0 until n).map(i => (x = values(i), w = weights(i))) plot.plot( _.data.values := data.asJson, _ += (title = s"Empirical Distribution (n=$n)").asJson ) + end plot /** Plot the empirical CDF (step function). */ def plotCdf(using viz.LowPriorityPlotTarget) = @@ -160,19 +169,21 @@ case class Empirical(values: IArray[Double], weights: IArray[Double]) j += 1 end while pts.toVector + end points plot.plot( _.data.values := points.asJson, _ += (title = s"Empirical CDF (n=$n)").asJson ) + end plotCdf end Empirical object Empirical: /** Construct an unweighted empirical distribution (all weights equal to $1$). * - * Note: We intentionally avoid `apply` overloads here because `IArray[Double]` erases to `Array[Double]` - * on the JVM, which can create signature collisions with the case class companion methods. + * Note: We intentionally avoid `apply` overloads here because `IArray[Double]` erases to `Array[Double]` on the JVM, + * which can create signature collisions with the case class companion methods. */ inline def equalWeights(values: Array[Double]): Empirical = Empirical( @@ -183,4 +194,4 @@ object Empirical: /** Construct a weighted empirical distribution from arrays. */ inline def weighted(values: Array[Double], weights: Array[Double]): Empirical = Empirical(IArray.from(values), IArray.from(weights)) - +end Empirical diff --git a/vecxt_re/src-jvm/dist/NegativeBinomial.scala b/vecxt_re/src-jvm/dist/NegativeBinomial.scala index 908757d7..a1ce4522 100644 --- a/vecxt_re/src-jvm/dist/NegativeBinomial.scala +++ b/vecxt_re/src-jvm/dist/NegativeBinomial.scala @@ -124,9 +124,14 @@ object NegativeBinomial: * @param tol * convergence tolerance for parameter 'a' * @return - * Named tuple with `dist`: the fitted NegativeBinomial distribution, and `converged`: whether the optimizer converged within maxIter + * Named tuple with `dist`: the fitted NegativeBinomial distribution, and `converged`: whether the optimizer + * converged within maxIter */ - def mle(observations: Array[Int], maxIter: Int = 500, tol: Double = 1e-8): (dist: NegativeBinomial, converged: Boolean) = + def mle( + observations: Array[Int], + maxIter: Int = 500, + tol: Double = 1e-8 + ): (dist: NegativeBinomial, converged: Boolean) = require(observations.nonEmpty, "observations must not be empty") require(observations.forall(_ >= 0), "all observations must be non-negative") @@ -209,6 +214,7 @@ object NegativeBinomial: if aNew <= 0 || !aNew.isFinite then a = a / 2.0 else a = aNew + end if converged = math.abs(step * delta) < tol * math.abs(a) iter += 1 @@ -221,25 +227,21 @@ object NegativeBinomial: /** Maximum likelihood estimation for the volume-adjusted Negative Binomial. * - * We observe pairs $(n_j, v_j)$ where $n_j$ is the count and $v_j$ is the volume ratio (historical volume / modeled volume). - * With parameters $(r, \beta)$ and $p = 1/(1+\beta v_j)$ the likelihood is - * $$ - * L(r,\beta) = \prod_j \frac{\Gamma(r+n_j)}{\Gamma(r)\,\Gamma(n_j+1)} \left(\frac{\beta v_j}{1+\beta v_j}\right)^{n_j} \left(\frac{1}{1+\beta v_j}\right)^r. - * $$ - * The log-likelihood is - * $$ - * \ell(r,\beta) = \sum_j \big[\log\Gamma(r+n_j) - \log\Gamma(r) - \log\Gamma(n_j+1) + n_j(\log(\beta v_j) - \log(1+\beta v_j)) - r\,\log(1+\beta v_j)\big]. - * $$ - * Gradient components: - * $$\partial_\beta \ell = \sum_j \Big( \frac{n_j}{\beta(1+\beta v_j)} - \frac{r v_j}{1+\beta v_j} \Big),\quad - * \partial_r \ell = \sum_j \big[\psi(r+n_j) - \psi(r) - \log(1+\beta v_j)\big],$$ - * and Hessian entries: - * $$\partial^2_{\beta\beta} \ell = \sum_j \Big( \frac{r v_j}{(1+\beta v_j)^2} - \frac{n_j(1+2\beta v_j)}{\beta^2(1+\beta v_j)^2} \Big),$$ - * $$\partial^2_{rr} \ell = \sum_j \big[\psi'(r+n_j) - \psi'(r)\big],\quad \partial^2_{\beta r} \ell = -\sum_j \frac{v_j}{1+\beta v_j}.$$ + * We observe pairs $(n_j, v_j)$ where $n_j$ is the count and $v_j$ is the volume ratio (historical volume / modeled + * volume). With parameters $(r, \beta)$ and $p = 1/(1+\beta v_j)$ the likelihood is $$ L(r,\beta) = \prod_j + * \frac{\Gamma(r+n_j)}{\Gamma(r)\,\Gamma(n_j+1)} \left(\frac{\beta v_j}{1+\beta v_j}\right)^{n_j} + * \left(\frac{1}{1+\beta v_j}\right)^r. $$ The log-likelihood is $$ \ell(r,\beta) = \sum_j \big[\log\Gamma(r+n_j) - + * \log\Gamma(r) - \log\Gamma(n_j+1) + n_j(\log(\beta v_j) - \log(1+\beta v_j)) - r\,\log(1+\beta v_j)\big]. $$ + * Gradient components: $$\partial_\beta \ell = \sum_j \Big( \frac{n_j}{\beta(1+\beta v_j)} - \frac{r v_j}{1+\beta + * v_j} \Big),\quad \partial_r \ell = \sum_j \big[\psi(r+n_j) - \psi(r) - \log(1+\beta v_j)\big],$$ and Hessian + * entries: $$\partial^2_{\beta\beta} \ell = \sum_j \Big( \frac{r v_j}{(1+\beta v_j)^2} - \frac{n_j(1+2\beta + * v_j)}{\beta^2(1+\beta v_j)^2} \Big),$$ $$\partial^2_{rr} \ell = \sum_j \big[\psi'(r+n_j) - \psi'(r)\big],\quad + * \partial^2_{\beta r} \ell = -\sum_j \frac{v_j}{1+\beta v_j}.$$ * * Implementation details: * - Initialize from method of moments on rates $n_j / v_j$; if underdispersed, start at a small $\beta$. - * - Newton updates solve the $2\times2$ system from the gradient/Hessian; a tiny ridge is added to keep the Hessian invertible. + * - Newton updates solve the $2\times2$ system from the gradient/Hessian; a tiny ridge is added to keep the + * Hessian invertible. * - Step halving is applied to enforce positivity of $r$ and $\beta$. * * @param observations @@ -253,7 +255,12 @@ object NegativeBinomial: * @return * tuple of fitted `NegativeBinomial(r, beta)` and a convergence flag */ - def volweightedMle(observations: Array[Int], volumes: Array[Double], maxIter: Int = 500, tol: Double = 1e-8): (dist: NegativeBinomial, converged: Boolean) = + def volweightedMle( + observations: Array[Int], + volumes: Array[Double], + maxIter: Int = 500, + tol: Double = 1e-8 + ): (dist: NegativeBinomial, converged: Boolean) = require(observations.nonEmpty, "observations must not be empty") require(observations.length == volumes.length, "observations and volumes must have the same length") require(observations.forall(_ >= 0), "all observations must be non-negative") @@ -324,8 +331,7 @@ object NegativeBinomial: val hrrAdj = hrr + ridge val det = hbbAdj * hrrAdj - hbr * hbr - if det.isNaN || det.isInfinite || math.abs(det) < 1e-18 then - iter = maxIter + if det.isNaN || det.isInfinite || math.abs(det) < 1e-18 then iter = maxIter else val deltaBeta = (gBeta * hrrAdj - gR * hbr) / det val deltaR = (hbbAdj * gR - hbr * gBeta) / det @@ -343,17 +349,23 @@ object NegativeBinomial: if newBeta > 0 && newR > 0 && newBeta.isFinite && newR.isFinite then beta = newBeta r = newR - converged = - math.abs(step * deltaBeta) <= tol * math.abs(beta) && - math.abs(step * deltaR) <= tol * math.abs(r) + converged = math.abs(step * deltaBeta) <= tol * math.abs(beta) && + math.abs(step * deltaR) <= tol * math.abs(r) else iter = maxIter + end if end if iter += 1 end while (NegativeBinomial(r, beta), converged) - - inline def mleVolumeWeighted(observations: Array[Int], volumes: Array[Double], maxIter: Int = 100, tol: Double = 1e-8): (dist: NegativeBinomial, converged: Boolean) = volweightedMle(observations, volumes, maxIter, tol) + end volweightedMle + + inline def mleVolumeWeighted( + observations: Array[Int], + volumes: Array[Double], + maxIter: Int = 100, + tol: Double = 1e-8 + ): (dist: NegativeBinomial, converged: Boolean) = volweightedMle(observations, volumes, maxIter, tol) end NegativeBinomial diff --git a/vecxt_re/src-jvm/dist/Pareto.scala b/vecxt_re/src-jvm/dist/Pareto.scala index c550a5e0..a441c54e 100644 --- a/vecxt_re/src-jvm/dist/Pareto.scala +++ b/vecxt_re/src-jvm/dist/Pareto.scala @@ -66,9 +66,9 @@ case class Pareto(scale: Double, shape: Double) def variance: Double = distribution.getVariance() private def guessMaxXForPlot = shape match - case s if s > 2 => mean + 4 * math.sqrt(variance) // mean and variance are defined - case s if s > 1 => mean + 20 * scale // no well defined variance - case _ => scale * 10 // no well defined mean + case s if s > 2 => mean + 4 * math.sqrt(variance) // mean and variance are defined + case s if s > 1 => mean + 20 * scale // no well defined variance + case _ => scale * 10 // no well defined mean def plot(using viz.LowPriorityPlotTarget) = @@ -93,7 +93,6 @@ case class Pareto(scale: Double, shape: Double) _.layer._1.data.sequence.stop := maxX, _.layer._1.data.sequence.step := (maxX - scale) / 200, _.layer._1.transform.head.calculate := pdfExpr, - _ += (title = s"Pareto Distribution PDF (scale=$scale, shape=$shape)").asJson ) end plot diff --git a/vecxt_re/src/CalendarYearIndex.scala b/vecxt_re/src/CalendarYearIndex.scala index ba5acb72..53d32f5b 100644 --- a/vecxt_re/src/CalendarYearIndex.scala +++ b/vecxt_re/src/CalendarYearIndex.scala @@ -92,14 +92,13 @@ object CalendarYearIndex: def plotIndex(reportingThreshold: Double)(using viz.LowPriorityPlotTarget) = val linePlot2 = VegaPlot.fromResource("index.vl.json") val cumulative = idx.onLevel(Array.fill(idx.years.length)(1.0), idx.years) - val factors = idx.years.zip(idx.indices).zip(cumulative).map { - case ((year, index), cumulative) => - ( - year = year, - index = index, - missing = 1 / cumulative, - threshold = idx.suggestedNewThreshold(reportingThreshold) - ) + val factors = idx.years.zip(idx.indices).zip(cumulative).map { case ((year, index), cumulative) => + ( + year = year, + index = index, + missing = 1 / cumulative, + threshold = idx.suggestedNewThreshold(reportingThreshold) + ) } linePlot2.plot( _.data.values := factors.asJson diff --git a/vecxt_re/src/IndexPerPeriod.scala b/vecxt_re/src/IndexPerPeriod.scala index e3c1d18f..894fced3 100644 --- a/vecxt_re/src/IndexPerPeriod.scala +++ b/vecxt_re/src/IndexPerPeriod.scala @@ -3,18 +3,20 @@ package vecxt_re import vecxt.all.* import vecxt.BoundsCheck.DoBoundsCheck.yes -/** - * Aims to provide a (very) simple index mapping for period-based models. +/** Aims to provide a (very) simple index mapping for period-based models. * - * Each period is associated with a unique index, which provided from period zero 0 going backwards for some historic number of periods. - * The basic goal of this is to "on-level" some historical dataset, which has labels corresponding to the periods here. + * Each period is associated with a unique index, which provided from period zero 0 going backwards for some historic + * number of periods. The basic goal of this is to "on-level" some historical dataset, which has labels corresponding + * to the periods here. * * This object provides methods to: - * - Retrieve the index for a given period. - * - Retrieve the cumulative index which will "on level" some historical number, from it's "historical period" to the "current period" + * - Retrieve the index for a given period. + * - Retrieve the cumulative index which will "on level" some historical number, from it's "historical period" to the + * "current period" * - * @param indices Array of indices where indices(0) is the current period (period 0) and indices(n) is n periods back. - * Each index typically represents a rate change factor for that period (e.g., 1.05 for 5% increase). + * @param indices + * Array of indices where indices(0) is the current period (period 0) and indices(n) is n periods back. Each index + * typically represents a rate change factor for that period (e.g., 1.05 for 5% increase). */ case class IndexPerPeriod(indices: Array[Double]): @@ -32,20 +34,21 @@ case class IndexPerPeriod(indices: Array[Double]): /** Number of periods available in the index */ inline def numPeriods: Int = indices.length - /** - * Get the index value for a specific period. + /** Get the index value for a specific period. * - * @param period The period number (0 = current, 1 = one period back, etc.) - * @return The index value for that period - * @throws IndexOutOfBoundsException if period is outside the available range + * @param period + * The period number (0 = current, 1 = one period back, etc.) + * @return + * The index value for that period + * @throws IndexOutOfBoundsException + * if period is outside the available range */ inline def indexAt(period: Int): Double = indices(period) - /** - * Calculate the cumulative on-leveling factor from a historical period to the current period. + /** Calculate the cumulative on-leveling factor from a historical period to the current period. * - * This multiplies all indices from period 0 up to (but not including) the specified historical period. - * The result is the factor needed to bring a value from the historical period to current levels. + * This multiplies all indices from period 0 up to (but not including) the specified historical period. The result is + * the factor needed to bring a value from the historical period to current levels. * * For example, if you have rate changes of 5% (1.05) each year for 3 years: * - indices = Array(1.05, 1.05, 1.05) @@ -54,8 +57,10 @@ case class IndexPerPeriod(indices: Array[Double]): * - cumulativeToCurrentFrom(2) = 1.05 * 1.05 = 1.1025 * - cumulativeToCurrentFrom(3) = 1.05 * 1.05 * 1.05 = 1.157625 * - * @param fromPeriod The historical period number (0 = current, positive = periods back) - * @return The cumulative factor to on-level from that period to current + * @param fromPeriod + * The historical period number (0 = current, positive = periods back) + * @return + * The cumulative factor to on-level from that period to current */ inline def cumulativeToCurrentFrom(fromPeriod: Int): Double = if fromPeriod <= 0 then 1.0 @@ -63,24 +68,28 @@ case class IndexPerPeriod(indices: Array[Double]): else cumulativeFactorsAll(fromPeriod) end cumulativeToCurrentFrom - /** - * Calculate cumulative on-leveling factors for all periods up to a given period. + /** Calculate cumulative on-leveling factors for all periods up to a given period. * - * @param upToPeriod The maximum period to calculate (exclusive) - * @return Array where result(i) is the cumulative factor from period i to current + * @param upToPeriod + * The maximum period to calculate (exclusive) + * @return + * Array where result(i) is the cumulative factor from period i to current */ inline def cumulativeFactors(upToPeriod: Int): Array[Double] = val n = math.min(upToPeriod, cumulativeFactorsAll.length) if n == cumulativeFactorsAll.length then cumulativeFactorsAll.clone() else Array.tabulate(n)(i => cumulativeFactorsAll(i)) + end if end cumulativeFactors - /** - * Apply on-leveling to an array of values, given their corresponding period labels. + /** Apply on-leveling to an array of values, given their corresponding period labels. * - * @param values The historical values to on-level - * @param periods The period label for each value (same length as values) - * @return Array of on-leveled values + * @param values + * The historical values to on-level + * @param periods + * The period label for each value (same length as values) + * @return + * Array of on-leveled values */ inline def onLevel(values: Array[Double], periods: Array[Int]): Array[Double] = require(values.length == periods.length, "values and periods must have the same length") @@ -90,6 +99,7 @@ case class IndexPerPeriod(indices: Array[Double]): if p <= 0 then 1.0 else if p >= cumulativeFactorsAll.length then cumulativeFactorsAll.last else cumulativeFactorsAll(p) + end if } values * (factors: Array[Double]) end onLevel @@ -98,23 +108,26 @@ end IndexPerPeriod object IndexPerPeriod: - /** - * Create an IndexPerPeriod from an array of rate changes (as percentages). + /** Create an IndexPerPeriod from an array of rate changes (as percentages). * - * @param rateChanges Array of rate changes where each value is the percentage change. - * e.g., 5.0 means a 5% increase, -3.0 means a 3% decrease. - * @return IndexPerPeriod with the rate changes converted to factors + * @param rateChanges + * Array of rate changes where each value is the percentage change. e.g., 5.0 means a 5% increase, -3.0 means a 3% + * decrease. + * @return + * IndexPerPeriod with the rate changes converted to factors */ inline def fromRateChanges(rateChanges: Array[Double]): IndexPerPeriod = IndexPerPeriod((rateChanges / 100.0) + 1.0) end fromRateChanges - /** - * Create an IndexPerPeriod with a constant rate change for all periods. + /** Create an IndexPerPeriod with a constant rate change for all periods. * - * @param numPeriods Number of historical periods - * @param factor The constant factor for each period (e.g., 1.05 for 5% per period) - * @return IndexPerPeriod with constant factors + * @param numPeriods + * Number of historical periods + * @param factor + * The constant factor for each period (e.g., 1.05 for 5% per period) + * @return + * IndexPerPeriod with constant factors */ inline def constant(numPeriods: Int, factor: Double): IndexPerPeriod = IndexPerPeriod(Array.fill(numPeriods)(factor)) diff --git a/vecxt_re/src/dist/Dist.scala b/vecxt_re/src/dist/Dist.scala index 5a3ec223..6d3fd522 100644 --- a/vecxt_re/src/dist/Dist.scala +++ b/vecxt_re/src/dist/Dist.scala @@ -1,55 +1,51 @@ package vecxt_re -trait Density[T] { +trait Density[T]: - /** Returns the unnormalized value of the measure*/ + /** Returns the unnormalized value of the measure */ def apply(x: T): Double - /** Returns the log unnormalized value of the measure*/ + /** Returns the log unnormalized value of the measure */ def logApply(x: T): Double = math.log(apply(x)) -} +end Density -/** - * Represents a continuous Distribution. - */ -trait ContinuousDistr[T] extends Density[T] with Rand[T] { +/** Represents a continuous Distribution. + */ +trait ContinuousDistr[T] extends Density[T] with Rand[T]: - /** Returns the probability density function at that point.*/ + /** Returns the probability density function at that point. */ def pdf(x: T): Double = math.exp(logPdf(x)) def logPdf(x: T): Double = unnormalizedLogPdf(x) - logNormalizer - /** Returns the probability density function up to a constant at that point.*/ + /** Returns the probability density function up to a constant at that point. */ def unnormalizedPdf(x: T): Double = math.exp(unnormalizedLogPdf(x)) def unnormalizedLogPdf(x: T): Double def logNormalizer: Double // 1/Z where Z = exp(logNormalizer) - lazy val normalizer - : Double = math.exp(-logNormalizer) + lazy val normalizer: Double = math.exp(-logNormalizer) def apply(x: T) = unnormalizedPdf(x) override def logApply(x: T) = unnormalizedLogPdf(x) -} +end ContinuousDistr -trait HasCdf { +trait HasCdf: def probability(x: Double, y: Double): Double // Probability that P(x < X <= y) def cdf(x: Double): Double // experimental plotting support def plot(using viz.LowPriorityPlotTarget): viz.VizReturn def plotCdf(using viz.LowPriorityPlotTarget): viz.VizReturn +end HasCdf -} +trait HasInverseCdf: + def inverseCdf(p: Double): Double // Compute the quantile of p +end HasInverseCdf -trait HasInverseCdf { - def inverseCdf(p: Double): Double //Compute the quantile of p -} - -/** - * Represents a discrete Distribution - */ -trait DiscreteDistr[T] extends Density[T] with Rand[T] { +/** Represents a discrete Distribution + */ +trait DiscreteDistr[T] extends Density[T] with Rand[T]: /** Returns the probability of that draw. */ def probabilityOf(x: T): Double @@ -61,12 +57,12 @@ trait DiscreteDistr[T] extends Density[T] with Rand[T] { def apply(x: T) = unnormalizedProbabilityOf(x) override def logApply(x: T) = unnormalizedLogProbabilityOf(x) -} +end DiscreteDistr -trait HasMean[T] { +trait HasMean[T]: def mean: T -} +end HasMean -trait HasVariance[T] { +trait HasVariance[T]: def variance: T -} \ No newline at end of file +end HasVariance diff --git a/vecxt_re/src/dist/Rand.scala b/vecxt_re/src/dist/Rand.scala index ecfe1102..6e9ec7b3 100644 --- a/vecxt_re/src/dist/Rand.scala +++ b/vecxt_re/src/dist/Rand.scala @@ -1,13 +1,12 @@ package vecxt_re -/** - * A trait for monadic distributions. Provides support for use in for-comprehensions - */ -trait Rand[T] { outer => +/** A trait for monadic distributions. Provides support for use in for-comprehensions + */ +trait Rand[T]: + outer => - /** - * Gets one sample from the distribution. Equivalent to sample - */ + /** Gets one sample from the distribution. Equivalent to sample + */ def draw: T inline def get = draw @@ -15,84 +14,76 @@ trait Rand[T] { outer => /** Overridden by filter/map/flatmap for monadic invocations. Basically, rejection samplers will return None here */ def drawOpt: Option[T] = Some(draw) - /** - * Gets one sample from the distribution. Equivalent to get - */ + /** Gets one sample from the distribution. Equivalent to get + */ inline def sample = get - /** - * Gets n samples from the distribution. - */ + /** Gets n samples from the distribution. + */ inline def sample(n: Int): IndexedSeq[T] = IndexedSeq.fill(n)(draw) - /** - * Gets n samples from the distribution into a specified collection type. - */ - inline def sampleTo[C](n: Int)(using factory: scala.collection.Factory[T, C]): C = { + /** Gets n samples from the distribution into a specified collection type. + */ + inline def sampleTo[C](n: Int)(using factory: scala.collection.Factory[T, C]): C = val builder = factory.newBuilder builder.sizeHint(n) var i = 0 - while (i < n) { + while i < n do builder += draw i += 1 - } + end while builder.result() - } + end sampleTo - /** - * An infinitely long iterator that samples repeatedly from the Rand - * @return an iterator that repeatedly samples - */ + /** An infinitely long iterator that samples repeatedly from the Rand + * @return + * an iterator that repeatedly samples + */ inline def samples: Iterator[T] = Iterator.continually(draw) - /** - * Converts a random sampler of one type to a random sampler of another type. - * Examples: - * uniform.map(_*2) gives a Rand[Double] in the range [0,2] - * Equivalently, for(x <- uniform) yield 2*x - * - * @param f the transform to apply to the sampled value. - * - */ + /** Converts a random sampler of one type to a random sampler of another type. Examples: uniform.map(_*2) gives a + * Rand[Double] in the range [0,2] Equivalently, for(x <- uniform) yield 2*x + * + * @param f + * the transform to apply to the sampled value. + */ def map[E](f: T => E): Rand[E] = MappedRand(outer, f) def flatMap[E](f: T => Rand[E]): Rand[E] = FlatMappedRand(outer, f) def withFilter(p: T => Boolean): Rand[T] = FilteredRand(outer, p) - -} +end Rand private final case class MappedRand[@specialized(Int, Double) T, @specialized(Int, Double) U]( rand: Rand[T], - func: T => U) - extends Rand[U] { + func: T => U +) extends Rand[U]: override def draw: U = func(rand.draw) override def drawOpt: Option[U] = rand.drawOpt.map(func) override def map[E](f: U => E): Rand[E] = MappedRand(rand, (x: T) => f(func(x))) -} +end MappedRand private final case class FlatMappedRand[@specialized(Int, Double) T, @specialized(Int, Double) U]( rand: Rand[T], - func: T => Rand[U]) - extends Rand[U] { + func: T => Rand[U] +) extends Rand[U]: override def draw: U = func(rand.draw).draw override def drawOpt: Option[U] = rand.drawOpt.flatMap(x => func(x).drawOpt) override def flatMap[E](f: U => Rand[E]): Rand[E] = FlatMappedRand(rand, (x: T) => func(x).flatMap(f)) -} +end FlatMappedRand -private final case class FilteredRand[@specialized(Int, Double) T]( - rand: Rand[T], - predicate: T => Boolean) - extends Rand[T] { - override def draw: T = { +private final case class FilteredRand[@specialized(Int, Double) T](rand: Rand[T], predicate: T => Boolean) + extends Rand[T]: + override def draw: T = var result = rand.draw var attempts = 0 - while (!predicate(result)) { + while !predicate(result) do attempts += 1 - if (attempts > 100000) throw new RuntimeException("Rejection sampling exceeded max attempts") + if attempts > 100000 then throw new RuntimeException("Rejection sampling exceeded max attempts") + end if result = rand.draw - } + end while result - } + end draw override def drawOpt: Option[T] = rand.drawOpt.filter(predicate) -} \ No newline at end of file +end FilteredRand diff --git a/vecxt_re/test/src-jvm/Empirical.test.scala b/vecxt_re/test/src-jvm/Empirical.test.scala index 6241978e..93b849ad 100644 --- a/vecxt_re/test/src-jvm/Empirical.test.scala +++ b/vecxt_re/test/src-jvm/Empirical.test.scala @@ -97,6 +97,5 @@ class EmpiricalTest extends FunSuite: Empirical.equalWeights(Array(1.0, Double.NaN)) } } - end EmpiricalTest diff --git a/vecxt_re/test/src-jvm/NegBin.test.scala b/vecxt_re/test/src-jvm/NegBin.test.scala index 68aef872..64b49497 100644 --- a/vecxt_re/test/src-jvm/NegBin.test.scala +++ b/vecxt_re/test/src-jvm/NegBin.test.scala @@ -15,7 +15,7 @@ class NegBinTest extends FunSuite: val sd = math.sqrt(nb.variance) val K = (mu + 15 * sd).toInt - val sum = (0 to K).map(nb.probabilityOf).sum + val sum = (0 to K).map(nb.probabilityOf).sum assert(math.abs(sum - 1.0) < 1e-8) } @@ -138,12 +138,9 @@ class NegBinTest extends FunSuite: assertEqualsDouble(fitted.b, trueNb.b, 0.1) } - - /** - * - * - * This directly exercises the volume factors: counts drawn with v = 0.5 use scale βv = 0.4, and with v = 2.0 use βv = 1.6; the fitter must undo that scaling to recover β = 0.8. - */ + /** This directly exercises the volume factors: counts drawn with v = 0.5 use scale βv = 0.4, and with v = 2.0 use βv = + * 1.6; the fitter must undo that scaling to recover β = 0.8. + */ test("SLOW: volume-weighted MLE recovers base params with mixed volumes") { assume(localTests, "Skip heavy sampling in CI") From 6087d3486a5f0dd6727761b00c7773b7e535cea2 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 26 Jan 2026 17:09:22 +0100 Subject: [PATCH 39/75] . --- vecxt_re/{src => src-jvm}/dist/Dist.scala | 0 vecxt_re/{src => src-jvm}/dist/Rand.scala | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename vecxt_re/{src => src-jvm}/dist/Dist.scala (100%) rename vecxt_re/{src => src-jvm}/dist/Rand.scala (100%) diff --git a/vecxt_re/src/dist/Dist.scala b/vecxt_re/src-jvm/dist/Dist.scala similarity index 100% rename from vecxt_re/src/dist/Dist.scala rename to vecxt_re/src-jvm/dist/Dist.scala diff --git a/vecxt_re/src/dist/Rand.scala b/vecxt_re/src-jvm/dist/Rand.scala similarity index 100% rename from vecxt_re/src/dist/Rand.scala rename to vecxt_re/src-jvm/dist/Rand.scala From 10b4e944a25d6e90a04999999a13483a3462f1e9 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 26 Jan 2026 17:13:43 +0100 Subject: [PATCH 40/75] . --- vecxt_re/src/CalendarYearIndex.scala | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/vecxt_re/src/CalendarYearIndex.scala b/vecxt_re/src/CalendarYearIndex.scala index 53d32f5b..330403b1 100644 --- a/vecxt_re/src/CalendarYearIndex.scala +++ b/vecxt_re/src/CalendarYearIndex.scala @@ -1,7 +1,5 @@ package vecxt_re -import io.github.quafadas.plots.SetupVega.{*, given} -import viz.macros.VegaPlot import io.circe.syntax.* /** A calendar year-based wrapper around IndexPerPeriod for on-leveling historical data. @@ -88,23 +86,6 @@ end CalendarYearIndex object CalendarYearIndex: - extension (idx: CalendarYearIndex) - def plotIndex(reportingThreshold: Double)(using viz.LowPriorityPlotTarget) = - val linePlot2 = VegaPlot.fromResource("index.vl.json") - val cumulative = idx.onLevel(Array.fill(idx.years.length)(1.0), idx.years) - val factors = idx.years.zip(idx.indices).zip(cumulative).map { case ((year, index), cumulative) => - ( - year = year, - index = index, - missing = 1 / cumulative, - threshold = idx.suggestedNewThreshold(reportingThreshold) - ) - } - linePlot2.plot( - _.data.values := factors.asJson - ) - end extension - /** Create a CalendarYearIndex from arrays of years and their corresponding indices. Years should be provided in * descending order (most recent first). * From e49340991f14c248eb343026ff48a26ea5ca8b75 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 26 Jan 2026 17:13:55 +0100 Subject: [PATCH 41/75] . --- vecxt_re/src-jvm/plots.scala | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 669b2c92..5e2b6f41 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -8,6 +8,24 @@ object Plots: lazy val seasonality = VegaPlot.fromResource("seasonality.vg.json") // catagory, amount lazy val distributionDensity = VegaPlot.fromResource("distDensity.vg.json") // value, density + + extension (idx: CalendarYearIndex) + def plotIndex(reportingThreshold: Double)(using viz.LowPriorityPlotTarget) = + val linePlot2 = VegaPlot.fromResource("index.vl.json") + val cumulative = idx.onLevel(Array.fill(idx.years.length)(1.0), idx.years) + val factors = idx.years.zip(idx.indices).zip(cumulative).map { case ((year, index), cumulative) => + ( + year = year, + index = index, + missing = 1 / cumulative, + threshold = idx.suggestedNewThreshold(reportingThreshold) + ) + } + linePlot2.plot( + _.data.values := factors.asJson + ) + end extension + extension (scenario: Scenarr) inline def plotSeasonality(highlight: Option[(year: Int, month: Int)] = None)(using tgt: viz.LowPriorityPlotTarget From c4f73282b0c321c07cb62051872b2280951d9ee9 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Mon, 26 Jan 2026 16:15:33 +0000 Subject: [PATCH 42/75] [autofix.ci] apply automated fixes --- vecxt_re/src-jvm/plots.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 5e2b6f41..6b183d70 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -8,7 +8,6 @@ object Plots: lazy val seasonality = VegaPlot.fromResource("seasonality.vg.json") // catagory, amount lazy val distributionDensity = VegaPlot.fromResource("distDensity.vg.json") // value, density - extension (idx: CalendarYearIndex) def plotIndex(reportingThreshold: Double)(using viz.LowPriorityPlotTarget) = val linePlot2 = VegaPlot.fromResource("index.vl.json") From 76f99b84300e9ae036041434367a21a0833f6a13 Mon Sep 17 00:00:00 2001 From: partens Date: Mon, 26 Jan 2026 17:20:04 +0100 Subject: [PATCH 43/75] . --- vecxt_re/src/CalendarYearIndex.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/vecxt_re/src/CalendarYearIndex.scala b/vecxt_re/src/CalendarYearIndex.scala index 330403b1..6f0f12ef 100644 --- a/vecxt_re/src/CalendarYearIndex.scala +++ b/vecxt_re/src/CalendarYearIndex.scala @@ -1,7 +1,5 @@ package vecxt_re -import io.circe.syntax.* - /** A calendar year-based wrapper around IndexPerPeriod for on-leveling historical data. * * This class maps calendar years to index factors, allowing on-leveling of datasets where data points are labeled with From b7dc52afbcd95e88c951a67109b8a88eca30cffb Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Mon, 26 Jan 2026 21:49:13 +0100 Subject: [PATCH 44/75] . --- .vscode/tasks.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 46f8cb22..ad9b443e 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -4,7 +4,7 @@ { "label": "compiledClassesAndSemanticDbFiles", "type": "shell", - "command": "./mill __.compiledClassesAndSemanticDbFiles", + "command": "./mill __.jvm.compiledClassesAndSemanticDbFiles", "runOptions": { "runOn": "folderOpen" }, From c3c2a03e74c660e96b0a6f2fe2fc6904c1e10d1a Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Mon, 26 Jan 2026 22:48:53 +0100 Subject: [PATCH 45/75] scenarr is now a monoid --- build.mill | 3 + experiments/src/pricing_fun.scala | 2 +- laws/package.mill | 12 +- vecxt/src-js-native/LongArrays.scala | 14 +- vecxt/src-jvm/LongArrays.scala | 11 + vecxt/src/all.scala | 4 +- vecxt_re/package.mill | 14 +- vecxt_re/src/scenarr.scala | 189 ++++++++- vecxt_re/test/src-jvm/NegBin.test.scala | 2 +- .../test/src-jvm/ScenarrMonoidLawsSpec.scala | 102 +++++ vecxt_re/test/src/scenario.test.scala | 14 +- vecxt_re/test/src/scenarr.monoid.test.scala | 383 ++++++++++++++++++ vecxt_re/test/src/scenarr.test.scala | 22 +- vecxt_re/test/src/vecEquals.scala | 9 + 14 files changed, 742 insertions(+), 39 deletions(-) create mode 100644 vecxt_re/test/src-jvm/ScenarrMonoidLawsSpec.scala create mode 100644 vecxt_re/test/src/scenarr.monoid.test.scala diff --git a/build.mill b/build.mill index 9840f95e..6554ae09 100644 --- a/build.mill +++ b/build.mill @@ -38,6 +38,9 @@ object V: val blas: Dep = mvn"dev.ludovic.netlib:blas:3.0.4" val lapack: Dep = mvn"dev.ludovic.netlib:lapack:3.0.4" val scalaJavaTime: Dep = mvn"io.github.cquiroz::scala-java-time::2.6.0" + val catsVersion = "2.13.0" + val disciplineVersion = "2.0.0" + val scalacheckVersion = "1.17.0" end V trait VecxtPublishModule extends PublishModule, ScalaModule, ScalafixModule: diff --git a/experiments/src/pricing_fun.scala b/experiments/src/pricing_fun.scala index d81202db..02bc3afd 100644 --- a/experiments/src/pricing_fun.scala +++ b/experiments/src/pricing_fun.scala @@ -9,7 +9,7 @@ import vecxt.BoundsCheck.DoBoundsCheck.yes val data = CSV.resource("losses.csv", CsvOpts(TypeInferrer.FromAllRows, ReadAs.Columns)) - val scen = Scenarr( + val scen = Scenarr.withGeneratedIds( iterations = data.year, days = data.day, amounts = data.amount, diff --git a/laws/package.mill b/laws/package.mill index 63298887..5b513ec3 100644 --- a/laws/package.mill +++ b/laws/package.mill @@ -7,13 +7,9 @@ import mill.api.Task.Simple object `package` extends Module: - val catsVersion = "2.10.0" - val disciplineVersion = "2.0.0" - val scalacheckVersion = "1.17.0" - trait LawsModule extends PlatformScalaModule with build.VecxtPublishModule: def mvnDeps = super.mvnDeps() ++ Seq( - mvn"org.typelevel::cats-kernel:$catsVersion" + mvn"org.typelevel::cats-kernel:${build.V.catsVersion}" ) end LawsModule @@ -27,9 +23,9 @@ object `package` extends Module: def mvnDeps = super.mvnDeps() ++ Seq( mvn"org.scalameta::munit::${build.V.munitVersion}", - mvn"org.typelevel::cats-kernel-laws:$catsVersion", - mvn"org.typelevel::discipline-munit:$disciplineVersion", - mvn"org.scalacheck::scalacheck:$scalacheckVersion" + mvn"org.typelevel::cats-kernel-laws:${build.V.catsVersion}", + mvn"org.typelevel::discipline-munit:${build.V.disciplineVersion}", + mvn"org.scalacheck::scalacheck:${build.V.scalacheckVersion}" ) override def forkArgs: Simple[Seq[String]] = super.forkArgs() ++ build.vecIncubatorFlag diff --git a/vecxt/src-js-native/LongArrays.scala b/vecxt/src-js-native/LongArrays.scala index 2a693bf4..f85ce848 100644 --- a/vecxt/src-js-native/LongArrays.scala +++ b/vecxt/src-js-native/LongArrays.scala @@ -2,7 +2,19 @@ package vecxt object LongArrays: - extension (arr: Array[Long]) inline def sumSIMD: Long = ??? + extension (arr: Array[Long]) + inline def select(indicies: Array[Int]): Array[Long] = + val len = indicies.length + val out = Array.ofDim[Long](len) + var i = 0 + while i < len do + out(i) = arr(indicies(i)) + i += 1 + end while + out + end select + + inline def sumSIMD: Long = ??? end extension end LongArrays diff --git a/vecxt/src-jvm/LongArrays.scala b/vecxt/src-jvm/LongArrays.scala index e774f37a..1e2da7da 100644 --- a/vecxt/src-jvm/LongArrays.scala +++ b/vecxt/src-jvm/LongArrays.scala @@ -7,6 +7,17 @@ object LongArrays: final val length = spl.length() extension (arr: Array[Long]) + inline def select(indicies: Array[Int]): Array[Long] = + val len = indicies.length + val out = Array.ofDim[Long](len) + var i = 0 + while i < len do + out(i) = arr(indicies(i)) + i += 1 + end while + out + end select + /** Computes the sum of all elements in the array using SIMD (Single Instruction, Multiple Data) operations. * * This method leverages the Vector API to perform parallel addition operations on chunks of the array, improving diff --git a/vecxt/src/all.scala b/vecxt/src/all.scala index b340baf9..58a5bbd5 100644 --- a/vecxt/src/all.scala +++ b/vecxt/src/all.scala @@ -26,6 +26,7 @@ object all: export vecxt.JvmNativeDoubleMatrix.* export vecxt.dimensionExtender.DimensionExtender.* export vecxt.IntArrays.* + export vecxt.LongArrays.* export vecxt.Determinant.* // Import determinant implementations export vecxt.Svd.* // JS and native are stubs export vecxt.Cholesky.* // JS and native are stubs @@ -35,7 +36,4 @@ object all: export vecxt.QR.* // JS and native are stubs // Random export vecxt.cosineSimilarity - - // Longs - export vecxt.LongArrays.* end all diff --git a/vecxt_re/package.mill b/vecxt_re/package.mill index 8a59dc6e..a37a2c5f 100644 --- a/vecxt_re/package.mill +++ b/vecxt_re/package.mill @@ -36,11 +36,17 @@ object `package` extends Module: mvn"io.github.quafadas::dedav4s:0.10.5", mvn"org.apache.commons:commons-math4-core:4.0-beta1", mvn"org.apache.commons:commons-statistics-distribution:1.1", - mvn"org.apache.commons:commons-rng-simple:1.6" + mvn"org.apache.commons:commons-rng-simple:1.6", + mvn"org.typelevel::cats-kernel:${build.V.catsVersion}" ) object test extends VexctReTest, ScalaTests: def moduleDeps = Seq(jvm) + override def mvnDeps = super.mvnDeps() ++ Seq( + mvn"org.typelevel::cats-kernel-laws:${build.V.catsVersion}", + mvn"org.typelevel::discipline-munit:${build.V.disciplineVersion}", + mvn"org.scalacheck::scalacheck:${build.V.scalacheckVersion}" + ) override def forkArgs: Simple[Seq[String]] = super.forkArgs() ++ build.vecIncubatorFlag end test end jvm @@ -48,7 +54,8 @@ object `package` extends Module: object js extends VexctReModule with build.CommonJS: def moduleDeps = Seq(build.vecxt.js) override def mvnDeps = super.mvnDeps() ++ Seq( - build.V.scalaJavaTime + build.V.scalaJavaTime, + mvn"org.typelevel::cats-kernel::${build.V.catsVersion}" ) def sources = Task(super.sources() ++ jsNativeSharedSources()) def moduleKind = ModuleKind.ESModule @@ -64,7 +71,8 @@ object `package` extends Module: object native extends VexctReModule with build.CommonNative: def moduleDeps = Seq(build.vecxt.native) override def mvnDeps = super.mvnDeps() ++ Seq( - build.V.scalaJavaTime + build.V.scalaJavaTime, + mvn"org.typelevel::cats-kernel::${build.V.catsVersion}" ) def sources = Task(super.sources() ++ jsNativeSharedSources() ++ jvmNativeSharedSources()) override def enableBsp = false diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index bd7d42ef..e275e8bb 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -1,15 +1,18 @@ package vecxt_re import vecxt.all.* +import cats.kernel.Monoid import java.time.LocalDate import java.time.temporal.ChronoUnit import java.time.Month +import scala.collection.mutable case class Scenarr( iterations: Array[Int], days: Array[Int], amounts: Array[Double], + ids: Array[Long], numberIterations: Int, threshold: Double = 0d, day1: LocalDate = LocalDate.of(2019, 1, 1), @@ -17,7 +20,10 @@ case class Scenarr( id: Long = scala.util.Random.nextLong(), isSorted: Boolean = false ): - assert(iterations.length == days.length && days.length == amounts.length) + assert( + iterations.length == days.length && days.length == amounts.length && amounts.length == ids.length, + s"Array lengths must match: iterations=${iterations.length}, days=${days.length}, amounts=${amounts.length}, ids=${ids.length}" + ) lazy val freq: Array[Int] = assert(isSorted, "Scenario must be sorted to compute frequency") @@ -66,8 +72,8 @@ case class Scenarr( lazy val meanLoss: Double = amounts.sum / numberIterations - lazy val itrDayAmount: Array[(itr: Int, day: Int, amnt: Double)] = - iterations.zip(days).zip(amounts).map { case ((i, d), a) => (itr = i, day = d, amnt = a) } + lazy val itrDayAmount: Array[(itr: Int, day: Int, amnt: Double, id: Long)] = + iterations.zip(days).zip(amounts).zip(ids).map { case (((i, d), a), id) => (itr = i, day = d, amnt = a, id = id) } lazy val period: (firstLoss: LocalDate, lastLoss: LocalDate) = (day1.plusDays((days.minSIMD - 1).toLong), day1.plusDays((days.maxSIMD - 1).toLong)) @@ -75,6 +81,177 @@ case class Scenarr( end Scenarr object Scenarr: + /** The empty Scenarr - identity element for the monoid. + * Combining any scenario with empty returns the original scenario unchanged. + */ + val empty: Scenarr = new Scenarr( + Array.emptyIntArray, + Array.emptyIntArray, + Array.emptyDoubleArray, + Array.emptyLongArray, + numberIterations = 0, + threshold = 0d, + day1 = LocalDate.of(2019, 1, 1), + name = "empty", + id = 0L, + isSorted = true + ) + + /** Combine two Scenarr instances following monoid laws. + * + * The combination semantics are: + * - Events with matching IDs have their amounts aggregated (with validation that iteration/day match) + * - Thresholds are summed, and claims below the new threshold are filtered out + * - Day1 is the earlier of the two; the later scenario's days are adjusted to align calendar dates + * - Number of iterations must match (unless one is empty) + * - Result is always sorted by (iteration, day) + * + * @throws IllegalArgumentException if events with same ID have different iteration/day + * @throws IllegalArgumentException if numberIterations don't match (for non-empty scenarios) + */ + def combine(s1: Scenarr, s2: Scenarr): Scenarr = + // Handle empty cases - identity element + if s1.amounts.isEmpty then return s2 + if s2.amounts.isEmpty then return s1 + + // Check iteration count matches for non-empty scenarios + require( + s1.numberIterations == s2.numberIterations, + s"Cannot combine scenarios with different iteration counts: ${s1.numberIterations} vs ${s2.numberIterations}" + ) + + // Determine new day1 (earlier of the two) + val newDay1 = if s1.day1.isBefore(s2.day1) then s1.day1 else s2.day1 + + // Calculate day offsets to align both scenarios to newDay1 + val dayOffset1 = ChronoUnit.DAYS.between(newDay1, s1.day1).toInt + val dayOffset2 = ChronoUnit.DAYS.between(newDay1, s2.day1).toInt + + // Sum thresholds + val newThreshold = s1.threshold + s2.threshold + + // Build a map: id -> (iteration, adjustedDay, totalAmount) + // This aggregates amounts for events with the same ID + val idMap = mutable.HashMap.empty[Long, (Int, Int, Double)] + + // Process s1 events + var i = 0 + while i < s1.ids.length do + val id = s1.ids(i) + val iter = s1.iterations(i) + val day = s1.days(i) + dayOffset1 + val amount = s1.amounts(i) + + idMap.get(id) match + case None => + idMap(id) = (iter, day, amount) + case Some((existingIter, existingDay, existingAmount)) => + require( + existingIter == iter && existingDay == day, + s"Event with ID $id has inconsistent iteration/day: ($existingIter, $existingDay) vs ($iter, $day)" + ) + idMap(id) = (iter, day, existingAmount + amount) + i += 1 + end while + + // Process s2 events + i = 0 + while i < s2.ids.length do + val id = s2.ids(i) + val iter = s2.iterations(i) + val day = s2.days(i) + dayOffset2 + val amount = s2.amounts(i) + + idMap.get(id) match + case None => + idMap(id) = (iter, day, amount) + case Some((existingIter, existingDay, existingAmount)) => + require( + existingIter == iter && existingDay == day, + s"Event with ID $id has inconsistent iteration/day: ($existingIter, $existingDay) vs ($iter, $day)" + ) + idMap(id) = (iter, day, existingAmount + amount) + i += 1 + end while + + // Convert to arrays, sorted by (iteration, day), filtering by threshold + val filtered = idMap.iterator.filter(_._2._3 > newThreshold).toArray + val sorted = filtered.sortBy { case (_, (iter, day, _)) => (iter, day) } + + val finalIds = sorted.map(_._1) + val finalIterations = sorted.map(_._2._1) + val finalDays = sorted.map(_._2._2) + val finalAmounts = sorted.map(_._2._3) + + new Scenarr( + finalIterations, + finalDays, + finalAmounts, + finalIds, + s1.numberIterations, + newThreshold, + newDay1, + s"concat: [${s1.name} + ${s2.name}]", + scala.util.Random.nextLong(), + isSorted = true + ) + end combine + + /** Infix operator for combining scenarios */ + extension (s1: Scenarr) def |+|(s2: Scenarr): Scenarr = combine(s1, s2) + + /** Cats Monoid instance for Scenarr. + * + * This instance requires that all combined scenarios have the same `numberIterations`. + * The identity element is `Scenarr.empty` with `numberIterations = 0`. + * + * Important: This monoid is only valid for scenarios with matching `numberIterations`. + * Combining scenarios with different iteration counts will throw an IllegalArgumentException. + * + * @param numIterations The fixed number of iterations for this monoid instance + */ + def monoidForIterations(numIterations: Int): Monoid[Scenarr] = new Monoid[Scenarr]: + def empty: Scenarr = new Scenarr( + Array.emptyIntArray, + Array.emptyIntArray, + Array.emptyDoubleArray, + Array.emptyLongArray, + numberIterations = numIterations, + threshold = 0d, + day1 = LocalDate.of(2019, 1, 1), + name = "empty", + id = 0L, + isSorted = true + ) + def combine(x: Scenarr, y: Scenarr): Scenarr = Scenarr.combine(x, y) + end monoidForIterations + + /** Default Monoid instance for Scenarr. + * Uses the general `combine` which treats empty scenarios as identity. + */ + given Monoid[Scenarr] with + def empty: Scenarr = Scenarr.empty + def combine(x: Scenarr, y: Scenarr): Scenarr = Scenarr.combine(x, y) + end given + + /** Create a Scenarr with automatically generated random IDs for each event. + * Use this factory when you don't need to specify event IDs explicitly. + */ + def withGeneratedIds( + iterations: Array[Int], + days: Array[Int], + amounts: Array[Double], + numberIterations: Int, + threshold: Double = 0d, + day1: LocalDate = LocalDate.of(2019, 1, 1), + name: String = "", + id: Long = scala.util.Random.nextLong(), + isSorted: Boolean = false + ): Scenarr = + val ids = Array.fill(iterations.length)(scala.util.Random.nextLong()) + new Scenarr(iterations, days, amounts, ids, numberIterations, threshold, day1, name, id, isSorted) + end withGeneratedIds + extension (scenario: Scenarr) inline def sorted: Scenarr = val indicies = scenario.iterations.zipWithIndex @@ -89,6 +266,7 @@ object Scenarr: scenario.iterations.select(indicies), scenario.days.select(indicies), scenario.amounts.select(indicies), + scenario.ids.select(indicies), scenario.numberIterations, scenario.threshold, scenario.day1, @@ -106,6 +284,7 @@ object Scenarr: scenario.iterations.mask(idx), scenario.days.mask(idx), scenario.amounts.mask(idx), + scenario.ids.mask(idx), i, scenario.threshold, scenario.day1, @@ -116,7 +295,7 @@ object Scenarr: end takeFirstNIterations inline def scaleAmntBy(scale: Double): Scenarr = - scenario.copy(amounts = scenario.amounts * scale, threshold = scenario.threshold * scale) + scenario.copy(amounts = scenario.amounts * scale, ids = scenario.ids, threshold = scenario.threshold * scale) inline def iteration(num: Int) = assert(num > 0 && num <= scenario.numberIterations) @@ -126,6 +305,7 @@ object Scenarr: scenario.iterations.mask(idx), scenario.days.mask(idx), scenario.amounts.mask(idx), + scenario.ids.mask(idx), scenario.numberIterations, scenario.threshold, scenario.day1, @@ -167,6 +347,7 @@ object Scenarr: scenario.iterations.mask(idx)(using false), scenario.days.mask(idx)(using false), scenario.amounts.mask(idx)(using false), + scenario.ids.mask(idx)(using false), scenario.numberIterations, newThresh, scenario.day1, diff --git a/vecxt_re/test/src-jvm/NegBin.test.scala b/vecxt_re/test/src-jvm/NegBin.test.scala index 64b49497..89318eca 100644 --- a/vecxt_re/test/src-jvm/NegBin.test.scala +++ b/vecxt_re/test/src-jvm/NegBin.test.scala @@ -6,7 +6,7 @@ import org.apache.commons.statistics.distribution.PoissonDistribution class NegBinTest extends FunSuite: - inline val localTests = true + inline val localTests = false test("pmf approximately normalizes") { val nb = NegativeBinomial(a = 2.5, b = 1.2) diff --git a/vecxt_re/test/src-jvm/ScenarrMonoidLawsSpec.scala b/vecxt_re/test/src-jvm/ScenarrMonoidLawsSpec.scala new file mode 100644 index 00000000..be942c6b --- /dev/null +++ b/vecxt_re/test/src-jvm/ScenarrMonoidLawsSpec.scala @@ -0,0 +1,102 @@ +package vecxt_re + +import cats.kernel.Eq +import cats.kernel.laws.discipline.MonoidTests +import munit.DisciplineSuite +import org.scalacheck.{Arbitrary, Gen} +import java.time.LocalDate + +/** Law-based tests for Scenarr Monoid using cats-kernel-laws and discipline. + * + * These tests verify that the Scenarr Monoid satisfies all required laws: + * - Left identity: empty |+| a = a + * - Right identity: a |+| empty = a + * - Associativity: (a |+| b) |+| c = a |+| (b |+| c) + */ +class ScenarrMonoidLawsSpec extends DisciplineSuite: + + // Fixed parameters for testing + private val TestIterations = 50 + private val TestDay1 = LocalDate.of(2019, 1, 1) + + // Generator for positive amounts (above any reasonable threshold) + private val amountGen: Gen[Double] = Gen.choose(100.0, 10000.0) + + // Generator for days (1-365) + private val dayGen: Gen[Int] = Gen.choose(1, 365) + + // Generator for iterations (1-TestIterations) + private val iterationGen: Gen[Int] = Gen.choose(1, TestIterations) + + // Generator for event count (0-20 events) + private val eventCountGen: Gen[Int] = Gen.choose(0, 25) + + // Small ID space to encourage clashes across Scenarrs being combined + private val idGen: Gen[Long] = Gen.choose(1L, 100L) + + /** Deterministically derive (iteration, day) from an ID. + * This ensures that when the same ID appears in different Scenarrs, + * it always has the same iteration and day - making the combine valid. + * Amounts can differ and will be aggregated. + */ + private def iterationForId(id: Long): Int = ((id % TestIterations) + 1).toInt + private def dayForId(id: Long): Int = ((id % 365) + 1).toInt + + /** Generate a valid Scenarr with fixed numberIterations and day1. + * + * Uses a small ID space (1-100) to encourage clashes across Scenarrs. + * Iteration and day are derived deterministically from ID, so clashing + * IDs always have consistent (iteration, day) pairs - the amounts get + * aggregated as expected by the monoid. + */ + private val scenarrrGen: Gen[Scenarr] = for + n <- eventCountGen + ids <- Gen.listOfN(n, idGen).map(_.distinct) // unique within this Scenarr + amounts <- Gen.listOfN(ids.length, amountGen) + threshold <- Gen.const(0.0) // Use 0 threshold to avoid filtering + yield + val iterations = ids.map(iterationForId).toArray + val days = ids.map(dayForId).toArray + new Scenarr( + iterations, + days, + amounts.toArray, + ids.toArray, + TestIterations, + threshold, + TestDay1, + s"test-${ids.length}", + scala.util.Random.nextLong(), + isSorted = false + ) + + given Arbitrary[Scenarr] = Arbitrary(scenarrrGen) + + /** Equality for Scenarr that compares the semantic content. + * + * Two Scenarrs are equal if they have: + * - Same numberIterations + * - Same threshold + * - Same day1 + * - Same events (id -> (iteration, day, amount)) regardless of order + */ + given Eq[Scenarr] = Eq.instance { (a, b) => + if a.numberIterations != b.numberIterations then false + else if Math.abs(a.threshold - b.threshold) > 1e-10 then false + else if a.day1 != b.day1 then false + else if a.ids.length != b.ids.length then false + else + // Compare events by creating a map of id -> (iter, day, amount) + a.iterations.sameElements(b.iterations) && + a.days.sameElements(b.days) && + a.amounts.zip(b.amounts).forall((x, y) => Math.abs(x - y) < 1e-10) && + a.ids.sameElements(b.ids) + } + + // Use the fixed-iteration monoid for law testing + given cats.kernel.Monoid[Scenarr] = Scenarr.monoidForIterations(TestIterations) + + // Run all Monoid law tests + checkAll("Scenarr.MonoidLaws", MonoidTests[Scenarr].monoid) + +end ScenarrMonoidLawsSpec diff --git a/vecxt_re/test/src/scenario.test.scala b/vecxt_re/test/src/scenario.test.scala index 6a974580..5cbee954 100644 --- a/vecxt_re/test/src/scenario.test.scala +++ b/vecxt_re/test/src/scenario.test.scala @@ -45,7 +45,7 @@ class ScenarioSuite extends munit.FunSuite: } test("scaleAmntBy doubles amounts and threshold, preserves other fields"): - val base = Scenarr( + val base = Scenarr.withGeneratedIds( iterations = Array(1, 1, 2), days = Array(1, 2, 3), amounts = Array(100.0, 200.0, 300.0), @@ -65,14 +65,14 @@ class ScenarioSuite extends munit.FunSuite: assertEquals(scaled.isSorted, base.isSorted) test("scaleAmntBy with zero scale results in zero amounts and zero threshold"): - val base = Scenarr(Array(1), Array(1), Array(123.0), numberIterations = 1, threshold = 7.5) + val base = Scenarr.withGeneratedIds(Array(1), Array(1), Array(123.0), numberIterations = 1, threshold = 7.5) val scaled0 = base.scaleAmntBy(0.0) assertEquals(scaled0.amounts.toSeq, Seq(0.0)) assertEquals(scaled0.threshold, 0.0) test("scaleAmntBy supports negative scaling and does not mutate original"): val originalAmounts = Array(10.0, 20.0, 30.0) - val base = Scenarr(Array(1, 1, 1), Array(1, 2, 3), originalAmounts.clone(), numberIterations = 1, threshold = 5.0) + val base = Scenarr.withGeneratedIds(Array(1, 1, 1), Array(1, 2, 3), originalAmounts.clone(), numberIterations = 1, threshold = 5.0) val scaled = base.scaleAmntBy(-1.5) assertEquals(scaled.amounts.toSeq, Seq(-15.0, -30.0, -45.0)) assertEquals(scaled.threshold, -7.5) @@ -81,7 +81,7 @@ class ScenarioSuite extends munit.FunSuite: assertEquals(base.threshold, 5.0) test("applyThreshold increases threshold and filters claims"): - val base = Scenarr( + val base = Scenarr.withGeneratedIds( iterations = Array(1, 2, 3), days = Array(10, 20, 30), amounts = Array(10.0, 20.0, 30.0), @@ -100,12 +100,12 @@ class ScenarioSuite extends munit.FunSuite: assertEquals(base.threshold, 5.0) test("applyThreshold throws if newThresh is not greater than current threshold"): - val base2 = Scenarr(Array(1), Array(1), Array(100.0), numberIterations = 1, threshold = 50.0) + val base2 = Scenarr.withGeneratedIds(Array(1), Array(1), Array(100.0), numberIterations = 1, threshold = 50.0) val ex = intercept[Exception](base2.applyThreshold(50.0)) assert(ex.getMessage.contains("Threshold may only be increased")) test("applyThreshold may result in no claims"): - val base3 = Scenarr(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 5.0) + val base3 = Scenarr.withGeneratedIds(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 5.0) val appliedEmpty = base3.applyThreshold(100.0) assertEquals(appliedEmpty.amounts.toSeq, Seq()) assertEquals(appliedEmpty.iterations.toSeq, Seq()) @@ -113,7 +113,7 @@ class ScenarioSuite extends munit.FunSuite: assertEquals(appliedEmpty.threshold, 100.0) test("claimDates maps day 1 to day1 property"): - val base = Scenarr( + val base = Scenarr.withGeneratedIds( iterations = Array(1, 2), days = Array(1, 100), amounts = Array(10.0, 20.0), diff --git a/vecxt_re/test/src/scenarr.monoid.test.scala b/vecxt_re/test/src/scenarr.monoid.test.scala new file mode 100644 index 00000000..bcb2d6e0 --- /dev/null +++ b/vecxt_re/test/src/scenarr.monoid.test.scala @@ -0,0 +1,383 @@ +package vecxt_re + +import munit.FunSuite +import java.time.LocalDate +import cats.kernel.Monoid + +class ScenarrMonoidSuite extends FunSuite: + + test("cats Monoid instance is available via given") { + val monoid = summon[Monoid[Scenarr]] + assertEquals(monoid.empty.amounts.length, 0) + assertEquals(monoid.empty.isSorted, true) + } + + test("cats Monoid.combine works like |+|") { + val s1 = Scenarr.withGeneratedIds(Array(1), Array(10), Array(100.0), numberIterations = 1) + val s2 = Scenarr.withGeneratedIds(Array(1), Array(20), Array(200.0), numberIterations = 1) + + val monoid = summon[Monoid[Scenarr]] + val result = monoid.combine(s1, s2) + + assertEquals(result.amounts.length, 2) + assertEquals(result.isSorted, true) + } + + test("cats Monoid.combineAll works for multiple scenarios") { + val scenarios = List( + new Scenarr(Array(1), Array(10), Array(100.0), Array(1L), numberIterations = 2), + new Scenarr(Array(1), Array(20), Array(200.0), Array(2L), numberIterations = 2), + new Scenarr(Array(2), Array(15), Array(150.0), Array(3L), numberIterations = 2) + ) + + val monoid = Scenarr.monoidForIterations(2) + val result = monoid.combineAll(scenarios) + + assertEquals(result.amounts.length, 3) + assertEquals(result.amounts.sum, 450.0) + } + + test("empty is left identity: empty |+| s = s") { + val s = Scenarr.withGeneratedIds( + Array(1, 2), + Array(10, 20), + Array(100.0, 200.0), + numberIterations = 2, + threshold = 5.0, + day1 = LocalDate.of(2020, 6, 15), + name = "test" + ) + + val result = Scenarr.empty |+| s + assertEquals(result.iterations.toSeq, s.iterations.toSeq) + assertEquals(result.days.toSeq, s.days.toSeq) + assertEquals(result.amounts.toSeq, s.amounts.toSeq) + assertEquals(result.numberIterations, s.numberIterations) + assertEquals(result.threshold, s.threshold) + assertEquals(result.day1, s.day1) + } + + test("empty is right identity: s |+| empty = s") { + val s = Scenarr.withGeneratedIds( + Array(1, 2), + Array(10, 20), + Array(100.0, 200.0), + numberIterations = 2, + threshold = 5.0, + day1 = LocalDate.of(2020, 6, 15), + name = "test" + ) + + val result = s |+| Scenarr.empty + assertEquals(result.iterations.toSeq, s.iterations.toSeq) + assertEquals(result.days.toSeq, s.days.toSeq) + assertEquals(result.amounts.toSeq, s.amounts.toSeq) + assertEquals(result.numberIterations, s.numberIterations) + assertEquals(result.threshold, s.threshold) + assertEquals(result.day1, s.day1) + } + + test("empty |+| empty = empty") { + val result = Scenarr.empty |+| Scenarr.empty + assertEquals(result.amounts.length, 0) + assertEquals(result.isSorted, true) + } + + test("combining disjoint events concatenates them") { + val s1 = Scenarr.withGeneratedIds( + Array(1, 1), + Array(10, 20), + Array(100.0, 200.0), + numberIterations = 2, + threshold = 0.0, + name = "s1" + ) + val s2 = Scenarr.withGeneratedIds( + Array(2, 2), + Array(15, 25), + Array(150.0, 250.0), + numberIterations = 2, + threshold = 0.0, + name = "s2" + ) + + val result = s1 |+| s2 + assertEquals(result.amounts.length, 4) + assertEquals(result.isSorted, true) + // Should be sorted by (iteration, day) + assertEquals(result.iterations.toSeq, Seq(1, 1, 2, 2)) + } + + test("events with same ID aggregate their amounts") { + val sharedId = 12345L + val s1 = new Scenarr( + Array(1), + Array(10), + Array(100.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s1" + ) + val s2 = new Scenarr( + Array(1), + Array(10), + Array(50.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s2" + ) + + val result = s1 |+| s2 + assertEquals(result.amounts.length, 1) + assertEquals(result.amounts(0), 150.0) // 100 + 50 + assertEquals(result.ids(0), sharedId) + } + + test("same ID with different iteration throws exception") { + val sharedId = 12345L + val s1 = new Scenarr( + Array(1), + Array(10), + Array(100.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s1" + ) + val s2 = new Scenarr( + Array(2), // different iteration! + Array(10), + Array(50.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s2" + ) + + intercept[IllegalArgumentException] { + s1 |+| s2 + } + } + + test("same ID with different day throws exception") { + val sharedId = 12345L + val s1 = new Scenarr( + Array(1), + Array(10), + Array(100.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s1" + ) + val s2 = new Scenarr( + Array(1), + Array(20), // different day! + Array(50.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s2" + ) + + intercept[IllegalArgumentException] { + s1 |+| s2 + } + } + + test("different numberIterations throws exception") { + val s1 = Scenarr.withGeneratedIds(Array(1), Array(10), Array(100.0), numberIterations = 2) + val s2 = Scenarr.withGeneratedIds(Array(1), Array(10), Array(100.0), numberIterations = 3) + + intercept[IllegalArgumentException] { + s1 |+| s2 + } + } + + test("thresholds are summed and claims filtered") { + val s1 = Scenarr.withGeneratedIds( + Array(1, 1), + Array(10, 20), + Array(30.0, 100.0), + numberIterations = 2, + threshold = 10.0 + ) + val s2 = Scenarr.withGeneratedIds( + Array(2), + Array(15), + Array(50.0), + numberIterations = 2, + threshold = 15.0 + ) + + val result = s1 |+| s2 + assertEquals(result.threshold, 25.0) // 10 + 15 + // Only claims > 25 survive: 30.0, 100.0, 50.0 all > 25 + assertEquals(result.amounts.length, 3) + } + + test("threshold filtering removes small claims") { + val s1 = Scenarr.withGeneratedIds( + Array(1), + Array(10), + Array(20.0), // will be filtered: 20 <= 25 + numberIterations = 2, + threshold = 10.0 + ) + val s2 = Scenarr.withGeneratedIds( + Array(2), + Array(15), + Array(50.0), // survives: 50 > 25 + numberIterations = 2, + threshold = 15.0 + ) + + val result = s1 |+| s2 + assertEquals(result.threshold, 25.0) + assertEquals(result.amounts.length, 1) + assertEquals(result.amounts(0), 50.0) + } + + test("day1 is the earlier of the two and days are adjusted") { + val earlierDay1 = LocalDate.of(2019, 1, 1) + val laterDay1 = LocalDate.of(2019, 1, 11) // 10 days later + + val s1 = Scenarr.withGeneratedIds( + Array(1), + Array(5), // day 5 relative to 2019-01-01 = Jan 5 + Array(100.0), + numberIterations = 1, + day1 = earlierDay1 + ) + val s2 = Scenarr.withGeneratedIds( + Array(1), + Array(1), // day 1 relative to 2019-01-11 = Jan 11, which is day 11 relative to Jan 1 + Array(200.0), + numberIterations = 1, + day1 = laterDay1 + ) + + val result = s1 |+| s2 + assertEquals(result.day1, earlierDay1) + // s1's day 5 stays as 5 + // s2's day 1 becomes 1 + 10 = 11 + assert(result.days.contains(5)) + assert(result.days.contains(11)) + } + + test("day1 adjustment works when s2 has earlier day1") { + val earlierDay1 = LocalDate.of(2019, 1, 1) + val laterDay1 = LocalDate.of(2019, 1, 11) + + val s1 = Scenarr.withGeneratedIds( + Array(1), + Array(1), // day 1 relative to Jan 11 = Jan 11 + Array(100.0), + numberIterations = 1, + day1 = laterDay1 + ) + val s2 = Scenarr.withGeneratedIds( + Array(1), + Array(5), // day 5 relative to Jan 1 = Jan 5 + Array(200.0), + numberIterations = 1, + day1 = earlierDay1 + ) + + val result = s1 |+| s2 + assertEquals(result.day1, earlierDay1) + // s1's day 1 becomes 1 + 10 = 11 + // s2's day 5 stays as 5 + assert(result.days.contains(5)) + assert(result.days.contains(11)) + } + + test("result is always sorted by iteration then day") { + val s1 = Scenarr.withGeneratedIds( + Array(2, 1), + Array(30, 10), + Array(200.0, 100.0), + numberIterations = 2 + ) + val s2 = Scenarr.withGeneratedIds( + Array(1, 2), + Array(20, 5), + Array(150.0, 50.0), + numberIterations = 2 + ) + + val result = s1 |+| s2 + assertEquals(result.isSorted, true) + // Expected order: (1,10), (1,20), (2,5), (2,30) + assertEquals(result.iterations.toSeq, Seq(1, 1, 2, 2)) + assertEquals(result.days.toSeq, Seq(10, 20, 5, 30)) + } + + test("name is formatted as concat: [s1 + s2]") { + val s1 = Scenarr.withGeneratedIds(Array(1), Array(10), Array(100.0), numberIterations = 1, name = "alpha") + val s2 = Scenarr.withGeneratedIds(Array(1), Array(20), Array(200.0), numberIterations = 1, name = "beta") + + val result = s1 |+| s2 + assertEquals(result.name, "concat: [alpha + beta]") + } + + test("associativity: (a |+| b) |+| c = a |+| (b |+| c)") { + // Use explicit IDs to avoid ID collisions that could cause issues + val a = new Scenarr( + Array(1), + Array(10), + Array(100.0), + Array(1L), + numberIterations = 2, + threshold = 0.0 + ) + val b = new Scenarr( + Array(1), + Array(20), + Array(200.0), + Array(2L), + numberIterations = 2, + threshold = 0.0 + ) + val c = new Scenarr( + Array(2), + Array(15), + Array(150.0), + Array(3L), + numberIterations = 2, + threshold = 0.0 + ) + + val leftAssoc = (a |+| b) |+| c + val rightAssoc = a |+| (b |+| c) + + // Core data should match + assertEquals(leftAssoc.iterations.toSeq, rightAssoc.iterations.toSeq) + assertEquals(leftAssoc.days.toSeq, rightAssoc.days.toSeq) + assertEquals(leftAssoc.amounts.toSeq, rightAssoc.amounts.toSeq) + assertEquals(leftAssoc.ids.sorted.toSeq, rightAssoc.ids.sorted.toSeq) + assertEquals(leftAssoc.numberIterations, rightAssoc.numberIterations) + assertEquals(leftAssoc.threshold, rightAssoc.threshold) + assertEquals(leftAssoc.day1, rightAssoc.day1) + } + + test("ID aggregation across multiple combines") { + val sharedId = 999L + val a = new Scenarr(Array(1), Array(10), Array(100.0), Array(sharedId), numberIterations = 1) + val b = new Scenarr(Array(1), Array(10), Array(50.0), Array(sharedId), numberIterations = 1) + val c = new Scenarr(Array(1), Array(10), Array(25.0), Array(sharedId), numberIterations = 1) + + val result = a |+| b |+| c + assertEquals(result.amounts.length, 1) + assertEquals(result.amounts(0), 175.0) // 100 + 50 + 25 + } + +end ScenarrMonoidSuite diff --git a/vecxt_re/test/src/scenarr.test.scala b/vecxt_re/test/src/scenarr.test.scala index df2e03e8..bd7778df 100644 --- a/vecxt_re/test/src/scenarr.test.scala +++ b/vecxt_re/test/src/scenarr.test.scala @@ -8,7 +8,7 @@ class ScenarrSuite extends FunSuite: test("constructor should enforce array length equality") { intercept[AssertionError] { - Scenarr(Array(1), Array(1, 2), Array(1.0), 2) + Scenarr.withGeneratedIds(Array(1), Array(1, 2), Array(1.0), 2) } } @@ -16,7 +16,7 @@ class ScenarrSuite extends FunSuite: val iterations = Array(1, 1, 1, 2, 3) val days = Array(1, 2, 3, 4, 5) val amounts = Array(10.0, 20.0, 30.0, 40.0, 50.0) - val sc = Scenarr(iterations, days, amounts, numberIterations = 3, isSorted = true) + val sc = Scenarr.withGeneratedIds(iterations, days, amounts, numberIterations = 3, isSorted = true) // Expected counts per iteration 1..3 => [3,1,1] val expectedFreq = Array(3, 1, 1) @@ -37,7 +37,7 @@ class ScenarrSuite extends FunSuite: val iterations = Array(1, 2, 1, 3, 1) val days = Array(1, 2, 3, 4, 5) val amounts = Array(10.0, 20.0, 30.0, 40.0, 50.0) - val sc = Scenarr(iterations, days, amounts, numberIterations = 3) + val sc = Scenarr.withGeneratedIds(iterations, days, amounts, numberIterations = 3) val sortedScen = sc.sorted @@ -53,7 +53,7 @@ class ScenarrSuite extends FunSuite: test("claimDates and monthYear mapping") { val days = Array(1, 2) - val sc = Scenarr(Array(1, 1), days, Array(10.0, 20.0), numberIterations = 1) + val sc = Scenarr.withGeneratedIds(Array(1, 1), days, Array(10.0, 20.0), numberIterations = 1) val claimDates = sc.claimDates assertEquals(claimDates(0), LocalDate.of(2019, 1, 1)) assertEquals(claimDates(1), LocalDate.of(2019, 1, 2)) @@ -64,14 +64,14 @@ class ScenarrSuite extends FunSuite: } test("numSeasons accounts for days spanning multiple years") { - val sc = Scenarr(Array(1, 1), Array(1, 400), Array(1.0, 2.0), numberIterations = 1) + val sc = Scenarr.withGeneratedIds(Array(1, 1), Array(1, 400), Array(1.0, 2.0), numberIterations = 1) println(sc.numSeasons) assertEquals(sc.numSeasons, 2) } test("itrDayAmount and period produce expected tuples") { val days = Array(10, 100, 365, 366) - val sc = Scenarr(Array(1, 1, 1, 1), days, Array(5.0, 6.0, 7.0, 8.0), numberIterations = 1) + val sc = Scenarr.withGeneratedIds(Array(1, 1, 1, 1), days, Array(5.0, 6.0, 7.0, 8.0), numberIterations = 1) val itda = sc.itrDayAmount assertVecEquals(itda.map(_.itr), Array(1, 1, 1, 1)) assertVecEquals(itda.map(_.day), days) @@ -83,7 +83,7 @@ class ScenarrSuite extends FunSuite: } test("hasOccurence false for empty amounts") { - val sc = Scenarr(Array.emptyIntArray, Array.emptyIntArray, Array.emptyDoubleArray, numberIterations = 0) + val sc = Scenarr.withGeneratedIds(Array.emptyIntArray, Array.emptyIntArray, Array.emptyDoubleArray, numberIterations = 0) assertEquals(sc.hasOccurence, false) } @@ -91,7 +91,7 @@ class ScenarrSuite extends FunSuite: val iter = Array(2, 1, 2) val days = Array(10, 5, 8) val amts = Array(20.0, 10.0, 15.0) - val sc = Scenarr(iter, days, amts, numberIterations = 2, isSorted = false) + val sc = Scenarr.withGeneratedIds(iter, days, amts, numberIterations = 2, isSorted = false) val ssorted = sc.sorted assertEquals(ssorted.isSorted, true) @@ -101,7 +101,7 @@ class ScenarrSuite extends FunSuite: } test("scaleAmntBy multiplies amounts and threshold") { - val sc = Scenarr(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 100.0) + val sc = Scenarr.withGeneratedIds(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 100.0) val scaled = sc.scaleAmntBy(2.0) assertEquals(scaled.threshold, 200.0) assertEquals(scaled.amounts.toList, Array(20.0, 40.0).toList) @@ -111,14 +111,14 @@ class ScenarrSuite extends FunSuite: val iters = Array(2, 1, 2, 1) val days = Array(1, 2, 3, 4) val amts = Array(10.0, 11.0, 12.0, 13.0) - val sc = Scenarr(iters, days, amts, numberIterations = 2) + val sc = Scenarr.withGeneratedIds(iters, days, amts, numberIterations = 2) val only2 = sc.iteration(2) assert(only2.iterations.forall(_ == 2)) assertEquals(only2.amounts.toList, Array(10.0, 12.0).toList) } test("applyThreshold filters amounts and only allows increasing threshold") { - val sc = Scenarr(Array(1, 1, 1), Array(1, 2, 3), Array(10.0, 50.0, 200.0), numberIterations = 1, threshold = 0.0) + val sc = Scenarr.withGeneratedIds(Array(1, 1, 1), Array(1, 2, 3), Array(10.0, 50.0, 200.0), numberIterations = 1, threshold = 0.0) val filtered = sc.applyThreshold(49.0) // keep > 49 => 50 and 200 assertEquals(filtered.amounts.toList, Array(50.0, 200.0).toList) diff --git a/vecxt_re/test/src/vecEquals.scala b/vecxt_re/test/src/vecEquals.scala index 166df902..924a940c 100644 --- a/vecxt_re/test/src/vecEquals.scala +++ b/vecxt_re/test/src/vecEquals.scala @@ -20,3 +20,12 @@ def assertVecEquals(v1: Array[Int], v2: Array[Int])(implicit loc: munit.Location i += 1 end while end assertVecEquals + +def assertVecEquals(v1: Array[Long], v2: Array[Long])(implicit loc: munit.Location): Unit = + assert(v1.length == v2.length) + var i: Int = 0; + while i < v1.length do + assertEquals(v1(i), v2(i), clue = s"at index $i") + i += 1 + end while +end assertVecEquals From 4957d321ff500ad1125ebc4b0648e38ee601f999 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Mon, 26 Jan 2026 21:50:06 +0000 Subject: [PATCH 46/75] [autofix.ci] apply automated fixes --- experiments/src/pricing_fun.scala | 16 +++++---- vecxt/src-jvm/LongArrays.scala | 1 + vecxt_re/src/scenarr.scala | 33 +++++++++++-------- .../test/src-jvm/ScenarrMonoidLawsSpec.scala | 12 +++---- vecxt_re/test/src/scenario.test.scala | 11 +++++-- vecxt_re/test/src/scenarr.test.scala | 14 ++++++-- 6 files changed, 55 insertions(+), 32 deletions(-) diff --git a/experiments/src/pricing_fun.scala b/experiments/src/pricing_fun.scala index 02bc3afd..4635fd16 100644 --- a/experiments/src/pricing_fun.scala +++ b/experiments/src/pricing_fun.scala @@ -9,13 +9,15 @@ import vecxt.BoundsCheck.DoBoundsCheck.yes val data = CSV.resource("losses.csv", CsvOpts(TypeInferrer.FromAllRows, ReadAs.Columns)) - val scen = Scenarr.withGeneratedIds( - iterations = data.year, - days = data.day, - amounts = data.amount, - numberIterations = 10, - threshold = 0.0 - ).sorted + val scen = Scenarr + .withGeneratedIds( + iterations = data.year, + days = data.day, + amounts = data.amount, + numberIterations = 10, + threshold = 0.0 + ) + .sorted // val scen1 = scen.iteration(1).copy(numberIterations = 1) diff --git a/vecxt/src-jvm/LongArrays.scala b/vecxt/src-jvm/LongArrays.scala index 1e2da7da..802eb1f8 100644 --- a/vecxt/src-jvm/LongArrays.scala +++ b/vecxt/src-jvm/LongArrays.scala @@ -44,6 +44,7 @@ object LongArrays: i += 1 end while total + end sumSIMD end extension end LongArrays diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index e275e8bb..a9218670 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -81,8 +81,8 @@ case class Scenarr( end Scenarr object Scenarr: - /** The empty Scenarr - identity element for the monoid. - * Combining any scenario with empty returns the original scenario unchanged. + /** The empty Scenarr - identity element for the monoid. Combining any scenario with empty returns the original + * scenario unchanged. */ val empty: Scenarr = new Scenarr( Array.emptyIntArray, @@ -106,13 +106,17 @@ object Scenarr: * - Number of iterations must match (unless one is empty) * - Result is always sorted by (iteration, day) * - * @throws IllegalArgumentException if events with same ID have different iteration/day - * @throws IllegalArgumentException if numberIterations don't match (for non-empty scenarios) + * @throws IllegalArgumentException + * if events with same ID have different iteration/day + * @throws IllegalArgumentException + * if numberIterations don't match (for non-empty scenarios) */ def combine(s1: Scenarr, s2: Scenarr): Scenarr = // Handle empty cases - identity element if s1.amounts.isEmpty then return s2 + end if if s2.amounts.isEmpty then return s1 + end if // Check iteration count matches for non-empty scenarios require( @@ -151,6 +155,7 @@ object Scenarr: s"Event with ID $id has inconsistent iteration/day: ($existingIter, $existingDay) vs ($iter, $day)" ) idMap(id) = (iter, day, existingAmount + amount) + end match i += 1 end while @@ -171,6 +176,7 @@ object Scenarr: s"Event with ID $id has inconsistent iteration/day: ($existingIter, $existingDay) vs ($iter, $day)" ) idMap(id) = (iter, day, existingAmount + amount) + end match i += 1 end while @@ -199,16 +205,18 @@ object Scenarr: /** Infix operator for combining scenarios */ extension (s1: Scenarr) def |+|(s2: Scenarr): Scenarr = combine(s1, s2) + end extension /** Cats Monoid instance for Scenarr. * - * This instance requires that all combined scenarios have the same `numberIterations`. - * The identity element is `Scenarr.empty` with `numberIterations = 0`. + * This instance requires that all combined scenarios have the same `numberIterations`. The identity element is + * `Scenarr.empty` with `numberIterations = 0`. * - * Important: This monoid is only valid for scenarios with matching `numberIterations`. - * Combining scenarios with different iteration counts will throw an IllegalArgumentException. + * Important: This monoid is only valid for scenarios with matching `numberIterations`. Combining scenarios with + * different iteration counts will throw an IllegalArgumentException. * - * @param numIterations The fixed number of iterations for this monoid instance + * @param numIterations + * The fixed number of iterations for this monoid instance */ def monoidForIterations(numIterations: Int): Monoid[Scenarr] = new Monoid[Scenarr]: def empty: Scenarr = new Scenarr( @@ -226,16 +234,15 @@ object Scenarr: def combine(x: Scenarr, y: Scenarr): Scenarr = Scenarr.combine(x, y) end monoidForIterations - /** Default Monoid instance for Scenarr. - * Uses the general `combine` which treats empty scenarios as identity. + /** Default Monoid instance for Scenarr. Uses the general `combine` which treats empty scenarios as identity. */ given Monoid[Scenarr] with def empty: Scenarr = Scenarr.empty def combine(x: Scenarr, y: Scenarr): Scenarr = Scenarr.combine(x, y) end given - /** Create a Scenarr with automatically generated random IDs for each event. - * Use this factory when you don't need to specify event IDs explicitly. + /** Create a Scenarr with automatically generated random IDs for each event. Use this factory when you don't need to + * specify event IDs explicitly. */ def withGeneratedIds( iterations: Array[Int], diff --git a/vecxt_re/test/src-jvm/ScenarrMonoidLawsSpec.scala b/vecxt_re/test/src-jvm/ScenarrMonoidLawsSpec.scala index be942c6b..61733897 100644 --- a/vecxt_re/test/src-jvm/ScenarrMonoidLawsSpec.scala +++ b/vecxt_re/test/src-jvm/ScenarrMonoidLawsSpec.scala @@ -34,19 +34,17 @@ class ScenarrMonoidLawsSpec extends DisciplineSuite: // Small ID space to encourage clashes across Scenarrs being combined private val idGen: Gen[Long] = Gen.choose(1L, 100L) - /** Deterministically derive (iteration, day) from an ID. - * This ensures that when the same ID appears in different Scenarrs, - * it always has the same iteration and day - making the combine valid. - * Amounts can differ and will be aggregated. + /** Deterministically derive (iteration, day) from an ID. This ensures that when the same ID appears in different + * Scenarrs, it always has the same iteration and day - making the combine valid. Amounts can differ and will be + * aggregated. */ private def iterationForId(id: Long): Int = ((id % TestIterations) + 1).toInt private def dayForId(id: Long): Int = ((id % 365) + 1).toInt /** Generate a valid Scenarr with fixed numberIterations and day1. * - * Uses a small ID space (1-100) to encourage clashes across Scenarrs. - * Iteration and day are derived deterministically from ID, so clashing - * IDs always have consistent (iteration, day) pairs - the amounts get + * Uses a small ID space (1-100) to encourage clashes across Scenarrs. Iteration and day are derived + * deterministically from ID, so clashing IDs always have consistent (iteration, day) pairs - the amounts get * aggregated as expected by the monoid. */ private val scenarrrGen: Gen[Scenarr] = for diff --git a/vecxt_re/test/src/scenario.test.scala b/vecxt_re/test/src/scenario.test.scala index 5cbee954..85b54ab3 100644 --- a/vecxt_re/test/src/scenario.test.scala +++ b/vecxt_re/test/src/scenario.test.scala @@ -72,7 +72,13 @@ class ScenarioSuite extends munit.FunSuite: test("scaleAmntBy supports negative scaling and does not mutate original"): val originalAmounts = Array(10.0, 20.0, 30.0) - val base = Scenarr.withGeneratedIds(Array(1, 1, 1), Array(1, 2, 3), originalAmounts.clone(), numberIterations = 1, threshold = 5.0) + val base = Scenarr.withGeneratedIds( + Array(1, 1, 1), + Array(1, 2, 3), + originalAmounts.clone(), + numberIterations = 1, + threshold = 5.0 + ) val scaled = base.scaleAmntBy(-1.5) assertEquals(scaled.amounts.toSeq, Seq(-15.0, -30.0, -45.0)) assertEquals(scaled.threshold, -7.5) @@ -105,7 +111,8 @@ class ScenarioSuite extends munit.FunSuite: assert(ex.getMessage.contains("Threshold may only be increased")) test("applyThreshold may result in no claims"): - val base3 = Scenarr.withGeneratedIds(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 5.0) + val base3 = + Scenarr.withGeneratedIds(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 5.0) val appliedEmpty = base3.applyThreshold(100.0) assertEquals(appliedEmpty.amounts.toSeq, Seq()) assertEquals(appliedEmpty.iterations.toSeq, Seq()) diff --git a/vecxt_re/test/src/scenarr.test.scala b/vecxt_re/test/src/scenarr.test.scala index bd7778df..c6c08d1b 100644 --- a/vecxt_re/test/src/scenarr.test.scala +++ b/vecxt_re/test/src/scenarr.test.scala @@ -83,7 +83,8 @@ class ScenarrSuite extends FunSuite: } test("hasOccurence false for empty amounts") { - val sc = Scenarr.withGeneratedIds(Array.emptyIntArray, Array.emptyIntArray, Array.emptyDoubleArray, numberIterations = 0) + val sc = + Scenarr.withGeneratedIds(Array.emptyIntArray, Array.emptyIntArray, Array.emptyDoubleArray, numberIterations = 0) assertEquals(sc.hasOccurence, false) } @@ -101,7 +102,8 @@ class ScenarrSuite extends FunSuite: } test("scaleAmntBy multiplies amounts and threshold") { - val sc = Scenarr.withGeneratedIds(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 100.0) + val sc = + Scenarr.withGeneratedIds(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 100.0) val scaled = sc.scaleAmntBy(2.0) assertEquals(scaled.threshold, 200.0) assertEquals(scaled.amounts.toList, Array(20.0, 40.0).toList) @@ -118,7 +120,13 @@ class ScenarrSuite extends FunSuite: } test("applyThreshold filters amounts and only allows increasing threshold") { - val sc = Scenarr.withGeneratedIds(Array(1, 1, 1), Array(1, 2, 3), Array(10.0, 50.0, 200.0), numberIterations = 1, threshold = 0.0) + val sc = Scenarr.withGeneratedIds( + Array(1, 1, 1), + Array(1, 2, 3), + Array(10.0, 50.0, 200.0), + numberIterations = 1, + threshold = 0.0 + ) val filtered = sc.applyThreshold(49.0) // keep > 49 => 50 and 200 assertEquals(filtered.amounts.toList, Array(50.0, 200.0).toList) From 40aa4903d79b88a81bdf72c3373d4a5515d0d588 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Mon, 26 Jan 2026 23:06:34 +0100 Subject: [PATCH 47/75] oep --- experiments/src/index.scala | 1 + vecxt_re/src/groupSums.scala | 51 ++++++++++++++++++++++ vecxt_re/src/scenarr.scala | 40 +++++++++++++++++ vecxt_re/test/src/groupSumCount.test.scala | 39 +++++++++++++++++ 4 files changed, 131 insertions(+) diff --git a/experiments/src/index.scala b/experiments/src/index.scala index af10a63b..5b151093 100644 --- a/experiments/src/index.scala +++ b/experiments/src/index.scala @@ -2,6 +2,7 @@ package experiments import io.github.quafadas.table.{*, given} import io.github.quafadas.plots.SetupVegaBrowser.{*, given} +import experiments.RPT.* @main def plotIndex = val idx = CSV.resource("idx.csv", CsvOpts(TypeInferrer.FromAllRows, ReadAs.Columns)) diff --git a/vecxt_re/src/groupSums.scala b/vecxt_re/src/groupSums.scala index 442530ac..27e3d229 100644 --- a/vecxt_re/src/groupSums.scala +++ b/vecxt_re/src/groupSums.scala @@ -101,3 +101,54 @@ inline def groupCount(groups: Array[Int], nitr: Int): Array[Int] = result end groupCount + +/** Compute the maximum of values for each group identified by an integer index. + * + * The function expects `groups` to be sorted in non-decreasing order and that `groups` and `values` have the same + * length. Group indices are 1-based and must be in the range 1..nitr. The returned array has length `nitr`; element at + * position `i` (0-based) contains the max of values for group index `i+1`. Groups with no entries produce + * Double.NegativeInfinity in the corresponding slot. + * + * Preconditions: + * - groups.length == values.length + * - groups is sorted (runs of identical indices are contiguous) + * - every g in groups satisfies 1 <= g <= nitr + * + * Complexity: O(groups.length) time, O(nitr) extra space. + * + * This method is unsafe and performs no checks that these conditions are satisfied. It is the responsibility of the + * caller. + * + * @param groups + * sorted array of 1-based group indices (length L) + * @param values + * array of values corresponding to each group index (length L) + * @param nitr + * number of groups (size of the returned array) + * @return + * an Array[Double] of length `nitr` where each element is the max for that group + * @throws ArrayIndexOutOfBoundsException + * if a group index is outside 1..nitr + * @throws IllegalArgumentException + * if groups.length != values.length + * + * Example: groups = Array(1, 1, 3), values = Array(1.0, 2.0, 4.0), nitr = 4 result = Array(2.0, -Inf, 4.0, -Inf) + */ +inline def groupMax(groups: Array[Int], values: Array[Double], nitr: Int): Array[Double] = + val result = Array.fill(nitr)(Double.NegativeInfinity) + val l = groups.length + var i = 0 + while i < l do + val g = groups(i) + var groupMax = Double.NegativeInfinity + // Process block of same group, computing max + while i < l && groups(i) == g do + if values(i) > groupMax then groupMax = values(i) + i += 1 + end while + result(g - 1) = groupMax + end while + + result +end groupMax + diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index a9218670..795a5cd7 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -38,6 +38,17 @@ case class Scenarr( groupSum(iterations, amounts, numberIterations) end agg + lazy val aep = agg.sorted(using Ordering[Double].reverse).zipWithIndex.map { case (amt, idx) => + (returnPeriod = numberIterations.toDouble / (idx + 1).toDouble, amount = amt) + } + + lazy val oep = groupMax(iterations, amounts, numberIterations).sorted(using Ordering[Double].reverse).zipWithIndex.map { + case (amt, idx) => + (returnPeriod = numberIterations.toDouble / (idx + 1).toDouble, amount = amt) + } + + + lazy val claimDates: Array[LocalDate] = (days - 1).map(d => ChronoUnit.DAYS.addTo(this.day1, d)) lazy val monthYear: Array[(month: Month, year: Int)] = claimDates.map(d => (d.getMonth, d.getYear)) @@ -97,6 +108,35 @@ object Scenarr: isSorted = true ) + /** Generate a small random Scenarr for experimentation. + * + * @param numClaims number of claim events to generate (default 15) + * @param seed optional random seed for reproducibility + * @return a small Scenarr with 10 iterations + */ + def sample(numClaims: Int = 15, seed: Option[Long] = None): Scenarr = + val rng = seed.fold(scala.util.Random())(s => scala.util.Random(s)) + val n = numClaims + + val iterations = Array.fill(n)(rng.nextInt(10) + 1) + val days = Array.fill(n)(rng.nextInt(365) + 1) + val amounts = Array.fill(n)(rng.nextDouble() * 1000.0 + 100.0) // 100-1100 + val ids = Array.fill(n)(rng.nextLong()) + + new Scenarr( + iterations = iterations, + days = days, + amounts = amounts, + ids = ids, + numberIterations = 10, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "sample", + id = rng.nextLong(), + isSorted = false + ) + end sample + /** Combine two Scenarr instances following monoid laws. * * The combination semantics are: diff --git a/vecxt_re/test/src/groupSumCount.test.scala b/vecxt_re/test/src/groupSumCount.test.scala index cf80a2b8..baf2c8f3 100644 --- a/vecxt_re/test/src/groupSumCount.test.scala +++ b/vecxt_re/test/src/groupSumCount.test.scala @@ -49,4 +49,43 @@ class GroupSumCountSuite extends FunSuite: assertVecEquals(sumResult, expectedSum) assertVecEquals(countResult, expectedCount) } + + test("groupMax finds max per 1-based group index with gaps") { + val groups = Array(1, 1, 2, 4, 4) + val values = Array(2.0, 3.0, 5.0, 10.0, 20.0) + + val result = groupMax(groups, values, nitr = 4) + + assertEquals(result.length, 4) + assertVecEquals(result, Array(3.0, 5.0, Double.NegativeInfinity, 20.0)) + } + + test("groupMax handles empty input by returning -Inf buckets") { + val groups = Array.empty[Int] + val values = Array.empty[Double] + + val maxResult = groupMax(groups, values, nitr = 3) + + assertEquals(maxResult.length, 3) + assert(maxResult.forall(_ == Double.NegativeInfinity)) + } + + test("groupMax single group spanning all entries") { + val groups = Array(3, 3, 3) + val values = Array(1.5, 2.5, -4.0) + + val maxResult = groupMax(groups, values, nitr = 4) + + val expectedMax = Array(Double.NegativeInfinity, Double.NegativeInfinity, 2.5, Double.NegativeInfinity) + assertVecEquals(maxResult, expectedMax) + } + + test("groupMax with negative values") { + val groups = Array(1, 1, 2, 2) + val values = Array(-5.0, -2.0, -10.0, -3.0) + + val result = groupMax(groups, values, nitr = 2) + + assertVecEquals(result, Array(-2.0, -3.0)) + } end GroupSumCountSuite From 16000eb4ee88b49cd5c086a9ca5ea02140564957 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Tue, 27 Jan 2026 00:06:15 +0100 Subject: [PATCH 48/75] . --- vecxt_re/resources/mixedCdf.vl.json | 12 ++ vecxt_re/resources/mixedPdf.vl.json | 23 ++ vecxt_re/src-jvm/dist/Mixed.scala | 281 +++++++++++++++++++++++++ vecxt_re/test/src-jvm/Mixed.test.scala | 144 +++++++++++++ 4 files changed, 460 insertions(+) create mode 100644 vecxt_re/resources/mixedCdf.vl.json create mode 100644 vecxt_re/resources/mixedPdf.vl.json create mode 100644 vecxt_re/src-jvm/dist/Mixed.scala create mode 100644 vecxt_re/test/src-jvm/Mixed.test.scala diff --git a/vecxt_re/resources/mixedCdf.vl.json b/vecxt_re/resources/mixedCdf.vl.json new file mode 100644 index 00000000..8c9f5e95 --- /dev/null +++ b/vecxt_re/resources/mixedCdf.vl.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Mixed distribution CDF (empirical body + Pareto tail)", + "width": "container", + "height": "container", + "data": { "values": [ { "x": 0.0, "cdf": 0.0 } ] }, + "mark": { "type": "line" }, + "encoding": { + "x": { "field": "x", "type": "quantitative", "title": "x" }, + "y": { "field": "cdf", "type": "quantitative", "title": "CDF" } + } +} diff --git a/vecxt_re/resources/mixedPdf.vl.json b/vecxt_re/resources/mixedPdf.vl.json new file mode 100644 index 00000000..b40bfb1d --- /dev/null +++ b/vecxt_re/resources/mixedPdf.vl.json @@ -0,0 +1,23 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Mixed distribution PDF (empirical body + Pareto tail) via histogram of samples", + "width": "container", + "height": "container", + "data": { "values": [ { "x": 0.0 } ] }, + "transform": [ + { "bin": { "maxbins": 80 }, "field": "x", "as": ["x0", "x1"] }, + { "aggregate": [ { "op": "count", "as": "binCount" } ], "groupby": ["x0", "x1"] }, + { "joinaggregate": [ { "op": "sum", "field": "binCount", "as": "N" } ] }, + { "calculate": "datum.binCount / (datum.N * (datum.x1 - datum.x0))", "as": "density" } + ], + "mark": { "type": "bar", "opacity": 0.65 }, + "encoding": { + "x": { "field": "x0", "type": "quantitative", "title": "x" }, + "x2": { "field": "x1" }, + "y": { "field": "density", "type": "quantitative", "title": "PDF" }, + "tooltip": [ + { "field": "binCount", "type": "quantitative", "title": "Bin count" }, + { "field": "density", "type": "quantitative", "title": "Density" } + ] + } +} diff --git a/vecxt_re/src-jvm/dist/Mixed.scala b/vecxt_re/src-jvm/dist/Mixed.scala new file mode 100644 index 00000000..0c539df8 --- /dev/null +++ b/vecxt_re/src-jvm/dist/Mixed.scala @@ -0,0 +1,281 @@ +package vecxt_re + +import org.apache.commons.rng.simple.RandomSource +import io.github.quafadas.plots.SetupVega.{*, given} +import io.circe.syntax.* + +/** Mixed distribution: Empirical body with Pareto tail. + * + * This distribution combines an empirical distribution for the body (values below the mixing point) with a Pareto + * distribution for the tail (values at or above the mixing point). + * + * The distribution is parameterized by: + * - An empirical distribution of observed values + * - A mixing point $m$ (threshold between body and tail) + * - A Pareto shape parameter $\alpha$ (for the tail) + * + * The CDF is continuous at the mixing point. Let $p_m = F_{\text{emp}}(m^-)$ be the empirical CDF just below the + * mixing point. Then: + * - For $x < m$: $F(x) = F_{\text{emp}}(x)$ + * - For $x \ge m$: $F(x) = p_m + (1 - p_m) \cdot F_{\text{Pareto}}(x)$ + * + * where the Pareto distribution has scale = $m$ and shape = $\alpha$. + * + * @param empirical + * The empirical distribution for the body + * @param mixingPoint + * The threshold where we switch from empirical to Pareto tail + * @param paretoShape + * The shape parameter (α) for the Pareto tail + */ +case class Mixed(empirical: Empirical, mixingPoint: Double, paretoShape: Double) + extends ContinuousDistr[Double] + with HasMean[Double] + with HasVariance[Double] + with HasCdf + with HasInverseCdf: + + require(mixingPoint > 0, "mixing point must be positive") + require(paretoShape > 0, "Pareto shape must be positive") + + private val rng = RandomSource.XO_RO_SHI_RO_128_PP.create() + + // The Pareto tail with scale = mixing point + private val paretoTail = Pareto(mixingPoint, paretoShape) + + // Probability mass in the empirical body (CDF at the mixing point) + // We want P(X < mixingPoint) from the empirical, which is the sum of all mass points strictly below mixingPoint + private val bodyWeight: Double = + // The empirical CDF is right-continuous, so cdf(m) includes P(X <= m). + // We want mass strictly below m for the body, and P(X >= m) goes to the tail. + // However, for simplicity and continuity, we use cdf(m-epsilon) conceptually. + // In practice, we compute the probability of all empirical points strictly below the mixing point. + var w = 0.0 + val vals = empirical.values + val weights = empirical.weights + var totalW = 0.0 + var i = 0 + while i < vals.length do + totalW += weights(i) + if vals(i) < mixingPoint then w += weights(i) + i += 1 + end while + w / totalW + end bodyWeight + + // Tail weight is the complement + private val tailWeight: Double = 1.0 - bodyWeight + + /** Draw a random sample from the mixed distribution */ + def draw: Double = + val u = rng.nextDouble() + inverseCdf(u) + end draw + + /** Unnormalized log PDF. + * + * For the body (empirical), this is technically undefined in the continuous sense since the empirical distribution + * is discrete. We return the log of the weighted probability mass if x exactly matches an empirical point, otherwise + * negative infinity. + * + * For the tail (Pareto), we return the properly weighted log PDF. + */ + def unnormalizedLogPdf(x: Double): Double = + if x < mixingPoint then + // Discrete mass in the body + val prob = empirical.probabilityOf(x) + if prob > 0 then math.log(prob) else Double.NegativeInfinity + else + // Continuous Pareto tail, scaled by tail weight + if tailWeight > 0 then paretoTail.unnormalizedLogPdf(x) + math.log(tailWeight) + else Double.NegativeInfinity + end if + end unnormalizedLogPdf + + /** Log normalizer (distribution is already normalized) */ + def logNormalizer: Double = 0.0 + + /** Probability that x < X <= y */ + def probability(x: Double, y: Double): Double = + if y <= x then 0.0 + else cdf(y) - cdf(x) + + /** Cumulative distribution function. + * + * For x < mixingPoint: F(x) = bodyWeight * (empirical CDF normalized to body) + * For x >= mixingPoint: F(x) = bodyWeight + tailWeight * F_Pareto(x) + */ + def cdf(x: Double): Double = + if x < mixingPoint then + // Use empirical CDF, but only count points below mixing point + // The CDF here is P(X <= x) for X in the body region, scaled by bodyWeight + val empCdfAtX = empirical.cdf(x) + // Scale: empirical CDF goes up to 1, but we only want it to contribute bodyWeight + math.min(empCdfAtX, bodyWeight) // Cap at bodyWeight since empirical points >= mixingPoint don't count + else + // In the tail region + bodyWeight + tailWeight * paretoTail.cdf(x) + end if + end cdf + + /** Inverse CDF (quantile function) */ + def inverseCdf(p: Double): Double = + require(p >= 0.0 && p <= 1.0, "p must be in [0,1]") + if p <= 0.0 then + // Return minimum of empirical or mixing point + empirical.inverseCdf(0.0) + else if p <= bodyWeight then + // In the body region - use empirical inverse CDF + // Scale p to [0, 1] within the body + val scaledP = p / bodyWeight + val q = empirical.inverseCdf(math.min(scaledP, 1.0)) + // Ensure we don't exceed mixing point + math.min(q, mixingPoint - Double.MinPositiveValue) + else + // In the tail region - use Pareto inverse CDF + val tailP = (p - bodyWeight) / tailWeight + paretoTail.inverseCdf(tailP) + end if + end inverseCdf + + /** Mean of the mixed distribution. + * + * E[X] = bodyWeight * E[X | X < m] + tailWeight * E[X_Pareto] + * + * Note: For Pareto, mean is only defined when shape > 1. + */ + def mean: Double = + // Compute conditional mean of empirical given X < mixingPoint + var empMean = 0.0 + var empWeight = 0.0 + val vals = empirical.values + val weights = empirical.weights + var totalW = 0.0 + var i = 0 + while i < vals.length do + totalW += weights(i) + i += 1 + end while + i = 0 + while i < vals.length do + if vals(i) < mixingPoint then + val w = weights(i) / totalW + empMean += vals(i) * w + empWeight += w + end if + i += 1 + end while + val condEmpMean = if empWeight > 0 then empMean / empWeight else 0.0 + + bodyWeight * condEmpMean + tailWeight * paretoTail.mean + end mean + + /** Variance of the mixed distribution. + * + * Uses the law of total variance. + * + * Note: For Pareto, variance is only defined when shape > 2. + */ + def variance: Double = + val m = mean + // Compute E[X^2] for the body + var empSecondMoment = 0.0 + var empWeight = 0.0 + val vals = empirical.values + val weights = empirical.weights + var totalW = 0.0 + var i = 0 + while i < vals.length do + totalW += weights(i) + i += 1 + end while + i = 0 + while i < vals.length do + if vals(i) < mixingPoint then + val w = weights(i) / totalW + empSecondMoment += vals(i) * vals(i) * w + empWeight += w + end if + i += 1 + end while + val condEmpSecondMoment = if empWeight > 0 then empSecondMoment / empWeight else 0.0 + + // E[X^2] for Pareto + val paretoSecondMoment = paretoTail.variance + paretoTail.mean * paretoTail.mean + + // Total E[X^2] + val totalSecondMoment = bodyWeight * condEmpSecondMoment + tailWeight * paretoSecondMoment + + // Var(X) = E[X^2] - E[X]^2 + totalSecondMoment - m * m + end variance + + /** Plot the mixed distribution PDF/histogram. */ + def plot(using viz.LowPriorityPlotTarget) = + val plot = VegaPlot.fromResource("mixedPdf.vl.json") + val numSamples = 10000 + val samples = (0 until numSamples).map(_ => (x = draw)) + + plot.plot( + _.data.values := samples.asJson, + _ += (title = s"Mixed Distribution (mixingPoint=$mixingPoint, paretoShape=$paretoShape)").asJson + ) + end plot + + /** Plot the mixed CDF. */ + def plotCdf(using viz.LowPriorityPlotTarget) = + val plot = VegaPlot.fromResource("mixedCdf.vl.json") + + // Generate points for the CDF + val minX = empirical.inverseCdf(0.0) + val maxX = paretoTail.inverseCdf(0.99) // 99th percentile of tail + val numPoints = 500 + val step = (maxX - minX) / numPoints + + val points = (0 to numPoints).map { i => + val x = minX + i * step + (x = x, cdf = cdf(x)) + } + + plot.plot( + _.data.values := points.asJson, + _ += (title = s"Mixed Distribution CDF (mixingPoint=$mixingPoint, paretoShape=$paretoShape)").asJson + ) + end plotCdf + +end Mixed + +object Mixed: + + /** Create a mixed distribution from raw empirical data. + * + * @param values + * The empirical sample values + * @param mixingPoint + * The threshold between body and tail + * @param paretoShape + * The Pareto shape parameter for the tail + */ + inline def fromValues(values: Array[Double], mixingPoint: Double, paretoShape: Double): Mixed = + Mixed(Empirical.equalWeights(values), mixingPoint, paretoShape) + + /** Create a mixed distribution from weighted empirical data. + * + * @param values + * The empirical sample values + * @param weights + * The weights for each sample value + * @param mixingPoint + * The threshold between body and tail + * @param paretoShape + * The Pareto shape parameter for the tail + */ + inline def fromWeightedValues( + values: Array[Double], + weights: Array[Double], + mixingPoint: Double, + paretoShape: Double + ): Mixed = + Mixed(Empirical.weighted(values, weights), mixingPoint, paretoShape) + +end Mixed diff --git a/vecxt_re/test/src-jvm/Mixed.test.scala b/vecxt_re/test/src-jvm/Mixed.test.scala new file mode 100644 index 00000000..793ad518 --- /dev/null +++ b/vecxt_re/test/src-jvm/Mixed.test.scala @@ -0,0 +1,144 @@ +package vecxt_re + +import munit.FunSuite + +class MixedTest extends FunSuite: + + test("CDF is continuous at mixing point") { + val values = Array(1.0, 2.0, 3.0, 4.0, 5.0) + val mixingPoint = 4.0 + val paretoShape = 2.0 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + // CDF just below and at the mixing point should be close + val cdfBelow = mixed.cdf(mixingPoint - 0.0001) + val cdfAt = mixed.cdf(mixingPoint) + + // At mixingPoint, the Pareto CDF starts at 0, so cdf should equal bodyWeight + // which is the fraction of empirical values strictly below mixingPoint + // Values < 4.0 are: 1.0, 2.0, 3.0 (3 out of 5) = 0.6 + val expectedBodyWeight = 0.6 + assertEqualsDouble(cdfBelow, expectedBodyWeight, 0.01) + assertEqualsDouble(cdfAt, expectedBodyWeight, 1e-12) // Pareto CDF at scale is 0 + } + + test("CDF goes from 0 to 1") { + val values = Array(1.0, 2.0, 3.0, 5.0, 10.0) + val mixingPoint = 4.0 + val paretoShape = 2.0 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + assertEqualsDouble(mixed.cdf(0.0), 0.0, 1e-12) + assertEqualsDouble(mixed.cdf(1000000.0), 1.0, 1e-6) + } + + test("inverseCdf and cdf are consistent") { + val values = Array(1.0, 2.0, 3.0, 5.0, 10.0) + val mixingPoint = 4.0 + val paretoShape = 2.5 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + // Test a range of quantiles + val quantiles = Array(0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99) + for p <- quantiles do + val x = mixed.inverseCdf(p) + val recoveredP = mixed.cdf(x) + // For discrete parts, we only expect recoveredP >= p + assert(recoveredP >= p - 1e-6, s"Failed at p=$p: inverseCdf($p)=$x, cdf($x)=$recoveredP") + end for + } + + test("draw returns values in valid range") { + val values = Array(1.0, 2.0, 3.0) + val mixingPoint = 2.5 + val paretoShape = 2.0 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + val samples = (1 to 1000).map(_ => mixed.draw) + val minSample = samples.min + val maxSample = samples.max + + // Min should be from empirical (>= 1.0) + assert(minSample >= 1.0, s"Min sample $minSample should be >= 1.0") + // Should have some tail samples above mixing point + assert(maxSample > mixingPoint, s"Max sample $maxSample should be > $mixingPoint (Pareto tail)") + } + + test("body weight calculation is correct") { + val values = Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0) + val mixingPoint = 5.5 + val paretoShape = 2.0 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + // Values < 5.5 are: 1, 2, 3, 4, 5 (5 out of 10) = 0.5 + // CDF at mixing point should equal bodyWeight + assertEqualsDouble(mixed.cdf(mixingPoint), 0.5, 1e-12) + } + + test("mean calculation for shape > 1") { + val values = Array(1.0, 2.0, 3.0) + val mixingPoint = 2.5 + val paretoShape = 2.5 // Mean is defined for shape > 1 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + val mean = mixed.mean + // Mean should be between minimum empirical and some reasonable upper bound + assert(mean > 1.0, s"Mean $mean should be > 1.0") + assert(mean.isFinite, s"Mean should be finite") + } + + test("variance calculation for shape > 2") { + val values = Array(1.0, 2.0, 3.0, 4.0) + val mixingPoint = 3.0 + val paretoShape = 3.0 // Variance is defined for shape > 2 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + val variance = mixed.variance + assert(variance > 0.0, s"Variance $variance should be > 0") + assert(variance.isFinite, s"Variance should be finite") + } + + test("weighted empirical works correctly") { + val values = Array(1.0, 2.0, 3.0) + val weights = Array(1.0, 2.0, 1.0) // 25%, 50%, 25% + val mixingPoint = 2.5 + val paretoShape = 2.0 + val mixed = Mixed.fromWeightedValues(values, weights, mixingPoint, paretoShape) + + // Values < 2.5 are 1.0 and 2.0 with weights 1.0 and 2.0 + // bodyWeight = (1.0 + 2.0) / (1.0 + 2.0 + 1.0) = 0.75 + assertEqualsDouble(mixed.cdf(mixingPoint), 0.75, 1e-12) + } + + test("probability method works correctly") { + val values = Array(1.0, 2.0, 3.0, 4.0, 5.0) + val mixingPoint = 3.5 + val paretoShape = 2.0 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + // P(2 < X <= 3) should be 1/5 = 0.2 (only value 3 is in this range) + assertEqualsDouble(mixed.probability(2.0, 3.0), 0.2, 1e-12) + + // P(0 < X <= 10) should be close to cdf(10) + assertEqualsDouble(mixed.probability(0.0, 10.0), mixed.cdf(10.0), 1e-12) + } + + test("construction fails with non-positive mixing point") { + intercept[IllegalArgumentException] { + Mixed.fromValues(Array(1.0, 2.0), 0.0, 2.0) + } + intercept[IllegalArgumentException] { + Mixed.fromValues(Array(1.0, 2.0), -1.0, 2.0) + } + } + + test("construction fails with non-positive pareto shape") { + intercept[IllegalArgumentException] { + Mixed.fromValues(Array(1.0, 2.0), 1.5, 0.0) + } + intercept[IllegalArgumentException] { + Mixed.fromValues(Array(1.0, 2.0), 1.5, -1.0) + } + } + +end MixedTest From 7f0f05dcb69b7db7feb3089d5c19263191a6de74 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Tue, 27 Jan 2026 00:06:24 +0100 Subject: [PATCH 49/75] . --- experiments/package.mill | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/experiments/package.mill b/experiments/package.mill index 5a85357c..c270710f 100644 --- a/experiments/package.mill +++ b/experiments/package.mill @@ -19,7 +19,8 @@ object `package` extends ScalaModule: override def mvnDeps = super.mvnDeps() ++ Seq( mvn"com.lihaoyi::os-lib::0.10.4", mvn"io.github.quafadas::scautable::0.0.35", - mvn"io.github.quafadas::dedav4s::0.10.4" + mvn"io.github.quafadas::dedav4s::0.10.4", + mvn"org.apache.logging.log4j:log4j-core:2.24.3" // Required by Apache POI for Excel ) end `package` From 78b540897a4e5bd07a5bf982b3aaba2a61aa21e0 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Mon, 26 Jan 2026 23:07:36 +0000 Subject: [PATCH 50/75] [autofix.ci] apply automated fixes --- vecxt_re/src-jvm/dist/Mixed.scala | 6 ++++-- vecxt_re/src/groupSums.scala | 2 +- vecxt_re/src/scenarr.scala | 20 +++++++++++--------- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/vecxt_re/src-jvm/dist/Mixed.scala b/vecxt_re/src-jvm/dist/Mixed.scala index 0c539df8..24a2bf7d 100644 --- a/vecxt_re/src-jvm/dist/Mixed.scala +++ b/vecxt_re/src-jvm/dist/Mixed.scala @@ -58,6 +58,7 @@ case class Mixed(empirical: Empirical, mixingPoint: Double, paretoShape: Double) while i < vals.length do totalW += weights(i) if vals(i) < mixingPoint then w += weights(i) + end if i += 1 end while w / totalW @@ -85,6 +86,7 @@ case class Mixed(empirical: Empirical, mixingPoint: Double, paretoShape: Double) // Discrete mass in the body val prob = empirical.probabilityOf(x) if prob > 0 then math.log(prob) else Double.NegativeInfinity + end if else // Continuous Pareto tail, scaled by tail weight if tailWeight > 0 then paretoTail.unnormalizedLogPdf(x) + math.log(tailWeight) @@ -102,8 +104,8 @@ case class Mixed(empirical: Empirical, mixingPoint: Double, paretoShape: Double) /** Cumulative distribution function. * - * For x < mixingPoint: F(x) = bodyWeight * (empirical CDF normalized to body) - * For x >= mixingPoint: F(x) = bodyWeight + tailWeight * F_Pareto(x) + * For x < mixingPoint: F(x) = bodyWeight * (empirical CDF normalized to body) For x >= mixingPoint: F(x) = + * bodyWeight + tailWeight * F_Pareto(x) */ def cdf(x: Double): Double = if x < mixingPoint then diff --git a/vecxt_re/src/groupSums.scala b/vecxt_re/src/groupSums.scala index 27e3d229..08956a36 100644 --- a/vecxt_re/src/groupSums.scala +++ b/vecxt_re/src/groupSums.scala @@ -144,6 +144,7 @@ inline def groupMax(groups: Array[Int], values: Array[Double], nitr: Int): Array // Process block of same group, computing max while i < l && groups(i) == g do if values(i) > groupMax then groupMax = values(i) + end if i += 1 end while result(g - 1) = groupMax @@ -151,4 +152,3 @@ inline def groupMax(groups: Array[Int], values: Array[Double], nitr: Int): Array result end groupMax - diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index 795a5cd7..7532eb39 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -42,12 +42,11 @@ case class Scenarr( (returnPeriod = numberIterations.toDouble / (idx + 1).toDouble, amount = amt) } - lazy val oep = groupMax(iterations, amounts, numberIterations).sorted(using Ordering[Double].reverse).zipWithIndex.map { - case (amt, idx) => - (returnPeriod = numberIterations.toDouble / (idx + 1).toDouble, amount = amt) - } - - + lazy val oep = + groupMax(iterations, amounts, numberIterations).sorted(using Ordering[Double].reverse).zipWithIndex.map { + case (amt, idx) => + (returnPeriod = numberIterations.toDouble / (idx + 1).toDouble, amount = amt) + } lazy val claimDates: Array[LocalDate] = (days - 1).map(d => ChronoUnit.DAYS.addTo(this.day1, d)) @@ -110,9 +109,12 @@ object Scenarr: /** Generate a small random Scenarr for experimentation. * - * @param numClaims number of claim events to generate (default 15) - * @param seed optional random seed for reproducibility - * @return a small Scenarr with 10 iterations + * @param numClaims + * number of claim events to generate (default 15) + * @param seed + * optional random seed for reproducibility + * @return + * a small Scenarr with 10 iterations */ def sample(numClaims: Int = 15, seed: Option[Long] = None): Scenarr = val rng = seed.fold(scala.util.Random())(s => scala.util.Random(s)) From 7570450ef06a533cc5a7120700f85dc188f8e43e Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Tue, 27 Jan 2026 20:57:35 +0100 Subject: [PATCH 51/75] fix --- .vscode/launch.json | 10 +++ build.mill | 2 +- laws/src/VectorCommutativeGroup.scala | 4 +- laws/src/VectorCommutativeMonoid.scala | 4 +- laws/src/VectorMonoid.scala | 4 +- laws/src/instances/DoubleInstances.scala | 10 +-- vecxt/src-js-native/eig.scala | 4 +- vecxt/src-js-native/solve.scala | 2 +- vecxt/src-js/array.scala | 2 +- vecxt/src-js/doublematrix.scala | 2 +- vecxt/src-jvm/cholesky.scala | 7 +- vecxt/src-jvm/eig.scala | 10 +-- vecxt/src-jvm/lu.scala | 10 +-- vecxt/src-jvm/qr.scala | 10 +-- vecxt/src-jvm/solve.scala | 8 ++- vecxt/src-jvm/svd.scala | 16 +++-- vecxt/src/MatrixHelper.scala | 2 +- vecxt/src/dimMatCheck.scala | 2 +- vecxt/src/intarray.scala | 5 +- vecxt_re/src-jvm/SplitLosses.scala | 5 +- vecxt_re/src-jvm/dist/Empirical.scala | 3 +- vecxt_re/src-jvm/dist/Mixed.scala | 3 +- vecxt_re/src-jvm/dist/NegativeBinomial.scala | 5 +- vecxt_re/src-jvm/dist/Pareto.scala | 5 +- vecxt_re/src-jvm/rpt.scala | 2 +- vecxt_re/src-native/rpt.scala | 3 +- vecxt_re/src/IndexPerPeriod.scala | 2 +- vecxt_re/src/scenario.scala | 4 +- vecxt_re/src/scenarr.scala | 10 +-- vecxtensions/src-js/mathtags.scala | 72 -------------------- vecxtensions/src-js/matmul.scala | 8 ++- 31 files changed, 102 insertions(+), 134 deletions(-) delete mode 100644 vecxtensions/src-js/mathtags.scala diff --git a/.vscode/launch.json b/.vscode/launch.json index 3e72c3a0..4d5851b5 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,6 +10,16 @@ "args": [], "jvmOptions": [], "env": {} + }, + { + "type": "scala", + "request": "launch", + "name": "readXl", + "mainClass": "experiments.readXl", + "buildTarget": "file:///Users/simon/Code/vecxt/experiments", + "args": [], + "jvmOptions": [], + "env": {} } ] } \ No newline at end of file diff --git a/build.mill b/build.mill index 6554ae09..7d7deb03 100644 --- a/build.mill +++ b/build.mill @@ -1,4 +1,4 @@ -//| mill-version: 1.1.0-RC4 +//| mill-version: 1.1.0 //| mill-jvm-version: 24 //| mill-jvm-opts: [ "--add-modules", "jdk.incubator.vector"] //| mvnDeps: diff --git a/laws/src/VectorCommutativeGroup.scala b/laws/src/VectorCommutativeGroup.scala index 15e1fadc..4bc85956 100644 --- a/laws/src/VectorCommutativeGroup.scala +++ b/laws/src/VectorCommutativeGroup.scala @@ -1,8 +1,10 @@ package vecxt.laws -import cats.kernel.{CommutativeGroup, Semigroup} import vecxt.BoundsCheck +import cats.kernel.CommutativeGroup +import cats.kernel.Semigroup + /** A CommutativeGroup for Array[A] scoped to a specific dimension. * * This trait extends both VectorMonoid and cats.kernel.CommutativeGroup, making it compatible with cats group laws diff --git a/laws/src/VectorCommutativeMonoid.scala b/laws/src/VectorCommutativeMonoid.scala index 4199bd7e..e520178d 100644 --- a/laws/src/VectorCommutativeMonoid.scala +++ b/laws/src/VectorCommutativeMonoid.scala @@ -1,8 +1,10 @@ package vecxt.laws -import cats.kernel.{CommutativeMonoid, Semigroup} import vecxt.BoundsCheck +import cats.kernel.CommutativeMonoid +import cats.kernel.Semigroup + /** A CommutativeMonoid for Array[A] scoped to a specific dimension. * * This trait extends both VectorMonoid and cats.kernel.CommutativeMonoid, making it compatible with cats commutative diff --git a/laws/src/VectorMonoid.scala b/laws/src/VectorMonoid.scala index 85c41859..f8bced93 100644 --- a/laws/src/VectorMonoid.scala +++ b/laws/src/VectorMonoid.scala @@ -1,8 +1,10 @@ package vecxt.laws -import cats.kernel.{Monoid, Semigroup} import vecxt.BoundsCheck +import cats.kernel.Monoid +import cats.kernel.Semigroup + /** A Monoid for Array[A] scoped to a specific dimension. * * This trait extends cats.kernel.Monoid, making it compatible with the entire cats laws testing infrastructure. diff --git a/laws/src/instances/DoubleInstances.scala b/laws/src/instances/DoubleInstances.scala index 568d34e5..b926da0b 100644 --- a/laws/src/instances/DoubleInstances.scala +++ b/laws/src/instances/DoubleInstances.scala @@ -1,9 +1,12 @@ package vecxt.laws.instances -import cats.kernel.Semigroup -import vecxt.laws.{Dimension, VectorCommutativeGroup, VectorCommutativeMonoid} import vecxt.BoundsCheck -import vecxt.all.{given, *} +import vecxt.all.{*, given} +import vecxt.laws.Dimension +import vecxt.laws.VectorCommutativeGroup +import vecxt.laws.VectorCommutativeMonoid + +import cats.kernel.Semigroup object double: @@ -20,7 +23,6 @@ object double: x + y , inverseFn = (a) => - import vecxt.BoundsCheck.DoBoundsCheck.yes -a ) end vectorAdditionGroup diff --git a/vecxt/src-js-native/eig.scala b/vecxt/src-js-native/eig.scala index 0f4c13da..a4a86058 100644 --- a/vecxt/src-js-native/eig.scala +++ b/vecxt/src-js-native/eig.scala @@ -1,7 +1,7 @@ package vecxt -import all.* -import BoundsCheck.BoundsCheck +import vecxt.BoundsCheck.BoundsCheck +import vecxt.all.* object Eigenvalues: inline def eig(m: Matrix[Double])(using diff --git a/vecxt/src-js-native/solve.scala b/vecxt/src-js-native/solve.scala index b90546b8..d635f0db 100644 --- a/vecxt/src-js-native/solve.scala +++ b/vecxt/src-js-native/solve.scala @@ -1,7 +1,7 @@ package vecxt -import vecxt.matrix.Matrix import vecxt.BoundsCheck.BoundsCheck +import vecxt.matrix.Matrix /** Linear system solver placeholder for JS and Native platforms. * diff --git a/vecxt/src-js/array.scala b/vecxt/src-js/array.scala index cb8021c2..b35c4a91 100644 --- a/vecxt/src-js/array.scala +++ b/vecxt/src-js/array.scala @@ -5,8 +5,8 @@ import scala.scalajs.js import scala.scalajs.js.typedarray.Float64Array import scala.util.chaining.* -import vecxt.BoundsCheck.BoundsCheck import vecxt.BooleanArrays.* +import vecxt.BoundsCheck.BoundsCheck object arrayUtil: extension [A](d: Array[A]) def printArr: String = d.mkString("[", ",", "]") diff --git a/vecxt/src-js/doublematrix.scala b/vecxt/src-js/doublematrix.scala index 138e64f7..91af1c9c 100644 --- a/vecxt/src-js/doublematrix.scala +++ b/vecxt/src-js/doublematrix.scala @@ -1,10 +1,10 @@ package vecxt +import scala.scalajs.js.JSConverters.* import scala.scalajs.js.typedarray.Float64Array import vecxt.BoundsCheck.BoundsCheck import vecxt.matrix.* -import scala.scalajs.js.JSConverters.* object JsDoubleMatrix: diff --git a/vecxt/src-jvm/cholesky.scala b/vecxt/src-jvm/cholesky.scala index 42e6a90a..6c56f809 100644 --- a/vecxt/src-jvm/cholesky.scala +++ b/vecxt/src-jvm/cholesky.scala @@ -1,13 +1,14 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix import vecxt.BoundsCheck.BoundsCheck import vecxt.MatrixHelper.zeros -import vecxt.all.update import vecxt.MatrixInstance.apply +import vecxt.all.update +import vecxt.matrix.Matrix + +import dev.ludovic.netlib.lapack.JavaLAPACK // https://github.com/scalanlp/breeze/blob/fd73d09976a1a50d68b91a53e3896980502d335e/math/src/main/scala/breeze/linalg/functions/svd.scala#L13 object Cholesky: diff --git a/vecxt/src-jvm/eig.scala b/vecxt/src-jvm/eig.scala index 5b362aed..aa7dcbe5 100644 --- a/vecxt/src-jvm/eig.scala +++ b/vecxt/src-jvm/eig.scala @@ -1,11 +1,13 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix -import vecxt.MatrixInstance.* -import vecxt.MatrixHelper.zeros + import vecxt.BoundsCheck.BoundsCheck +import vecxt.MatrixHelper.zeros +import vecxt.MatrixInstance.* +import vecxt.matrix.Matrix + +import dev.ludovic.netlib.lapack.JavaLAPACK // https://github.com/scalanlp/breeze/blob/fd73d09976a1a50d68b91a53e3896980502d335e/math/src/main/scala/breeze/linalg/functions/eig.scala#L25 object Eigenvalues: diff --git a/vecxt/src-jvm/lu.scala b/vecxt/src-jvm/lu.scala index e65f5324..d1271974 100644 --- a/vecxt/src-jvm/lu.scala +++ b/vecxt/src-jvm/lu.scala @@ -1,11 +1,13 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix -import vecxt.MatrixInstance.* -import vecxt.MatrixHelper.zeros + import vecxt.BoundsCheck.BoundsCheck +import vecxt.MatrixHelper.zeros +import vecxt.MatrixInstance.* +import vecxt.matrix.Matrix + +import dev.ludovic.netlib.lapack.JavaLAPACK /** LU decomposition with partial pivoting using LAPACK. * diff --git a/vecxt/src-jvm/qr.scala b/vecxt/src-jvm/qr.scala index dfeb5cb6..3637283c 100644 --- a/vecxt/src-jvm/qr.scala +++ b/vecxt/src-jvm/qr.scala @@ -1,11 +1,13 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix -import vecxt.MatrixInstance.* -import vecxt.MatrixHelper.zeros + import vecxt.BoundsCheck.BoundsCheck +import vecxt.MatrixHelper.zeros +import vecxt.MatrixInstance.* +import vecxt.matrix.Matrix + +import dev.ludovic.netlib.lapack.JavaLAPACK object QR: private lazy final val lapack = JavaLAPACK.getInstance() diff --git a/vecxt/src-jvm/solve.scala b/vecxt/src-jvm/solve.scala index 8f588a1d..24aaa019 100644 --- a/vecxt/src-jvm/solve.scala +++ b/vecxt/src-jvm/solve.scala @@ -1,10 +1,12 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix -import vecxt.MatrixInstance.* + import vecxt.BoundsCheck.BoundsCheck +import vecxt.MatrixInstance.* +import vecxt.matrix.Matrix + +import dev.ludovic.netlib.lapack.JavaLAPACK /** Linear system solver using LAPACK. * diff --git a/vecxt/src-jvm/svd.scala b/vecxt/src-jvm/svd.scala index 3ae8dcee..6e13fce7 100644 --- a/vecxt/src-jvm/svd.scala +++ b/vecxt/src-jvm/svd.scala @@ -1,16 +1,18 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix + +import vecxt.BooleanArrays.trues +import vecxt.BoundsCheck.BoundsCheck +import vecxt.DoubleMatrix.matmul +import vecxt.MatrixHelper.zeros import vecxt.MatrixInstance.* -import vecxt.arrays.maxSIMD import vecxt.arrays.> -import vecxt.BooleanArrays.trues +import vecxt.arrays.maxSIMD +import vecxt.matrix.Matrix import vecxt.matrixUtil.transpose -import vecxt.MatrixHelper.zeros -import vecxt.DoubleMatrix.matmul -import vecxt.BoundsCheck.BoundsCheck + +import dev.ludovic.netlib.lapack.JavaLAPACK // https://github.com/scalanlp/breeze/blob/fd73d09976a1a50d68b91a53e3896980502d335e/math/src/main/scala/breeze/linalg/functions/svd.scala#L13 object Svd: diff --git a/vecxt/src/MatrixHelper.scala b/vecxt/src/MatrixHelper.scala index 10cef0d4..8ef82bd9 100644 --- a/vecxt/src/MatrixHelper.scala +++ b/vecxt/src/MatrixHelper.scala @@ -3,8 +3,8 @@ package vecxt import scala.reflect.ClassTag import vecxt.BoundsCheck.BoundsCheck -import vecxt.matrix.* import vecxt.MatrixInstance.apply +import vecxt.matrix.* object MatrixHelper: extension (m: Matrix.type) diff --git a/vecxt/src/dimMatCheck.scala b/vecxt/src/dimMatCheck.scala index 3e76c6f4..f67753a0 100644 --- a/vecxt/src/dimMatCheck.scala +++ b/vecxt/src/dimMatCheck.scala @@ -1,8 +1,8 @@ package vecxt import vecxt.BoundsCheck.BoundsCheck -import vecxt.matrix.* import vecxt.MatrixInstance.* +import vecxt.matrix.* object dimMatCheck: inline def apply[A](a: Matrix[A], b: Matrix[A])(using inline doCheck: BoundsCheck) = diff --git a/vecxt/src/intarray.scala b/vecxt/src/intarray.scala index 6eb0bbe8..1c1f64f0 100644 --- a/vecxt/src/intarray.scala +++ b/vecxt/src/intarray.scala @@ -1,9 +1,10 @@ package vecxt -import scala.util.control.Breaks.* import scala.reflect.ClassTag -import vecxt.BoundsCheck.BoundsCheck +import scala.util.control.Breaks.* + import vecxt.BooleanArrays.trues +import vecxt.BoundsCheck.BoundsCheck object IntArrays: diff --git a/vecxt_re/src-jvm/SplitLosses.scala b/vecxt_re/src-jvm/SplitLosses.scala index 5903629d..a194e5bb 100644 --- a/vecxt_re/src-jvm/SplitLosses.scala +++ b/vecxt_re/src-jvm/SplitLosses.scala @@ -2,9 +2,12 @@ package vecxt_re import java.util.concurrent.Executors -import jdk.incubator.vector.{DoubleVector, VectorOperators, VectorSpecies} import vecxt.BoundsCheck.BoundsCheck +import jdk.incubator.vector.DoubleVector +import jdk.incubator.vector.VectorOperators +import jdk.incubator.vector.VectorSpecies + object SplitLosses: extension (tower: Tower) /** High-performance SIMD optimized version for small number of layers (1 -5) and large number of claims. diff --git a/vecxt_re/src-jvm/dist/Empirical.scala b/vecxt_re/src-jvm/dist/Empirical.scala index d287be9a..72224010 100644 --- a/vecxt_re/src-jvm/dist/Empirical.scala +++ b/vecxt_re/src-jvm/dist/Empirical.scala @@ -1,8 +1,9 @@ package vecxt_re import org.apache.commons.rng.simple.RandomSource -import io.github.quafadas.plots.SetupVega.{*, given} + import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} /** Empirical distribution (JVM only). * diff --git a/vecxt_re/src-jvm/dist/Mixed.scala b/vecxt_re/src-jvm/dist/Mixed.scala index 24a2bf7d..3528e023 100644 --- a/vecxt_re/src-jvm/dist/Mixed.scala +++ b/vecxt_re/src-jvm/dist/Mixed.scala @@ -1,8 +1,9 @@ package vecxt_re import org.apache.commons.rng.simple.RandomSource -import io.github.quafadas.plots.SetupVega.{*, given} + import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} /** Mixed distribution: Empirical body with Pareto tail. * diff --git a/vecxt_re/src-jvm/dist/NegativeBinomial.scala b/vecxt_re/src-jvm/dist/NegativeBinomial.scala index a1ce4522..4ab4ffea 100644 --- a/vecxt_re/src-jvm/dist/NegativeBinomial.scala +++ b/vecxt_re/src-jvm/dist/NegativeBinomial.scala @@ -1,11 +1,12 @@ package vecxt_re import org.apache.commons.numbers.gamma.LogGamma +import org.apache.commons.rng.simple.RandomSource import org.apache.commons.statistics.distribution.GammaDistribution import org.apache.commons.statistics.distribution.PoissonDistribution -import org.apache.commons.rng.simple.RandomSource -import io.github.quafadas.plots.SetupVega.{*, given} + import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} /** Negative Binomial Distribution with alternative parameterization. * diff --git a/vecxt_re/src-jvm/dist/Pareto.scala b/vecxt_re/src-jvm/dist/Pareto.scala index a441c54e..7d838082 100644 --- a/vecxt_re/src-jvm/dist/Pareto.scala +++ b/vecxt_re/src-jvm/dist/Pareto.scala @@ -1,9 +1,10 @@ package vecxt_re -import org.apache.commons.statistics.distribution.ParetoDistribution import org.apache.commons.rng.simple.RandomSource -import io.github.quafadas.plots.SetupVega.{*, given} +import org.apache.commons.statistics.distribution.ParetoDistribution + import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} /** Pareto Type I Distribution. * diff --git a/vecxt_re/src-jvm/rpt.scala b/vecxt_re/src-jvm/rpt.scala index 6ae24f75..24847600 100644 --- a/vecxt_re/src-jvm/rpt.scala +++ b/vecxt_re/src-jvm/rpt.scala @@ -1,7 +1,7 @@ package vecxt_re +import vecxt.all.* import vecxt_re.Limits.* import vecxt_re.Retentions.* -import vecxt.all.* import jdk.incubator.vector.DoubleVector import jdk.incubator.vector.VectorSpecies diff --git a/vecxt_re/src-native/rpt.scala b/vecxt_re/src-native/rpt.scala index ec06e90e..e4099b38 100644 --- a/vecxt_re/src-native/rpt.scala +++ b/vecxt_re/src-native/rpt.scala @@ -1,8 +1,7 @@ package vecxt_re +import vecxt.all.* import vecxt_re.Limits.Limit import vecxt_re.Retentions.Retention -import vecxt.all.* -import vecxt.all.given /* diff --git a/vecxt_re/src/IndexPerPeriod.scala b/vecxt_re/src/IndexPerPeriod.scala index 894fced3..2e1e2c9f 100644 --- a/vecxt_re/src/IndexPerPeriod.scala +++ b/vecxt_re/src/IndexPerPeriod.scala @@ -1,7 +1,7 @@ package vecxt_re -import vecxt.all.* import vecxt.BoundsCheck.DoBoundsCheck.yes +import vecxt.all.* /** Aims to provide a (very) simple index mapping for period-based models. * diff --git a/vecxt_re/src/scenario.scala b/vecxt_re/src/scenario.scala index abcda8fd..915655a7 100644 --- a/vecxt_re/src/scenario.scala +++ b/vecxt_re/src/scenario.scala @@ -1,10 +1,10 @@ package vecxt_re -import vecxt.all.* - import java.time.LocalDate import java.time.temporal.ChronoUnit +import vecxt.all.* + case class Event(eventId: Long = scala.util.Random.nextLong(), iteration: Int = 0, day: Int = 0, loss: Double = 0): def multiplyBy(scale: Double): Event = this.copy(loss = loss * scale) end Event diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala index 7532eb39..53633654 100644 --- a/vecxt_re/src/scenarr.scala +++ b/vecxt_re/src/scenarr.scala @@ -1,13 +1,15 @@ package vecxt_re -import vecxt.all.* -import cats.kernel.Monoid - import java.time.LocalDate -import java.time.temporal.ChronoUnit import java.time.Month +import java.time.temporal.ChronoUnit + import scala.collection.mutable +import vecxt.all.* + +import cats.kernel.Monoid + case class Scenarr( iterations: Array[Int], days: Array[Int], diff --git a/vecxtensions/src-js/mathtags.scala b/vecxtensions/src-js/mathtags.scala deleted file mode 100644 index b85f43ba..00000000 --- a/vecxtensions/src-js/mathtags.scala +++ /dev/null @@ -1,72 +0,0 @@ -package vecxtensions - -import scalatags.Text.all.* // Imports commonly used ScalaTags elements like `Tag`, `attrs`, etc. -import scalatags.Text.tags -import vecxt.all.* - -import com.raquo.laminar.api.L.{*, given} -import com.raquo.laminar.tags.* -import com.raquo.laminar.codecs.StringAsIsCodec -import vecxt.BoundsCheck.DoBoundsCheck.no - -object MathTagsLaminar: - - extension (m: Matrix[Double]) - def printMl = - mfenced( - mtable( - for i <- 0 until m.rows - yield mtr( - for j <- 0 until m.cols - yield mtd( - mn(m((j, i))) - ) - ) - ) - ) - end extension - - val xmlns1 = htmlAttr[String]("xmlns", StringAsIsCodec) - val math = htmlTag("math") - // Basic content elements - val mi = CustomHtmlTag("mi") - val mn = CustomHtmlTag("mn") - val mo = CustomHtmlTag("mo") - - val mtext = CustomHtmlTag("mtext") - val mfrac = CustomHtmlTag("mfrac") - val msup = CustomHtmlTag("msup") - val msub = CustomHtmlTag("msub") - val msupsub = CustomHtmlTag("msubsup") - val msqrt = CustomHtmlTag("msqrt") - val mroot = CustomHtmlTag("mroot") - val mfenced = CustomHtmlTag("mfenced") - val menclose = CustomHtmlTag("menclose") - val mtable = CustomHtmlTag("mtable") - val mtr = CustomHtmlTag("mtr") - val mtd = CustomHtmlTag("mtd") - val maligngroup = CustomHtmlTag("maligngroup") - val malignmark = CustomHtmlTag("malignmark") - val mspace = CustomHtmlTag("mspace") - val mrow = CustomHtmlTag("mrow") - val mphantom = CustomHtmlTag("mphantom") - val merror = CustomHtmlTag("merror") - val munderover = CustomHtmlTag("munderover") - val mover = CustomHtmlTag("mover") - val munder = CustomHtmlTag("munder") - val msubsup = CustomHtmlTag("msubsup") - val munder_accent = CustomHtmlTag("munder") - val mover_accent = CustomHtmlTag("mover") - val mmultiscripts = CustomHtmlTag("mmultiscripts") - val mstyle = CustomHtmlTag("mstyle") - val mtag = CustomHtmlTag("mtag") - val mlongdiv = CustomHtmlTag("mlongdiv") - val mprescripts = CustomHtmlTag("mprescripts") - val none = CustomHtmlTag("none") - val semantics = CustomHtmlTag("semantics") - val annotation = CustomHtmlTag("annotation") - val annotation_xml = CustomHtmlTag("annotation-xml") - val msum = CustomHtmlTag("msum") - val mprod = CustomHtmlTag("mprod") - val mint = CustomHtmlTag("mint") -end MathTagsLaminar diff --git a/vecxtensions/src-js/matmul.scala b/vecxtensions/src-js/matmul.scala index 27632569..dde848e0 100644 --- a/vecxtensions/src-js/matmul.scala +++ b/vecxtensions/src-js/matmul.scala @@ -1,11 +1,13 @@ package vecxtensions -import spire.implicits.* -import spire.algebra.Ring import scala.reflect.ClassTag + import vecxt.* -import vecxt.all.* import vecxt.BoundsCheck.BoundsCheck +import vecxt.all.* + +import spire.algebra.Ring +import spire.implicits.* object SpireExt: From 668941d7fb1c270a2af4a19a921911dde4cbe256 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Tue, 27 Jan 2026 22:24:38 +0100 Subject: [PATCH 52/75] . --- laws/src/instances/DoubleInstances.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/laws/src/instances/DoubleInstances.scala b/laws/src/instances/DoubleInstances.scala index b926da0b..571e2477 100644 --- a/laws/src/instances/DoubleInstances.scala +++ b/laws/src/instances/DoubleInstances.scala @@ -22,8 +22,7 @@ object double: import vecxt.BoundsCheck.DoBoundsCheck.yes x + y , - inverseFn = (a) => - -a + inverseFn = (a) => -a ) end vectorAdditionGroup From 9b45a5b41e3ec408cf2d2be53988b01590ab840e Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Tue, 27 Jan 2026 22:26:00 +0100 Subject: [PATCH 53/75] . --- build.mill | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.mill b/build.mill index 7d7deb03..845483d2 100644 --- a/build.mill +++ b/build.mill @@ -2,7 +2,7 @@ //| mill-jvm-version: 24 //| mill-jvm-opts: [ "--add-modules", "jdk.incubator.vector"] //| mvnDeps: -//| - io.github.quafadas:millSite_mill1_3.8:0.0.56 +//| - io.github.quafadas:millSite_mill1_3.8:0.0.57 //| - com.goyeau:mill-scalafix_mill1_3:0.6.0 //| - com.lihaoyi::mill-contrib-jmh:$MILL_VERSION From 6a0f372d91f3281dfe7268ec557f71f966047bf8 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Wed, 28 Jan 2026 19:33:08 +0100 Subject: [PATCH 54/75] . --- vecxt_re/src-jvm/plots.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 6b183d70..d3e1262b 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -4,9 +4,10 @@ import io.circe.syntax.* import io.github.quafadas.plots.SetupVega.{*, given} object Plots: - lazy val timeline = VegaPlot.fromResource("timeline.vl.json") // riskInceptionDate, riskExpiryDate - lazy val seasonality = VegaPlot.fromResource("seasonality.vg.json") // catagory, amount - lazy val distributionDensity = VegaPlot.fromResource("distDensity.vg.json") // value, density + // These must be private otherwise scaladoc get crazy. + private lazy val timeline = VegaPlot.fromResource("timeline.vl.json") // riskInceptionDate, riskExpiryDate + private lazy val seasonality = VegaPlot.fromResource("seasonality.vg.json") // catagory, amount + private lazy val distributionDensity = VegaPlot.fromResource("distDensity.vg.json") // value, density extension (idx: CalendarYearIndex) def plotIndex(reportingThreshold: Double)(using viz.LowPriorityPlotTarget) = From 4444a29504b4a1355eddbe3376de2a8235c2a5f6 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Wed, 28 Jan 2026 19:33:21 +0100 Subject: [PATCH 55/75] . --- .vscode/launch.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.vscode/launch.json b/.vscode/launch.json index 4d5851b5..e3ca2737 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -20,6 +20,16 @@ "args": [], "jvmOptions": [], "env": {} + }, + { + "type": "scala", + "request": "launch", + "name": "pricingFun", + "mainClass": "experiments.pricingFun", + "buildTarget": "file:///Users/simon/Code/vecxt/experiments", + "args": [], + "jvmOptions": [], + "env": {} } ] } \ No newline at end of file From f786575d388a0fd2100dd99526d02edbe5d950f4 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Wed, 28 Jan 2026 20:47:03 +0100 Subject: [PATCH 56/75] . --- .../stemChartWithRollingAverage.vl.json | 108 ++++++++++++++++++ vecxt_re/resources/stemPlot.vl.json | 74 ++++++++++++ vecxt_re/src-jvm/dist/NegativeBinomial.scala | 16 +-- 3 files changed, 191 insertions(+), 7 deletions(-) create mode 100644 vecxt_re/resources/stemChartWithRollingAverage.vl.json create mode 100644 vecxt_re/resources/stemPlot.vl.json diff --git a/vecxt_re/resources/stemChartWithRollingAverage.vl.json b/vecxt_re/resources/stemChartWithRollingAverage.vl.json new file mode 100644 index 00000000..a656fa15 --- /dev/null +++ b/vecxt_re/resources/stemChartWithRollingAverage.vl.json @@ -0,0 +1,108 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Stem plot with daily time unit showing amounts", + "title": "Stem Plot", + "width": "container", + "height": "container", + "datasets": { + "stemData": [ + { "date": "2022-01-01", "amount": 10, "normalisedAmount": 12 }, + { "date": "2023-01-02", "amount": 5, "normalisedAmount": 7 }, + { "date": "2024-01-03", "amount": 15, "normalisedAmount": 20 }, + { "date": "2022-06-04", "amount": 10, "normalisedAmount": 12 }, + { "date": "2023-03-05", "amount": 20, "normalisedAmount": 22 } + ], + "rollingAverageData": [ + { "endYear": "2022", "rollingAverage": 8 }, + { "endYear": "2023", "rollingAverage": 9 }, + { "endYear": "2024", "rollingAverage": 10 } + ] + }, + "data": { "name": "stemData" }, + "encoding": { + "x": { + "field": "date", + "type": "temporal", + "timeUnit": "yearmonthdate", + "title": "Date" + } + }, + "layer": [ + { + "mark": { "type": "rule" }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative", + "title": "Amount" + }, + "y2": { "datum": 0 }, + "color": { "datum": "Amount", "legend": { "title": "Series" } } + } + }, + { + "mark": { "type": "rule" }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative" + }, + "y2": { "field": "normalisedAmount" }, + "color": { "datum": "Normalised Amount" } + } + }, + { + "mark": { + "type": "circle", + "size": 100, + "tooltip": { "content": "data" } + }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative" + }, + "color": { "datum": "Amount" } + } + }, + { + "mark": { + "type": "circle", + "size": 100, + "tooltip": { "content": "data" } + }, + "encoding": { + "y": { + "field": "normalisedAmount", + "type": "quantitative" + }, + "color": { "datum": "Normalised Amount" } + } + }, + { + "data": { "name": "rollingAverageData" }, + "mark": { + "type": "line", + "strokeWidth": 2, + "tooltip": { "content": "data" } + }, + "encoding": { + "x": { + "field": "endYear", + "type": "temporal", + "timeUnit": "year" + }, + "y": { + "field": "rollingAverage", + "type": "quantitative" + }, + "color": { "datum": "Rolling Average" } + } + } + ], + "config": { + "range": { + "category": ["steelblue", "orange", "green"] + } + } +} diff --git a/vecxt_re/resources/stemPlot.vl.json b/vecxt_re/resources/stemPlot.vl.json new file mode 100644 index 00000000..7fa3b54d --- /dev/null +++ b/vecxt_re/resources/stemPlot.vl.json @@ -0,0 +1,74 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Stem plot with daily time unit showing amounts", + "title": "Stem Plot", + "width": "container", + "height": "container", + "data": { + "values": [ + { "date": "2023-01-01", "amount": 10, "normalisedAmount": 12 }, + { "date": "2023-01-02", "amount": 5, "normalisedAmount": 7 }, + { "date": "2023-01-03", "amount": 15, "normalisedAmount": 20 }, + { "date": "2023-01-04", "amount": 10, "normalisedAmount": 12 }, + { "date": "2023-01-05", "amount": 20, "normalisedAmount": 22 } + ] + }, + "encoding": { + "x": { + "field": "date", + "type": "temporal", + "timeUnit": "yearmonthdate", + "title": "Date" + } + }, + "layer": [ + { + "mark": { "type": "rule", "color": "steelblue" }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative", + "title": "Amount" + }, + "y2": { "datum": 0 } + } + }, + { + "mark": { "type": "rule", "color": "orange" }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative" + }, + "y2": { "field": "normalisedAmount" } + } + }, + { + "mark": { + "type": "circle", + "size": 100, + "color": "steelblue", + "tooltip": true + }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative" + } + } + },{ + "mark": { + "type": "circle", + "size": 100, + "color": "orange", + "tooltip": "datum" + }, + "encoding": { + "y": { + "field": "normalisedAmount", + "type": "quantitative" + } + } + } + ] +} diff --git a/vecxt_re/src-jvm/dist/NegativeBinomial.scala b/vecxt_re/src-jvm/dist/NegativeBinomial.scala index 4ab4ffea..8752d30b 100644 --- a/vecxt_re/src-jvm/dist/NegativeBinomial.scala +++ b/vecxt_re/src-jvm/dist/NegativeBinomial.scala @@ -6,6 +6,7 @@ import org.apache.commons.statistics.distribution.GammaDistribution import org.apache.commons.statistics.distribution.PoissonDistribution import io.circe.syntax.* +import vecxt.all.* import io.github.quafadas.plots.SetupVega.{*, given} /** Negative Binomial Distribution with alternative parameterization. @@ -137,8 +138,11 @@ object NegativeBinomial: require(observations.forall(_ >= 0), "all observations must be non-negative") val n = observations.length.toDouble - val xbar = observations.sum / n - val sumX = observations.sum.toDouble + val (xbar, variance) = observations.meanAndVariance + val sumX = observations.sumSIMD.toDouble + + require(xbar > 0, "mean must be positive for NB fitting") + // Constant term in the log-likelihood: -∑ log Γ(x_i+1) var sumLogFact = 0.0 var _i = 0 @@ -148,7 +152,7 @@ object NegativeBinomial: end while // Profile log-likelihood with b = xbar/a (equivalently p = a/(a+xbar)) - def profileLogLik(a: Double): Double = + inline def profileLogLik(a: Double): Double = if a <= 0 || !a.isFinite then Double.NegativeInfinity else val p = a / (a + xbar) @@ -162,9 +166,7 @@ object NegativeBinomial: end while ll - val variance = observations.map(x => (x - xbar) * (x - xbar)).sum / n - require(xbar > 0, "mean must be positive for NB fitting") // If variance <= mean, data is underdispersed relative to Poisson // In this case, return near-Poisson (small b) @@ -205,11 +207,11 @@ object NegativeBinomial: // Backtracking line search on the profile log-likelihood to improve robustness. val llCur = profileLogLik(a) var step = 1.0 - var aNew = a + step * delta + var aNew = Math.fma(step, delta, a) var llNew = profileLogLik(aNew) while step > 1e-6 && llNew < llCur do step *= 0.5 - aNew = a + step * delta + aNew = Math.fma(step, delta, a) llNew = profileLogLik(aNew) end while From 3c849e5908b8d059da1baf6f4a9a208545b877f2 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Wed, 28 Jan 2026 21:00:05 +0100 Subject: [PATCH 57/75] . --- vecxt_re/src-jvm/dist/Dist.scala | 6 ++--- vecxt_re/src-jvm/dist/Empirical.scala | 2 +- vecxt_re/src-jvm/dist/Mixed.scala | 2 +- vecxt_re/src-jvm/dist/NegativeBinomial.scala | 14 ++++++++++ vecxt_re/src-jvm/dist/Pareto.scala | 2 +- vecxt_re/src-jvm/plots.scala | 28 ++++++++++++++++++++ 6 files changed, 48 insertions(+), 6 deletions(-) diff --git a/vecxt_re/src-jvm/dist/Dist.scala b/vecxt_re/src-jvm/dist/Dist.scala index 6d3fd522..fa26382b 100644 --- a/vecxt_re/src-jvm/dist/Dist.scala +++ b/vecxt_re/src-jvm/dist/Dist.scala @@ -30,9 +30,9 @@ trait ContinuousDistr[T] extends Density[T] with Rand[T]: override def logApply(x: T) = unnormalizedLogPdf(x) end ContinuousDistr -trait HasCdf: - def probability(x: Double, y: Double): Double // Probability that P(x < X <= y) - def cdf(x: Double): Double +trait HasCdf[T]: + def probability(x: T, y: T): Double // Probability that P(x < X <= y) + def cdf(x: T): Double // experimental plotting support def plot(using viz.LowPriorityPlotTarget): viz.VizReturn diff --git a/vecxt_re/src-jvm/dist/Empirical.scala b/vecxt_re/src-jvm/dist/Empirical.scala index 72224010..bd59d3ef 100644 --- a/vecxt_re/src-jvm/dist/Empirical.scala +++ b/vecxt_re/src-jvm/dist/Empirical.scala @@ -24,7 +24,7 @@ case class Empirical(values: IArray[Double], weights: IArray[Double]) extends DiscreteDistr[Double] with HasMean[Double] with HasVariance[Double] - with HasCdf + with HasCdf[Double] with HasInverseCdf: require(values.nonEmpty, "values must not be empty") diff --git a/vecxt_re/src-jvm/dist/Mixed.scala b/vecxt_re/src-jvm/dist/Mixed.scala index 3528e023..6f539915 100644 --- a/vecxt_re/src-jvm/dist/Mixed.scala +++ b/vecxt_re/src-jvm/dist/Mixed.scala @@ -33,7 +33,7 @@ case class Mixed(empirical: Empirical, mixingPoint: Double, paretoShape: Double) extends ContinuousDistr[Double] with HasMean[Double] with HasVariance[Double] - with HasCdf + with HasCdf[Double] with HasInverseCdf: require(mixingPoint > 0, "mixing point must be positive") diff --git a/vecxt_re/src-jvm/dist/NegativeBinomial.scala b/vecxt_re/src-jvm/dist/NegativeBinomial.scala index 8752d30b..b8ffbfbb 100644 --- a/vecxt_re/src-jvm/dist/NegativeBinomial.scala +++ b/vecxt_re/src-jvm/dist/NegativeBinomial.scala @@ -35,6 +35,7 @@ import io.github.quafadas.plots.SetupVega.{*, given} case class NegativeBinomial(a: Double, b: Double) extends DiscreteDistr[Int] with HasMean[Double] + with HasCdf[Int] with HasVariance[Double]: require(a > 0, "a must be positive") require(b > 0, "b must be positive") @@ -77,6 +78,19 @@ case class NegativeBinomial(a: Double, b: Double) def variance: Double = a * b * (1.0 + b) + + override def probability(x: Int, y: Int): Double = + if x >= y then 0.0 + else cdf(y) - cdf(x) + + override def cdf(x: Int): Double = + if x < 0 then 0.0 + else + // CDF of NegBin(r, p) at k = I_p(r, k+1) + // where I_p(a, b) is the regularized incomplete beta function + // For our parameterization: p = 1/(1+b) is the success probability + org.apache.commons.numbers.gamma.RegularizedBeta.value(p, a, x.toDouble + 1.0) + def plot(using viz.LowPriorityPlotTarget) = val linePlot = VegaPlot.fromResource("negBinProb.vl.json") val maxX = (mean + 4 * math.sqrt(variance)).toInt diff --git a/vecxt_re/src-jvm/dist/Pareto.scala b/vecxt_re/src-jvm/dist/Pareto.scala index 7d838082..b99f3555 100644 --- a/vecxt_re/src-jvm/dist/Pareto.scala +++ b/vecxt_re/src-jvm/dist/Pareto.scala @@ -26,7 +26,7 @@ case class Pareto(scale: Double, shape: Double) extends ContinuousDistr[Double] with HasMean[Double] with HasVariance[Double] - with HasCdf + with HasCdf[Double] with HasInverseCdf: require(scale > 0, "scale must be positive") diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index d3e1262b..0408b9b2 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -48,4 +48,32 @@ object Plots: _.data.values := sorted.asJson ) end extension + + // extension (negBin: NegativeBinomial) + // inline def plotPdf(using viz.LowPriorityPlotTarget) = + // val numPoints = 1000 + // val maxX = negBin.mean + 4 * math.sqrt(negBin.variance) + // val data = (0 until numPoints).map { i => + // val x = i.toDouble * maxX / numPoints + // (value = x, density = negBin.probabilityOf(x.round.toInt)) + // } + + // distributionDensity.plot( + // _.title(s"Negative Binomial Distribution Density (a=${negBin.a}, b=${negBin.b})"), + // _.data.values := data.asJson + // ) + + // inline def plotCdf(using viz.LowPriorityPlotTarget) = + // val numPoints = 1000 + // val maxX = negBin.mean + 4 * math.sqrt(negBin.variance) + // val data = (0 until numPoints).map { i => + // val x = i.toDouble * maxX / numPoints + // (value = x, density = negBin.cdf(x)) + // } + + // distributionDensity.plot( + // _.title(s"Negative Binomial Distribution CDF (a=${negBin.a}, b=${negBin.b})"), + // _.data.values := data.asJson + // ) + // end extension end Plots From bde22f197d4f3689ab88037159d3ac48d7fca86e Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 09:06:27 +0100 Subject: [PATCH 58/75] poisson --- .../resources/negBinCumul_vsSample.vl.json | 51 ++++ vecxt_re/resources/poissonCumul.vl.json | 18 ++ vecxt_re/resources/poissonProb.vl.json | 18 ++ vecxt_re/src-jvm/dist/Poisson.scala | 247 ++++++++++++++++++ vecxt_re/src-jvm/plots.scala | 23 ++ vecxt_re/test/src-jvm/Poisson.test.scala | 206 +++++++++++++++ 6 files changed, 563 insertions(+) create mode 100644 vecxt_re/resources/negBinCumul_vsSample.vl.json create mode 100644 vecxt_re/resources/poissonCumul.vl.json create mode 100644 vecxt_re/resources/poissonProb.vl.json create mode 100644 vecxt_re/src-jvm/dist/Poisson.scala create mode 100644 vecxt_re/test/src-jvm/Poisson.test.scala diff --git a/vecxt_re/resources/negBinCumul_vsSample.vl.json b/vecxt_re/resources/negBinCumul_vsSample.vl.json new file mode 100644 index 00000000..c5f95805 --- /dev/null +++ b/vecxt_re/resources/negBinCumul_vsSample.vl.json @@ -0,0 +1,51 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Negative Binomial CDF with samples overlaid", + "width": "container", + "height": "container", + "title": "Negative Binomial CDF with Sample Points", + "layer": [ + { + "data": { + "values": [ + {"value": 0, "prob": 0.0}, + {"value": 1, "prob": 0.1}, + {"value": 2, "prob": 0.18}, + {"value": 3, "prob": 0.30}, + {"value": 4, "prob": 0.45} + ] + }, + "mark": { + "type": "line", + "interpolate": "step-after", + "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative", "title": "k"}, + "y": {"field": "prob", "type": "quantitative", "title": "CDF"} + } + }, + + { + "data": { + "values": [ + {"value": 1, "prob": 0.125}, + {"value": 2.0, "prob": 0.35}, + {"value": 3, "prob": 0.55}, + {"value": 4, "prob": 0.85} + ] + }, + "mark": { + "type": "point", + "shape": "cross", + "color": "red", + "size": 100, + "strokeWidth": 2 + }, + "encoding": { + "x": {"field": "value", "type": "quantitative", "title": "k"}, + "y": {"field": "prob", "type": "quantitative", "title": "CDF"} + } + } + ] +} diff --git a/vecxt_re/resources/poissonCumul.vl.json b/vecxt_re/resources/poissonCumul.vl.json new file mode 100644 index 00000000..2ea62753 --- /dev/null +++ b/vecxt_re/resources/poissonCumul.vl.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Poisson distribution cumulative probabilities.", + "data": {"values": [ + {"value": 0, "prob": 0.0}, + {"value": 1, "prob": 0.1}, + {"value": 2, "prob": 0.28} + ]}, + "mark": { + "type": "line", "tooltip": true, "point": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative"}, + "y": {"field": "prob", "type": "quantitative"} + }, + "width": "container", + "height": "container" +} diff --git a/vecxt_re/resources/poissonProb.vl.json b/vecxt_re/resources/poissonProb.vl.json new file mode 100644 index 00000000..42b12a9e --- /dev/null +++ b/vecxt_re/resources/poissonProb.vl.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Poisson distribution probabilities.", + "data": {"values": [ + {"value": 0, "prob": 0.0}, + {"value": 1, "prob": 0.1}, + {"value": 2, "prob": 0.18} + ]}, + "mark": { + "type": "bar", "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative"}, + "y": {"field": "prob", "type": "quantitative"} + }, + "width": "container", + "height": "container" +} diff --git a/vecxt_re/src-jvm/dist/Poisson.scala b/vecxt_re/src-jvm/dist/Poisson.scala new file mode 100644 index 00000000..3b8cad38 --- /dev/null +++ b/vecxt_re/src-jvm/dist/Poisson.scala @@ -0,0 +1,247 @@ +package vecxt_re + +import org.apache.commons.numbers.gamma.LogGamma +import org.apache.commons.rng.simple.RandomSource +import org.apache.commons.statistics.distribution.PoissonDistribution + +import io.circe.syntax.* +import vecxt.all.* +import io.github.quafadas.plots.SetupVega.{*, given} + +/** Poisson Distribution. + * + * The Poisson distribution models the number of events occurring in a fixed interval of time or space, given that + * these events occur with a known constant mean rate and independently of the time since the last event. + * + * Parameterization: + * - λ (lambda) = mean = variance + * + * PMF: P(X = k) = λ^k * e^(-λ) / k! + * + * The Poisson distribution is a limiting case of the Negative Binomial distribution as the dispersion parameter b → + * 0. + * + * @param lambda + * the rate parameter (must be positive) + */ +case class Poisson(lambda: Double) + extends DiscreteDistr[Int] + with HasMean[Double] + with HasCdf[Int] + with HasVariance[Double]: + require(lambda > 0, "lambda must be positive") + require(lambda.isFinite, "lambda must be finite") + + private val logLambda: Double = math.log(lambda) + + private val rng = RandomSource.XO_RO_SHI_RO_128_PP.create() + private val poissonDistribution = PoissonDistribution.of(lambda) + private val poissonSampler = poissonDistribution.createSampler(rng) + + /** Draw a sample from the Poisson distribution */ + inline def draw: Int = poissonSampler.sample() + + /** PMF: P(X = k) = λ^k * e^(-λ) / k! */ + def probabilityOf(x: Int): Double = + if x < 0 then 0.0 + else math.exp(logProbabilityOf(x)) + + /** Log PMF: log P(X = k) = k*log(λ) - λ - log(k!) */ + override def logProbabilityOf(x: Int): Double = + if x < 0 then Double.NegativeInfinity + else x * logLambda - lambda - LogGamma.value(x + 1) + + inline def mean: Double = lambda + + inline def variance: Double = lambda + + override def probability(x: Int, y: Int): Double = + if x >= y then 0.0 + else cdf(y) - cdf(x) + + override def cdf(x: Int): Double = + if x < 0 then 0.0 + else + // CDF using regularized incomplete gamma function + // P(X <= k) = Q(k+1, λ) = Γ(k+1, λ) / Γ(k+1) + // which is the upper regularized gamma function + org.apache.commons.numbers.gamma.RegularizedGamma.Q.value(x.toDouble + 1.0, lambda) + + def plot(using viz.LowPriorityPlotTarget) = + val linePlot = VegaPlot.fromResource("poissonProb.vl.json") + val maxX = (mean + 4 * math.sqrt(variance)).toInt + val data = (0 to maxX).map { k => + (value = k, prob = probabilityOf(k)) + } + linePlot.plot( + _.data.values := data.asJson, + _ += (title = s"Poisson Distribution Marginal Probabilities (λ=$lambda)").asJson + ) + end plot + + def plotCdf(using viz.LowPriorityPlotTarget) = + val linePlot = VegaPlot.fromResource("poissonCumul.vl.json") + val maxX = (mean + 4 * math.sqrt(variance)).toInt + var cumProb = 0.0 + val data = (0 to maxX).map { k => + cumProb += probabilityOf(k) + (value = k, prob = cumProb) + } + linePlot.plot( + _.data.values := data.asJson, + _ += (title = s"Poisson Distribution Cumulative Probabilities (λ=$lambda)").asJson + ) + end plotCdf +end Poisson + +object Poisson: + /** Create a Poisson distribution from the mean. + * + * @param mu + * the mean (rate) parameter + * @return + * a Poisson distribution with the given mean + */ + inline def fromMean(mu: Double): Poisson = Poisson(mu) + + /** Maximum likelihood estimation for Poisson parameter. + * + * For Poisson, the MLE of λ is simply the sample mean. This is exact and always converges in one step. + * + * @param observations + * array of non-negative integer observations + * @return + * Named tuple with `dist`: the fitted Poisson distribution, and `converged`: always true for Poisson MLE + */ + def mle(observations: Array[Int]): (dist: Poisson, converged: Boolean) = + require(observations.nonEmpty, "observations must not be empty") + require(observations.forall(_ >= 0), "all observations must be non-negative") + + val lambdaHat = observations.mean + require(lambdaHat > 0, "mean must be positive for Poisson fitting") + + (Poisson(lambdaHat), true) + end mle + + /** Maximum likelihood estimation for volume-adjusted Poisson. + * + * For observations $n_j$ with corresponding volumes $v_j$, the Poisson model assumes $n_j \sim + * \text{Poisson}(\lambda v_j)$. + * + * The MLE for $\lambda$ is: $$ \hat{\lambda} = \frac{\sum_j n_j}{\sum_j v_j} $$ + * + * @param observations + * non-negative counts $n_j$ + * @param volumes + * positive volume ratios $v_j$ (same units as modeled period) + * @return + * tuple of fitted `Poisson(lambda)` and a convergence flag (always true for Poisson) + */ + def volweightedMle( + observations: Array[Int], + volumes: Array[Double] + ): (dist: Poisson, converged: Boolean) = + require(observations.nonEmpty, "observations must not be empty") + require(observations.length == volumes.length, "observations and volumes must have the same length") + require(observations.forall(_ >= 0), "all observations must be non-negative") + require(volumes.forall(v => v > 0 && v.isFinite), "volumes must be positive and finite") + + val sumN = observations.sumSIMD.toDouble + val sumV = volumes.sum + val lambdaHat = sumN / sumV + + require(lambdaHat > 0, "rate must be positive for Poisson fitting") + + (Poisson(lambdaHat), true) + end volweightedMle + + inline def mleVolumeWeighted( + observations: Array[Int], + volumes: Array[Double] + ): (dist: Poisson, converged: Boolean) = volweightedMle(observations, volumes) + + /** Perform a chi-squared goodness-of-fit test to assess whether the observed data follows a Poisson distribution. + * + * Groups observations into bins and computes the chi-squared statistic comparing observed to expected frequencies. + * + * @param observations + * array of non-negative integer observations + * @param lambda + * the Poisson rate parameter (if None, uses MLE from data) + * @param minExpected + * minimum expected frequency per bin (bins are combined to meet this threshold) + * @return + * Named tuple with `statistic`: the chi-squared test statistic, `degreesOfFreedom`: the degrees of freedom, and + * `pValue`: the p-value of the test + */ + def goodnessOfFit( + observations: Array[Int], + lambda: Option[Double] = None, + minExpected: Double = 5.0 + ): (statistic: Double, degreesOfFreedom: Int, pValue: Double) = + require(observations.nonEmpty, "observations must not be empty") + require(minExpected > 0, "minExpected must be positive") + + val n = observations.length.toDouble + val lambdaEst = lambda.getOrElse(observations.sumSIMD.toDouble / n) + val poisson = Poisson(lambdaEst) + + // Find the max observation to determine bin range + var maxObs = observations.maxSIMD + + + // Count observations in each bin + val counts = new Array[Int](maxObs + 2) // +1 for the "maxObs or more" bin + var i = 0 + while i < observations.length do + val obs = observations(i) + if obs >= counts.length - 1 then counts(counts.length - 1) += 1 + else counts(obs) += 1 + i += 1 + end while + + // Compute expected frequencies + val expected = new Array[Double](counts.length) + i = 0 + while i < expected.length - 1 do + expected(i) = n * poisson.probabilityOf(i) + i += 1 + end while + // Last bin is "maxObs or more" + expected(expected.length - 1) = n * (1.0 - poisson.cdf(expected.length - 2)) + + // Combine bins with expected < minExpected + var chiSq = 0.0 + var df = -1 // Start at -1 because we estimated lambda + var obsAccum = 0 + var expAccum = 0.0 + + i = 0 + while i < counts.length do + obsAccum += counts(i) + expAccum += expected(i) + if expAccum >= minExpected then + chiSq += (obsAccum - expAccum) * (obsAccum - expAccum) / expAccum + df += 1 + obsAccum = 0 + expAccum = 0.0 + end if + i += 1 + end while + + // Handle remaining accumulated values + if expAccum > 0 then + // Add to previous bin's chi-squared contribution + chiSq += (obsAccum - expAccum) * (obsAccum - expAccum) / expAccum + df += 1 + end if + + // Compute p-value using chi-squared distribution + val pValue = + if df <= 0 then 1.0 + else 1.0 - org.apache.commons.numbers.gamma.RegularizedGamma.P.value(df.toDouble / 2.0, chiSq / 2.0) + + (chiSq, df, pValue) + end goodnessOfFit + +end Poisson diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 0408b9b2..a8bee60b 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -8,6 +8,7 @@ object Plots: private lazy val timeline = VegaPlot.fromResource("timeline.vl.json") // riskInceptionDate, riskExpiryDate private lazy val seasonality = VegaPlot.fromResource("seasonality.vg.json") // catagory, amount private lazy val distributionDensity = VegaPlot.fromResource("distDensity.vg.json") // value, density + private lazy val negBinCdfWSample = VegaPlot.fromResource("negBinCumul_vsSample.vl.json") // value, density extension (idx: CalendarYearIndex) def plotIndex(reportingThreshold: Double)(using viz.LowPriorityPlotTarget) = @@ -26,6 +27,28 @@ object Plots: ) end extension + extension (nb: NegativeBinomial) + inline def plotCdfWithSamples(samples: Array[Double])(using viz.LowPriorityPlotTarget) = + val maxX = nb.mean + 4 * math.sqrt(nb.variance) + var cumProb = 0.0 + val data = (0 to (maxX.toInt + 1)).map { k => + cumProb += nb.probabilityOf(k) + (value = k, prob = cumProb) + } + + val sampleData = samples.sorted + val n = sampleData.length + val empiricalProb = sampleData.zipWithIndex.map { case (value, idx) => + (value = value.toInt, prob = (idx + 1).toDouble / n) + } + + negBinCdfWSample.plot( + _.title(s"Negative Binomial Distribution Density (a=${nb.a}, b=${nb.b}) vs Sample Data"), + _.layer._0.data.values := data.asJson, + _.layer._1.data.values := data.asJson + ) + end extension + extension (scenario: Scenarr) inline def plotSeasonality(highlight: Option[(year: Int, month: Int)] = None)(using tgt: viz.LowPriorityPlotTarget diff --git a/vecxt_re/test/src-jvm/Poisson.test.scala b/vecxt_re/test/src-jvm/Poisson.test.scala new file mode 100644 index 00000000..f7dfa6d6 --- /dev/null +++ b/vecxt_re/test/src-jvm/Poisson.test.scala @@ -0,0 +1,206 @@ +package vecxt_re + +import munit.FunSuite +import vecxt.all.* +import org.apache.commons.statistics.distribution.PoissonDistribution as ApachePoisson + +class PoissonTest extends FunSuite: + + inline val localTests = true + + test("pmf approximately normalizes") { + val pois = Poisson(lambda = 5.0) + + val mu = pois.mean + val sd = math.sqrt(pois.variance) + val K = (mu + 15 * sd).toInt + + val sum = (0 to K).map(pois.probabilityOf).sum + + assert(math.abs(sum - 1.0) < 1e-8) + } + + test("pmf mean and variance match theory") { + val lambda = 7.5 + val pois = Poisson(lambda) + + val K = 500 + val probs = (0 to K).map(k => pois.probabilityOf(k)) + + val mean = probs.zipWithIndex.map { case (p, k) => p * k }.sum + val varr = probs.zipWithIndex.map { case (p, k) => p * k * k }.sum - mean * mean + + // For Poisson, mean = variance = lambda + assert(math.abs(mean - lambda) < 1e-6) + assert(math.abs(varr - lambda) < 1e-6) + assert(math.abs(pois.mean - lambda) < 1e-10) + assert(math.abs(pois.variance - lambda) < 1e-10) + } + + test("matches Apache Commons Poisson distribution") { + val lambda = 4.0 + val pois = Poisson(lambda) + val apachePois = ApachePoisson.of(lambda) + + assert(pois.probabilityOf(-1) == 0.0) + assert(pois.logProbabilityOf(-1).isNegInfinity) + + (0 to 20).foreach { k => + val diff = math.abs(pois.probabilityOf(k) - apachePois.probability(k)) + assert(diff < 1e-14, s"PMF mismatch at k=$k: ${pois.probabilityOf(k)} vs ${apachePois.probability(k)}") + } + } + + test("cdf matches Apache Commons") { + val lambda = 6.0 + val pois = Poisson(lambda) + val apachePois = ApachePoisson.of(lambda) + + (0 to 25).foreach { k => + val diff = math.abs(pois.cdf(k) - apachePois.cumulativeProbability(k)) + assert(diff < 1e-12, s"CDF mismatch at k=$k: ${pois.cdf(k)} vs ${apachePois.cumulativeProbability(k)}") + } + } + + test("probability(x, y) equals cdf(y) - cdf(x)") { + val pois = Poisson(5.0) + + for + x <- 0 to 10 + y <- (x + 1) to 15 + do + val expected = pois.cdf(y) - pois.cdf(x) + val actual = pois.probability(x, y) + assert(math.abs(actual - expected) < 1e-14) + } + + test("small lambda works correctly") { + val pois = Poisson(0.1) + + val K = 50 + val sum = (0 to K).map(pois.probabilityOf).sum + assert(math.abs(sum - 1.0) < 1e-10) + + // P(X=0) = e^(-0.1) ≈ 0.9048 + assert(math.abs(pois.probabilityOf(0) - math.exp(-0.1)) < 1e-14) + } + + test("large lambda works correctly") { + val pois = Poisson(100.0) + + // For large lambda, distribution is approximately normal with mean=variance=lambda + val K = 250 + val probs = (0 to K).map(k => pois.probabilityOf(k)) + val sum = probs.sum + + assert(math.abs(sum - 1.0) < 1e-6) + + val mean = probs.zipWithIndex.map { case (p, k) => p * k }.sum + assert(math.abs(mean - 100.0) < 1e-4) + } + + // Ignored in CI as slow + test("SLOW: sampling mean and variance") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.PoissonTest.sampling mean and variance IN CI========") + val pois = Poisson(8.0) + val n = 2_000_000 + + val xs = Array.fill(n)(pois.draw.toDouble) + + val mean = xs.sum / n + val varr = xs.map(x => (x - mean) * (x - mean)).sum / n + + assert(math.abs(mean - pois.mean) < 5e-3) + assert(math.abs(varr - pois.variance) < 5e-2) + } + + test("SLOW: sampling distribution matches pmf") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.PoissonTest.sampling distribution matches pmf IN CI========") + val pois = Poisson(4.0) + val n = 500_000 + + val samples = Array.fill(n)(pois.draw) + val counts = samples.groupBy(identity).view.mapValues(_.size).toMap + + (0 to 15).foreach { k => + val empirical = counts.getOrElse(k, 0).toDouble / n + val theoretical = pois.probabilityOf(k) + val diff = math.abs(empirical - theoretical) + assert(diff < 0.01, s"At k=$k: empirical=$empirical, theoretical=$theoretical, diff=$diff") + } + } + + test("SLOW: MLE recovers true parameter") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF MLE recovers true parameter distribution matches pmf IN CI========") + + val trueLambda = 6.0 + val apachePois = ApachePoisson.of(trueLambda) + val sampler = apachePois.createSampler(org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create()) + + val data = Array.fill(10_000)(sampler.sample()) + val (fitted, converged) = Poisson.mle(data) + + assert(converged) + assert(math.abs(fitted.lambda - trueLambda) < 0.1, s"Fitted lambda=${fitted.lambda}, true=$trueLambda") + } + + test("SLOW: MLE volume-weighted with uniform volumes equals regular MLE") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF MLE volume-weighted with uniform volumes equals regular MLE IN CI========") + + + val trueLambda = 5.0 + val apachePois = ApachePoisson.of(trueLambda) + val sampler = apachePois.createSampler(org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create()) + + val data = Array.fill(10_000)(sampler.sample()) + val uniformVolumes = Array.fill(10_000)(1.0) + + val (fitted, converged) = Poisson.mle(data) + val (fittedVol, convergedVol) = Poisson.volweightedMle(data, uniformVolumes) + + assert(converged) + assert(convergedVol) + assert(math.abs(fitted.lambda - fittedVol.lambda) < 1e-10) + } + + test("SLOW: volume-weighted MLE correctly adjusts for volumes") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF volume-weighted MLE correctly adjusts for volumes IN CI========") + // If we have counts n_j from volumes v_j, the rate lambda should be sum(n_j) / sum(v_j) + val observations = Array(10, 20, 15, 25) + val volumes = Array(2.0, 4.0, 3.0, 5.0) + + val expectedLambda = observations.sum.toDouble / volumes.sum // = 70 / 14 = 5.0 + val (fitted, converged) = Poisson.volweightedMle(observations, volumes) + + assert(converged) + assert(math.abs(fitted.lambda - expectedLambda) < 1e-10) + } + + test("SLOW: goodness-of-fit test accepts Poisson data") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF goodness-of-fit test accepts Poisson data IN CI========") + val trueLambda = 5.0 + val apachePois = ApachePoisson.of(trueLambda) + val sampler = apachePois.createSampler(org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create()) + + val data = Array.fill(1000)(sampler.sample()) + val (statistic, df, pValue) = Poisson.goodnessOfFit(data) + + // With Poisson data, we should not reject at α=0.05 + assert(pValue > 0.01, s"p-value=$pValue is suspiciously low for Poisson data") + } + + test("fromMean creates distribution with correct lambda") { + val mu = 7.5 + val pois = Poisson.fromMean(mu) + assert(pois.lambda == mu) + assert(pois.mean == mu) + assert(pois.variance == mu) + } + +end PoissonTest From aac13fbc8b453841ba07e58522c7ae49476a9f5e Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 09:07:32 +0100 Subject: [PATCH 59/75] . --- vecxt_re/src-jvm/plots.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index a8bee60b..115c4492 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -43,9 +43,9 @@ object Plots: } negBinCdfWSample.plot( - _.title(s"Negative Binomial Distribution Density (a=${nb.a}, b=${nb.b}) vs Sample Data"), + _.title(s"Negative Binomial CDF (a=${nb.a}, b=${nb.b}) vs Sample Data"), _.layer._0.data.values := data.asJson, - _.layer._1.data.values := data.asJson + _.layer._1.data.values := empiricalProb.asJson ) end extension From 4b873f5f28fe490b1da368dd4d20be08c75b9050 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 09:09:39 +0100 Subject: [PATCH 60/75] . --- vecxt_re/src/all.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vecxt_re/src/all.scala b/vecxt_re/src/all.scala index f0361aaf..dec3af34 100644 --- a/vecxt_re/src/all.scala +++ b/vecxt_re/src/all.scala @@ -10,4 +10,8 @@ object all: export vecxt_re.ReReporting.* export vecxt_re.IndexPerPeriod export vecxt_re.CalendarYearIndex + export vecxt_re.NegativeBinomial + export vecxt_re.Poisson + export vecxt_re.Empirical + export vecxt_re.Pareto end all From 731d737a869729ba9a433605ea585c9d4bc2ee20 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 09:12:22 +0100 Subject: [PATCH 61/75] . --- vecxt_re/src-jvm/plots.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 115c4492..503fb826 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -28,7 +28,7 @@ object Plots: end extension extension (nb: NegativeBinomial) - inline def plotCdfWithSamples(samples: Array[Double])(using viz.LowPriorityPlotTarget) = + inline def plotCdfWithSamples(samples: IndexedSeq[Double])(using viz.LowPriorityPlotTarget) = val maxX = nb.mean + 4 * math.sqrt(nb.variance) var cumProb = 0.0 val data = (0 to (maxX.toInt + 1)).map { k => From 0c5bdd61d4ee97e7201d4896eced3dfd838999b6 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 09:13:19 +0100 Subject: [PATCH 62/75] . --- vecxt_re/src-jvm/plots.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 503fb826..8f068147 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -28,7 +28,7 @@ object Plots: end extension extension (nb: NegativeBinomial) - inline def plotCdfWithSamples(samples: IndexedSeq[Double])(using viz.LowPriorityPlotTarget) = + inline def plotCdfWithSamples(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = val maxX = nb.mean + 4 * math.sqrt(nb.variance) var cumProb = 0.0 val data = (0 to (maxX.toInt + 1)).map { k => From 0fb4ed7074fb413ee43a6d71fa1e315ac44e3a37 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 11:41:39 +0100 Subject: [PATCH 63/75] stats --- vecxt_re/resources/ecdfVsCdf.vl.json | 54 ++ vecxt_re/resources/pearsonResiduals.vl.json | 43 ++ vecxt_re/resources/poissonTrend.vl.json | 110 ++++ vecxt_re/resources/rootogram.vl.json | 38 ++ vecxt_re/src-jvm/TrendAnalysis.scala | 482 ++++++++++++++++++ vecxt_re/src-jvm/dist/NegativeBinomial.scala | 28 +- vecxt_re/src-jvm/plots.scala | 289 ++++++++++- vecxt_re/src/all.scala | 2 + vecxt_re/test/src-jvm/NegBin.test.scala | 66 ++- vecxt_re/test/src-jvm/Poisson.test.scala | 2 +- .../test/src-jvm/TrendAnalysis.test.scala | 181 +++++++ 11 files changed, 1255 insertions(+), 40 deletions(-) create mode 100644 vecxt_re/resources/ecdfVsCdf.vl.json create mode 100644 vecxt_re/resources/pearsonResiduals.vl.json create mode 100644 vecxt_re/resources/poissonTrend.vl.json create mode 100644 vecxt_re/resources/rootogram.vl.json create mode 100644 vecxt_re/src-jvm/TrendAnalysis.scala create mode 100644 vecxt_re/test/src-jvm/TrendAnalysis.test.scala diff --git a/vecxt_re/resources/ecdfVsCdf.vl.json b/vecxt_re/resources/ecdfVsCdf.vl.json new file mode 100644 index 00000000..50fafe39 --- /dev/null +++ b/vecxt_re/resources/ecdfVsCdf.vl.json @@ -0,0 +1,54 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "ECDF vs Theoretical CDF comparison for count data", + "width": "container", + "height": "container", + "title": "ECDF vs Theoretical CDF", + "layer": [ + { + "data": { + "values": [ + {"value": 0, "prob": 0.1}, + {"value": 1, "prob": 0.3}, + {"value": 2, "prob": 0.6}, + {"value": 3, "prob": 0.85}, + {"value": 4, "prob": 0.95} + ] + }, + "mark": { + "type": "line", + "interpolate": "step-after", + "strokeWidth": 2, + "color": "steelblue", + "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative", "title": "k"}, + "y": {"field": "prob", "type": "quantitative", "title": "Cumulative Probability"} + } + }, + { + "data": { + "values": [ + {"value": 0, "prob": 0.12}, + {"value": 1, "prob": 0.32}, + {"value": 2, "prob": 0.58}, + {"value": 3, "prob": 0.82}, + {"value": 4, "prob": 0.97} + ] + }, + "mark": { + "type": "line", + "interpolate": "step-after", + "strokeWidth": 2, + "strokeDash": [4, 4], + "color": "orange", + "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative", "title": "k"}, + "y": {"field": "prob", "type": "quantitative", "title": "Cumulative Probability"} + } + } + ] +} diff --git a/vecxt_re/resources/pearsonResiduals.vl.json b/vecxt_re/resources/pearsonResiduals.vl.json new file mode 100644 index 00000000..666570fa --- /dev/null +++ b/vecxt_re/resources/pearsonResiduals.vl.json @@ -0,0 +1,43 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Pearson residuals plot for count data", + "width": "container", + "height": "container", + "title": "Pearson Residuals", + "data": { + "values": [ + {"k": 0, "residual": 0.5}, + {"k": 1, "residual": -1.2}, + {"k": 2, "residual": 0.8}, + {"k": 3, "residual": -0.3} + ] + }, + "layer": [ + { + "mark": {"type": "bar", "color": "steelblue"}, + "encoding": { + "x": {"field": "k", "type": "ordinal", "title": "Count (k)"}, + "y": {"field": "residual", "type": "quantitative", "title": "Pearson Residual"}, + "color": { + "condition": { + "test": "abs(datum.residual) > 2", + "value": "red" + }, + "value": "steelblue" + } + } + }, + { + "mark": {"type": "rule", "color": "black", "strokeDash": [4, 4]}, + "encoding": {"y": {"datum": 2}} + }, + { + "mark": {"type": "rule", "color": "black", "strokeDash": [4, 4]}, + "encoding": {"y": {"datum": -2}} + }, + { + "mark": {"type": "rule", "color": "gray"}, + "encoding": {"y": {"datum": 0}} + } + ] +} diff --git a/vecxt_re/resources/poissonTrend.vl.json b/vecxt_re/resources/poissonTrend.vl.json new file mode 100644 index 00000000..5ad89210 --- /dev/null +++ b/vecxt_re/resources/poissonTrend.vl.json @@ -0,0 +1,110 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Poisson GLM trend with confidence intervals", + "width": "container", + "height": "container", + "title": "Frequency Trend", + "layer": [ + { + "data": { + "values": [{"year": 2000, "lower": 0.3, "upper": 1.2}] + }, + "transform": [{"calculate": "'CI @ 95%'", "as": "legend"}], + "mark": { + "type": "area", + "opacity": 0.15 + }, + "encoding": { + "x": {"field": "year", "type": "quantitative", "title": "Year", "scale": {"zero": false}}, + "y": {"field": "lower", "type": "quantitative", "title": "Count"}, + "y2": {"field": "upper"}, + "color": { + "field": "legend", + "type": "nominal", + "scale": {"range": ["red"]}, + "legend": {"symbolType": "square", "symbolOpacity": 0.15, "title": null} + } + } + }, + { + "data": { + "values": [{"year": 2000, "lower": 0.3}] + }, + "mark": { + "type": "line", + "strokeWidth": 2, + "strokeDash": [6, 4], + "color": "red" + }, + "encoding": { + "x": {"field": "year", "type": "quantitative"}, + "y": {"field": "lower", "type": "quantitative"} + } + }, + { + "data": { + "values": [{"year": 2000, "upper": 1.2}] + }, + "mark": { + "type": "line", + "strokeWidth": 2, + "strokeDash": [6, 4], + "color": "red" + }, + "encoding": { + "x": {"field": "year", "type": "quantitative"}, + "y": {"field": "upper", "type": "quantitative", "title": "Count"} + } + }, + { + "data": { + "values": [{"year": 2000, "fit": 0.7}] + }, + "transform": [{"calculate": "'Fit'", "as": "legend"}], + "mark": { + "type": "line", + "strokeWidth": 2.5, + "tooltip": true + }, + "encoding": { + "x": {"field": "year", "type": "quantitative"}, + "y": {"field": "fit", "type": "quantitative"}, + "stroke": { + "field": "legend", + "type": "nominal", + "scale": {"range": ["red"]}, + "legend": {"symbolType": "stroke", "symbolStrokeWidth": 2.5, "title": null} + } + } + }, + { + "data": { + "values": [{"year": 2000, "count": 1}] + }, + "transform": [{"calculate": "'Observation'", "as": "legend"}], + "mark": { + "type": "point", + "shape": "cross", + "size": 80, + "strokeWidth": 2, + "tooltip": true + }, + "encoding": { + "x": {"field": "year", "type": "quantitative"}, + "y": {"field": "count", "type": "quantitative"}, + "stroke": { + "field": "legend", + "type": "nominal", + "scale": {"range": ["steelblue"]}, + "legend": {"symbolType": "cross", "symbolStrokeWidth": 2, "title": null} + } + } + } + ], + "config": { + "legend": { + "orient": "top-right", + "offset": 5 + } + } +} diff --git a/vecxt_re/resources/rootogram.vl.json b/vecxt_re/resources/rootogram.vl.json new file mode 100644 index 00000000..e290d945 --- /dev/null +++ b/vecxt_re/resources/rootogram.vl.json @@ -0,0 +1,38 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Hanging rootogram for count data diagnostics", + "width": "container", + "height": "container", + "title": "Hanging Rootogram", + "data": { + "values": [ + {"k": 0, "sqrtExpected": 2.0, "sqrtObserved": 1.8, "hanging": 0.2}, + {"k": 1, "sqrtExpected": 3.0, "sqrtObserved": 3.2, "hanging": -0.2}, + {"k": 2, "sqrtExpected": 2.5, "sqrtObserved": 2.3, "hanging": 0.2} + ] + }, + "encoding": { + "x": {"field": "k", "type": "ordinal", "title": "Count (k)"} + }, + "layer": [ + { + "mark": {"type": "bar", "color": "steelblue", "opacity": 0.7}, + "encoding": { + "y": {"field": "sqrtExpected", "type": "quantitative", "title": "√Frequency"}, + "y2": {"field": "hanging", "type": "quantitative"} + } + }, + { + "mark": {"type": "line", "color": "red", "strokeWidth": 2, "point": true}, + "encoding": { + "y": {"field": "sqrtExpected", "type": "quantitative"} + } + }, + { + "mark": {"type": "rule", "color": "black", "strokeDash": [4, 4]}, + "encoding": { + "y": {"datum": 0} + } + } + ] +} diff --git a/vecxt_re/src-jvm/TrendAnalysis.scala b/vecxt_re/src-jvm/TrendAnalysis.scala new file mode 100644 index 00000000..1ceb2d0a --- /dev/null +++ b/vecxt_re/src-jvm/TrendAnalysis.scala @@ -0,0 +1,482 @@ +package vecxt_re + +import org.apache.commons.math3.special.Gamma.logGamma + +/** Result of fitting a GLM trend model: log(μ) = β₀ + β₁·year + * + * Contains coefficient estimates, standard errors, test statistics, and goodness-of-fit measures. + * + * @param nObs + * Number of observations + * @param dfResidual + * Residual degrees of freedom (n - 2) + * @param intercept + * Estimated intercept (β₀) + * @param slope + * Estimated year coefficient (β₁) + * @param seIntercept + * Standard error of intercept + * @param seSlope + * Standard error of slope + * @param zIntercept + * z-statistic for intercept (β₀ / SE(β₀)) + * @param zSlope + * z-statistic for slope (β₁ / SE(β₁)) + * @param pValueIntercept + * Two-tailed p-value for intercept (H₀: β₀ = 0) + * @param pValueSlope + * Two-tailed p-value for slope (H₀: β₁ = 0) - this tests for significant trend + * @param nullDeviance + * Deviance of intercept-only model + * @param residualDeviance + * Deviance of full model + * @param dispersion + * Estimated dispersion parameter (1.0 for Poisson, estimated for NegBin) + * @param fStatistic + * F-statistic for model vs intercept-only (using dispersion) + * @param fPValue + * p-value for F-statistic + * @param aic + * Akaike Information Criterion + * @param logLikelihood + * Log-likelihood of the fitted model + */ +case class TrendFitResult( + nObs: Int, + dfResidual: Int, + intercept: Double, + slope: Double, + seIntercept: Double, + seSlope: Double, + zIntercept: Double, + zSlope: Double, + pValueIntercept: Double, + pValueSlope: Double, + nullDeviance: Double, + residualDeviance: Double, + dispersion: Double, + fStatistic: Double, + fPValue: Double, + aic: Double, + logLikelihood: Double +): + + /** Test whether there is a statistically significant trend at the given alpha level */ + def hasSignificantTrend(alpha: Double = 0.05): Boolean = pValueSlope < alpha + + /** Nicely formatted summary string, similar to R's glm summary output */ + def summary: String = + val sb = new StringBuilder + sb.append("Generalized Linear Model: log(Count) ~ 1 + Year\n") + sb.append("=" * 60 + "\n\n") + + sb.append("Coefficients:\n") + sb.append(f"${""}%-15s ${"Estimate"}%12s ${"Std. Error"}%12s ${"z value"}%10s ${"Pr(>|z|)"}%12s\n") + sb.append("-" * 60 + "\n") + sb.append( + f"(Intercept)${" "}%-4s $intercept%12.5f $seIntercept%12.5f $zIntercept%10.3f $pValueIntercept%12.6f${significanceCode(pValueIntercept)}%s\n" + ) + sb.append( + f"Year${" "}%-11s $slope%12.7f $seSlope%12.7f $zSlope%10.3f $pValueSlope%12.6f${significanceCode(pValueSlope)}%s\n" + ) + sb.append("-" * 60 + "\n") + sb.append("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\n") + + sb.append(f"$nObs observations, $dfResidual residual degrees of freedom\n") + sb.append(f"Estimated Dispersion: $dispersion%.3f\n") + sb.append(f"Null Deviance: $nullDeviance%.3f\n") + sb.append(f"Residual Deviance: $residualDeviance%.3f\n") + sb.append(f"AIC: $aic%.3f\n") + sb.append(f"Log-Likelihood: $logLikelihood%.3f\n\n") + + sb.append(f"F-statistic vs. constant model: $fStatistic%.3f, p-value = $fPValue%.6f\n") + + sb.toString + + private def significanceCode(p: Double): String = + if p < 0.001 then " ***" + else if p < 0.01 then " **" + else if p < 0.05 then " *" + else if p < 0.1 then " ." + else "" +end TrendFitResult + +object TrendAnalysis: + + /** Two-tailed p-value from z-statistic using normal approximation */ + private def pValueFromZ(z: Double): Double = + if z.isNaN || z.isInfinite then Double.NaN + else 2.0 * (1.0 - normalCdf(math.abs(z))) + + /** Standard normal CDF approximation (Abramowitz & Stegun) */ + private def normalCdf(x: Double): Double = + val a1 = 0.254829592 + val a2 = -0.284496736 + val a3 = 1.421413741 + val a4 = -1.453152027 + val a5 = 1.061405429 + val p = 0.3275911 + val sign = if x < 0 then -1 else 1 + val absX = math.abs(x) + val t = 1.0 / (1.0 + p * absX) + val y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * math.exp(-absX * absX / 2) + 0.5 * (1.0 + sign * y) + + /** F-distribution CDF approximation for p-value calculation */ + private def fDistPValue(f: Double, df1: Int, df2: Int): Double = + if f <= 0 || df1 <= 0 || df2 <= 0 then 1.0 + else + // Use beta distribution relationship: F ~ Beta(df1/2, df2/2) transform + val x = df2.toDouble / (df2 + df1 * f) + 1.0 - incompleteBeta(df2 / 2.0, df1 / 2.0, x) + + /** Incomplete beta function approximation using continued fraction */ + private def incompleteBeta(a: Double, b: Double, x: Double): Double = + if x <= 0 then 0.0 + else if x >= 1 then 1.0 + else + val bt = + if x == 0 || x == 1 then 0.0 + else + math.exp( + logGamma(a + b) - logGamma(a) - logGamma(b) + + a * math.log(x) + b * math.log(1 - x) + ) + if x < (a + 1) / (a + b + 2) then bt * betaCF(a, b, x) / a + else 1.0 - bt * betaCF(b, a, 1 - x) / b + + /** Continued fraction for incomplete beta */ + private def betaCF(a: Double, b: Double, x: Double): Double = + val maxIter = 100 + val eps = 1e-10 + var c = 1.0 + var d = 1.0 / math.max(1.0 - (a + b) * x / (a + 1), eps) + var h = d + var m = 1 + while m <= maxIter do + val m2 = 2 * m + var aa = m * (b - m) * x / ((a + m2 - 1) * (a + m2)) + d = 1.0 / math.max(1.0 + aa * d, eps) + c = math.max(1.0 + aa / c, eps) + h *= d * c + aa = -(a + m) * (a + b + m) * x / ((a + m2) * (a + m2 + 1)) + d = 1.0 / math.max(1.0 + aa * d, eps) + c = math.max(1.0 + aa / c, eps) + val del = d * c + h *= del + if math.abs(del - 1.0) < eps then return h + m += 1 + h + + extension (p: Poisson) + /** Fit a Poisson GLM trend model: log(μ) = β₀ + β₁·year + * + * Uses IRLS to fit the model and computes test statistics for assessing whether there is a statistically + * significant trend over time. + * + * @param years + * the year for each observation + * @param counts + * the count for each observation (same length as years) + * @return + * TrendFitResult containing coefficients, standard errors, p-values, and goodness-of-fit statistics + */ + def fitTrend(years: IndexedSeq[Int], counts: IndexedSeq[Int]): TrendFitResult = + require(years.length == counts.length, "years and counts must have the same length") + require(years.length >= 3, "need at least 3 observations to fit a trend") + + val n = years.length + val yearsD = years.map(_.toDouble) + val countsD = counts.map(_.toDouble) + + // Fit full model: log(μ) = β₀ + β₁·year via IRLS + val meanY = countsD.sum / n + var beta0 = math.log(math.max(meanY, 0.1)) + var beta1 = 0.0 + + for _ <- 0 until 25 do + val mu = yearsD.map(y => math.exp(beta0 + beta1 * y)) + val z = (0 until n).map { i => + val eta = beta0 + beta1 * yearsD(i) + eta + (countsD(i) - mu(i)) / math.max(mu(i), 1e-10) + } + val w = mu.map(m => math.max(m, 1e-10)) + + var xtwx00, xtwx01, xtwx11 = 0.0 + var xtwz0, xtwz1 = 0.0 + var i = 0 + while i < n do + val wi = w(i) + val yi = yearsD(i) + val zi = z(i) + xtwx00 += wi + xtwx01 += wi * yi + xtwx11 += wi * yi * yi + xtwz0 += wi * zi + xtwz1 += wi * yi * zi + i += 1 + end while + + val det = xtwx00 * xtwx11 - xtwx01 * xtwx01 + if math.abs(det) > 1e-15 then + beta0 = (xtwx11 * xtwz0 - xtwx01 * xtwz1) / det + beta1 = (xtwx00 * xtwz1 - xtwx01 * xtwz0) / det + + // Fit null model: log(μ) = β₀ only + val nullBeta0 = math.log(meanY) + val muNull = Array.fill(n)(meanY) + + // Compute deviances + // Poisson deviance: 2 * Σ[yᵢ·log(yᵢ/μᵢ) - (yᵢ - μᵢ)] + def poissonDeviance(observed: IndexedSeq[Double], fitted: IndexedSeq[Double]): Double = + var dev = 0.0 + var i = 0 + while i < n do + val y = observed(i) + val mu = fitted(i) + if y > 0 then dev += y * math.log(y / mu) + dev -= (y - mu) + i += 1 + end while + 2.0 * dev + + val muFull = yearsD.map(y => math.exp(beta0 + beta1 * y)) + val nullDeviance = poissonDeviance(countsD, muNull.toIndexedSeq) + val residualDeviance = poissonDeviance(countsD, muFull) + + // Fisher information and standard errors + var i00, i01, i11 = 0.0 + var j = 0 + while j < n do + val mi = muFull(j) + val yi = yearsD(j) + i00 += mi + i01 += mi * yi + i11 += mi * yi * yi + j += 1 + end while + + val detI = i00 * i11 - i01 * i01 + val seBeta0 = if detI > 1e-15 then math.sqrt(i11 / detI) else Double.NaN + val seBeta1 = if detI > 1e-15 then math.sqrt(i00 / detI) else Double.NaN + + // z-statistics and p-values + val zBeta0 = beta0 / seBeta0 + val zBeta1 = beta1 / seBeta1 + val pBeta0 = pValueFromZ(zBeta0) + val pBeta1 = pValueFromZ(zBeta1) + + // For Poisson, dispersion = 1 by assumption + val dispersion = 1.0 + + // Pearson dispersion estimate (for diagnostics) + var pearsonChi2 = 0.0 + var k = 0 + while k < n do + val y = countsD(k) + val mu = muFull(k) + pearsonChi2 += (y - mu) * (y - mu) / math.max(mu, 1e-10) + k += 1 + end while + val estimatedDispersion = pearsonChi2 / (n - 2) + + // F-statistic: (null deviance - residual deviance) / dispersion + val fStat = (nullDeviance - residualDeviance) / dispersion + val fPVal = fDistPValue(fStat, 1, n - 2) + + // Log-likelihood + def poissonLogLik(observed: IndexedSeq[Double], fitted: IndexedSeq[Double]): Double = + var ll = 0.0 + var i = 0 + while i < n do + val y = observed(i).toInt + val mu = fitted(i) + ll += y * math.log(mu) - mu - logGamma(y + 1) + i += 1 + end while + ll + + val logLik = poissonLogLik(countsD, muFull) + val aic = -2 * logLik + 2 * 2 // 2 parameters + + TrendFitResult( + nObs = n, + dfResidual = n - 2, + intercept = beta0, + slope = beta1, + seIntercept = seBeta0, + seSlope = seBeta1, + zIntercept = zBeta0, + zSlope = zBeta1, + pValueIntercept = pBeta0, + pValueSlope = pBeta1, + nullDeviance = nullDeviance, + residualDeviance = residualDeviance, + dispersion = estimatedDispersion, + fStatistic = fStat, + fPValue = fPVal, + aic = aic, + logLikelihood = logLik + ) + end extension + + extension (nb: NegativeBinomial) + /** Fit a Negative Binomial GLM trend model: log(μ) = β₀ + β₁·year + * + * Uses IRLS with the NB2 variance function (Var = μ + μ²/θ where θ = a). This accounts for overdispersion in + * count data. + * + * @param years + * the year for each observation + * @param counts + * the count for each observation (same length as years) + * @return + * TrendFitResult containing coefficients, standard errors, p-values, and goodness-of-fit statistics + */ + def fitTrend(years: IndexedSeq[Int], counts: IndexedSeq[Int]): TrendFitResult = + require(years.length == counts.length, "years and counts must have the same length") + require(years.length >= 3, "need at least 3 observations to fit a trend") + + val n = years.length + val yearsD = years.map(_.toDouble) + val countsD = counts.map(_.toDouble) + val theta = nb.a // overdispersion parameter + + // Fit full model via IRLS with NB variance function + val meanY = countsD.sum / n + var beta0 = math.log(math.max(meanY, 0.1)) + var beta1 = 0.0 + + for _ <- 0 until 25 do + val mu = yearsD.map(y => math.exp(beta0 + beta1 * y)) + + // NB2 variance: Var = μ + μ²/θ, so weight = μ / (1 + μ/θ) + val w = mu.map { m => + val v = m + m * m / theta + math.max(m * m / v, 1e-10) + } + + val z = (0 until n).map { i => + val eta = beta0 + beta1 * yearsD(i) + eta + (countsD(i) - mu(i)) / math.max(mu(i), 1e-10) + } + + var xtwx00, xtwx01, xtwx11 = 0.0 + var xtwz0, xtwz1 = 0.0 + var i = 0 + while i < n do + val wi = w(i) + val yi = yearsD(i) + val zi = z(i) + xtwx00 += wi + xtwx01 += wi * yi + xtwx11 += wi * yi * yi + xtwz0 += wi * zi + xtwz1 += wi * yi * zi + i += 1 + end while + + val det = xtwx00 * xtwx11 - xtwx01 * xtwx01 + if math.abs(det) > 1e-15 then + beta0 = (xtwx11 * xtwz0 - xtwx01 * xtwz1) / det + beta1 = (xtwx00 * xtwz1 - xtwx01 * xtwz0) / det + + // Null model + val nullBeta0 = math.log(meanY) + + // Negative binomial deviance: 2 * Σ[yᵢ·log(yᵢ/μᵢ) - (yᵢ + θ)·log((yᵢ + θ)/(μᵢ + θ))] + def nbDeviance(observed: IndexedSeq[Double], fitted: IndexedSeq[Double]): Double = + var dev = 0.0 + var i = 0 + while i < n do + val y = observed(i) + val mu = fitted(i) + if y > 0 then dev += y * math.log(y / mu) + dev -= (y + theta) * math.log((y + theta) / (mu + theta)) + i += 1 + end while + 2.0 * dev + + val muFull = yearsD.map(y => math.exp(beta0 + beta1 * y)) + val muNull = IndexedSeq.fill(n)(meanY) + val nullDeviance = nbDeviance(countsD, muNull) + val residualDeviance = nbDeviance(countsD, muFull) + + // Fisher information with NB variance + var i00, i01, i11 = 0.0 + var j = 0 + while j < n do + val mi = muFull(j) + val yi = yearsD(j) + val wi = mi * mi / (mi + mi * mi / theta) + i00 += wi + i01 += wi * yi + i11 += wi * yi * yi + j += 1 + end while + + val detI = i00 * i11 - i01 * i01 + val seBeta0 = if detI > 1e-15 then math.sqrt(i11 / detI) else Double.NaN + val seBeta1 = if detI > 1e-15 then math.sqrt(i00 / detI) else Double.NaN + + val zBeta0 = beta0 / seBeta0 + val zBeta1 = beta1 / seBeta1 + val pBeta0 = pValueFromZ(zBeta0) + val pBeta1 = pValueFromZ(zBeta1) + + // Estimated dispersion (Pearson) + var pearsonChi2 = 0.0 + var k = 0 + while k < n do + val y = countsD(k) + val mu = muFull(k) + val v = mu + mu * mu / theta + pearsonChi2 += (y - mu) * (y - mu) / v + k += 1 + end while + val dispersion = pearsonChi2 / (n - 2) + + // F-statistic + val fStat = (nullDeviance - residualDeviance) / dispersion + val fPVal = fDistPValue(fStat, 1, n - 2) + + // NB log-likelihood + def nbLogLik(observed: IndexedSeq[Double], fitted: IndexedSeq[Double]): Double = + var ll = 0.0 + var i = 0 + while i < n do + val y = observed(i).toInt + val mu = fitted(i) + // log P(Y=y) = log Γ(y+θ) - log Γ(θ) - log(y!) + θ·log(θ/(θ+μ)) + y·log(μ/(θ+μ)) + ll += logGamma(y + theta) - logGamma(theta) - logGamma(y + 1) + ll += theta * math.log(theta / (theta + mu)) + ll += y * math.log(mu / (theta + mu)) + i += 1 + end while + ll + + val logLik = nbLogLik(countsD, muFull) + val aic = -2 * logLik + 2 * 2 // 2 parameters (not counting θ as estimated here) + + TrendFitResult( + nObs = n, + dfResidual = n - 2, + intercept = beta0, + slope = beta1, + seIntercept = seBeta0, + seSlope = seBeta1, + zIntercept = zBeta0, + zSlope = zBeta1, + pValueIntercept = pBeta0, + pValueSlope = pBeta1, + nullDeviance = nullDeviance, + residualDeviance = residualDeviance, + dispersion = dispersion, + fStatistic = fStat, + fPValue = fPVal, + aic = aic, + logLikelihood = logLik + ) + end extension + +end TrendAnalysis diff --git a/vecxt_re/src-jvm/dist/NegativeBinomial.scala b/vecxt_re/src-jvm/dist/NegativeBinomial.scala index b8ffbfbb..304fd9f8 100644 --- a/vecxt_re/src-jvm/dist/NegativeBinomial.scala +++ b/vecxt_re/src-jvm/dist/NegativeBinomial.scala @@ -140,14 +140,14 @@ object NegativeBinomial: * @param tol * convergence tolerance for parameter 'a' * @return - * Named tuple with `dist`: the fitted NegativeBinomial distribution, and `converged`: whether the optimizer - * converged within maxIter + * Named tuple with `dist`: the fitted distribution (Poisson if no overdispersion, otherwise NegativeBinomial), + * and `converged`: whether the optimizer converged within maxIter */ def mle( observations: Array[Int], maxIter: Int = 500, tol: Double = 1e-8 - ): (dist: NegativeBinomial, converged: Boolean) = + ): (dist: Poisson | NegativeBinomial, converged: Boolean) = require(observations.nonEmpty, "observations must not be empty") require(observations.forall(_ >= 0), "all observations must be non-negative") @@ -183,8 +183,8 @@ object NegativeBinomial: // If variance <= mean, data is underdispersed relative to Poisson - // In this case, return near-Poisson (small b) - if variance <= xbar then (NegativeBinomial(xbar / 1e-10, 1e-10), true) + // In this case, return Poisson distribution + if variance <= xbar then (Poisson(xbar), true) else // Method of moments initial estimates: // b = variance/mean - 1 @@ -270,14 +270,14 @@ object NegativeBinomial: * @param tol * relative tolerance on both parameters * @return - * tuple of fitted `NegativeBinomial(r, beta)` and a convergence flag + * tuple of fitted distribution (Poisson if no overdispersion, otherwise NegativeBinomial) and a convergence flag */ def volweightedMle( observations: Array[Int], volumes: Array[Double], maxIter: Int = 500, tol: Double = 1e-8 - ): (dist: NegativeBinomial, converged: Boolean) = + ): (dist: Poisson | NegativeBinomial, converged: Boolean) = require(observations.nonEmpty, "observations must not be empty") require(observations.length == volumes.length, "observations and volumes must have the same length") require(observations.forall(_ >= 0), "all observations must be non-negative") @@ -305,10 +305,16 @@ object NegativeBinomial: end while varRate /= nObs.toDouble + // If variance <= mean, data is underdispersed relative to Poisson + // Return Poisson distribution with rate = sum(n) / sum(v) + if varRate <= meanRate then + val sumN = observations.sumSIMD.toDouble + val sumV = volumes.sum + return (Poisson(sumN / sumV), true) + end if + val betaFloor = 1e-6 - var beta = - if varRate <= meanRate then betaFloor - else math.max((varRate / meanRate) - 1.0, betaFloor) + var beta = math.max((varRate / meanRate) - 1.0, betaFloor) var r = meanRate / beta var iter = 0 @@ -383,6 +389,6 @@ object NegativeBinomial: volumes: Array[Double], maxIter: Int = 100, tol: Double = 1e-8 - ): (dist: NegativeBinomial, converged: Boolean) = volweightedMle(observations, volumes, maxIter, tol) + ): (dist: Poisson | NegativeBinomial, converged: Boolean) = volweightedMle(observations, volumes, maxIter, tol) end NegativeBinomial diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 8f068147..e967e299 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -9,6 +9,10 @@ object Plots: private lazy val seasonality = VegaPlot.fromResource("seasonality.vg.json") // catagory, amount private lazy val distributionDensity = VegaPlot.fromResource("distDensity.vg.json") // value, density private lazy val negBinCdfWSample = VegaPlot.fromResource("negBinCumul_vsSample.vl.json") // value, density + private lazy val ecdfVsCdf = VegaPlot.fromResource("ecdfVsCdf.vl.json") // theoretical and empirical CDF + private lazy val rootogram = VegaPlot.fromResource("rootogram.vl.json") // hanging rootogram + private lazy val pearsonResiduals = VegaPlot.fromResource("pearsonResiduals.vl.json") // residual plot + private lazy val poissonTrend = VegaPlot.fromResource("poissonTrend.vl.json") // Poisson GLM trend extension (idx: CalendarYearIndex) def plotIndex(reportingThreshold: Double)(using viz.LowPriorityPlotTarget) = @@ -28,24 +32,287 @@ object Plots: end extension extension (nb: NegativeBinomial) - inline def plotCdfWithSamples(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = - val maxX = nb.mean + 4 * math.sqrt(nb.variance) + + + /** Plot ECDF vs theoretical CDF as step functions for visual goodness-of-fit assessment. + * + * Both curves are step functions. Deviations between the orange (empirical) and blue (theoretical) lines indicate + * potential model misfit. + */ + inline def plotEcdfVsCdf(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val maxX = math.max(samples.max, (nb.mean + 4 * math.sqrt(nb.variance)).toInt) + + // Theoretical CDF var cumProb = 0.0 - val data = (0 to (maxX.toInt + 1)).map { k => + val theoreticalCdf = (0 to maxX).map { k => cumProb += nb.probabilityOf(k) (value = k, prob = cumProb) } - val sampleData = samples.sorted - val n = sampleData.length - val empiricalProb = sampleData.zipWithIndex.map { case (value, idx) => - (value = value.toInt, prob = (idx + 1).toDouble / n) + // Empirical CDF (step function at each unique value) + val n = samples.length.toDouble + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + var empiricalCum = 0.0 + val empiricalCdf = (0 to maxX).map { k => + empiricalCum += counts.getOrElse(k, 0) + (value = k, prob = empiricalCum / n) + } + + ecdfVsCdf.plot( + _.title(s"NegBin(a=${nb.a}, b=${nb.b}) ECDF vs Theoretical CDF"), + _.layer._0.data.values := theoreticalCdf.asJson, + _.layer._1.data.values := empiricalCdf.asJson + ) + + /** Plot a hanging rootogram for count data diagnostics. + * + * A rootogram displays sqrt(expected) as the reference curve and hangs bars from it down to sqrt(observed). When + * the model fits well, bars hang close to the zero line. Bars extending below zero indicate under-prediction; + * bars stopping above zero indicate over-prediction. + */ + inline def plotRootogram(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val n = samples.length.toDouble + val maxK = math.max(samples.max, (nb.mean + 3 * math.sqrt(nb.variance)).toInt) + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + + val data = (0 to maxK).map { k => + val observed = counts.getOrElse(k, 0) + val expected = nb.probabilityOf(k) * n + val sqrtObs = math.sqrt(observed) + val sqrtExp = math.sqrt(expected) + // Hanging: bar goes from sqrtExp down by sqrtObs, ending at sqrtExp - sqrtObs + (k = k, sqrtExpected = sqrtExp, sqrtObserved = sqrtObs, hanging = sqrtExp - sqrtObs) + } + + rootogram.plot( + _.title(s"NegBin(a=${nb.a}, b=${nb.b}) Hanging Rootogram"), + _.data.values := data.asJson + ) + + /** Plot Pearson residuals: (observed - expected) / sqrt(expected). + * + * Residuals beyond ±2 (shown in red) indicate significant deviation from the fitted model. For Negative Binomial, + * we use the variance = μ(1 + μ/a) for the denominator when available. + */ + inline def plotPearsonResiduals(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val n = samples.length.toDouble + val maxK = math.max(samples.max, (nb.mean + 3 * math.sqrt(nb.variance)).toInt) + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + + val data = (0 to maxK).flatMap { k => + val observed = counts.getOrElse(k, 0) + val expected = nb.probabilityOf(k) * n + // Only include if expected > 0 to avoid division by zero + if expected > 0.001 then + // For NegBin, variance of count = expected * (1 + expected/(n*a)) approximately + // Simplify to Pearson: (O - E) / sqrt(E) + val residual = (observed - expected) / math.sqrt(expected) + Some((k = k, residual = residual)) + else None + } + + pearsonResiduals.plot( + _.title(s"NegBin(a=${nb.a}, b=${nb.b}) Pearson Residuals"), + _.data.values := data.asJson + ) + end extension + + extension (p: Poisson) + /** Plot ECDF vs theoretical CDF as step functions for visual goodness-of-fit assessment. */ + inline def plotEcdfVsCdf(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val maxX = math.max(samples.max, (p.mean + 4 * math.sqrt(p.variance)).toInt) + + // Theoretical CDF + var cumProb = 0.0 + val theoreticalCdf = (0 to maxX).map { k => + cumProb += p.probabilityOf(k) + (value = k, prob = cumProb) + } + + // Empirical CDF + val n = samples.length.toDouble + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + var empiricalCum = 0.0 + val empiricalCdf = (0 to maxX).map { k => + empiricalCum += counts.getOrElse(k, 0) + (value = k, prob = empiricalCum / n) + } + + ecdfVsCdf.plot( + _.title(s"Poisson(λ=${p.lambda}) ECDF vs Theoretical CDF"), + _.layer._0.data.values := theoreticalCdf.asJson, + _.layer._1.data.values := empiricalCdf.asJson + ) + + /** Plot a hanging rootogram for Poisson count data diagnostics. + * + * Bars hang from sqrt(expected) down to sqrt(expected) - sqrt(observed). Good fit means bars end near zero. + */ + inline def plotRootogram(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val n = samples.length.toDouble + val maxK = math.max(samples.max, (p.mean + 3 * math.sqrt(p.variance)).toInt) + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + + val data = (0 to maxK).map { k => + val observed = counts.getOrElse(k, 0) + val expected = p.probabilityOf(k) * n + val sqrtObs = math.sqrt(observed) + val sqrtExp = math.sqrt(expected) + (k = k, sqrtExpected = sqrtExp, sqrtObserved = sqrtObs, hanging = sqrtExp - sqrtObs) + } + + rootogram.plot( + _.title(s"Poisson(λ=${p.lambda}) Hanging Rootogram"), + _.data.values := data.asJson + ) + + /** Plot Pearson residuals for Poisson: (observed - expected) / sqrt(expected). + * + * For Poisson, variance = mean, so the denominator is simply sqrt(expected). Residuals beyond ±2 (red) suggest + * significant deviation. Systematic patterns may indicate overdispersion (consider Negative Binomial). + */ + inline def plotPearsonResiduals(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val n = samples.length.toDouble + val maxK = math.max(samples.max, (p.mean + 3 * math.sqrt(p.variance)).toInt) + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + + val data = (0 to maxK).flatMap { k => + val observed = counts.getOrElse(k, 0) + val expected = p.probabilityOf(k) * n + if expected > 0.001 then + val residual = (observed - expected) / math.sqrt(expected) + Some((k = k, residual = residual)) + else None + } + + pearsonResiduals.plot( + _.title(s"Poisson(λ=${p.lambda}) Pearson Residuals"), + _.data.values := data.asJson + ) + + /** Plot a Poisson GLM trend: log(Count) ~ 1 + Year with 95% confidence intervals. + * + * Fits a Poisson regression to count data over years and displays: - Observations (blue X markers) - Fitted trend + * line (solid red) - 95% confidence interval band (dashed red lines with shaded area) + * + * The coefficients (intercept, year slope) and their standard errors are estimated via iteratively reweighted + * least squares (IRLS). The confidence intervals use normal approximation on the log scale. + * + * @param years + * the year for each observation + * @param counts + * the count for each observation (same length as years) + */ + inline def plotTrend(years: IndexedSeq[Int], counts: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + require(years.length == counts.length, "years and counts must have the same length") + + val n = years.length + val yearsD = years.map(_.toDouble) + val countsD = counts.map(_.toDouble) + + // Fit Poisson GLM via IRLS: log(μ) = β₀ + β₁·year + // Design matrix: X = [1 | year], each row is [1, yearᵢ] + val meanY = countsD.sum / n + var beta0 = math.log(math.max(meanY, 0.1)) + var beta1 = 0.0 + + // IRLS iterations + for _ <- 0 until 25 do + // Fitted values: μ = exp(Xβ) + val mu = yearsD.map(y => math.exp(beta0 + beta1 * y)) + + // Working response: z = η + (y - μ)/μ where η = Xβ + val z = (0 until n).map { i => + val eta = beta0 + beta1 * yearsD(i) + eta + (countsD(i) - mu(i)) / math.max(mu(i), 1e-10) + } + + // Weights: W = diag(μ) for Poisson canonical link + val w = mu.map(m => math.max(m, 1e-10)) + + // Solve weighted least squares: (XᵀWX)β = XᵀWz + // XᵀWX is 2×2 symmetric: [[Σwᵢ, Σwᵢyᵢ], [Σwᵢyᵢ, Σwᵢyᵢ²]] + // XᵀWz is 2×1: [Σwᵢzᵢ, Σwᵢyᵢzᵢ] + var xtwx00, xtwx01, xtwx11 = 0.0 + var xtwz0, xtwz1 = 0.0 + + var i = 0 + while i < n do + val wi = w(i) + val yi = yearsD(i) + val zi = z(i) + xtwx00 += wi + xtwx01 += wi * yi + xtwx11 += wi * yi * yi + xtwz0 += wi * zi + xtwz1 += wi * yi * zi + i += 1 + end while + + // Solve 2×2 system via Cramer's rule: [xtwx00, xtwx01; xtwx01, xtwx11] * β = [xtwz0; xtwz1] + val det = xtwx00 * xtwx11 - xtwx01 * xtwx01 + if math.abs(det) > 1e-15 then + beta0 = (xtwx11 * xtwz0 - xtwx01 * xtwz1) / det + beta1 = (xtwx00 * xtwz1 - xtwx01 * xtwz0) / det + + // Fisher information matrix: I = XᵀWX at final β + // I is 2×2 symmetric: [[i00, i01], [i01, i11]] + val muFinal = yearsD.map(y => math.exp(beta0 + beta1 * y)) + var i00, i01, i11 = 0.0 + var j = 0 + while j < n do + val mi = muFinal(j) + val yi = yearsD(j) + i00 += mi + i01 += mi * yi + i11 += mi * yi * yi + j += 1 + end while + + // Standard errors from Cov(β) = I⁻¹ + val detI = i00 * i11 - i01 * i01 + val seBeta0 = if detI > 1e-15 then math.sqrt(i11 / detI) else Double.NaN + val seBeta1 = if detI > 1e-15 then math.sqrt(i00 / detI) else Double.NaN + + // Covariance matrix: Cov(β) = I⁻¹ = (1/det) * [[i11, -i01], [-i01, i00]] + val covBeta = + if detI > 1e-15 then + Some( + ( + v00 = i11 / detI, + v01 = -i01 / detI, + v11 = i00 / detI + ) + ) + else None + + // Generate fitted curve with CI + val minYear = years.min + val maxYear = years.max + val yearRange = (minYear to maxYear).toVector + + val ciData = yearRange.map { y => + val eta = beta0 + beta1 * y.toDouble + // Var(η) = xᵀ Cov(β) x where x = [1, year]ᵀ + val varEta = covBeta.map { c => + c.v00 + 2 * y * c.v01 + y.toDouble * y.toDouble * c.v11 + }.getOrElse(0.0) + val seEta = math.sqrt(math.max(varEta, 0.0)) + val fit = math.exp(eta) + val lower = math.exp(eta - 1.96 * seEta) + val upper = math.exp(eta + 1.96 * seEta) + (year = y, fit = fit, lower = lower, upper = upper) } - negBinCdfWSample.plot( - _.title(s"Negative Binomial CDF (a=${nb.a}, b=${nb.b}) vs Sample Data"), - _.layer._0.data.values := data.asJson, - _.layer._1.data.values := empiricalProb.asJson + val obsData = years.zip(counts).map { case (y, c) => (year = y, count = c) } + + poissonTrend.plot( + _.title(s"Poisson Trend: β₀=${f"$beta0%.3f"}±${f"$seBeta0%.3f"}, β₁=${f"$beta1%.5f"}±${f"$seBeta1%.5f"}"), + _.layer._0.data.values := ciData.asJson, + _.layer._1.data.values := ciData.asJson, + _.layer._2.data.values := ciData.asJson, + _.layer._3.data.values := ciData.asJson, + _.layer._4.data.values := obsData.asJson ) end extension diff --git a/vecxt_re/src/all.scala b/vecxt_re/src/all.scala index dec3af34..904f31ef 100644 --- a/vecxt_re/src/all.scala +++ b/vecxt_re/src/all.scala @@ -4,6 +4,8 @@ object all: export vecxt_re.Scenario export vecxt_re.Scenarr export vecxt_re.Plots.* + export vecxt_re.TrendAnalysis.* + export vecxt_re.TrendFitResult export vecxt_re.SplitLosses.* export vecxt_re.SplitScenario.* export vecxt_re.DeductibleType.* diff --git a/vecxt_re/test/src-jvm/NegBin.test.scala b/vecxt_re/test/src-jvm/NegBin.test.scala index 89318eca..c548c746 100644 --- a/vecxt_re/test/src-jvm/NegBin.test.scala +++ b/vecxt_re/test/src-jvm/NegBin.test.scala @@ -6,7 +6,7 @@ import org.apache.commons.statistics.distribution.PoissonDistribution class NegBinTest extends FunSuite: - inline val localTests = false + inline val localTests = true test("pmf approximately normalizes") { val nb = NegativeBinomial(a = 2.5, b = 1.2) @@ -116,11 +116,12 @@ class NegBinTest extends FunSuite: val (fitted, converged) = NegativeBinomial.mle(data) assert(converged) - // println(s"True parameters: a=${trueNb.a}, b=${trueNb.b}") - // println(s"Fitted parameters: a=${fitted.a}, b=${fitted.b}") - - assertEqualsDouble(fitted.mean, trueNb.mean, 0.1) - assertEqualsDouble(fitted.b, trueNb.b, 0.1) + fitted match + case nb: NegativeBinomial => + assertEqualsDouble(nb.mean, trueNb.mean, 0.1) + assertEqualsDouble(nb.b, trueNb.b, 0.1) + case _: Poisson => + fail("Expected NegativeBinomial but got Poisson") } test("SLOW: vol weighted MLE follows standard case with uniform volumes ") { @@ -134,8 +135,12 @@ class NegBinTest extends FunSuite: val (fitted, converged) = NegativeBinomial.mleVolumeWeighted(data, Array.fill(10_000)(1.0)) assert(converged) - assertEqualsDouble(fitted.mean, trueNb.mean, 0.1) - assertEqualsDouble(fitted.b, trueNb.b, 0.1) + fitted match + case nb: NegativeBinomial => + assertEqualsDouble(nb.mean, trueNb.mean, 0.1) + assertEqualsDouble(nb.b, trueNb.b, 0.1) + case _: Poisson => + fail("Expected NegativeBinomial but got Poisson") } /** This directly exercises the volume factors: counts drawn with v = 0.5 use scale βv = 0.4, and with v = 2.0 use βv = @@ -173,15 +178,42 @@ class NegBinTest extends FunSuite: val (fitted, converged) = NegativeBinomial.mleVolumeWeighted(data, vols, maxIter = 200, tol = 1e-8) assert(converged) - assertEqualsDouble(fitted.a, rTrue, 0.1) - assertEqualsDouble(fitted.b, betaTrue, 0.1) - - // Ignoring volumes collapses a mixture of scaled NB's into a single NB, which should fit worse - // (at minimum: it should be less accurate on the modeled-period mean and dispersion). - val modeledMean = rTrue * betaTrue - val (unweighted, _) = NegativeBinomial.mle(data) - assert(math.abs(fitted.mean - modeledMean) <= math.abs(unweighted.mean - modeledMean)) - assert(math.abs(fitted.b - betaTrue) <= math.abs(unweighted.b - betaTrue)) + fitted match + case nb: NegativeBinomial => + assertEqualsDouble(nb.a, rTrue, 0.1) + assertEqualsDouble(nb.b, betaTrue, 0.1) + + // Ignoring volumes collapses a mixture of scaled NB's into a single NB, which should fit worse + // (at minimum: it should be less accurate on the modeled-period mean and dispersion). + val modeledMean = rTrue * betaTrue + val (unweighted, _) = NegativeBinomial.mle(data) + unweighted match + case unNb: NegativeBinomial => + assert(math.abs(nb.mean - modeledMean) <= math.abs(unNb.mean - modeledMean)) + assert(math.abs(nb.b - betaTrue) <= math.abs(unNb.b - betaTrue)) + case _: Poisson => + // Unweighted returned Poisson, just check that the weighted NB is reasonable + assert(math.abs(nb.mean - modeledMean) < 0.5) + case _: Poisson => + fail("Expected NegativeBinomial but got Poisson for overdispersed data") + } + + test("MLE returns Poisson for underdispersed data") { + // Data with variance <= mean should return Poisson + val poissonData = Array.fill(10000)(org.apache.commons.statistics.distribution.PoissonDistribution + .of(5.0) + .createSampler(org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create()) + .sample()) + + val (fitted, converged) = NegativeBinomial.mle(poissonData) + assert(converged) + + fitted match + case pois: Poisson => + assertEqualsDouble(pois.lambda, 5.0, 0.3) + case _: NegativeBinomial => + // Also acceptable if slightly overdispersed due to sampling variance + () } end NegBinTest diff --git a/vecxt_re/test/src-jvm/Poisson.test.scala b/vecxt_re/test/src-jvm/Poisson.test.scala index f7dfa6d6..0dab37ae 100644 --- a/vecxt_re/test/src-jvm/Poisson.test.scala +++ b/vecxt_re/test/src-jvm/Poisson.test.scala @@ -6,7 +6,7 @@ import org.apache.commons.statistics.distribution.PoissonDistribution as ApacheP class PoissonTest extends FunSuite: - inline val localTests = true + inline val localTests = false test("pmf approximately normalizes") { val pois = Poisson(lambda = 5.0) diff --git a/vecxt_re/test/src-jvm/TrendAnalysis.test.scala b/vecxt_re/test/src-jvm/TrendAnalysis.test.scala new file mode 100644 index 00000000..3dd46ef5 --- /dev/null +++ b/vecxt_re/test/src-jvm/TrendAnalysis.test.scala @@ -0,0 +1,181 @@ +package vecxt_re + +import munit.FunSuite + +class TrendAnalysisTest extends FunSuite: + + import TrendAnalysis.* + + // Test data: synthetic counts with known trend + val yearsNoTrend = Vector(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009) + val countsNoTrend = Vector(1, 1, 1, 1, 1, 1, 1, 1, 1, 1) // No trend + + val yearsWithTrend = Vector(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009) + val countsWithTrend = Vector(1, 1, 2, 2, 3, 4, 5, 6, 8, 10) // Clear upward trend + + // Example + val realYears = Vector( + 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, + 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, + 2018, 2019, 2020, 2021, 2022, 2023, 2024 + ) + val realCounts = Vector( + 1, 0, 1, 0, 1, 0, 1, 2, 1, 0, + 0, 3, 1, 0, 0, 1, 1, 2, 1, 2, + 1, 3, 1, 1, 1, 0, 1 + ) + + test("Poisson fitTrend returns valid result structure") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsNoTrend, countsNoTrend) + + assertEquals(result.nObs, 10) + assertEquals(result.dfResidual, 8) + assert(!result.intercept.isNaN, "intercept should not be NaN") + assert(!result.slope.isNaN, "slope should not be NaN") + assert(!result.seIntercept.isNaN, "seIntercept should not be NaN") + assert(!result.seSlope.isNaN, "seSlope should not be NaN") + assert(result.pValueSlope >= 0 && result.pValueSlope <= 1, "p-value should be in [0,1]") + } + + test("Poisson fitTrend detects no significant trend in flat data") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsNoTrend, countsNoTrend) + + // Slope should be close to zero + assert(math.abs(result.slope) < 0.1, s"slope should be near zero, got ${result.slope}") + // p-value should be high (not significant) + assert(result.pValueSlope > 0.1, s"p-value should be > 0.1 for no trend, got ${result.pValueSlope}") + assert(!result.hasSignificantTrend(0.05), "should not detect significant trend in flat data") + } + + test("Poisson fitTrend detects significant trend in increasing data") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + + // Slope should be positive + assert(result.slope > 0, s"slope should be positive, got ${result.slope}") + // p-value should be low (significant) + assert(result.pValueSlope < 0.05, s"p-value should be < 0.05 for clear trend, got ${result.pValueSlope}") + assert(result.hasSignificantTrend(0.05), "should detect significant trend in increasing data") + } + + test("Poisson fitTrend residual deviance less than null deviance for trending data") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + + assert( + result.residualDeviance < result.nullDeviance, + s"residual deviance (${result.residualDeviance}) should be less than null deviance (${result.nullDeviance})" + ) + } + + test("NegativeBinomial fitTrend returns valid result structure") { + val nb = NegativeBinomial(a = 1.0, b = 1.0) + val result = nb.fitTrend(yearsNoTrend, countsNoTrend) + + assertEquals(result.nObs, 10) + assertEquals(result.dfResidual, 8) + assert(!result.intercept.isNaN, "intercept should not be NaN") + assert(!result.slope.isNaN, "slope should not be NaN") + assert(!result.seIntercept.isNaN, "seIntercept should not be NaN") + assert(!result.seSlope.isNaN, "seSlope should not be NaN") + assert(result.pValueSlope >= 0 && result.pValueSlope <= 1, "p-value should be in [0,1]") + } + + test("NegativeBinomial fitTrend detects no significant trend in flat data") { + val nb = NegativeBinomial(a = 1.0, b = 1.0) + val result = nb.fitTrend(yearsNoTrend, countsNoTrend) + + assert(math.abs(result.slope) < 0.1, s"slope should be near zero, got ${result.slope}") + assert(!result.hasSignificantTrend(0.05), "should not detect significant trend in flat data") + } + + test("NegativeBinomial fitTrend detects significant trend in increasing data") { + val nb = NegativeBinomial(a = 2.0, b = 0.5) + val result = nb.fitTrend(yearsWithTrend, countsWithTrend) + + assert(result.slope > 0, s"slope should be positive, got ${result.slope}") + assert(result.hasSignificantTrend(0.05), "should detect significant trend in increasing data") + } + + test("Poisson fitTrend on realistic data produces sensible coefficients") { + val pois = Poisson(1.0) + val result = pois.fitTrend(realYears, realCounts) + + assertEquals(result.nObs, 27) + assertEquals(result.dfResidual, 25) + + // The image shows β₀ ≈ -91.887, β₁ ≈ 0.0456 for similar data + // Our parameterization may differ slightly, but signs should match + // Slope should be small and positive (slight upward trend) + assert(result.slope > -0.1 && result.slope < 0.2, s"slope should be small, got ${result.slope}") + + // Check that summary doesn't throw + val summary = result.summary + assert(summary.nonEmpty, "summary should not be empty") + assert(summary.contains("Coefficients"), "summary should contain 'Coefficients'") + assert(summary.contains("Year"), "summary should contain 'Year'") + } + + test("TrendFitResult summary formatting") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + val summary = result.summary + + // Check summary contains expected sections + assert(summary.contains("Generalized Linear Model"), "should have model header") + assert(summary.contains("(Intercept)"), "should show intercept") + assert(summary.contains("Year"), "should show year coefficient") + assert(summary.contains("Null Deviance"), "should show null deviance") + assert(summary.contains("Residual Deviance"), "should show residual deviance") + assert(summary.contains("AIC"), "should show AIC") + assert(summary.contains("F-statistic"), "should show F-statistic") + } + + test("fitTrend requires minimum 3 observations") { + val pois = Poisson(1.0) + + intercept[IllegalArgumentException] { + pois.fitTrend(Vector(2000, 2001), Vector(1, 2)) + } + } + + test("fitTrend requires equal length years and counts") { + val pois = Poisson(1.0) + + intercept[IllegalArgumentException] { + pois.fitTrend(Vector(2000, 2001, 2002), Vector(1, 2)) + } + } + + test("F-statistic p-value is valid") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + + assert(result.fStatistic > 0, "F-statistic should be positive for trending data") + assert(result.fPValue >= 0 && result.fPValue <= 1, "F p-value should be in [0,1]") + } + + test("AIC is finite") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + + assert(result.aic.isFinite, "AIC should be finite") + assert(result.logLikelihood.isFinite, "log-likelihood should be finite") + } + + test("Poisson and NegBin give similar results for low dispersion") { + // When NegBin has high 'a' (low overdispersion), should approximate Poisson + val pois = Poisson(1.0) + val nb = NegativeBinomial(a = 100.0, b = 0.01) // High a = low overdispersion + + val poisResult = pois.fitTrend(yearsWithTrend, countsWithTrend) + val nbResult = nb.fitTrend(yearsWithTrend, countsWithTrend) + + // Slopes should be in the same ballpark + val slopeDiff = math.abs(poisResult.slope - nbResult.slope) + assert(slopeDiff < 0.5, s"slopes should be similar: Poisson=${poisResult.slope}, NegBin=${nbResult.slope}") + } + +end TrendAnalysisTest From e76b10ee233cd2857d35b17fff20a9e249dd85ed Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 13:11:50 +0100 Subject: [PATCH 64/75] . --- .gitignore | 2 + vecxt_re/resources/poissonTrend.vl.json | 15 +- vecxt_re/src-jvm/TrendAnalysis.scala | 72 ++------- vecxt_re/test/src-jvm/NegBin.test.scala | 2 +- .../test/src-jvm/TrendAnalysis.test.scala | 102 +++++++++++-- vecxt_re/test/statchek.ipynb | 143 ++++++++++++++++++ 6 files changed, 260 insertions(+), 76 deletions(-) create mode 100644 vecxt_re/test/statchek.ipynb diff --git a/.gitignore b/.gitignore index f2596f11..5e4d2f2e 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,5 @@ weights2.csv biases1.csv biases2.csv .DS_Store + +.venv/ \ No newline at end of file diff --git a/vecxt_re/resources/poissonTrend.vl.json b/vecxt_re/resources/poissonTrend.vl.json index 5ad89210..de54bdf6 100644 --- a/vecxt_re/resources/poissonTrend.vl.json +++ b/vecxt_re/resources/poissonTrend.vl.json @@ -1,8 +1,8 @@ { "$schema": "https://vega.github.io/schema/vega-lite/v6.json", "description": "Poisson GLM trend with confidence intervals", - "width": "container", - "height": "container", + "width": 600, + "height": 600, "title": "Frequency Trend", "layer": [ { @@ -81,22 +81,23 @@ "data": { "values": [{"year": 2000, "count": 1}] }, - "transform": [{"calculate": "'Observation'", "as": "legend"}], + "transform": [{"calculate": "'Observation'", "as": "obsLegend"}], "mark": { "type": "point", "shape": "cross", "size": 80, "strokeWidth": 2, - "tooltip": true + "tooltip": true, + "stroke": "steelblue" }, "encoding": { "x": {"field": "year", "type": "quantitative"}, "y": {"field": "count", "type": "quantitative"}, - "stroke": { - "field": "legend", + "fill": { + "field": "obsLegend", "type": "nominal", "scale": {"range": ["steelblue"]}, - "legend": {"symbolType": "cross", "symbolStrokeWidth": 2, "title": null} + "legend": {"symbolType": "cross", "symbolStrokeColor": "steelblue", "title": null} } } } diff --git a/vecxt_re/src-jvm/TrendAnalysis.scala b/vecxt_re/src-jvm/TrendAnalysis.scala index 1ceb2d0a..75314ce1 100644 --- a/vecxt_re/src-jvm/TrendAnalysis.scala +++ b/vecxt_re/src-jvm/TrendAnalysis.scala @@ -102,71 +102,27 @@ case class TrendFitResult( end TrendFitResult object TrendAnalysis: + private val normDist = org.apache.commons.math3.distribution.NormalDistribution(0.0, 1.0) /** Two-tailed p-value from z-statistic using normal approximation */ - private def pValueFromZ(z: Double): Double = + private inline def pValueFromZ(z: Double): Double = if z.isNaN || z.isInfinite then Double.NaN else 2.0 * (1.0 - normalCdf(math.abs(z))) - /** Standard normal CDF approximation (Abramowitz & Stegun) */ - private def normalCdf(x: Double): Double = - val a1 = 0.254829592 - val a2 = -0.284496736 - val a3 = 1.421413741 - val a4 = -1.453152027 - val a5 = 1.061405429 - val p = 0.3275911 - val sign = if x < 0 then -1 else 1 - val absX = math.abs(x) - val t = 1.0 / (1.0 + p * absX) - val y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * math.exp(-absX * absX / 2) - 0.5 * (1.0 + sign * y) - - /** F-distribution CDF approximation for p-value calculation */ - private def fDistPValue(f: Double, df1: Int, df2: Int): Double = + /** Standard normal CDF using Apache Commons Math */ + private inline def normalCdf(x: Double): Double = + normDist.cumulativeProbability(x) + + /** F-distribution p-value: P(F > f) for right-tailed test */ + private inline def fDistPValue(f: Double, df1: Int, df2: Int): Double = if f <= 0 || df1 <= 0 || df2 <= 0 then 1.0 else - // Use beta distribution relationship: F ~ Beta(df1/2, df2/2) transform - val x = df2.toDouble / (df2 + df1 * f) - 1.0 - incompleteBeta(df2 / 2.0, df1 / 2.0, x) - - /** Incomplete beta function approximation using continued fraction */ - private def incompleteBeta(a: Double, b: Double, x: Double): Double = - if x <= 0 then 0.0 - else if x >= 1 then 1.0 - else - val bt = - if x == 0 || x == 1 then 0.0 - else - math.exp( - logGamma(a + b) - logGamma(a) - logGamma(b) + - a * math.log(x) + b * math.log(1 - x) - ) - if x < (a + 1) / (a + b + 2) then bt * betaCF(a, b, x) / a - else 1.0 - bt * betaCF(b, a, 1 - x) / b - - /** Continued fraction for incomplete beta */ - private def betaCF(a: Double, b: Double, x: Double): Double = - val maxIter = 100 - val eps = 1e-10 - var c = 1.0 - var d = 1.0 / math.max(1.0 - (a + b) * x / (a + 1), eps) - var h = d - var m = 1 - while m <= maxIter do - val m2 = 2 * m - var aa = m * (b - m) * x / ((a + m2 - 1) * (a + m2)) - d = 1.0 / math.max(1.0 + aa * d, eps) - c = math.max(1.0 + aa / c, eps) - h *= d * c - aa = -(a + m) * (a + b + m) * x / ((a + m2) * (a + m2 + 1)) - d = 1.0 / math.max(1.0 + aa * d, eps) - c = math.max(1.0 + aa / c, eps) - val del = d * c - h *= del - if math.abs(del - 1.0) < eps then return h - m += 1 - h + val fDist = new org.apache.commons.math3.distribution.FDistribution(df1.toDouble, df2.toDouble) + 1.0 - fDist.cumulativeProbability(f) + + /** Regularized incomplete beta function using Apache Commons Math */ + private inline def incompleteBeta(a: Double, b: Double, x: Double): Double = + org.apache.commons.math3.special.Beta.regularizedBeta(x, a, b) extension (p: Poisson) /** Fit a Poisson GLM trend model: log(μ) = β₀ + β₁·year diff --git a/vecxt_re/test/src-jvm/NegBin.test.scala b/vecxt_re/test/src-jvm/NegBin.test.scala index c548c746..7b335cde 100644 --- a/vecxt_re/test/src-jvm/NegBin.test.scala +++ b/vecxt_re/test/src-jvm/NegBin.test.scala @@ -6,7 +6,7 @@ import org.apache.commons.statistics.distribution.PoissonDistribution class NegBinTest extends FunSuite: - inline val localTests = true + inline val localTests = false test("pmf approximately normalizes") { val nb = NegativeBinomial(a = 2.5, b = 1.2) diff --git a/vecxt_re/test/src-jvm/TrendAnalysis.test.scala b/vecxt_re/test/src-jvm/TrendAnalysis.test.scala index 3dd46ef5..60385e45 100644 --- a/vecxt_re/test/src-jvm/TrendAnalysis.test.scala +++ b/vecxt_re/test/src-jvm/TrendAnalysis.test.scala @@ -2,6 +2,8 @@ package vecxt_re import munit.FunSuite +import vecxt.all.* + class TrendAnalysisTest extends FunSuite: import TrendAnalysis.* @@ -13,20 +15,40 @@ class TrendAnalysisTest extends FunSuite: val yearsWithTrend = Vector(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009) val countsWithTrend = Vector(1, 1, 2, 2, 3, 4, 5, 6, 8, 10) // Clear upward trend - // Example - val realYears = Vector( - 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, - 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, - 2018, 2019, 2020, 2021, 2022, 2023, 2024 + val realYears = Array( + 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025 ) - val realCounts = Vector( - 1, 0, 1, 0, 1, 0, 1, 2, 1, 0, - 0, 3, 1, 0, 0, 1, 1, 2, 1, 2, - 1, 3, 1, 1, 1, 0, 1 + val realCounts = Array( + 1,0,0,0,1,0,0,0,2,1,1,2,3,0,0,1,2,1,0,1,2,3,1,2,1,0,1 ) + // Example results from numpy / statsmodels for realYears +/** +Fitted Poisson with lambda = 0.9629629629629629 + +Generalized Linear Model: log(Count) ~ 1 + Year +============================================================ + Generalized Linear Model Regression Results +============================================================================== +Dep. Variable: y No. Observations: 27 +Model: GLM Df Residuals: 25 +Model Family: Poisson Df Model: 1 +Link Function: Log Scale: 1.0000 +Method: IRLS Log-Likelihood: -32.760 +Date: Thu, 29 Jan 2026 Deviance: 26.468 +Time: 12:14:51 Pearson chi2: 22.3 +No. Iterations: 5 Pseudo R-squ. (CS): 0.08983 +Covariance Type: nonrobust +============================================================================== + coef std err z P>|z| [0.025 0.975] +------------------------------------------------------------------------------ +const -82.0576 52.254 -1.570 0.116 -184.473 20.358 +x1 0.0407 0.026 1.571 0.116 -0.010 0.092 +============================================================================== +*/ + test("Poisson fitTrend returns valid result structure") { - val pois = Poisson(1.0) + val pois = Poisson(realCounts.mean) val result = pois.fitTrend(yearsNoTrend, countsNoTrend) assertEquals(result.nObs, 10) @@ -36,6 +58,8 @@ class TrendAnalysisTest extends FunSuite: assert(!result.seIntercept.isNaN, "seIntercept should not be NaN") assert(!result.seSlope.isNaN, "seSlope should not be NaN") assert(result.pValueSlope >= 0 && result.pValueSlope <= 1, "p-value should be in [0,1]") + + } test("Poisson fitTrend detects no significant trend in flat data") { @@ -118,6 +142,38 @@ class TrendAnalysisTest extends FunSuite: assert(summary.contains("Year"), "summary should contain 'Year'") } + test("Poisson fitTrend matches Python statsmodels GLM results") { + // Python statsmodels GLM output for realYears/realCounts: + // Fitted Poisson with lambda = 0.9629629629629629 + // No. Observations: 27, Df Residuals: 25, Df Model: 1 + // Log-Likelihood: -32.760, Deviance: 26.468, Pearson chi2: 22.3 + // const: -82.0576 (std err 52.254), z=-1.570, p=0.116 + // x1: 0.0407 (std err 0.026), z= 1.571, p=0.116 + val pois = Poisson(realCounts.mean) + val result = pois.fitTrend(realYears, realCounts) + + // Observations and degrees of freedom + assertEquals(result.nObs, 27) + assertEquals(result.dfResidual, 25) + + // Coefficients (tolerance for numerical differences) + assertEqualsDouble(result.intercept, -82.0576, 0.5) + assertEqualsDouble(result.slope, 0.0407, 0.001) + + // Standard errors + assertEqualsDouble(result.seIntercept, 52.254, 0.5) + assertEqualsDouble(result.seSlope, 0.026, 0.001) + + // P-value for slope (Python: 0.116, some variation expected due to CDF approximation) + assertEqualsDouble(result.pValueSlope, 0.116, 0.03) + + // Log-likelihood + assertEqualsDouble(result.logLikelihood, -32.760, 0.1) + + // Residual deviance + assertEqualsDouble(result.residualDeviance, 26.468, 0.1) + } + test("TrendFitResult summary formatting") { val pois = Poisson(1.0) val result = pois.fitTrend(yearsWithTrend, countsWithTrend) @@ -157,6 +213,32 @@ class TrendAnalysisTest extends FunSuite: assert(result.fPValue >= 0 && result.fPValue <= 1, "F p-value should be in [0,1]") } + test("F-statistic p-value is small for significant trend") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + + // For significant trend, F p-value should be small (< 0.05) + // F ~ 2.5 with df1=1, df2=8 should have p-value around 0.15 + // For our strongly trending data, F should be larger and p-value smaller + assert( + result.fPValue < 0.2, + s"F p-value should be small for significant trend, got ${result.fPValue} with F=${result.fStatistic}" + ) + } + + test("F-statistic p-value matches expected range for known F values") { + // Sanity check: for trending data, higher F should mean lower p-value + val pois = Poisson(1.0) + val trendResult = pois.fitTrend(yearsWithTrend, countsWithTrend) + val flatResult = pois.fitTrend(yearsNoTrend, countsNoTrend) + + // Trending data should have higher F-stat and lower p-value than flat data + assert( + trendResult.fStatistic > flatResult.fStatistic || flatResult.fStatistic <= 0, + s"Trending F (${trendResult.fStatistic}) should be >= flat F (${flatResult.fStatistic})" + ) + } + test("AIC is finite") { val pois = Poisson(1.0) val result = pois.fitTrend(yearsWithTrend, countsWithTrend) diff --git a/vecxt_re/test/statchek.ipynb b/vecxt_re/test/statchek.ipynb new file mode 100644 index 00000000..3f66ff67 --- /dev/null +++ b/vecxt_re/test/statchek.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "id": "24260c73", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1999: 1\n", + "2000: 0\n", + "2001: 0\n", + "2002: 0\n", + "2003: 1\n", + "2004: 0\n", + "2005: 0\n", + "2006: 0\n", + "2007: 2\n", + "2008: 1\n", + "2009: 1\n", + "2010: 2\n", + "2011: 3\n", + "2012: 0\n", + "2013: 0\n", + "2014: 1\n", + "2015: 2\n", + "2016: 1\n", + "2017: 0\n", + "2018: 1\n", + "2019: 2\n", + "2020: 3\n", + "2021: 1\n", + "2022: 2\n", + "2023: 1\n", + "2024: 0\n", + "2025: 1\n" + ] + } + ], + "source": [ + "real_years = [\n", + " 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025\n", + "]\n", + "real_counts = [\n", + " 1,0,0,0,1,0,0,0,2,1,1,2,3,0,0,1,2,1,0,1,2,3,1,2,1,0,1\n", + "]\n", + "\n", + "for year, count in zip(real_years, real_counts):\n", + " print(f\"{year}: {count}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cef5959c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: numpy in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (2.4.1)\n", + "Requirement already satisfied: statsmodels in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (0.14.6)\n", + "Requirement already satisfied: scipy!=1.9.2,>=1.8 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from statsmodels) (1.17.0)\n", + "Requirement already satisfied: pandas!=2.1.0,>=1.4 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from statsmodels) (3.0.0)\n", + "Requirement already satisfied: patsy>=0.5.6 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from statsmodels) (1.0.2)\n", + "Requirement already satisfied: packaging>=21.3 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from statsmodels) (26.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from pandas!=2.1.0,>=1.4->statsmodels) (2.9.0.post0)\n", + "Requirement already satisfied: six>=1.5 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from python-dateutil>=2.8.2->pandas!=2.1.0,>=1.4->statsmodels) (1.17.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Fitted Poisson with lambda = 0.9629629629629629\n", + "\n", + "Generalized Linear Model: log(Count) ~ 1 + Year\n", + "============================================================\n", + " Generalized Linear Model Regression Results \n", + "==============================================================================\n", + "Dep. Variable: y No. Observations: 27\n", + "Model: GLM Df Residuals: 25\n", + "Model Family: Poisson Df Model: 1\n", + "Link Function: Log Scale: 1.0000\n", + "Method: IRLS Log-Likelihood: -32.760\n", + "Date: Thu, 29 Jan 2026 Deviance: 26.468\n", + "Time: 12:14:23 Pearson chi2: 22.3\n", + "No. Iterations: 5 Pseudo R-squ. (CS): 0.08983\n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const -82.0576 52.254 -1.570 0.116 -184.473 20.358\n", + "x1 0.0407 0.026 1.571 0.116 -0.010 0.092\n", + "==============================================================================\n" + ] + } + ], + "source": [ + "\n", + "import numpy as np\n", + "import statsmodels.api as sm\n", + "\n", + "# Create arrays from the data\n", + "years = np.array(real_years)\n", + "counts = np.array(real_counts)\n", + "\n", + "# Fitted Poisson lambda (mean of counts)\n", + "poisson_lambda = np.mean(counts)\n", + "print(f\"Fitted Poisson with lambda = {poisson_lambda}\")\n", + "\n", + "# Fit Poisson GLM: log(Count) ~ 1 + Year\n", + "X = sm.add_constant(years) # Add intercept\n", + "poisson_model = sm.GLM(counts, X, family=sm.families.Poisson())\n", + "result = poisson_model.fit()\n", + "\n", + "print(\"\\nGeneralized Linear Model: log(Count) ~ 1 + Year\")\n", + "print(\"=\" * 60)\n", + "print(result.summary())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 018cb646f08e418b19661cc7498cf350001a690d Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 16:22:58 +0100 Subject: [PATCH 65/75] . --- vecxt_re/test/statchek.ipynb | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/vecxt_re/test/statchek.ipynb b/vecxt_re/test/statchek.ipynb index 3f66ff67..4e477713 100644 --- a/vecxt_re/test/statchek.ipynb +++ b/vecxt_re/test/statchek.ipynb @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "cef5959c", "metadata": {}, "outputs": [ @@ -62,15 +62,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: numpy in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (2.4.1)\n", - "Requirement already satisfied: statsmodels in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (0.14.6)\n", - "Requirement already satisfied: scipy!=1.9.2,>=1.8 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from statsmodels) (1.17.0)\n", - "Requirement already satisfied: pandas!=2.1.0,>=1.4 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from statsmodels) (3.0.0)\n", - "Requirement already satisfied: patsy>=0.5.6 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from statsmodels) (1.0.2)\n", - "Requirement already satisfied: packaging>=21.3 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from statsmodels) (26.0)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from pandas!=2.1.0,>=1.4->statsmodels) (2.9.0.post0)\n", - "Requirement already satisfied: six>=1.5 in /Users/simon/Code/vecxt/.venv/lib/python3.14/site-packages (from python-dateutil>=2.8.2->pandas!=2.1.0,>=1.4->statsmodels) (1.17.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n", "Fitted Poisson with lambda = 0.9629629629629629\n", "\n", "Generalized Linear Model: log(Count) ~ 1 + Year\n", @@ -83,7 +74,7 @@ "Link Function: Log Scale: 1.0000\n", "Method: IRLS Log-Likelihood: -32.760\n", "Date: Thu, 29 Jan 2026 Deviance: 26.468\n", - "Time: 12:14:23 Pearson chi2: 22.3\n", + "Time: 12:14:51 Pearson chi2: 22.3\n", "No. Iterations: 5 Pseudo R-squ. (CS): 0.08983\n", "Covariance Type: nonrobust \n", "==============================================================================\n", From 997deadd6efa904b32bebc73cad8cf3f3250c970 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Thu, 29 Jan 2026 15:24:12 +0000 Subject: [PATCH 66/75] [autofix.ci] apply automated fixes --- vecxt_re/src-jvm/TrendAnalysis.scala | 15 +++++- vecxt_re/src-jvm/dist/NegativeBinomial.scala | 7 +-- vecxt_re/src-jvm/dist/Poisson.scala | 5 +- vecxt_re/src-jvm/plots.scala | 24 ++++++--- vecxt_re/test/src-jvm/NegBin.test.scala | 15 ++++-- vecxt_re/test/src-jvm/Poisson.test.scala | 2 +- .../test/src-jvm/TrendAnalysis.test.scala | 52 +++++++++---------- 7 files changed, 72 insertions(+), 48 deletions(-) diff --git a/vecxt_re/src-jvm/TrendAnalysis.scala b/vecxt_re/src-jvm/TrendAnalysis.scala index 75314ce1..72626bee 100644 --- a/vecxt_re/src-jvm/TrendAnalysis.scala +++ b/vecxt_re/src-jvm/TrendAnalysis.scala @@ -92,6 +92,7 @@ case class TrendFitResult( sb.append(f"F-statistic vs. constant model: $fStatistic%.3f, p-value = $fPValue%.6f\n") sb.toString + end summary private def significanceCode(p: Double): String = if p < 0.001 then " ***" @@ -177,6 +178,8 @@ object TrendAnalysis: if math.abs(det) > 1e-15 then beta0 = (xtwx11 * xtwz0 - xtwx01 * xtwz1) / det beta1 = (xtwx00 * xtwz1 - xtwx01 * xtwz0) / det + end if + end for // Fit null model: log(μ) = β₀ only val nullBeta0 = math.log(meanY) @@ -191,10 +194,12 @@ object TrendAnalysis: val y = observed(i) val mu = fitted(i) if y > 0 then dev += y * math.log(y / mu) + end if dev -= (y - mu) i += 1 end while 2.0 * dev + end poissonDeviance val muFull = yearsD.map(y => math.exp(beta0 + beta1 * y)) val nullDeviance = poissonDeviance(countsD, muNull.toIndexedSeq) @@ -251,6 +256,7 @@ object TrendAnalysis: i += 1 end while ll + end poissonLogLik val logLik = poissonLogLik(countsD, muFull) val aic = -2 * logLik + 2 * 2 // 2 parameters @@ -279,8 +285,8 @@ object TrendAnalysis: extension (nb: NegativeBinomial) /** Fit a Negative Binomial GLM trend model: log(μ) = β₀ + β₁·year * - * Uses IRLS with the NB2 variance function (Var = μ + μ²/θ where θ = a). This accounts for overdispersion in - * count data. + * Uses IRLS with the NB2 variance function (Var = μ + μ²/θ where θ = a). This accounts for overdispersion in count + * data. * * @param years * the year for each observation @@ -336,6 +342,8 @@ object TrendAnalysis: if math.abs(det) > 1e-15 then beta0 = (xtwx11 * xtwz0 - xtwx01 * xtwz1) / det beta1 = (xtwx00 * xtwz1 - xtwx01 * xtwz0) / det + end if + end for // Null model val nullBeta0 = math.log(meanY) @@ -348,10 +356,12 @@ object TrendAnalysis: val y = observed(i) val mu = fitted(i) if y > 0 then dev += y * math.log(y / mu) + end if dev -= (y + theta) * math.log((y + theta) / (mu + theta)) i += 1 end while 2.0 * dev + end nbDeviance val muFull = yearsD.map(y => math.exp(beta0 + beta1 * y)) val muNull = IndexedSeq.fill(n)(meanY) @@ -410,6 +420,7 @@ object TrendAnalysis: i += 1 end while ll + end nbLogLik val logLik = nbLogLik(countsD, muFull) val aic = -2 * logLik + 2 * 2 // 2 parameters (not counting θ as estimated here) diff --git a/vecxt_re/src-jvm/dist/NegativeBinomial.scala b/vecxt_re/src-jvm/dist/NegativeBinomial.scala index 304fd9f8..063c6ac9 100644 --- a/vecxt_re/src-jvm/dist/NegativeBinomial.scala +++ b/vecxt_re/src-jvm/dist/NegativeBinomial.scala @@ -78,7 +78,6 @@ case class NegativeBinomial(a: Double, b: Double) def variance: Double = a * b * (1.0 + b) - override def probability(x: Int, y: Int): Double = if x >= y then 0.0 else cdf(y) - cdf(x) @@ -140,8 +139,8 @@ object NegativeBinomial: * @param tol * convergence tolerance for parameter 'a' * @return - * Named tuple with `dist`: the fitted distribution (Poisson if no overdispersion, otherwise NegativeBinomial), - * and `converged`: whether the optimizer converged within maxIter + * Named tuple with `dist`: the fitted distribution (Poisson if no overdispersion, otherwise NegativeBinomial), and + * `converged`: whether the optimizer converged within maxIter */ def mle( observations: Array[Int], @@ -180,8 +179,6 @@ object NegativeBinomial: end while ll - - // If variance <= mean, data is underdispersed relative to Poisson // In this case, return Poisson distribution if variance <= xbar then (Poisson(xbar), true) diff --git a/vecxt_re/src-jvm/dist/Poisson.scala b/vecxt_re/src-jvm/dist/Poisson.scala index 3b8cad38..8f215cd7 100644 --- a/vecxt_re/src-jvm/dist/Poisson.scala +++ b/vecxt_re/src-jvm/dist/Poisson.scala @@ -18,8 +18,7 @@ import io.github.quafadas.plots.SetupVega.{*, given} * * PMF: P(X = k) = λ^k * e^(-λ) / k! * - * The Poisson distribution is a limiting case of the Negative Binomial distribution as the dispersion parameter b → - * 0. + * The Poisson distribution is a limiting case of the Negative Binomial distribution as the dispersion parameter b → 0. * * @param lambda * the rate parameter (must be positive) @@ -189,7 +188,6 @@ object Poisson: // Find the max observation to determine bin range var maxObs = observations.maxSIMD - // Count observations in each bin val counts = new Array[Int](maxObs + 2) // +1 for the "maxObs or more" bin var i = 0 @@ -197,6 +195,7 @@ object Poisson: val obs = observations(i) if obs >= counts.length - 1 then counts(counts.length - 1) += 1 else counts(obs) += 1 + end if i += 1 end while diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index e967e299..681f73d8 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -33,7 +33,6 @@ object Plots: extension (nb: NegativeBinomial) - /** Plot ECDF vs theoretical CDF as step functions for visual goodness-of-fit assessment. * * Both curves are step functions. Deviations between the orange (empirical) and blue (theoretical) lines indicate @@ -63,12 +62,13 @@ object Plots: _.layer._0.data.values := theoreticalCdf.asJson, _.layer._1.data.values := empiricalCdf.asJson ) + end plotEcdfVsCdf /** Plot a hanging rootogram for count data diagnostics. * * A rootogram displays sqrt(expected) as the reference curve and hangs bars from it down to sqrt(observed). When - * the model fits well, bars hang close to the zero line. Bars extending below zero indicate under-prediction; - * bars stopping above zero indicate over-prediction. + * the model fits well, bars hang close to the zero line. Bars extending below zero indicate under-prediction; bars + * stopping above zero indicate over-prediction. */ inline def plotRootogram(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = val n = samples.length.toDouble @@ -88,6 +88,7 @@ object Plots: _.title(s"NegBin(a=${nb.a}, b=${nb.b}) Hanging Rootogram"), _.data.values := data.asJson ) + end plotRootogram /** Plot Pearson residuals: (observed - expected) / sqrt(expected). * @@ -109,12 +110,14 @@ object Plots: val residual = (observed - expected) / math.sqrt(expected) Some((k = k, residual = residual)) else None + end if } pearsonResiduals.plot( _.title(s"NegBin(a=${nb.a}, b=${nb.b}) Pearson Residuals"), _.data.values := data.asJson ) + end plotPearsonResiduals end extension extension (p: Poisson) @@ -143,6 +146,7 @@ object Plots: _.layer._0.data.values := theoreticalCdf.asJson, _.layer._1.data.values := empiricalCdf.asJson ) + end plotEcdfVsCdf /** Plot a hanging rootogram for Poisson count data diagnostics. * @@ -165,6 +169,7 @@ object Plots: _.title(s"Poisson(λ=${p.lambda}) Hanging Rootogram"), _.data.values := data.asJson ) + end plotRootogram /** Plot Pearson residuals for Poisson: (observed - expected) / sqrt(expected). * @@ -183,12 +188,14 @@ object Plots: val residual = (observed - expected) / math.sqrt(expected) Some((k = k, residual = residual)) else None + end if } pearsonResiduals.plot( _.title(s"Poisson(λ=${p.lambda}) Pearson Residuals"), _.data.values := data.asJson ) + end plotPearsonResiduals /** Plot a Poisson GLM trend: log(Count) ~ 1 + Year with 95% confidence intervals. * @@ -254,6 +261,8 @@ object Plots: if math.abs(det) > 1e-15 then beta0 = (xtwx11 * xtwz0 - xtwx01 * xtwz1) / det beta1 = (xtwx00 * xtwz1 - xtwx01 * xtwz0) / det + end if + end for // Fisher information matrix: I = XᵀWX at final β // I is 2×2 symmetric: [[i00, i01], [i01, i11]] @@ -294,9 +303,11 @@ object Plots: val ciData = yearRange.map { y => val eta = beta0 + beta1 * y.toDouble // Var(η) = xᵀ Cov(β) x where x = [1, year]ᵀ - val varEta = covBeta.map { c => - c.v00 + 2 * y * c.v01 + y.toDouble * y.toDouble * c.v11 - }.getOrElse(0.0) + val varEta = covBeta + .map { c => + c.v00 + 2 * y * c.v01 + y.toDouble * y.toDouble * c.v11 + } + .getOrElse(0.0) val seEta = math.sqrt(math.max(varEta, 0.0)) val fit = math.exp(eta) val lower = math.exp(eta - 1.96 * seEta) @@ -314,6 +325,7 @@ object Plots: _.layer._3.data.values := ciData.asJson, _.layer._4.data.values := obsData.asJson ) + end plotTrend end extension extension (scenario: Scenarr) diff --git a/vecxt_re/test/src-jvm/NegBin.test.scala b/vecxt_re/test/src-jvm/NegBin.test.scala index 7b335cde..e05b6355 100644 --- a/vecxt_re/test/src-jvm/NegBin.test.scala +++ b/vecxt_re/test/src-jvm/NegBin.test.scala @@ -122,6 +122,7 @@ class NegBinTest extends FunSuite: assertEqualsDouble(nb.b, trueNb.b, 0.1) case _: Poisson => fail("Expected NegativeBinomial but got Poisson") + end match } test("SLOW: vol weighted MLE follows standard case with uniform volumes ") { @@ -141,6 +142,7 @@ class NegBinTest extends FunSuite: assertEqualsDouble(nb.b, trueNb.b, 0.1) case _: Poisson => fail("Expected NegativeBinomial but got Poisson") + end match } /** This directly exercises the volume factors: counts drawn with v = 0.5 use scale βv = 0.4, and with v = 2.0 use βv = @@ -194,16 +196,20 @@ class NegBinTest extends FunSuite: case _: Poisson => // Unweighted returned Poisson, just check that the weighted NB is reasonable assert(math.abs(nb.mean - modeledMean) < 0.5) + end match case _: Poisson => fail("Expected NegativeBinomial but got Poisson for overdispersed data") + end match } test("MLE returns Poisson for underdispersed data") { // Data with variance <= mean should return Poisson - val poissonData = Array.fill(10000)(org.apache.commons.statistics.distribution.PoissonDistribution - .of(5.0) - .createSampler(org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create()) - .sample()) + val poissonData = Array.fill(10000)( + org.apache.commons.statistics.distribution.PoissonDistribution + .of(5.0) + .createSampler(org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create()) + .sample() + ) val (fitted, converged) = NegativeBinomial.mle(poissonData) assert(converged) @@ -214,6 +220,7 @@ class NegBinTest extends FunSuite: case _: NegativeBinomial => // Also acceptable if slightly overdispersed due to sampling variance () + end match } end NegBinTest diff --git a/vecxt_re/test/src-jvm/Poisson.test.scala b/vecxt_re/test/src-jvm/Poisson.test.scala index 0dab37ae..2179d9e3 100644 --- a/vecxt_re/test/src-jvm/Poisson.test.scala +++ b/vecxt_re/test/src-jvm/Poisson.test.scala @@ -72,6 +72,7 @@ class PoissonTest extends FunSuite: val expected = pois.cdf(y) - pois.cdf(x) val actual = pois.probability(x, y) assert(math.abs(actual - expected) < 1e-14) + end for } test("small lambda works correctly") { @@ -151,7 +152,6 @@ class PoissonTest extends FunSuite: assume(localTests, "Don't run local-only tests in CI ideally as they are slow") println("=============TURN OFF MLE volume-weighted with uniform volumes equals regular MLE IN CI========") - val trueLambda = 5.0 val apachePois = ApachePoisson.of(trueLambda) val sampler = apachePois.createSampler(org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create()) diff --git a/vecxt_re/test/src-jvm/TrendAnalysis.test.scala b/vecxt_re/test/src-jvm/TrendAnalysis.test.scala index 60385e45..ac10ef7b 100644 --- a/vecxt_re/test/src-jvm/TrendAnalysis.test.scala +++ b/vecxt_re/test/src-jvm/TrendAnalysis.test.scala @@ -16,36 +16,35 @@ class TrendAnalysisTest extends FunSuite: val countsWithTrend = Vector(1, 1, 2, 2, 3, 4, 5, 6, 8, 10) // Clear upward trend val realYears = Array( - 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025 + 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, + 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025 ) val realCounts = Array( - 1,0,0,0,1,0,0,0,2,1,1,2,3,0,0,1,2,1,0,1,2,3,1,2,1,0,1 + 1, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 2, 3, 0, 0, 1, 2, 1, 0, 1, 2, 3, 1, 2, 1, 0, 1 ) // Example results from numpy / statsmodels for realYears -/** -Fitted Poisson with lambda = 0.9629629629629629 - -Generalized Linear Model: log(Count) ~ 1 + Year -============================================================ - Generalized Linear Model Regression Results -============================================================================== -Dep. Variable: y No. Observations: 27 -Model: GLM Df Residuals: 25 -Model Family: Poisson Df Model: 1 -Link Function: Log Scale: 1.0000 -Method: IRLS Log-Likelihood: -32.760 -Date: Thu, 29 Jan 2026 Deviance: 26.468 -Time: 12:14:51 Pearson chi2: 22.3 -No. Iterations: 5 Pseudo R-squ. (CS): 0.08983 -Covariance Type: nonrobust -============================================================================== - coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------- -const -82.0576 52.254 -1.570 0.116 -184.473 20.358 -x1 0.0407 0.026 1.571 0.116 -0.010 0.092 -============================================================================== -*/ - + /** Fitted Poisson with lambda = 0.9629629629629629 + * + * Generalized Linear Model: log(Count) ~ 1 + Year + * ============================================================ + * Generalized Linear Model Regression Results + * ============================================================================== + * Dep. Variable: y No. Observations: 27 + * Model: GLM Df Residuals: 25 + * Model Family: Poisson Df Model: 1 + * Link Function: Log Scale: 1.0000 + * Method: IRLS Log-Likelihood: -32.760 + * Date: Thu, 29 Jan 2026 Deviance: 26.468 + * Time: 12:14:51 Pearson chi2: 22.3 + * No. Iterations: 5 Pseudo R-squ. (CS): 0.08983 + * Covariance Type: nonrobust + * ============================================================================== + * coef std err z P>|z| [0.025 0.975] + * ------------------------------------------------------------------------------ + * const -82.0576 52.254 -1.570 0.116 -184.473 20.358 + * x1 0.0407 0.026 1.571 0.116 -0.010 0.092 + * ============================================================================== + */ test("Poisson fitTrend returns valid result structure") { val pois = Poisson(realCounts.mean) @@ -59,7 +58,6 @@ x1 0.0407 0.026 1.571 0.116 -0.010 0.092 assert(!result.seSlope.isNaN, "seSlope should not be NaN") assert(result.pValueSlope >= 0 && result.pValueSlope <= 1, "p-value should be in [0,1]") - } test("Poisson fitTrend detects no significant trend in flat data") { From 884e0bbe413bf1278049a2d3db2f124eb7c62240 Mon Sep 17 00:00:00 2001 From: partens Date: Thu, 29 Jan 2026 16:41:14 +0100 Subject: [PATCH 67/75] . --- vecxt_re/resources/squareDiag.vg.json | 161 ++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 vecxt_re/resources/squareDiag.vg.json diff --git a/vecxt_re/resources/squareDiag.vg.json b/vecxt_re/resources/squareDiag.vg.json new file mode 100644 index 00000000..0bbfc78c --- /dev/null +++ b/vecxt_re/resources/squareDiag.vg.json @@ -0,0 +1,161 @@ +{ + "$schema": "https://vega.github.io/schema/vega/v6.json", + "description": "Treemap of transactions sized by 'size'. Grouped by transaction → company.", + "width": 700, + "height": 500, + "padding": 2.5, + "autosize": "none", + + "signals": [ + { + "name": "layout", "value": "squarify", + "bind": { + "input": "select", + "options": ["squarify", "binary", "slicedice"] + } + }, + { + "name": "aspectRatio", "value": 1.6, + "bind": { "input": "range", "min": 1, "max": 5, "step": 0.1 } + } + ], + + "data": [ + { + "name": "transactions", + "values": [ + { "company": "A", "size": 40, "transaction": "Yey" }, + { "company": "A", "size": 23, "transaction": "B" }, + { "company": "B", "size": 29, "transaction": "cat" }, + { "company": "C", "size": 17, "transaction": "dog" } + ] + }, + + { + "name": "tree", + "source": "transactions", + "transform": [ + { "type": "nest", "keys": ["transaction", "company"] }, + { + "type": "treemap", + "field": "size", + "sort": { "field": "value" }, + "round": true, + "method": { "signal": "layout" }, + "ratio": { "signal": "aspectRatio" }, + "size": [{ "signal": "width" }, { "signal": "height" }] + } + ] + }, + { + "name": "nodes", + "source": "tree", + "transform": [{ "type": "filter", "expr": "datum.children" }] + }, + { + "name": "leaves", + "source": "tree", + "transform": [{ "type": "filter", "expr": "!datum.children" }] + } + ], + + "scales": [ + { + "name": "color_companies", + "type": "ordinal", + "domain": { "data": "leaves", "field": "company" }, + "range": {"scheme": "dark2"} + }, + { + "name": "labelSize", + "type": "ordinal", + "domain": [0, 1, 2, 3], + "range": [256, 28, 20, 14] + }, + { + "name": "labelOpacity", + "type": "ordinal", + "domain": [0, 1, 2, 3], + "range": [0.15, 0.5, 0.8, 1.0] + } + ], + + "marks": [ + + { + "type": "rect", + "from": { "data": "nodes" }, + "interactive": false, + "encode": { + "enter": { + "fill": { "value": "#f3f3f3" } + }, + "update": { + "x": { "field": "x0" }, + "y": { "field": "y0" }, + "x2": { "field": "x1" }, + "y2": { "field": "y1" } + } + } + }, + + + { + "type": "rect", + "from": { "data": "leaves" }, + "encode": { + "enter": { + "stroke": { "value": "#fff" }, + "fill": { "scale": "color_companies", "field": "company" }, + "tooltip": { + "signal": "{'Transaction': datum.transaction, 'Company': datum.company, 'Size': datum.size}" + } + }, + "update": { + "x": { "field": "x0" }, + "y": { "field": "y0" }, + "x2": { "field": "x1" }, + "y2": { "field": "y1" } + }, + "hover": { + "stroke": { "value": "#000" } + } + } + }, + + + { + "type": "text", + "from": { "data": "leaves" }, + "interactive": false, + "encode": { + "enter": { + "font": { "value": "Helvetica Neue, Arial" }, + "align": { "value": "center" }, + "baseline": { "value": "middle" }, + "fill": { "value": "#000" }, + "fontWeight": { "value": "bold" } + }, + "update": { + "x": { "signal": "(datum.x0 + datum.x1) / 2" }, + "y": { "signal": "(datum.y0 + datum.y1) / 2" }, + + "text": { + "signal": + "[datum.company, datum.transaction, datum.size + 'mUSD']" + }, + + "fontSize": { + "signal": + "15" + }, + + "fillOpacity": { + "signal": + "((datum.x1-datum.x0) * (datum.y1-datum.y0)) < 2500 ? 0 : 1" + } + } + } +} + ] +} From 1a9ca44f4a6ca9463226a3b45cfe8c802a52bd03 Mon Sep 17 00:00:00 2001 From: partens Date: Thu, 29 Jan 2026 16:41:37 +0100 Subject: [PATCH 68/75] . --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5e4d2f2e..fbb16a79 100644 --- a/.gitignore +++ b/.gitignore @@ -39,4 +39,5 @@ biases1.csv biases2.csv .DS_Store -.venv/ \ No newline at end of file +.venv/ +experiments/src/bhd.scala From 98696e285d640d4b2ffb98f683500b66405d817a Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 17:23:17 +0100 Subject: [PATCH 69/75] . --- vecxt_re/{src => src-jvm}/all.scala | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename vecxt_re/{src => src-jvm}/all.scala (100%) diff --git a/vecxt_re/src/all.scala b/vecxt_re/src-jvm/all.scala similarity index 100% rename from vecxt_re/src/all.scala rename to vecxt_re/src-jvm/all.scala From 02bd306074d63bd78c431e36c5ed0667e67626f1 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 20:58:46 +0100 Subject: [PATCH 70/75] . --- vecxt/src/intarray.scala | 20 ++ vecxt/test/src/intarray.test.scala | 27 ++ vecxt_re/resources/hillPlot.vl.json | 33 +++ vecxt_re/resources/loglogCdf.vl.json | 56 ++++ vecxt_re/src-jvm/all.scala | 5 + vecxt_re/src-jvm/plots.scala | 106 ++++++++ vecxt_re/src/HillEstimator.scala | 207 ++++++++++++++ vecxt_re/src/PickandsEstimator.scala | 252 ++++++++++++++++++ vecxt_re/test/src/hillEstimator.test.scala | 151 +++++++++++ .../test/src/pickandsEstimator.test.scala | 145 ++++++++++ 10 files changed, 1002 insertions(+) create mode 100644 vecxt_re/resources/hillPlot.vl.json create mode 100644 vecxt_re/resources/loglogCdf.vl.json create mode 100644 vecxt_re/src/HillEstimator.scala create mode 100644 vecxt_re/src/PickandsEstimator.scala create mode 100644 vecxt_re/test/src/hillEstimator.test.scala create mode 100644 vecxt_re/test/src/pickandsEstimator.test.scala diff --git a/vecxt/src/intarray.scala b/vecxt/src/intarray.scala index 1c1f64f0..533aec7c 100644 --- a/vecxt/src/intarray.scala +++ b/vecxt/src/intarray.scala @@ -5,6 +5,7 @@ import scala.util.control.Breaks.* import vecxt.BooleanArrays.trues import vecxt.BoundsCheck.BoundsCheck +import vecxt.arrays.sumSIMD object IntArrays: @@ -35,6 +36,25 @@ object IntArrays: out end select + inline def countsToIdx: Array[Int] = + var total = arr.sumSIMD + var i = 0 + val out = new Array[Int](total) + var j = 0 + while i < arr.length do + val count = arr(i) + val idx = i + 1 + var k = 0 + while k < count do + out(j) = idx + j += 1 + k += 1 + end while + i += 1 + end while + out + end countsToIdx + inline def contiguous: Boolean = var i = 1 var out = true diff --git a/vecxt/test/src/intarray.test.scala b/vecxt/test/src/intarray.test.scala index 99616db6..2700b903 100644 --- a/vecxt/test/src/intarray.test.scala +++ b/vecxt/test/src/intarray.test.scala @@ -133,4 +133,31 @@ class IntArrayExtensionSuite extends munit.FunSuite: assertVecEquals(base.select(idx), Array.emptyIntArray) } + test("countsToIdx basic") { + val counts = Array(2, 3, 1) + // 2 ones, 3 twos, 1 three => [1, 1, 2, 2, 2, 3] + assertVecEquals(counts.countsToIdx, Array(1, 1, 2, 2, 2, 3)) + } + + test("countsToIdx with zeros") { + val counts = Array(1, 0, 2, 0, 1) + // 1 one, 0 twos, 2 threes, 0 fours, 1 five => [1, 3, 3, 5] + assertVecEquals(counts.countsToIdx, Array(1, 3, 3, 5)) + } + + test("countsToIdx empty array") { + val counts = Array.emptyIntArray + assertVecEquals(counts.countsToIdx, Array.emptyIntArray) + } + + test("countsToIdx all zeros") { + val counts = Array(0, 0, 0) + assertVecEquals(counts.countsToIdx, Array.emptyIntArray) + } + + test("countsToIdx single element") { + val counts = Array(5) + assertVecEquals(counts.countsToIdx, Array(1, 1, 1, 1, 1)) + } + end IntArrayExtensionSuite diff --git a/vecxt_re/resources/hillPlot.vl.json b/vecxt_re/resources/hillPlot.vl.json new file mode 100644 index 00000000..cc8987ad --- /dev/null +++ b/vecxt_re/resources/hillPlot.vl.json @@ -0,0 +1,33 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "title": "Hill Plot", + "width": "container", + "height": "container", + "data": { + "values": [ + { "k": 10, "estimate": 2.1 }, + { "k": 20, "estimate": 2.05 }, + { "k": 30, "estimate": 1.98 }, + { "k": 40, "estimate": 2.02 }, + { "k": 50, "estimate": 1.95 } + ] + }, + "mark": { + "type": "line", + "point": { "filled": true, "size": 40 }, + "tooltip": { "content": "data" } + }, + "encoding": { + "x": { + "field": "k", + "type": "quantitative", + "title": "k (number of upper order statistics)" + }, + "y": { + "field": "estimate", + "type": "quantitative", + "title": "Tail Index Estimate α̂(k)" + }, + "color": { "value": "steelblue" } + } +} diff --git a/vecxt_re/resources/loglogCdf.vl.json b/vecxt_re/resources/loglogCdf.vl.json new file mode 100644 index 00000000..f92be3f7 --- /dev/null +++ b/vecxt_re/resources/loglogCdf.vl.json @@ -0,0 +1,56 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "title": "Log-Log CDF Plot", + "width": "container", + "height": "container", + "data": { + "values": [ + { "x": 1, "y": 0.9, "source": "model" }, + { "x": 10, "y": 0.5, "source": "model" }, + { "x": 100, "y": 0.1, "source": "model" }, + { "x": 1000, "y": 0.01, "source": "model" }, + { "x": 2, "y": 0.85, "source": "empirical" }, + { "x": 15, "y": 0.55, "source": "empirical" }, + { "x": 80, "y": 0.15, "source": "empirical" }, + { "x": 500, "y": 0.05, "source": "empirical" } + ] + }, + "layer": [ + { + "transform": [{ "filter": "datum.source === 'model'" }], + "mark": "line", + "encoding": { + "x": { + "field": "x", + "type": "quantitative", + "scale": { "type": "log", "domainMin": 1 }, + "title": "X (log scale)" + }, + "y": { + "field": "y", + "type": "quantitative", + "scale": { "type": "log" }, + "title": "Survival Probability S(x)" + }, + "color": { "value": "steelblue" } + } + }, + { + "transform": [{ "filter": "datum.source === 'empirical'" }], + "mark": { "type": "point", "shape": "cross", "size": 100, "tooltip": {"content": "data"} }, + "encoding": { + "x": { + "field": "x", + "type": "quantitative", + "scale": { "type": "log", "domainMin": 1 } + }, + "y": { + "field": "y", + "type": "quantitative", + "scale": { "type": "log" } + }, + "color": { "value": "red" } + } + } + ] +} diff --git a/vecxt_re/src-jvm/all.scala b/vecxt_re/src-jvm/all.scala index 904f31ef..0bd4a108 100644 --- a/vecxt_re/src-jvm/all.scala +++ b/vecxt_re/src-jvm/all.scala @@ -3,6 +3,7 @@ package vecxt_re object all: export vecxt_re.Scenario export vecxt_re.Scenarr + export vecxt_re.Tower export vecxt_re.Plots.* export vecxt_re.TrendAnalysis.* export vecxt_re.TrendFitResult @@ -16,4 +17,8 @@ object all: export vecxt_re.Poisson export vecxt_re.Empirical export vecxt_re.Pareto + export vecxt_re.Mixed + export vecxt_re.HillEstimator + export vecxt_re.PickandsEstimator + end all diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 681f73d8..35ccd016 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -2,6 +2,8 @@ package vecxt_re import io.circe.syntax.* import io.github.quafadas.plots.SetupVega.{*, given} +import vecxt_re.HillEstimator.HillPlotResult +import vecxt_re.PickandsEstimator.PickandsPlotResult object Plots: // These must be private otherwise scaladoc get crazy. @@ -13,6 +15,8 @@ object Plots: private lazy val rootogram = VegaPlot.fromResource("rootogram.vl.json") // hanging rootogram private lazy val pearsonResiduals = VegaPlot.fromResource("pearsonResiduals.vl.json") // residual plot private lazy val poissonTrend = VegaPlot.fromResource("poissonTrend.vl.json") // Poisson GLM trend + private lazy val logLogPlot = VegaPlot.fromResource("loglogCdf.vl.json") // log-log CDF plot + private lazy val hillPlotSpec = VegaPlot.fromResource("hillPlot.vl.json") // Hill plot for tail index extension (idx: CalendarYearIndex) def plotIndex(reportingThreshold: Double)(using viz.LowPriorityPlotTarget) = @@ -351,6 +355,108 @@ object Plots: ) end extension + extension (mixed: Mixed) + /** Plot log-log comparison of theoretical Mixed distribution vs empirical sample data. + * + * This plot shows the complementary CDF (1 - CDF) on log-log scale, which is useful for visualizing tail behavior + * of heavy-tailed distributions. The x-axis shows log(value) and y-axis shows log(1 - CDF). + */ + def plotLogLogVsSample(samples: IndexedSeq[Double], threshold: Double)(using viz.LowPriorityPlotTarget) = + val sortedSamples = samples.sorted + val n = sortedSamples.length.toDouble + + // Empirical survival function using Hazen plotting position + val empiricalData = sortedSamples.zipWithIndex.collect { + case (x, i) if x > 0 => + val survivalProb = (n - i - 0.5) / n + if survivalProb > 0 then Some((x = x, y = survivalProb, source = "empirical")) + else None + end if + }.flatten + + // Theoretical survival function (1 - CDF) + // For Pareto: S(x) = (xₘ/x)^α, so log(S) = α·log(xₘ) - α·log(x) is linear + val minX = sortedSamples.filter(_ > 0).headOption.getOrElse(1.0) + val maxX = sortedSamples.last + val numPoints = 200 + val theoreticalData = (0 until numPoints).flatMap { i => + val x = minX * math.pow(maxX / minX, i.toDouble / (numPoints - 1)) + val survivalProb = 1.0 - mixed.cdf(x) + if survivalProb > 1e-10 && x > 0 then Some((x = x, y = survivalProb, source = "model")) + else None + end if + } + + val allData = theoreticalData ++ empiricalData + + logLogPlot.plot( + _.title(s"Mixed Distribution Log-Log Plot"), + _.data.values := allData.asJson, + _.layer._0.encoding.x.scale.domainMin := threshold + ) + end plotLogLogVsSample + end extension + + extension (hp: HillPlotResult) + /** Plot a Hill plot showing tail index estimates α̂(k) vs k. + * + * A Hill plot helps identify the optimal number of upper order statistics to use for Pareto tail estimation. Look + * for a stable plateau region where the estimate is relatively constant - this indicates the range of k where the + * Pareto assumption holds. Too small k gives high variance; too large k includes non-tail observations. + */ + def plotHill(using viz.LowPriorityPlotTarget) = + val data = hp.kValues.zip(hp.estimates).map { case (k, est) => + (k = k, estimate = est) + } + + hillPlotSpec.plot( + _.title("Hill Plot for Pareto Tail Index Estimation"), + _.data.values := data.asJson + ) + end plotHill + end extension + + extension (pp: PickandsPlotResult) + /** Plot a Pickands plot showing tail index estimates α̂(k) = 1/γ̂(k) vs k. + * + * The Pickands estimator is more robust than Hill to model misspecification but has higher variance. Look for a + * stable plateau region. Unlike the Hill plot, the Pickands estimator can give negative γ values for light-tailed + * distributions; only positive γ (and thus positive α) indicates heavy tails. + */ + def plotPickands(using viz.LowPriorityPlotTarget) = + val data = pp.kValues + .zip(pp.alphaEstimates) + .filter { case (_, est) => !est.isNaN && est.isFinite && est > 0 } + .map { case (k, est) => + (k = k, estimate = est) + } + + hillPlotSpec.plot( + _.title("Pickands Plot for Pareto Tail Index Estimation"), + _.data.values := data.asJson + ) + end plotPickands + + /** Plot the raw extreme value index γ̂(k) from Pickands estimator. + * + * For heavy-tailed data, γ > 0. For light-tailed data (e.g., exponential), γ = 0. For bounded distributions, γ < + * 0. + */ + def plotPickandsGamma(using viz.LowPriorityPlotTarget) = + val data = pp.kValues + .zip(pp.gammaEstimates) + .filter { case (_, est) => !est.isNaN && est.isFinite } + .map { case (k, est) => + (k = k, estimate = est) + } + + hillPlotSpec.plot( + _.title("Pickands Plot for Extreme Value Index γ"), + _.data.values := data.asJson + ) + end plotPickandsGamma + end extension + // extension (negBin: NegativeBinomial) // inline def plotPdf(using viz.LowPriorityPlotTarget) = // val numPoints = 1000 diff --git a/vecxt_re/src/HillEstimator.scala b/vecxt_re/src/HillEstimator.scala new file mode 100644 index 00000000..7fb6e670 --- /dev/null +++ b/vecxt_re/src/HillEstimator.scala @@ -0,0 +1,207 @@ +package vecxt_re + +/** Hill estimator for Pareto tail index estimation. + * + * The Hill estimator is used to estimate the shape parameter (α) of a Pareto distribution from the upper tail of the + * data. For a Pareto distribution with survival function S(x) = (x_min/x)^α, the tail index α determines how heavy + * the tail is: + * - α < 2: Infinite variance + * - α < 1: Infinite mean + * - Larger α means lighter tails + * + * The estimator uses the k largest order statistics: α̂ = 1 / (1/k * Σᵢ₌₁ᵏ ln(X₍ₙ₋ᵢ₊₁₎) - ln(X₍ₙ₋ₖ₎)) + */ +object HillEstimator: + + /** Computes the Hill estimator for the Pareto tail index using the k largest observations. + * + * @param data + * The data array (will be sorted internally) + * @param k + * The number of upper order statistics to use (must be between 1 and n-1) + * @return + * The estimated tail index α + * @throws IllegalArgumentException + * if k is out of valid range or data is empty + */ + def apply(data: Array[Double], k: Int): Double = + require(data.length > 1, "Data must have at least 2 observations") + require(k >= 1 && k < data.length, s"k must be between 1 and ${data.length - 1}, got $k") + + val sorted = data.clone() + java.util.Arrays.sort(sorted) + + val n = sorted.length + val threshold = sorted(n - k - 1) // X_(n-k) + + require(threshold > 0, "Threshold (k-th largest value) must be positive for Pareto estimation") + + var sumLogRatios = 0.0 + var i = 0 + while i < k do + val xi = sorted(n - 1 - i) // X_(n-i+1) for i = 1..k + sumLogRatios += math.log(xi) - math.log(threshold) + i += 1 + end while + + k.toDouble / sumLogRatios + end apply + + /** Result of a Hill plot computation containing k values and corresponding tail index estimates. + * + * @param kValues + * Array of k values used + * @param estimates + * Corresponding tail index estimates α̂(k) + */ + case class HillPlotResult( + kValues: Array[Int], + estimates: Array[Double] + ): + /** Find a stable region in the Hill plot by looking for low variance segments. + * + * @param windowSize + * Size of the sliding window for variance calculation + * @param threshold + * Maximum coefficient of variation to consider "stable" + * @return + * Optional tuple of (start k, end k, mean estimate) for the most stable region + */ + def findStableRegion(windowSize: Int = 10, threshold: Double = 0.1): Option[(bestStart: Int, bestEnd: Int, meanEstimate: Double)] = + if kValues.length < windowSize then None + else + var bestVariance = Double.MaxValue + var bestStart = 0 + var bestMean = 0.0 + + var i = 0 + while i <= estimates.length - windowSize do + var sum = 0.0 + var sumSq = 0.0 + var j = 0 + while j < windowSize do + val v = estimates(i + j) + sum += v + sumSq += v * v + j += 1 + end while + val mean = sum / windowSize + val variance = sumSq / windowSize - mean * mean + val cv = if mean != 0 then math.sqrt(variance) / math.abs(mean) else Double.MaxValue + + if cv < bestVariance && cv < threshold then + bestVariance = cv + bestStart = i + bestMean = mean + end if + i += 1 + end while + + if bestVariance < threshold then Some((bestStart = kValues(bestStart), bestEnd = kValues(bestStart + windowSize - 1), meanEstimate = bestMean)) + else None + end if + end findStableRegion + end HillPlotResult + + /** Computes a Hill plot: tail index estimates for a range of k values. + * + * A Hill plot shows how the estimate varies with k. A good estimate should show a stable plateau region. Too small k + * gives high variance; too large k includes non-tail observations. + * + * @param data + * The data array + * @param kMin + * Minimum k value (default: 2) + * @param kMax + * Maximum k value (default: n/2 or n-1, whichever is smaller) + * @param step + * Step size for k values (default: 1) + * @return + * HillPlotResult containing k values and corresponding estimates + */ + def hillPlot( + data: Array[Double], + kMin: Int = 2, + kMax: Int = -1, + step: Int = 1 + ): HillPlotResult = + require(data.length > 2, "Data must have at least 3 observations") + + val sorted = data.clone() + java.util.Arrays.sort(sorted) + + val n = sorted.length + val actualKMax = if kMax < 0 then math.min(n / 2, n - 1) else math.min(kMax, n - 1) + val actualKMin = math.max(kMin, 1) + + require(actualKMin < actualKMax, s"kMin ($actualKMin) must be less than kMax ($actualKMax)") + + // Pre-compute log values for efficiency + val logValues = new Array[Double](n) + var i = 0 + while i < n do + logValues(i) = math.log(sorted(i)) + i += 1 + end while + + // Calculate number of k values + val numK = (actualKMax - actualKMin) / step + 1 + val kValues = new Array[Int](numK) + val estimates = new Array[Double](numK) + + var idx = 0 + var k = actualKMin + while k <= actualKMax do + val threshold = sorted(n - k - 1) + val logThreshold = logValues(n - k - 1) + + var sumLogRatios = 0.0 + var j = 0 + while j < k do + sumLogRatios += logValues(n - 1 - j) - logThreshold + j += 1 + end while + + kValues(idx) = k + estimates(idx) = k.toDouble / sumLogRatios + idx += 1 + k += step + end while + + HillPlotResult(kValues, estimates) + end hillPlot + +end HillEstimator + +object HillEstimatorExtensions: + + extension (vec: Array[Double]) + + /** Computes the Hill estimator for the Pareto tail index. + * + * @param k + * The number of upper order statistics to use + * @return + * The estimated tail index α + */ + inline def hillEstimator(k: Int): Double = HillEstimator(vec, k) + + /** Computes a Hill plot for this data. + * + * @param kMin + * Minimum k value (default: 2) + * @param kMax + * Maximum k value (default: n/2) + * @param step + * Step size for k values (default: 1) + * @return + * HillPlotResult with k values and estimates + */ + inline def hillPlot( + kMin: Int = 2, + kMax: Int = -1, + step: Int = 1 + ): HillEstimator.HillPlotResult = + HillEstimator.hillPlot(vec, kMin, kMax, step) + +end HillEstimatorExtensions diff --git a/vecxt_re/src/PickandsEstimator.scala b/vecxt_re/src/PickandsEstimator.scala new file mode 100644 index 00000000..5acb202d --- /dev/null +++ b/vecxt_re/src/PickandsEstimator.scala @@ -0,0 +1,252 @@ +package vecxt_re + +/** Pickands estimator for extreme value index (tail index) estimation. + * + * It is unlikely to be useful, given the amount of data typically available in reinsurance. + * + * The Pickands estimator is a robust, non-parametric estimator for the extreme value index (EVI) of a distribution. + * Unlike the Hill estimator which assumes a Pareto-type tail, the Pickands estimator works for all three domains of + * attraction (Fréchet, Gumbel, Weibull). + * + * For heavy-tailed distributions (Fréchet domain), the EVI γ > 0 corresponds to a Pareto tail index α = 1/γ. + * + * The estimator uses order statistics at positions n-k, n-2k, n-4k: γ̂ = (1/ln2) * ln((X₍ₙ₋ₖ₎ - X₍ₙ₋₂ₖ₎) / (X₍ₙ₋₂ₖ₎ - + * X₍ₙ₋₄ₖ₎)) + * + * Properties: + * - More robust to model misspecification than Hill + * - Higher variance than Hill for pure Pareto data + * - Works for all extreme value distributions, not just Pareto + * - Consistent and asymptotically normal + */ +object PickandsEstimator: + + private val ln2 = math.log(2.0) + + /** Computes the Pickands estimator for the extreme value index. + * + * @param data + * The data array (will be sorted internally) + * @param k + * The tuning parameter (must satisfy 4k < n) + * @return + * The estimated extreme value index γ (for Pareto, α = 1/γ) + * @throws IllegalArgumentException + * if k is out of valid range or data is too small + */ + def apply(data: Array[Double], k: Int): Double = + require(data.length >= 5, "Data must have at least 5 observations") + require(k >= 1, s"k must be at least 1, got $k") + require(4 * k < data.length, s"4*k must be less than n=${data.length}, got 4*$k=${4 * k}") + + val sorted = data.clone() + java.util.Arrays.sort(sorted) + + val n = sorted.length + + // Order statistics (using 1-based indexing convention, converted to 0-based) + // X_(n-k+1), X_(n-2k+1), X_(n-4k+1) in 1-based + // = sorted(n-k), sorted(n-2k), sorted(n-4k) in 0-based + val x_nk = sorted(n - k) + val x_n2k = sorted(n - 2 * k) + val x_n4k = sorted(n - 4 * k) + + val numerator = x_nk - x_n2k + val denominator = x_n2k - x_n4k + + require(denominator > 0, "Denominator (X_(n-2k) - X_(n-4k)) must be positive") + require(numerator > 0, "Numerator (X_(n-k) - X_(n-2k)) must be positive") + + val ratio = numerator / denominator + math.log(ratio) / ln2 + end apply + + /** Computes the Pareto tail index α from the Pickands estimate. + * + * For heavy-tailed distributions in the Fréchet domain, γ > 0 and α = 1/γ. + * + * @param data + * The data array + * @param k + * The tuning parameter + * @return + * The estimated Pareto tail index α = 1/γ + */ + def tailIndex(data: Array[Double], k: Int): Double = + val gamma = apply(data, k) + require(gamma > 0, s"Pickands estimate γ=$gamma is not positive; data may not be heavy-tailed") + 1.0 / gamma + end tailIndex + + /** Result of a Pickands plot computation. + * + * @param kValues + * Array of k values used + * @param gammaEstimates + * Corresponding EVI estimates γ̂(k) + * @param alphaEstimates + * Corresponding tail index estimates α̂(k) = 1/γ̂(k) (NaN if γ ≤ 0) + */ + case class PickandsPlotResult( + kValues: Array[Int], + gammaEstimates: Array[Double], + alphaEstimates: Array[Double] + ): + /** Find a stable region by looking for low variance segments in gamma estimates. + * + * @param windowSize + * Size of the sliding window + * @param threshold + * Maximum coefficient of variation to consider "stable" + * @return + * Optional tuple of (start k, end k, mean gamma, mean alpha) + */ + def findStableRegion(windowSize: Int = 5, threshold: Double = 0.2): Option[(Int, Int, Double, Double)] = + if kValues.length < windowSize then None + else + var bestVariance = Double.MaxValue + var bestStart = 0 + var bestMeanGamma = 0.0 + + var i = 0 + while i <= gammaEstimates.length - windowSize do + var sum = 0.0 + var sumSq = 0.0 + var validCount = 0 + var j = 0 + while j < windowSize do + val v = gammaEstimates(i + j) + if !v.isNaN && v.isFinite then + sum += v + sumSq += v * v + validCount += 1 + end if + j += 1 + end while + + if validCount == windowSize then + val mean = sum / windowSize + val variance = sumSq / windowSize - mean * mean + val cv = if mean != 0 then math.sqrt(math.abs(variance)) / math.abs(mean) else Double.MaxValue + + if cv < bestVariance && cv < threshold then + bestVariance = cv + bestStart = i + bestMeanGamma = mean + end if + end if + i += 1 + end while + + if bestVariance < threshold && bestMeanGamma > 0 then + Some((kValues(bestStart), kValues(bestStart + windowSize - 1), bestMeanGamma, 1.0 / bestMeanGamma)) + else None + end if + end findStableRegion + end PickandsPlotResult + + /** Computes a Pickands plot: EVI estimates for a range of k values. + * + * @param data + * The data array + * @param kMin + * Minimum k value (default: 1) + * @param kMax + * Maximum k value (default: (n-1)/4) + * @param step + * Step size for k values (default: 1) + * @return + * PickandsPlotResult containing k values and estimates + */ + def pickandsPlot( + data: Array[Double], + kMin: Int = 1, + kMax: Int = -1, + step: Int = 1 + ): PickandsPlotResult = + require(data.length >= 5, "Data must have at least 5 observations") + + val sorted = data.clone() + java.util.Arrays.sort(sorted) + + val n = sorted.length + // Maximum valid k is floor((n-1)/4) since we need 4k < n + val maxValidK = (n - 1) / 4 + val actualKMax = if kMax < 0 then maxValidK else math.min(kMax, maxValidK) + val actualKMin = math.max(kMin, 1) + + require(actualKMin <= actualKMax, s"kMin ($actualKMin) must be <= kMax ($actualKMax), n=$n allows k up to $maxValidK") + + // Calculate number of k values + val numK = (actualKMax - actualKMin) / step + 1 + val kValues = new Array[Int](numK) + val gammaEstimates = new Array[Double](numK) + val alphaEstimates = new Array[Double](numK) + + var idx = 0 + var k = actualKMin + while k <= actualKMax do + val x_nk = sorted(n - k) + val x_n2k = sorted(n - 2 * k) + val x_n4k = sorted(n - 4 * k) + + val numerator = x_nk - x_n2k + val denominator = x_n2k - x_n4k + + val gamma = + if denominator > 0 && numerator > 0 then math.log(numerator / denominator) / ln2 + else Double.NaN + + kValues(idx) = k + gammaEstimates(idx) = gamma + alphaEstimates(idx) = if gamma > 0 then 1.0 / gamma else Double.NaN + idx += 1 + k += step + end while + + PickandsPlotResult(kValues, gammaEstimates, alphaEstimates) + end pickandsPlot + +end PickandsEstimator + +object PickandsEstimatorExtensions: + + extension (vec: Array[Double]) + + /** Computes the Pickands estimator for the extreme value index γ. + * + * @param k + * The tuning parameter (must satisfy 4k < n) + * @return + * The estimated extreme value index γ + */ + inline def pickandsEstimator(k: Int): Double = PickandsEstimator(vec, k) + + /** Computes the Pareto tail index α using the Pickands estimator. + * + * @param k + * The tuning parameter + * @return + * The estimated tail index α = 1/γ + */ + inline def pickandsTailIndex(k: Int): Double = PickandsEstimator.tailIndex(vec, k) + + /** Computes a Pickands plot for this data. + * + * @param kMin + * Minimum k value (default: 1) + * @param kMax + * Maximum k value (default: (n-1)/4) + * @param step + * Step size for k values (default: 1) + * @return + * PickandsPlotResult with k values and estimates + */ + inline def pickandsPlot( + kMin: Int = 1, + kMax: Int = -1, + step: Int = 1 + ): PickandsEstimator.PickandsPlotResult = + PickandsEstimator.pickandsPlot(vec, kMin, kMax, step) + +end PickandsEstimatorExtensions diff --git a/vecxt_re/test/src/hillEstimator.test.scala b/vecxt_re/test/src/hillEstimator.test.scala new file mode 100644 index 00000000..2ce36447 --- /dev/null +++ b/vecxt_re/test/src/hillEstimator.test.scala @@ -0,0 +1,151 @@ +package vecxt_re + +import HillEstimatorExtensions.* + +class HillEstimatorSuite extends munit.FunSuite: + + // Helper to generate Pareto samples using inverse transform + def generatePareto(n: Int, alpha: Double, xMin: Double = 1.0, seed: Long = 42L): Array[Double] = + val rng = new scala.util.Random(seed) + Array.fill(n) { + val u = rng.nextDouble() + xMin / math.pow(u, 1.0 / alpha) + } + + test("Hill estimator basic sanity check") { + // Simple case: known sorted data + val data = Array(1.0, 2.0, 4.0, 8.0, 16.0) + // Using k=2 means we use the 2 largest: 16, 8 + // Threshold is at position n-k-1 = 5-2-1 = 2, which is 4.0 + // sum = ln(16/4) + ln(8/4) = ln(4) + ln(2) = 2*ln(2) + ln(2) = 3*ln(2) + // estimate = 2 / (3*ln(2)) + val expected = 2.0 / (3.0 * math.log(2.0)) + val estimate = data.hillEstimator(2) + assertEqualsDouble(estimate, expected, 1e-10) + } + + test("Hill estimator converges for Pareto(2.0) distribution") { + val alpha = 2.0 + val data = generatePareto(10000, alpha) + // With large sample, estimate should be close to true alpha + val estimate = data.hillEstimator(500) + // Allow 15% error for statistical estimation + assertEqualsDouble(estimate, alpha, alpha * 0.15) + } + + test("Hill estimator converges for Pareto(1.5) distribution") { + val alpha = 1.5 + val data = generatePareto(10000, alpha) + val estimate = data.hillEstimator(500) + assertEqualsDouble(estimate, alpha, alpha * 0.15) + } + + test("Hill estimator converges for Pareto(3.0) distribution") { + val alpha = 3.0 + val data = generatePareto(10000, alpha) + val estimate = data.hillEstimator(500) + assertEqualsDouble(estimate, alpha, alpha * 0.15) + } + + test("Hill estimator rejects invalid k values") { + val data = Array(1.0, 2.0, 3.0, 4.0, 5.0) + + intercept[IllegalArgumentException] { + data.hillEstimator(0) // k must be >= 1 + } + + intercept[IllegalArgumentException] { + data.hillEstimator(5) // k must be < n + } + + intercept[IllegalArgumentException] { + data.hillEstimator(10) // k must be < n + } + } + + test("Hill estimator rejects empty or single-element arrays") { + intercept[IllegalArgumentException] { + Array.empty[Double].hillEstimator(1) + } + + intercept[IllegalArgumentException] { + Array(1.0).hillEstimator(1) + } + } + + test("Hill plot produces valid output") { + val alpha = 2.0 + val data = generatePareto(1000, alpha) + val result = data.hillPlot(kMin = 10, kMax = 100, step = 5) + + assertEquals(result.kValues.length, result.estimates.length) + assert(result.kValues.head == 10) + assert(result.kValues.last == 100) + assert(result.kValues.length == 19) // (100-10)/5 + 1 = 19 + } + + test("Hill plot estimates are positive for valid Pareto data") { + val data = generatePareto(500, 2.0) + val result = data.hillPlot(kMin = 5, kMax = 50) + + result.estimates.foreach { est => + assert(est > 0, s"Expected positive estimate, got $est") + } + } + + test("Hill plot default kMax is sensible") { + val data = generatePareto(100, 2.0) + val result = data.hillPlot() + + // Default kMax should be min(n/2, n-1) = 50 + assert(result.kValues.last <= 50) + assert(result.kValues.head == 2) // default kMin + } + + test("Hill plot findStableRegion identifies plateau") { + // Generate clean Pareto data + val alpha = 2.0 + val data = generatePareto(5000, alpha, seed = 123L) + val result = data.hillPlot(kMin = 50, kMax = 1000, step = 10) + + result.findStableRegion(windowSize = 5, threshold = 0.15) match + case Some((kStart, kEnd, meanEstimate)) => + // The stable region should give estimate close to true alpha + assertEqualsDouble(meanEstimate, alpha, alpha * 0.2) + assert(kStart < kEnd) + case None => + // It's okay if no stable region found with strict threshold + // Just verify the method runs without error + () + } + + test("Hill estimator is invariant to data order") { + val data = Array(5.0, 1.0, 10.0, 2.0, 20.0, 3.0) + val shuffled = data.clone() + scala.util.Random.shuffle(shuffled.toSeq).toArray + + val est1 = data.hillEstimator(2) + val est2 = shuffled.hillEstimator(2) + + // Both should give same result after internal sorting + assertEqualsDouble(est1, est2, 1e-10) + } + + test("Hill estimator with k=1 uses only largest value") { + val data = Array(1.0, 2.0, 4.0, 8.0) + // k=1: use only largest (8), threshold is second largest (4) + // estimate = 1 / ln(8/4) = 1/ln(2) + val expected = 1.0 / math.log(2.0) + val estimate = data.hillEstimator(1) + assertEqualsDouble(estimate, expected, 1e-10) + } + + test("Hill plot step parameter works correctly") { + val data = generatePareto(200, 2.0) + val result = data.hillPlot(kMin = 10, kMax = 50, step = 10) + + assertEquals(result.kValues.toSeq, Seq(10, 20, 30, 40, 50)) + assertEquals(result.estimates.length, 5) + } + +end HillEstimatorSuite diff --git a/vecxt_re/test/src/pickandsEstimator.test.scala b/vecxt_re/test/src/pickandsEstimator.test.scala new file mode 100644 index 00000000..02e6dce8 --- /dev/null +++ b/vecxt_re/test/src/pickandsEstimator.test.scala @@ -0,0 +1,145 @@ +package vecxt_re + +import PickandsEstimatorExtensions.* + +class PickandsEstimatorSuite extends munit.FunSuite: + + // Helper to generate Pareto samples using inverse transform + def generatePareto(n: Int, alpha: Double, xMin: Double = 1.0, seed: Long = 42L): Array[Double] = + val rng = new scala.util.Random(seed) + Array.fill(n) { + val u = rng.nextDouble() + xMin / math.pow(u, 1.0 / alpha) + } + + test("Pickands estimator basic formula check") { + // Construct a simple case where we know the order statistics + // Data: 1, 2, 3, 4, 5, 6, 7, 8, 9 (n=9) + // k=2: X_(n-k)=X_(7)=8, X_(n-2k)=X_(5)=6, X_(n-4k)=X_(1)=2 + // γ = ln((8-6)/(6-2)) / ln(2) = ln(2/4) / ln(2) = ln(0.5) / ln(2) = -1 + val data = Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0) + val gamma = data.pickandsEstimator(2) + assertEqualsDouble(gamma, -1.0, 1e-10) + } + + test("Pickands estimator for Pareto(2.0) distribution converges") { + val alpha = 2.0 + val gamma = 1.0 / alpha // = 0.5 + val data = generatePareto(10000, alpha) + + // Pickands has higher variance - use multiple k values and average + val result = data.pickandsPlot(kMin = 100, kMax = 500, step = 10) + val validEstimates = result.gammaEstimates.filter(g => g > 0 && !g.isNaN) + val meanEstimate = validEstimates.sum / validEstimates.length + + // Pickands has higher variance than Hill, so allow 50% error + assertEqualsDouble(meanEstimate, gamma, gamma * 0.5) + } + + test("Pickands tail index for Pareto(2.0)") { + val alpha = 2.0 + val data = generatePareto(10000, alpha) + + // Average over a range of k values for more stable estimate + val result = data.pickandsPlot(kMin = 100, kMax = 500, step = 10) + val validEstimates = result.alphaEstimates.filter(a => a > 0 && !a.isNaN && a.isFinite) + val meanEstimate = validEstimates.sum / validEstimates.length + + // Should be close to 2, allow 50% error for Pickands + assertEqualsDouble(meanEstimate, alpha, alpha * 0.5) + } + + test("Pickands estimator for Pareto(1.5) distribution") { + val alpha = 1.5 + val gamma = 1.0 / alpha + val data = generatePareto(5000, alpha, seed = 123L) + val k = 40 + val estimate = data.pickandsEstimator(k) + + assertEqualsDouble(estimate, gamma, gamma * 0.35) + } + + test("Pickands estimator rejects invalid k values") { + val data = Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0) // n=9 + + intercept[IllegalArgumentException] { + data.pickandsEstimator(0) // k must be >= 1 + } + + intercept[IllegalArgumentException] { + data.pickandsEstimator(3) // 4*3=12 > 9 + } + } + + test("Pickands estimator rejects small arrays") { + intercept[IllegalArgumentException] { + Array(1.0, 2.0, 3.0, 4.0).pickandsEstimator(1) + } + } + + test("Pickands plot produces valid output") { + val alpha = 2.0 + val data = generatePareto(1000, alpha) + val result = data.pickandsPlot(kMin = 5, kMax = 50, step = 5) + + assertEquals(result.kValues.length, result.gammaEstimates.length) + assertEquals(result.kValues.length, result.alphaEstimates.length) + assert(result.kValues.head == 5) + assert(result.kValues.last == 50) + } + + test("Pickands plot default kMax respects 4k < n constraint") { + val data = generatePareto(100, 2.0) + val result = data.pickandsPlot() + + // Max valid k = (100-1)/4 = 24 + assert(result.kValues.last <= 24) + } + + test("Pickands plot positive gamma implies positive alpha") { + val data = generatePareto(500, 2.0) + val result = data.pickandsPlot(kMin = 2, kMax = 20) + + result.gammaEstimates.zip(result.alphaEstimates).foreach { case (gamma, alpha) => + if gamma > 0 && !gamma.isNaN then + assert(alpha > 0, s"Expected positive alpha for gamma=$gamma") + assertEqualsDouble(alpha, 1.0 / gamma, 1e-10) + end if + } + } + + test("Pickands estimator is invariant to data order") { + val data = Array(9.0, 1.0, 5.0, 3.0, 7.0, 2.0, 8.0, 4.0, 6.0) + val shuffled = scala.util.Random.shuffle(data.toSeq).toArray + + val est1 = data.pickandsEstimator(2) + val est2 = shuffled.pickandsEstimator(2) + + assertEqualsDouble(est1, est2, 1e-10) + } + + test("Pickands findStableRegion identifies plateau") { + val alpha = 2.0 + val data = generatePareto(2000, alpha, seed = 456L) + val result = data.pickandsPlot(kMin = 10, kMax = 100, step = 2) + + result.findStableRegion(windowSize = 5, threshold = 0.3) match + case Some((kStart, kEnd, meanGamma, meanAlpha)) => + // Mean alpha should be close to true alpha + assertEqualsDouble(meanAlpha, alpha, alpha * 0.35) + assert(kStart < kEnd) + assertEqualsDouble(meanGamma, 1.0 / meanAlpha, 1e-10) + case None => + // Okay if no stable region found with strict threshold + () + } + + test("Pickands plot step parameter works correctly") { + val data = generatePareto(500, 2.0) + val result = data.pickandsPlot(kMin = 5, kMax = 25, step = 5) + + assertEquals(result.kValues.toSeq, Seq(5, 10, 15, 20, 25)) + assertEquals(result.gammaEstimates.length, 5) + } + +end PickandsEstimatorSuite From 29698c11fb59a41e90a817eb0d24ff9f5f0e25e3 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Thu, 29 Jan 2026 19:59:58 +0000 Subject: [PATCH 71/75] [autofix.ci] apply automated fixes --- vecxt_re/src/HillEstimator.scala | 14 ++++++++++---- vecxt_re/src/PickandsEstimator.scala | 11 ++++++++--- vecxt_re/test/src/hillEstimator.test.scala | 2 ++ vecxt_re/test/src/pickandsEstimator.test.scala | 2 ++ 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/vecxt_re/src/HillEstimator.scala b/vecxt_re/src/HillEstimator.scala index 7fb6e670..e484d39e 100644 --- a/vecxt_re/src/HillEstimator.scala +++ b/vecxt_re/src/HillEstimator.scala @@ -3,8 +3,8 @@ package vecxt_re /** Hill estimator for Pareto tail index estimation. * * The Hill estimator is used to estimate the shape parameter (α) of a Pareto distribution from the upper tail of the - * data. For a Pareto distribution with survival function S(x) = (x_min/x)^α, the tail index α determines how heavy - * the tail is: + * data. For a Pareto distribution with survival function S(x) = (x_min/x)^α, the tail index α determines how heavy the + * tail is: * - α < 2: Infinite variance * - α < 1: Infinite mean * - Larger α means lighter tails @@ -67,7 +67,10 @@ object HillEstimator: * @return * Optional tuple of (start k, end k, mean estimate) for the most stable region */ - def findStableRegion(windowSize: Int = 10, threshold: Double = 0.1): Option[(bestStart: Int, bestEnd: Int, meanEstimate: Double)] = + def findStableRegion( + windowSize: Int = 10, + threshold: Double = 0.1 + ): Option[(bestStart: Int, bestEnd: Int, meanEstimate: Double)] = if kValues.length < windowSize then None else var bestVariance = Double.MaxValue @@ -97,8 +100,10 @@ object HillEstimator: i += 1 end while - if bestVariance < threshold then Some((bestStart = kValues(bestStart), bestEnd = kValues(bestStart + windowSize - 1), meanEstimate = bestMean)) + if bestVariance < threshold then + Some((bestStart = kValues(bestStart), bestEnd = kValues(bestStart + windowSize - 1), meanEstimate = bestMean)) else None + end if end if end findStableRegion end HillPlotResult @@ -203,5 +208,6 @@ object HillEstimatorExtensions: step: Int = 1 ): HillEstimator.HillPlotResult = HillEstimator.hillPlot(vec, kMin, kMax, step) + end extension end HillEstimatorExtensions diff --git a/vecxt_re/src/PickandsEstimator.scala b/vecxt_re/src/PickandsEstimator.scala index 5acb202d..5d62b1a9 100644 --- a/vecxt_re/src/PickandsEstimator.scala +++ b/vecxt_re/src/PickandsEstimator.scala @@ -1,8 +1,8 @@ package vecxt_re /** Pickands estimator for extreme value index (tail index) estimation. - * - * It is unlikely to be useful, given the amount of data typically available in reinsurance. + * + * It is unlikely to be useful, given the amount of data typically available in reinsurance. * * The Pickands estimator is a robust, non-parametric estimator for the extreme value index (EVI) of a distribution. * Unlike the Hill estimator which assumes a Pareto-type tail, the Pickands estimator works for all three domains of @@ -141,6 +141,7 @@ object PickandsEstimator: if bestVariance < threshold && bestMeanGamma > 0 then Some((kValues(bestStart), kValues(bestStart + windowSize - 1), bestMeanGamma, 1.0 / bestMeanGamma)) else None + end if end if end findStableRegion end PickandsPlotResult @@ -175,7 +176,10 @@ object PickandsEstimator: val actualKMax = if kMax < 0 then maxValidK else math.min(kMax, maxValidK) val actualKMin = math.max(kMin, 1) - require(actualKMin <= actualKMax, s"kMin ($actualKMin) must be <= kMax ($actualKMax), n=$n allows k up to $maxValidK") + require( + actualKMin <= actualKMax, + s"kMin ($actualKMin) must be <= kMax ($actualKMax), n=$n allows k up to $maxValidK" + ) // Calculate number of k values val numK = (actualKMax - actualKMin) / step + 1 @@ -248,5 +252,6 @@ object PickandsEstimatorExtensions: step: Int = 1 ): PickandsEstimator.PickandsPlotResult = PickandsEstimator.pickandsPlot(vec, kMin, kMax, step) + end extension end PickandsEstimatorExtensions diff --git a/vecxt_re/test/src/hillEstimator.test.scala b/vecxt_re/test/src/hillEstimator.test.scala index 2ce36447..ff688b8e 100644 --- a/vecxt_re/test/src/hillEstimator.test.scala +++ b/vecxt_re/test/src/hillEstimator.test.scala @@ -11,6 +11,7 @@ class HillEstimatorSuite extends munit.FunSuite: val u = rng.nextDouble() xMin / math.pow(u, 1.0 / alpha) } + end generatePareto test("Hill estimator basic sanity check") { // Simple case: known sorted data @@ -117,6 +118,7 @@ class HillEstimatorSuite extends munit.FunSuite: // It's okay if no stable region found with strict threshold // Just verify the method runs without error () + end match } test("Hill estimator is invariant to data order") { diff --git a/vecxt_re/test/src/pickandsEstimator.test.scala b/vecxt_re/test/src/pickandsEstimator.test.scala index 02e6dce8..95c53870 100644 --- a/vecxt_re/test/src/pickandsEstimator.test.scala +++ b/vecxt_re/test/src/pickandsEstimator.test.scala @@ -11,6 +11,7 @@ class PickandsEstimatorSuite extends munit.FunSuite: val u = rng.nextDouble() xMin / math.pow(u, 1.0 / alpha) } + end generatePareto test("Pickands estimator basic formula check") { // Construct a simple case where we know the order statistics @@ -132,6 +133,7 @@ class PickandsEstimatorSuite extends munit.FunSuite: case None => // Okay if no stable region found with strict threshold () + end match } test("Pickands plot step parameter works correctly") { From a5a82845e196b59e3690358c181cf2e28290c4c5 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 21:09:32 +0100 Subject: [PATCH 72/75] . --- vecxt/src-js-native/array.scala | 19 +++++++++++++++++++ vecxt/src-jvm/arrays.scala | 19 +++++++++++++++++++ vecxt/src/intarray.scala | 19 ------------------- 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/vecxt/src-js-native/array.scala b/vecxt/src-js-native/array.scala index a771ec6e..9fd2b860 100644 --- a/vecxt/src-js-native/array.scala +++ b/vecxt/src-js-native/array.scala @@ -609,6 +609,25 @@ object JsNativeDoubleArrays: end while idx end logicalIdxArr + + inline def countsToIdx: Array[Int] = + var total = vec.sum + var i = 0 + val out = new Array[Int](total) + var j = 0 + while i < vec.length do + val count = vec(i) + val idx = i + 1 + var k = 0 + while k < count do + out(j) = idx + j += 1 + k += 1 + end while + i += 1 + end while + out + end countsToIdx end extension // extension [@specialized(Double, Int) A: Numeric](vec: Array[A]) diff --git a/vecxt/src-jvm/arrays.scala b/vecxt/src-jvm/arrays.scala index a61cc123..0962e80c 100644 --- a/vecxt/src-jvm/arrays.scala +++ b/vecxt/src-jvm/arrays.scala @@ -232,6 +232,25 @@ object arrays: end increments + inline def countsToIdx: Array[Int] = + var total = vec.sumSIMD + var i = 0 + val out = new Array[Int](total) + var j = 0 + while i < vec.length do + val count = vec(i) + val idx = i + 1 + var k = 0 + while k < count do + out(j) = idx + j += 1 + k += 1 + end while + i += 1 + end while + out + end countsToIdx + inline def sumSIMD: Int = var i: Int = 0 var acc = IntVector.zero(spi) diff --git a/vecxt/src/intarray.scala b/vecxt/src/intarray.scala index 533aec7c..5d925667 100644 --- a/vecxt/src/intarray.scala +++ b/vecxt/src/intarray.scala @@ -36,25 +36,6 @@ object IntArrays: out end select - inline def countsToIdx: Array[Int] = - var total = arr.sumSIMD - var i = 0 - val out = new Array[Int](total) - var j = 0 - while i < arr.length do - val count = arr(i) - val idx = i + 1 - var k = 0 - while k < count do - out(j) = idx - j += 1 - k += 1 - end while - i += 1 - end while - out - end countsToIdx - inline def contiguous: Boolean = var i = 1 var out = true From cef4c873ec628c53658f662be98929c60b643241 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 21:28:10 +0100 Subject: [PATCH 73/75] . --- site/{docs => TODO}/xPlatform.md | 0 site/docs/blog/2024-08-01-Motivation.md | 2 +- vecxt_re/src-jvm/PlatformReporting.scala | 2 +- vecxt_re/src/Layer.scala | 6 ++++++ 4 files changed, 8 insertions(+), 2 deletions(-) rename site/{docs => TODO}/xPlatform.md (100%) diff --git a/site/docs/xPlatform.md b/site/TODO/xPlatform.md similarity index 100% rename from site/docs/xPlatform.md rename to site/TODO/xPlatform.md diff --git a/site/docs/blog/2024-08-01-Motivation.md b/site/docs/blog/2024-08-01-Motivation.md index bfa3f86f..f8ec9e33 100644 --- a/site/docs/blog/2024-08-01-Motivation.md +++ b/site/docs/blog/2024-08-01-Motivation.md @@ -17,7 +17,7 @@ For example if your data acquisition is serverside, but do parts of a calculatio # JVM -[[vecxt]] is cross platform, this example runs on the JVM, see [Cross Platform](../xPlatform.md) for the same example running in scalaJS. +[[vecxt]] is cross platform, this example runs on the JVM, see (coming soon) cross platform docs for the same example running in scalaJS. ```scala mdoc diff --git a/vecxt_re/src-jvm/PlatformReporting.scala b/vecxt_re/src-jvm/PlatformReporting.scala index 8d041cd4..138bd207 100644 --- a/vecxt_re/src-jvm/PlatformReporting.scala +++ b/vecxt_re/src-jvm/PlatformReporting.scala @@ -89,7 +89,7 @@ object PlatformReporting: val exhaustProb = exhaustCount.toDouble / numIterations ( - name = calcd.layer.layerName.getOrElse(s"Layer ${calcd.layer.layerId}"), + name = calcd.layer.layerName.getOrElse(calcd.layer.autoName), limit = reportLimit, el = el / reportLimit, stdDev = stdDev / reportLimit, diff --git a/vecxt_re/src/Layer.scala b/vecxt_re/src/Layer.scala index 326cd2e2..1b1cd175 100644 --- a/vecxt_re/src/Layer.scala +++ b/vecxt_re/src/Layer.scala @@ -56,6 +56,12 @@ case class Layer( lazy val brokerageUnitString = brokerageUnit.map(_.toString) lazy val occLayer = Sublayer(occLimit, occRetention, LossCalc.Occ, occType) lazy val aggLayer = Sublayer(aggLimit, aggRetention, LossCalc.Agg, aggType) + lazy val autoName: String = + occLimit match + case Some(occLim) => s"$occLim xs ${occRetention.getOrElse(0.0)}" + case None => aggLimit match + case Some(aggLim) => s"$aggLim xs ${aggRetention.getOrElse(0.0)} agg" + case None => "Unlimited Layer" lazy val firstLimit = occLimit.orElse(aggLimit).getOrElse(Double.PositiveInfinity) From 02bbaefc7ae96bbc8e6b1ae424d3ce9f8a3861e5 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Thu, 29 Jan 2026 20:29:28 +0000 Subject: [PATCH 74/75] [autofix.ci] apply automated fixes --- vecxt_re/src/Layer.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vecxt_re/src/Layer.scala b/vecxt_re/src/Layer.scala index 1b1cd175..7c24cc78 100644 --- a/vecxt_re/src/Layer.scala +++ b/vecxt_re/src/Layer.scala @@ -59,9 +59,10 @@ case class Layer( lazy val autoName: String = occLimit match case Some(occLim) => s"$occLim xs ${occRetention.getOrElse(0.0)}" - case None => aggLimit match - case Some(aggLim) => s"$aggLim xs ${aggRetention.getOrElse(0.0)} agg" - case None => "Unlimited Layer" + case None => + aggLimit match + case Some(aggLim) => s"$aggLim xs ${aggRetention.getOrElse(0.0)} agg" + case None => "Unlimited Layer" lazy val firstLimit = occLimit.orElse(aggLimit).getOrElse(Double.PositiveInfinity) From 88c8ad1d8f72f9e64c9dd5e80e054bb4e6fcf798 Mon Sep 17 00:00:00 2001 From: Simon Parten Date: Thu, 29 Jan 2026 21:32:44 +0100 Subject: [PATCH 75/75] . --- vecxt/src/intarray.scala | 1 - vecxt_re/src-jvm/dist/NegativeBinomial.scala | 3 ++- vecxt_re/src-jvm/dist/Poisson.scala | 3 ++- vecxt_re/src-jvm/plots.scala | 7 ++++--- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/vecxt/src/intarray.scala b/vecxt/src/intarray.scala index 5d925667..1c1f64f0 100644 --- a/vecxt/src/intarray.scala +++ b/vecxt/src/intarray.scala @@ -5,7 +5,6 @@ import scala.util.control.Breaks.* import vecxt.BooleanArrays.trues import vecxt.BoundsCheck.BoundsCheck -import vecxt.arrays.sumSIMD object IntArrays: diff --git a/vecxt_re/src-jvm/dist/NegativeBinomial.scala b/vecxt_re/src-jvm/dist/NegativeBinomial.scala index 063c6ac9..b9df1d11 100644 --- a/vecxt_re/src-jvm/dist/NegativeBinomial.scala +++ b/vecxt_re/src-jvm/dist/NegativeBinomial.scala @@ -5,8 +5,9 @@ import org.apache.commons.rng.simple.RandomSource import org.apache.commons.statistics.distribution.GammaDistribution import org.apache.commons.statistics.distribution.PoissonDistribution -import io.circe.syntax.* import vecxt.all.* + +import io.circe.syntax.* import io.github.quafadas.plots.SetupVega.{*, given} /** Negative Binomial Distribution with alternative parameterization. diff --git a/vecxt_re/src-jvm/dist/Poisson.scala b/vecxt_re/src-jvm/dist/Poisson.scala index 8f215cd7..62815546 100644 --- a/vecxt_re/src-jvm/dist/Poisson.scala +++ b/vecxt_re/src-jvm/dist/Poisson.scala @@ -4,8 +4,9 @@ import org.apache.commons.numbers.gamma.LogGamma import org.apache.commons.rng.simple.RandomSource import org.apache.commons.statistics.distribution.PoissonDistribution -import io.circe.syntax.* import vecxt.all.* + +import io.circe.syntax.* import io.github.quafadas.plots.SetupVega.{*, given} /** Poisson Distribution. diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala index 35ccd016..b8bcef9c 100644 --- a/vecxt_re/src-jvm/plots.scala +++ b/vecxt_re/src-jvm/plots.scala @@ -1,10 +1,11 @@ package vecxt_re -import io.circe.syntax.* -import io.github.quafadas.plots.SetupVega.{*, given} import vecxt_re.HillEstimator.HillPlotResult import vecxt_re.PickandsEstimator.PickandsPlotResult +import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} + object Plots: // These must be private otherwise scaladoc get crazy. private lazy val timeline = VegaPlot.fromResource("timeline.vl.json") // riskInceptionDate, riskExpiryDate @@ -390,7 +391,7 @@ object Plots: val allData = theoreticalData ++ empiricalData logLogPlot.plot( - _.title(s"Mixed Distribution Log-Log Plot"), + _.title("Mixed Distribution Log-Log Plot"), _.data.values := allData.asJson, _.layer._0.encoding.x.scale.domainMin := threshold )