diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index ab628718..461f5779 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -45,6 +45,17 @@ vecxt/ │ ├── src-jvm/ # JVM-specific tests │ ├── src-js/ # Js-specific tests │ └── src-native/ # Scala Native-specific tests +├── vecxt_re/ # Domain specific library for reinsurance calculations +│ ├── src/ # Cross-platform shared source code +│ ├── src-jvm/ # JVM-specific implementations (SIMD Vector API) +│ ├── src-js/ # JavaScript-specific implementations +│ ├── src-js-native/ # JavaScript / native shared (DRY) implementations +│ ├── src-native/ # Scala Native-specific implementations +│ └── test/ # Cross-platform test suite (munit) +│ ├── src/ # Shared test source files +│ ├── src-jvm/ # JVM-specific tests +│ ├── src-js/ # Js-specific tests +│ └── src-native/ # Scala Native-specific tests ├── vecxt/ # Main source directory and core published module │ ├── src/ # Cross-platform shared source code │ ├── src-jvm/ # JVM-specific implementations (SIMD Vector API) @@ -81,4 +92,8 @@ Follow styleguide.md for coding conventions Use inline methods where possible to avoid dispatch overhead where possible. ## GitHub Actions CI -The project uses GitHub Actions for CI/CD \ No newline at end of file +The project uses GitHub Actions for CI/CD + +## Vecxt Re + +Contains a bunch of domain specific code for reinsurance calculations, structures, and various reinsurance contract types. It will often rely on Vecxt. You should view the principles as the same - correctness above all else - performance matters. It also aims to eexpose a consistent cross platform API. \ No newline at end of file diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml new file mode 100644 index 00000000..83266889 --- /dev/null +++ b/.github/workflows/autofix.yml @@ -0,0 +1,19 @@ +name: 'autofix.ci' +on: + pull_request: +jobs: + autofix: + if: github.event.pull_request.draft == false + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 25 + + - name: Run autoformat + run: ./mill mill.scalalib.scalafmt.ScalafmtModule/reformatAll __.sources + + - uses: autofix-ci/action@ff86a557419858bb967097bfc916833f5647fa8c \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 10d444a1..b1398a51 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,6 +68,9 @@ jobs: - name: Test run: ./mill vecxtensions.${{ matrix.project }}.test + - name: Test + run: ./mill vecxt_re.${{ matrix.project }}.test + - name: Laws Test if: matrix.project == 'jvm' run: ./mill laws.${{ matrix.project }}.test diff --git a/.gitignore b/.gitignore index f2596f11..fbb16a79 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,6 @@ weights2.csv biases1.csv biases2.csv .DS_Store + +.venv/ +experiments/src/bhd.scala diff --git a/.vscode/launch.json b/.vscode/launch.json index 3e8bca59..e3ca2737 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,49 +1,35 @@ { - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ { "type": "scala", "request": "launch", - "name": "test Suite", - "buildTarget": "vecxt.jvm.test", - "testClass": "vecxt.LongArraysSuite", - "jvmOptions": [ - "--add-modules=jdk.incubator.vector" - ], - "args": [ - "-oD" - ] + "name": "plotIndex", + "mainClass": "experiments.plotIndex", + "buildTarget": "file:///Users/simon/Code/vecxt/experiments", + "args": [], + "jvmOptions": [], + "env": {} }, { "type": "scala", "request": "launch", - "name": "debug test", - "buildTarget": "vecxt.jvm.test", - "testClass": "vecxt.MatrixAdditionTest", - "jvmOptions": [ - "--add-modules=jdk.incubator.vector" - ], - "args": [ - "-oD" - ], - "env": {}, - "internalConsoleOptions": "openOnSessionStart", - "preLaunchTask": "", - "postDebugTask": "" + "name": "readXl", + "mainClass": "experiments.readXl", + "buildTarget": "file:///Users/simon/Code/vecxt/experiments", + "args": [], + "jvmOptions": [], + "env": {} }, { "type": "scala", - "request": "attach", - "name": "Attach debugger", - // name of the module that is being debugging - "buildTarget": "vecxt.jvm.test", - // Host of the jvm to connect to - "hostName": "localhost", - // Port to connect to - "port": 5005 + "request": "launch", + "name": "pricingFun", + "mainClass": "experiments.pricingFun", + "buildTarget": "file:///Users/simon/Code/vecxt/experiments", + "args": [], + "jvmOptions": [], + "env": {} } ] } \ No newline at end of file diff --git a/.vscode/mcp.json b/.vscode/mcp.json index 10703d90..9d2ba94f 100644 --- a/.vscode/mcp.json +++ b/.vscode/mcp.json @@ -1,8 +1,8 @@ { "servers": { "vecxt-metals": { - "url": "http://localhost:51891/sse", - "type": "sse" + "url": "http://localhost:51891/mcp", + "type": "http" } } } \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 46f8cb22..ad9b443e 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -4,7 +4,7 @@ { "label": "compiledClassesAndSemanticDbFiles", "type": "shell", - "command": "./mill __.compiledClassesAndSemanticDbFiles", + "command": "./mill __.jvm.compiledClassesAndSemanticDbFiles", "runOptions": { "runOn": "folderOpen" }, diff --git a/benchmark/package.mill b/benchmark/package.mill index b5570157..30db05e2 100644 --- a/benchmark/package.mill +++ b/benchmark/package.mill @@ -9,7 +9,7 @@ object `package` extends JmhModule with ScalaModule: def scalaVersion = build.vecxt.jvm.scalaVersion def jmhCoreVersion = "1.37" override def forkArgs: T[Seq[String]] = super.forkArgs() ++ build.vecIncubatorFlag - override def moduleDeps: Seq[JavaModule] = Seq(build.vecxt.jvm, build.vecxtensions.jvm) + override def moduleDeps: Seq[JavaModule] = Seq(build.vecxt.jvm, build.vecxtensions.jvm, build.vecxt_re.jvm) def enableBsp = false // override def generateBenchmarkSources = T{ diff --git a/benchmark/src/LossReportBenchmark.scala b/benchmark/src/LossReportBenchmark.scala new file mode 100644 index 00000000..990dd1e4 --- /dev/null +++ b/benchmark/src/LossReportBenchmark.scala @@ -0,0 +1,94 @@ +package vecxt.benchmark + +import org.openjdk.jmh.annotations.* +import org.openjdk.jmh.infra.Blackhole +import scala.compiletime.uninitialized +import scala.util.Random +import vecxt_re.* +import vecxt_re.ReReporting.* +import vecxt.all.* + +// ./mill benchmark.runJmh "vecxt.benchmark.LossReportBenchmark" -jvmArgs --add-modules=jdk.incubator.vector -rf json -wi 1 -i 3 -f 1 + +/** 231] Benchmark (numEventsStr) (numIterationsStr) Mode Cnt Score Error Units 231] LossReportBenchmark.lossReport_fast + * 10000 100 thrpt 3 177346.981 ± 24137.324 ops/s 231] LossReportBenchmark.lossReport_fast 10000 1000 thrpt 3 + * 180400.504 ± 8719.687 ops/s 231] LossReportBenchmark.lossReport_fast 100000 100 thrpt 3 11731.510 ± 1945.957 ops/s + * 231] LossReportBenchmark.lossReport_fast 100000 1000 thrpt 3 17443.246 ± 425.030 ops/s 231] + * LossReportBenchmark.lossReport_separate 10000 100 thrpt 3 46850.187 ± 7232.734 ops/s 231] + * LossReportBenchmark.lossReport_separate 10000 1000 thrpt 3 49876.719 ± 5238.487 ops/s 231] + * LossReportBenchmark.lossReport_separate 100000 100 thrpt 3 3360.234 ± 326.993 ops/s 231] + * LossReportBenchmark.lossReport_separate 100000 1000 thrpt 3 4706.819 ± 615.832 ops/s + */ + +@State(Scope.Thread) +class LossReportBenchmark extends BLASBenchmark: + + @Param(Array("10000", "100000")) + var numEventsStr: String = uninitialized + + @Param(Array("100", "1000")) + var numIterationsStr: String = uninitialized + + var years: Array[Int] = uninitialized + var ceded: Array[Double] = uninitialized + var layerObj: Layer = uninitialized + + @Setup(Level.Trial) + def setup: Unit = + val rng = new Random(0) + val numEvents = numEventsStr.toInt + val numIterations = numIterationsStr.toInt + + val yrs = Array.ofDim[Int](numEvents) + var i = 0 + while i < numEvents do + yrs(i) = rng.nextInt(numIterations) + 1 // 1-based group indices + i += 1 + end while + + java.util.Arrays.sort(yrs) + + years = yrs + + ceded = Array.ofDim[Double](numEvents) + i = 0 + while i < numEvents do + // random loss values between 0 and 100 + ceded(i) = rng.nextDouble() * 100.0 + i += 1 + end while + + // Choose a layer with a moderate aggLimit to cause some exhaustion hits + layerObj = Layer(occLimit = Some(100.0), aggLimit = Some(50.0)) + () + end setup + + @Benchmark + def lossReport_fast(bh: Blackhole) = + val calcd = (layerObj, ceded) + val r = calcd.lossReport(numIterationsStr.toInt, years, ReportDenominator.FirstLimit) + // consume fields so JMH doesn't optimize away + bh.consume(r.el) + bh.consume(r.stdDev) + bh.consume(r.attachProb) + bh.consume(r.exhaustProb) + end lossReport_fast + + @Benchmark + def lossReport_separate(bh: Blackhole) = + val calcd = (layerObj, ceded) + val n = numIterationsStr.toInt + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layerObj) + + val el = calcd.expectedLoss(n) / reportLimit + val std = calcd.std(n, years) / reportLimit + val attach = calcd.attachmentProbability(n, years) + val exhaust = calcd.exhaustionProbability(n, years) + + bh.consume(el) + bh.consume(std) + bh.consume(attach) + bh.consume(exhaust) + end lossReport_separate + +end LossReportBenchmark diff --git a/benchmark/src/groupOpsBenchmark.scala b/benchmark/src/groupOpsBenchmark.scala index 0e3d35fb..98a9967e 100644 --- a/benchmark/src/groupOpsBenchmark.scala +++ b/benchmark/src/groupOpsBenchmark.scala @@ -9,9 +9,12 @@ import org.openjdk.jmh.annotations.* import org.openjdk.jmh.infra.Blackhole import scala.compiletime.uninitialized import vecxtensions.* +import vecxt_re.* import java.util.Random import java.util.concurrent.TimeUnit +// ./mill benchmark.runJmh "vecxt.benchmark.LossReportBenchmark" -jvmArgs --add-modules=jdk.incubator.vector -rf json -wi 1 -i 3 -f 1 + @State(Scope.Thread) @BenchmarkMode(Array(Mode.AverageTime)) @OutputTimeUnit(TimeUnit.NANOSECONDS) @@ -91,11 +94,21 @@ class GroupOpsBenchmark: bh.consume(valuesCopy2) end benchGroupDiffInPlace - @Benchmark - def benchGroupSum(bh: Blackhole): Unit = - val (uniqueGroups, sums) = groupSum(groups, values) - bh.consume(uniqueGroups) - bh.consume(sums) - end benchGroupSum + // @Benchmark + // def benchGroupSum(bh: Blackhole): Unit = + // // groupSum in vecxt_re takes nitr; compute number of groups then call it + // var maxGroup = 0 + // var i = 0 + // while i < groups.length do + // if groups(i) > maxGroup then maxGroup = groups(i) + // end if + // i += 1 + // end while + // val numGroups = maxGroup + 1 + // val sums = groupSum(groups, values, numGroups) + // val uniqueGroups = Array.tabulate(numGroups)(identity) + // bh.consume(uniqueGroups) + // bh.consume(sums) + // end benchGroupSum end GroupOpsBenchmark diff --git a/benchmark/src/splitAmnt.scala b/benchmark/src/splitAmnt.scala index 207b5009..4ef551f3 100644 --- a/benchmark/src/splitAmnt.scala +++ b/benchmark/src/splitAmnt.scala @@ -9,12 +9,12 @@ import org.openjdk.jmh.infra.Blackhole import scala.compiletime.uninitialized import vecxt.all.* import vecxt.all.given -import vecxt.reinsurance.* +import vecxt_re.* import jdk.incubator.vector.VectorSpecies import jdk.incubator.vector.VectorOperators import jdk.incubator.vector.DoubleVector import java.util.Random -import vecxt.reinsurance.SplitLosses.splitAmntFast +import vecxt_re.SplitLosses.splitAmntFast // mill benchmark.runJmh vecxt.benchmark.SplitAmntBenchmark -jvmArgs --add-modules=jdk.incubator.vector -rf json @State(Scope.Thread) diff --git a/benchmark/src/variance.scala b/benchmark/src/variance.scala index c2605334..9f50cd70 100644 --- a/benchmark/src/variance.scala +++ b/benchmark/src/variance.scala @@ -10,10 +10,18 @@ import jdk.incubator.vector.VectorSpecies import jdk.incubator.vector.VectorOperators import jdk.incubator.vector.DoubleVector +// ./mill benchmark.runJmh "vecxt.benchmark.VarianceBenchmark" -jvmArgs --add-modules=jdk.incubator.vector -rf json -wi 2 -i 3 -f 1 + +/** 231] Benchmark (len) Mode Cnt Score Error Units 231] VarianceBenchmark.var_simd_twopass 1000 thrpt 3 1087302.435 ± + * 16013.286 ops/s 231] VarianceBenchmark.var_simd_twopass 100000 thrpt 3 9578.869 ± 334.606 ops/s 231] + * VarianceBenchmark.var_simd_welford 1000 thrpt 3 436244.559 ± 6158.585 ops/s 231] VarianceBenchmark.var_simd_welford + * 100000 thrpt 3 4187.715 ± 203.266 ops/s + */ + @State(Scope.Thread) class VarianceBenchmark extends BLASBenchmark: - @Param(Array("3", "128", "100000")) + @Param(Array("1000", "100000")) var len: String = uninitialized; var arr: Array[Double] = uninitialized @@ -32,20 +40,29 @@ class VarianceBenchmark extends BLASBenchmark: vec.map(i => (i - μ) * (i - μ)).sumSIMD / (vec.length - 1) end extension - + // @Benchmark + // def var_naive_twopass(bh: Blackhole) = + // val r = arr.variance2 + // bh.consume(r); + // end var_naive_twopass @Benchmark - def var_loop(bh: Blackhole) = - val r = arr.variance2 + def var_simd_twopass(bh: Blackhole) = + val r = arr.meanAndVarianceTwoPass(VarianceMode.Sample).variance bh.consume(r); - end var_loop + end var_simd_twopass + // @Benchmark + // def var_simd_welford(bh: Blackhole) = + // val r = arr.meanAndVarianceWelfordSIMD(VarianceMode.Sample).variance + // bh.consume(r); + // end var_simd_welford - @Benchmark - def var_vec(bh: Blackhole) = - val r = arr.variance - bh.consume(r); - end var_vec + // @Benchmark + // def var_default(bh: Blackhole) = + // val r = arr.variance(VarianceMode.Sample) + // bh.consume(r); + // end var_default end VarianceBenchmark diff --git a/build.mill b/build.mill index 21a2e083..845483d2 100644 --- a/build.mill +++ b/build.mill @@ -1,8 +1,8 @@ -//| mill-version: 1.1.0-RC4 +//| mill-version: 1.1.0 //| mill-jvm-version: 24 //| mill-jvm-opts: [ "--add-modules", "jdk.incubator.vector"] //| mvnDeps: -//| - io.github.quafadas:millSite_mill1_3.8:0.0.56 +//| - io.github.quafadas:millSite_mill1_3.8:0.0.57 //| - com.goyeau:mill-scalafix_mill1_3:0.6.0 //| - com.lihaoyi::mill-contrib-jmh:$MILL_VERSION @@ -37,6 +37,10 @@ object V: val munitVersion = "1.1.1" val blas: Dep = mvn"dev.ludovic.netlib:blas:3.0.4" val lapack: Dep = mvn"dev.ludovic.netlib:lapack:3.0.4" + val scalaJavaTime: Dep = mvn"io.github.cquiroz::scala-java-time::2.6.0" + val catsVersion = "2.13.0" + val disciplineVersion = "2.0.0" + val scalacheckVersion = "1.17.0" end V trait VecxtPublishModule extends PublishModule, ScalaModule, ScalafixModule: @@ -77,3 +81,7 @@ trait CommonNative extends ScalaNativeModule with VecxtPublishModule: ) def scalaNativeVersion: Simple[String] = "0.5.9" end CommonNative + +trait ShareCompileResources extends ScalaModule: + override def compileResources = super.compileResources() ++ resources() +end ShareCompileResources diff --git a/experiments/package.mill b/experiments/package.mill index eb1d7fcd..c270710f 100644 --- a/experiments/package.mill +++ b/experiments/package.mill @@ -15,11 +15,12 @@ object `package` extends ScalaModule: override def forkArgs = super.forkArgs() ++ build.vecIncubatorFlag // override def mainClass = Some("mnist") - override def moduleDeps = Seq(build.vecxt.jvm, build.vecxtensions.jvm) + override def moduleDeps = Seq(build.vecxt.jvm, build.vecxtensions.jvm, build.vecxt_re.jvm) override def mvnDeps = super.mvnDeps() ++ Seq( mvn"com.lihaoyi::os-lib::0.10.4", mvn"io.github.quafadas::scautable::0.0.35", - mvn"io.github.quafadas::dedav4s::0.10.3" + mvn"io.github.quafadas::dedav4s::0.10.4", + mvn"org.apache.logging.log4j:log4j-core:2.24.3" // Required by Apache POI for Excel ) end `package` diff --git a/experiments/resources/idx.csv b/experiments/resources/idx.csv new file mode 100644 index 00000000..f713c83b --- /dev/null +++ b/experiments/resources/idx.csv @@ -0,0 +1,7 @@ +year,idx +2020, 1.05 +2021, 1.03 +2022, 1.04 +2023, 1.02 +2024, 1.06 +2025, 1.03 diff --git a/experiments/resources/losses.csv b/experiments/resources/losses.csv new file mode 100644 index 00000000..db38f689 --- /dev/null +++ b/experiments/resources/losses.csv @@ -0,0 +1,11 @@ +year,day,amount +3,302,5.912378260806521E8 +2,60,9.862215041507638E7 +2,147,6.174601056303087E8 +5,49,7.371032155830323E8 +7,57,8.011450710400633E8 +8,81,7.835310794931588E8 +1,25,9.332911010018561E8 +4,139,6.391918434382262E8 +1,276,6.679874680098424E8 +2,93,7.796052636961774E8 \ No newline at end of file diff --git a/experiments/src/cheatsheet.scala b/experiments/src/cheatsheet.scala index 01ff037d..898c7247 100644 --- a/experiments/src/cheatsheet.scala +++ b/experiments/src/cheatsheet.scala @@ -254,7 +254,7 @@ object CheatsheetTest: not(boolArr2) // Boolean indexing - val filtered = a(a > 2.0) + val filtered = a.mask(a > 2.0) println(s"Filtered (>2): ${filtered.mkString(", ")}") val countTrues = boolArr.trues diff --git a/experiments/src/index.scala b/experiments/src/index.scala new file mode 100644 index 00000000..5b151093 --- /dev/null +++ b/experiments/src/index.scala @@ -0,0 +1,13 @@ +package experiments + +import io.github.quafadas.table.{*, given} +import io.github.quafadas.plots.SetupVegaBrowser.{*, given} +import experiments.RPT.* + +@main def plotIndex = + val idx = CSV.resource("idx.csv", CsvOpts(TypeInferrer.FromAllRows, ReadAs.Columns)) + val calYrIdx = vecxt_re.CalendarYearIndex(2025, idx.year, idx.idx) + println(calYrIdx) + calYrIdx.plotIndex(1.0) + println("finished") +end plotIndex diff --git a/experiments/src/pricing_fun.scala b/experiments/src/pricing_fun.scala new file mode 100644 index 00000000..4635fd16 --- /dev/null +++ b/experiments/src/pricing_fun.scala @@ -0,0 +1,47 @@ +package experiments + +import RPT.* +import cats.syntax.all.* +import io.github.quafadas.table.TypeInferrer +import vecxt.BoundsCheck.DoBoundsCheck.yes + +@main def pricingFun = + + val data = CSV.resource("losses.csv", CsvOpts(TypeInferrer.FromAllRows, ReadAs.Columns)) + + val scen = Scenarr + .withGeneratedIds( + iterations = data.year, + days = data.day, + amounts = data.amount, + numberIterations = 10, + threshold = 0.0 + ) + .sorted + + // val scen1 = scen.iteration(1).copy(numberIterations = 1) + + // println(scen1) + scen.itrDayAmount.ptbln + + val tower = Tower.singleShot(500e6, Array(150e6, 150e6, 100e6)) + val tower2 = Tower.singleShot(900e6, Array(100e6)) + + val (ceded, retained, splits) = tower.splitScenarioAmounts(scen) + val (ceded2, retained2, splits2) = tower2.splitScenarioAmounts(scen) + + // println(ceded.printArr) + // println(retained.printArr) + + // println() + + (splits ++ splits2).map(s => s.lossReport(scen.numberIterations, scen.iterations, ReportDenominator.FirstLimit)).ptbln + + // val (ceded10, retained10, splits10) = tower.splitScenarioAmounts(iter10)(using true) + + // println(ceded10.printArr) + + // splits10.map(_.cededToLayer).foreach(arr => println(arr.printArr)) + + // splits.map(s => s.lossReport(scen.numberIterations, scen.iterations, ReportDenominator.FirstLimit)).ptbln +end pricingFun diff --git a/experiments/src/rep_setup.scala b/experiments/src/rep_setup.scala new file mode 100644 index 00000000..f8199b0a --- /dev/null +++ b/experiments/src/rep_setup.scala @@ -0,0 +1,19 @@ +package experiments + +object RPT: + export vecxt.all.{*, given} + export io.github.quafadas.table.{*, given} + export io.github.quafadas.plots.SetupVega.{*, given} + export viz.PlotTargets.desktopBrowser + export vecxt_re.Plots.* + export vecxt_re.rpt.* + export vecxt_re.SplitLosses.* + export vecxt_re.SplitScenario.* + export vecxt_re.Scenario + export vecxt_re.Scenarr + export vecxt_re.Tower + export vecxt_re.Tower.* + export vecxt_re.ReReporting.* + export vecxt_re.ReportDenominator + +end RPT diff --git a/laws/package.mill b/laws/package.mill index 63298887..5b513ec3 100644 --- a/laws/package.mill +++ b/laws/package.mill @@ -7,13 +7,9 @@ import mill.api.Task.Simple object `package` extends Module: - val catsVersion = "2.10.0" - val disciplineVersion = "2.0.0" - val scalacheckVersion = "1.17.0" - trait LawsModule extends PlatformScalaModule with build.VecxtPublishModule: def mvnDeps = super.mvnDeps() ++ Seq( - mvn"org.typelevel::cats-kernel:$catsVersion" + mvn"org.typelevel::cats-kernel:${build.V.catsVersion}" ) end LawsModule @@ -27,9 +23,9 @@ object `package` extends Module: def mvnDeps = super.mvnDeps() ++ Seq( mvn"org.scalameta::munit::${build.V.munitVersion}", - mvn"org.typelevel::cats-kernel-laws:$catsVersion", - mvn"org.typelevel::discipline-munit:$disciplineVersion", - mvn"org.scalacheck::scalacheck:$scalacheckVersion" + mvn"org.typelevel::cats-kernel-laws:${build.V.catsVersion}", + mvn"org.typelevel::discipline-munit:${build.V.disciplineVersion}", + mvn"org.scalacheck::scalacheck:${build.V.scalacheckVersion}" ) override def forkArgs: Simple[Seq[String]] = super.forkArgs() ++ build.vecIncubatorFlag diff --git a/laws/src/VectorCommutativeGroup.scala b/laws/src/VectorCommutativeGroup.scala index 15e1fadc..4bc85956 100644 --- a/laws/src/VectorCommutativeGroup.scala +++ b/laws/src/VectorCommutativeGroup.scala @@ -1,8 +1,10 @@ package vecxt.laws -import cats.kernel.{CommutativeGroup, Semigroup} import vecxt.BoundsCheck +import cats.kernel.CommutativeGroup +import cats.kernel.Semigroup + /** A CommutativeGroup for Array[A] scoped to a specific dimension. * * This trait extends both VectorMonoid and cats.kernel.CommutativeGroup, making it compatible with cats group laws diff --git a/laws/src/VectorCommutativeMonoid.scala b/laws/src/VectorCommutativeMonoid.scala index 4199bd7e..e520178d 100644 --- a/laws/src/VectorCommutativeMonoid.scala +++ b/laws/src/VectorCommutativeMonoid.scala @@ -1,8 +1,10 @@ package vecxt.laws -import cats.kernel.{CommutativeMonoid, Semigroup} import vecxt.BoundsCheck +import cats.kernel.CommutativeMonoid +import cats.kernel.Semigroup + /** A CommutativeMonoid for Array[A] scoped to a specific dimension. * * This trait extends both VectorMonoid and cats.kernel.CommutativeMonoid, making it compatible with cats commutative diff --git a/laws/src/VectorMonoid.scala b/laws/src/VectorMonoid.scala index 85c41859..f8bced93 100644 --- a/laws/src/VectorMonoid.scala +++ b/laws/src/VectorMonoid.scala @@ -1,8 +1,10 @@ package vecxt.laws -import cats.kernel.{Monoid, Semigroup} import vecxt.BoundsCheck +import cats.kernel.Monoid +import cats.kernel.Semigroup + /** A Monoid for Array[A] scoped to a specific dimension. * * This trait extends cats.kernel.Monoid, making it compatible with the entire cats laws testing infrastructure. diff --git a/laws/src/instances/DoubleInstances.scala b/laws/src/instances/DoubleInstances.scala index 568d34e5..571e2477 100644 --- a/laws/src/instances/DoubleInstances.scala +++ b/laws/src/instances/DoubleInstances.scala @@ -1,9 +1,12 @@ package vecxt.laws.instances -import cats.kernel.Semigroup -import vecxt.laws.{Dimension, VectorCommutativeGroup, VectorCommutativeMonoid} import vecxt.BoundsCheck -import vecxt.all.{given, *} +import vecxt.all.{*, given} +import vecxt.laws.Dimension +import vecxt.laws.VectorCommutativeGroup +import vecxt.laws.VectorCommutativeMonoid + +import cats.kernel.Semigroup object double: @@ -19,9 +22,7 @@ object double: import vecxt.BoundsCheck.DoBoundsCheck.yes x + y , - inverseFn = (a) => - import vecxt.BoundsCheck.DoBoundsCheck.yes - -a + inverseFn = (a) => -a ) end vectorAdditionGroup diff --git a/site/docs/xPlatform.md b/site/TODO/xPlatform.md similarity index 100% rename from site/docs/xPlatform.md rename to site/TODO/xPlatform.md diff --git a/site/docs/blog/2024-08-01-Motivation.md b/site/docs/blog/2024-08-01-Motivation.md index bfa3f86f..f8ec9e33 100644 --- a/site/docs/blog/2024-08-01-Motivation.md +++ b/site/docs/blog/2024-08-01-Motivation.md @@ -17,7 +17,7 @@ For example if your data acquisition is serverside, but do parts of a calculatio # JVM -[[vecxt]] is cross platform, this example runs on the JVM, see [Cross Platform](../xPlatform.md) for the same example running in scalaJS. +[[vecxt]] is cross platform, this example runs on the JVM, see (coming soon) cross platform docs for the same example running in scalaJS. ```scala mdoc diff --git a/site/docs/cheatsheet.md b/site/docs/cheatsheet.md index 0fdd3393..8f12966e 100644 --- a/site/docs/cheatsheet.md +++ b/site/docs/cheatsheet.md @@ -146,7 +146,7 @@ Is not supported in an "implicit" fashion. Look at the methods; | Element-wise equality | `a =:= b` | `a == b` | `a == b` | | Element-wise inequality | `a !:= b` | `a != b` | `a ~= b` | | Find indices where true | `idx.logicalIdx(...)` | `np.nonzero(a > 0.5)` | `find(a > 0.5)` | -| Boolean indexing | `a(a > 2.0)` | `a[a > 0.5]` | `a(a > 0.5)` | +| Boolean indexing | `a.mask(a > 2.0)` | `a[a > 0.5]` | `a(a > 0.5)` | | Count true values | `(a > 2.0).trues` | `np.sum(a > 0.5)` | `sum(a > 0.5)` | ## Array / Matrix Manipulation diff --git a/site/docs/examples.md b/site/docs/examples.md index cdd7d1a1..642ccf96 100644 --- a/site/docs/examples.md +++ b/site/docs/examples.md @@ -68,7 +68,7 @@ v1.cumsum.printArr (v1 < 2).printArr (v1 <= 2).printArr -(v1(v1 <= 2)).printArr +(v1.mask(v1 <= 2)).printArr (v1.outer(v2)).printMat @@ -121,7 +121,7 @@ v1.dot(v2) (v1 < 2).printArr (v1 <= 2).printArr -(v1(v1 <= 2)).printArr +(v1.mask(v1 <= 2)).printArr ``` diff --git a/vecxt/src-js-native/LongArrays.scala b/vecxt/src-js-native/LongArrays.scala index 2a693bf4..f85ce848 100644 --- a/vecxt/src-js-native/LongArrays.scala +++ b/vecxt/src-js-native/LongArrays.scala @@ -2,7 +2,19 @@ package vecxt object LongArrays: - extension (arr: Array[Long]) inline def sumSIMD: Long = ??? + extension (arr: Array[Long]) + inline def select(indicies: Array[Int]): Array[Long] = + val len = indicies.length + val out = Array.ofDim[Long](len) + var i = 0 + while i < len do + out(i) = arr(indicies(i)) + i += 1 + end while + out + end select + + inline def sumSIMD: Long = ??? end extension end LongArrays diff --git a/vecxt/src-js-native/array.scala b/vecxt/src-js-native/array.scala index 1820a772..9fd2b860 100644 --- a/vecxt/src-js-native/array.scala +++ b/vecxt/src-js-native/array.scala @@ -560,6 +560,12 @@ object JsNativeDoubleArrays: sum end dot + inline def =:=(nums: Array[Int]): Array[Boolean] = + logicalIdxArr(nums, (a, b) => a == b) + + inline def =:=(num: Int): Array[Boolean] = + logicalIdx((a, b) => a == b, num) + inline def <(num: Int): Array[Boolean] = logicalIdx((a, b) => a < b, num) @@ -587,6 +593,41 @@ object JsNativeDoubleArrays: end while idx end logicalIdx + + inline def logicalIdxArr( + compare: Array[Int], + inline op: (Int, Int) => Boolean + ): Array[Boolean] = + val n = vec.length + val idx = Array.fill(n)(false) + + var i = 0 + while i < n do + if op(vec(i), compare(i)) then idx(i) = true + end if + i = i + 1 + end while + idx + end logicalIdxArr + + inline def countsToIdx: Array[Int] = + var total = vec.sum + var i = 0 + val out = new Array[Int](total) + var j = 0 + while i < vec.length do + val count = vec(i) + val idx = i + 1 + var k = 0 + while k < count do + out(j) = idx + j += 1 + k += 1 + end while + i += 1 + end while + out + end countsToIdx end extension // extension [@specialized(Double, Int) A: Numeric](vec: Array[A]) diff --git a/vecxt/src-js-native/eig.scala b/vecxt/src-js-native/eig.scala index 0f4c13da..a4a86058 100644 --- a/vecxt/src-js-native/eig.scala +++ b/vecxt/src-js-native/eig.scala @@ -1,7 +1,7 @@ package vecxt -import all.* -import BoundsCheck.BoundsCheck +import vecxt.BoundsCheck.BoundsCheck +import vecxt.all.* object Eigenvalues: inline def eig(m: Matrix[Double])(using diff --git a/vecxt/src-js-native/solve.scala b/vecxt/src-js-native/solve.scala index b90546b8..d635f0db 100644 --- a/vecxt/src-js-native/solve.scala +++ b/vecxt/src-js-native/solve.scala @@ -1,7 +1,7 @@ package vecxt -import vecxt.matrix.Matrix import vecxt.BoundsCheck.BoundsCheck +import vecxt.matrix.Matrix /** Linear system solver placeholder for JS and Native platforms. * diff --git a/vecxt/src-js/array.scala b/vecxt/src-js/array.scala index 8469e5bb..b35c4a91 100644 --- a/vecxt/src-js/array.scala +++ b/vecxt/src-js/array.scala @@ -5,8 +5,8 @@ import scala.scalajs.js import scala.scalajs.js.typedarray.Float64Array import scala.util.chaining.* -import vecxt.BoundsCheck.BoundsCheck import vecxt.BooleanArrays.* +import vecxt.BoundsCheck.BoundsCheck object arrayUtil: extension [A](d: Array[A]) def printArr: String = d.mkString("[", ",", "]") @@ -79,6 +79,89 @@ object arrays: end for newVec end apply + + inline def mean: Double = + var sum = 0.0 + var i = 0 + while i < vec.length do + sum += vec(i) + i += 1 + end while + sum / vec.length + end mean + + inline def variance: Double = variance(VarianceMode.Population) + + inline def variance(mode: VarianceMode): Double = + vec.meanAndVariance(mode).variance + end variance + + inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + var mean = 0.0 + var m2 = 0.0 + var i = 0 + while i < vec.length do + val n = i + 1 + val delta = vec(i) - mean + mean += delta / n + val delta2 = vec(i) - mean + m2 += delta * delta2 + i += 1 + end while + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (mean, m2 / denom) + end meanAndVariance + + inline def std: Double = std(VarianceMode.Population) + + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) + + inline def minSIMD: Int = + var i = 0 + var acc = Int.MaxValue + while i < vec.length do + val v = vec(i) + if v < acc then acc = v + end if + i += 1 + end while + acc + end minSIMD + + inline def maxSIMD: Int = + var i = 0 + var acc = Int.MinValue + while i < vec.length do + val v = vec(i) + if v > acc then acc = v + end if + i += 1 + end while + acc + end maxSIMD + + inline def -=(scalar: Int): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) - scalar + i += 1 + end while + end -= + + inline def -(scalar: Int): Array[Int] = + vec.clone().tap(_ -= scalar) + end - end extension extension (vec: Array[Double]) @@ -97,6 +180,30 @@ object arrays: newVec end apply + inline def minSIMD: Double = + var i = 0 + var acc = Double.PositiveInfinity + while i < vec.length do + val v = vec(i) + if v < acc then acc = v + end if + i += 1 + end while + acc + end minSIMD + + inline def maxSIMD: Double = + var i = 0 + var acc = Double.NegativeInfinity + while i < vec.length do + val v = vec(i) + if v > acc then acc = v + end if + i += 1 + end while + acc + end maxSIMD + def increments: Array[Double] = val out = Array.ofDim[Double](vec.length) out(0) = vec(0) @@ -108,12 +215,14 @@ object arrays: out end increments - inline def stdDev: Double = - // https://www.cuemath.com/data/standard-deviation/ - val mu = vec.mean - val diffs_2 = vec.map(num => (num - mu) * (num - mu)) - Math.sqrt(diffs_2.sum / (vec.length - 1)) - end stdDev + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) + + inline def std: Double = std(VarianceMode.Population) + + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) inline def mean: Double = vec.sumSIMD / vec.length @@ -137,12 +246,35 @@ object arrays: sum end product - def variance: Double = - // https://www.cuemath.com/sample-variance-formula/ - val μ = vec.mean - vec.map(i => (i - μ) * (i - μ)).sum / (vec.length - 1) + inline def variance: Double = variance(VarianceMode.Population) + + def variance(mode: VarianceMode): Double = + meanAndVariance(mode).variance end variance + inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + var mean = 0.0 + var m2 = 0.0 + var i = 0 + while i < vec.length do + val n = i + 1 + val delta = vec(i) - mean + mean += delta / n + val delta2 = vec(i) - mean + m2 += delta * delta2 + i += 1 + end while + + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (mean, m2 / denom) + end meanAndVariance + inline def unary_- : Array[Double] = val newVec = Array.ofDim[Double](vec.length) var i = 0 diff --git a/vecxt/src-js/dimCheck.scala b/vecxt/src-js/dimCheck.scala index 4bf2a8b2..7520572f 100644 --- a/vecxt/src-js/dimCheck.scala +++ b/vecxt/src-js/dimCheck.scala @@ -1,7 +1,5 @@ package vecxt -import scala.scalajs.js.typedarray.Float64Array - import vecxt.BoundsCheck.BoundsCheck case class VectorDimensionMismatch(givenDimension: Int, requiredDimension: Int) @@ -25,6 +23,9 @@ protected[vecxt] object dimCheck: inline def apply[A](a: Array[Double], b: scala.scalajs.js.Array[A])(using inline doCheck: BoundsCheck) = inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) + inline def apply[A](a: Array[A], b: Array[Boolean])(using inline doCheck: BoundsCheck) = + inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) + inline def apply(a: Array[Double], b: Array[Double])(using inline doCheck: BoundsCheck) = inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) diff --git a/vecxt/src-js/doublematrix.scala b/vecxt/src-js/doublematrix.scala index 138e64f7..91af1c9c 100644 --- a/vecxt/src-js/doublematrix.scala +++ b/vecxt/src-js/doublematrix.scala @@ -1,10 +1,10 @@ package vecxt +import scala.scalajs.js.JSConverters.* import scala.scalajs.js.typedarray.Float64Array import vecxt.BoundsCheck.BoundsCheck import vecxt.matrix.* -import scala.scalajs.js.JSConverters.* object JsDoubleMatrix: diff --git a/vecxt/src-jvm/LongArrays.scala b/vecxt/src-jvm/LongArrays.scala index e774f37a..802eb1f8 100644 --- a/vecxt/src-jvm/LongArrays.scala +++ b/vecxt/src-jvm/LongArrays.scala @@ -7,6 +7,17 @@ object LongArrays: final val length = spl.length() extension (arr: Array[Long]) + inline def select(indicies: Array[Int]): Array[Long] = + val len = indicies.length + val out = Array.ofDim[Long](len) + var i = 0 + while i < len do + out(i) = arr(indicies(i)) + i += 1 + end while + out + end select + /** Computes the sum of all elements in the array using SIMD (Single Instruction, Multiple Data) operations. * * This method leverages the Vector API to perform parallel addition operations on chunks of the array, improving @@ -33,6 +44,7 @@ object LongArrays: i += 1 end while total + end sumSIMD end extension end LongArrays diff --git a/vecxt/src-jvm/arrays.scala b/vecxt/src-jvm/arrays.scala index 3d8410e8..0962e80c 100644 --- a/vecxt/src-jvm/arrays.scala +++ b/vecxt/src-jvm/arrays.scala @@ -3,7 +3,6 @@ package vecxt import scala.reflect.ClassTag import scala.util.chaining.* -import vecxt.BooleanArrays.trues import vecxt.BoundsCheck.BoundsCheck import vecxt.matrix.Matrix @@ -233,6 +232,25 @@ object arrays: end increments + inline def countsToIdx: Array[Int] = + var total = vec.sumSIMD + var i = 0 + val out = new Array[Int](total) + var j = 0 + while i < vec.length do + val count = vec(i) + val idx = i + 1 + var k = 0 + while k < count do + out(j) = idx + j += 1 + k += 1 + end while + i += 1 + end while + out + end countsToIdx + inline def sumSIMD: Int = var i: Int = 0 var acc = IntVector.zero(spi) @@ -250,6 +268,72 @@ object arrays: temp end sumSIMD + inline def mean: Double = + sumSIMD / vec.length.toDouble + end mean + + inline def variance: Double = variance(VarianceMode.Population) + + inline def variance(mode: VarianceMode): Double = + meanAndVariance(mode).variance + + inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + meanAndVarianceTwoPass(mode) + end meanAndVariance + + /** 231] Benchmark (len) Mode Cnt Score Error Units 231] VarianceBenchmark.var_simd_twopass 1000 thrpt 3 1087302.435 + * ± 16013.286 ops/s 231] VarianceBenchmark.var_simd_twopass 100000 thrpt 3 9578.869 ± 334.606 ops/s 231] + * VarianceBenchmark.var_simd_welford 1000 thrpt 3 436244.559 ± 6158.585 ops/s 231] + * VarianceBenchmark.var_simd_welford 100000 thrpt 3 4187.715 ± 203.266 ops/s + */ + inline def meanAndVarianceTwoPass(mode: VarianceMode): (mean: Double, variance: Double) = + val μ = vec.mean + val μVec = DoubleVector.broadcast(spd, μ) + + var i = 0 + var acc = DoubleVector.zero(spd) + val tmp = new Array[Double](spdl) + + while i < spd.loopBound(vec.length) do + var lane = 0 + while lane < spdl do + tmp(lane) = vec(i + lane).toDouble + lane += 1 + end while + + val v = DoubleVector.fromArray(spd, tmp, 0) + val diff = v.sub(μVec) + acc = diff.fma(diff, acc) + i += spdl + end while + + var sumSqDiff = acc.reduceLanes(VectorOperators.ADD) + + while i < vec.length do + val diff = vec(i).toDouble - μ + sumSqDiff = Math.fma(diff, diff, sumSqDiff) + i += 1 + end while + + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (μ, sumSqDiff / denom) + end meanAndVarianceTwoPass + + inline def std: Double = std(VarianceMode.Population) + + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) + inline def dot(vec2: Array[Int])(using inline boundsCheck: BoundsCheck): Int = dimCheck(vec, vec2) val newVec = Array.ofDim[Int](vec.length) @@ -278,6 +362,29 @@ object arrays: vec.clone.tap(_ -= vec2) end - + inline def -=(scalar: Int): Unit = + + var i = 0 + + while i < spi.loopBound(vec.length) do + IntVector + .fromArray(spi, vec, i) + .sub(scalar) + .intoArray(vec, i) + i += spil + end while + + while i < vec.length do + vec(i) = vec(i) - scalar + i += 1 + end while + + end -= + + inline def -(scalar: Int): Array[Int] = + vec.clone().tap(_ -= scalar) + end - + inline def -=(vec2: Array[Int])(using inline boundsCheck: BoundsCheck): Unit = dimCheck(vec, vec2) var i = 0 @@ -357,22 +464,6 @@ object arrays: end extension - extension [@specialized(Double, Int) A](vec: Array[A])(using ClassTag[A]) - inline def apply(index: Array[Boolean])(using inline boundsCheck: BoundsCheck) = - dimCheck(vec, index) - val trues = index.trues - val newVec: Array[A] = new Array[A](trues) - var j = 0 - for i <- 0 until index.length do - // println(s"i: $i || j: $j || ${index(i)} ${vec(i)} ") - if index(i) then - newVec(j) = vec(i) - j = 1 + j - end for - newVec - end apply - end extension - extension (d: Double) inline def /(arr: Array[Double]) = val out = new Array[Double](arr.length) @@ -717,18 +808,106 @@ object arrays: Matrix(out, (n, m))(using BoundsCheck.DoBoundsCheck.no) end outer - def variance: Double = - meanAndVariance.variance + inline def variance: Double = variance(VarianceMode.Population) + + def variance(mode: VarianceMode): Double = + meanAndVariance(mode).variance end variance - inline def stdDev: Double = - // https://www.cuemath.com/data/standard-deviation/ - val mu = vec.mean - val diffs_2 = vec.map(num => Math.pow(num - mu, 2)) - Math.sqrt(diffs_2.sumSIMD / (vec.length - 1)) - end stdDev + inline def std: Double = std(VarianceMode.Population) + + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + meanAndVarianceTwoPass(mode) + end meanAndVariance + + /** True SIMD-optimized Welford's algorithm for computing mean and variance. + * + * Each SIMD lane maintains independent Welford accumulators (n, mean, M2). Lanes process strided elements: lane 0 + * gets [0,4,8,...], lane 1 gets [1,5,9,...], etc. At the end, all lanes are merged using the parallel Welford + * merge formula: + * + * δ = meanB - meanA n = nA + nB mean = meanA + δ * nB / n M2 = M2A + M2B + δ² * nA * nB / n + * + * This algorimth is crushed by the simple two pass SIMD version. + * + * 231] Benchmark (len) Mode Cnt Score Error Units 231] VarianceBenchmark.var_simd_twopass 1000 thrpt 3 1087302.435 + * ± 16013.286 ops/s 231] VarianceBenchmark.var_simd_twopass 100000 thrpt 3 9578.869 ± 334.606 ops/s 231] + * VarianceBenchmark.var_simd_welford 1000 thrpt 3 436244.559 ± 6158.585 ops/s 231] + * VarianceBenchmark.var_simd_welford 100000 thrpt 3 4187.715 ± 203.266 ops/s + */ + private inline def meanAndVarianceWelfordSIMD(mode: VarianceMode): (mean: Double, variance: Double) = + if vec.length == 0 then (0.0, 0.0) + else + // Per-lane accumulators + var laneMeans = DoubleVector.zero(spd) + var delta = DoubleVector.zero(spd) + var delta2 = DoubleVector.zero(spd) + var laneM2 = DoubleVector.zero(spd) + + var i = 0 + var j: Double = 1 + // ALl lanes will have processed J elements at the end of this loop + while i < spd.loopBound(vec.length) do + j = j + 1 + val values = DoubleVector.fromArray(spd, vec, i) + delta = values.sub(laneMeans) // Use current mean + laneMeans = laneMeans.add(delta.div(DoubleVector.broadcast(spd, j))) + delta2 = values.sub(laneMeans) // Use updated mean + laneM2 = laneM2.add(delta.mul(delta2)) + i += spdl + end while + + // val laneSumA = laneSum.toArray() + val laneMean = laneMeans.toArray() + val laneM2A = laneM2.toArray() + // Merge all lanes + var globalN = j + var globalMean = laneMean(0) + var globalM2 = laneM2A(0) + + var lane = 1 + while lane < spdl do + val delta = laneMean(lane) - globalMean + val newN = globalN + j + globalMean = globalMean + delta * j / newN + globalM2 = globalM2 + laneM2A(lane) + delta * delta * globalN * j / newN + globalN = newN + + lane += 1 + end while + + // Process tail elements + while i < vec.length do + val n = globalN + 1 + val delta = vec(i) - globalMean + globalMean += delta / n + val delta2 = vec(i) - globalMean + globalM2 += delta * delta2 + globalN = n + i += 1 + end while + + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (globalMean, globalM2 / denom) + end if + end meanAndVarianceWelfordSIMD + + /** Two-pass variance calculation (legacy, for comparison). First pass computes mean, second pass computes variance. + */ + inline def meanAndVarianceTwoPass(mode: VarianceMode): (mean: Double, variance: Double) = val μ = vec.mean val l = spd.length() var tmp = DoubleVector.zero(spd) @@ -737,7 +916,7 @@ object arrays: var i = 0 while i < spd.loopBound(vec.length) do val v = DoubleVector.fromArray(spd, vec, i) - val diff = v.sub(μVec) // Broadcast mean once, reuse + val diff = v.sub(μVec) tmp = diff.fma(diff, tmp) i += spdl end while @@ -750,9 +929,12 @@ object arrays: i += 1 end while - (μ, sumSqDiff * (1.0 / (vec.length - 1))) + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble - end meanAndVariance + (μ, sumSqDiff / denom) + end meanAndVarianceTwoPass inline def mean: Double = vec.sumSIMD / vec.length diff --git a/vecxt/src-jvm/cholesky.scala b/vecxt/src-jvm/cholesky.scala index 42e6a90a..6c56f809 100644 --- a/vecxt/src-jvm/cholesky.scala +++ b/vecxt/src-jvm/cholesky.scala @@ -1,13 +1,14 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix import vecxt.BoundsCheck.BoundsCheck import vecxt.MatrixHelper.zeros -import vecxt.all.update import vecxt.MatrixInstance.apply +import vecxt.all.update +import vecxt.matrix.Matrix + +import dev.ludovic.netlib.lapack.JavaLAPACK // https://github.com/scalanlp/breeze/blob/fd73d09976a1a50d68b91a53e3896980502d335e/math/src/main/scala/breeze/linalg/functions/svd.scala#L13 object Cholesky: diff --git a/vecxt/src-jvm/dimCheck.scala b/vecxt/src-jvm/dimCheck.scala index dabb8ba1..f39c3847 100644 --- a/vecxt/src-jvm/dimCheck.scala +++ b/vecxt/src-jvm/dimCheck.scala @@ -11,6 +11,12 @@ protected[vecxt] object dimCheck: inline def apply[A, B](a: Array[A], b: Array[B])(using inline doCheck: BoundsCheck) = inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) + inline def apply[A](a: Array[A], b: Array[Boolean])(using inline doCheck: BoundsCheck) = + inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) + + inline def apply[A](a: Array[A], b: Array[A])(using inline doCheck: BoundsCheck) = + inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) + inline def apply(a: Array[Double], b: Array[Double])(using inline doCheck: BoundsCheck) = inline if doCheck then if a.length != b.length then throw VectorDimensionMismatch(a.length, b.length) end dimCheck diff --git a/vecxt/src-jvm/eig.scala b/vecxt/src-jvm/eig.scala index 5b362aed..aa7dcbe5 100644 --- a/vecxt/src-jvm/eig.scala +++ b/vecxt/src-jvm/eig.scala @@ -1,11 +1,13 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix -import vecxt.MatrixInstance.* -import vecxt.MatrixHelper.zeros + import vecxt.BoundsCheck.BoundsCheck +import vecxt.MatrixHelper.zeros +import vecxt.MatrixInstance.* +import vecxt.matrix.Matrix + +import dev.ludovic.netlib.lapack.JavaLAPACK // https://github.com/scalanlp/breeze/blob/fd73d09976a1a50d68b91a53e3896980502d335e/math/src/main/scala/breeze/linalg/functions/eig.scala#L25 object Eigenvalues: diff --git a/vecxt/src-jvm/lu.scala b/vecxt/src-jvm/lu.scala index e65f5324..d1271974 100644 --- a/vecxt/src-jvm/lu.scala +++ b/vecxt/src-jvm/lu.scala @@ -1,11 +1,13 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix -import vecxt.MatrixInstance.* -import vecxt.MatrixHelper.zeros + import vecxt.BoundsCheck.BoundsCheck +import vecxt.MatrixHelper.zeros +import vecxt.MatrixInstance.* +import vecxt.matrix.Matrix + +import dev.ludovic.netlib.lapack.JavaLAPACK /** LU decomposition with partial pivoting using LAPACK. * diff --git a/vecxt/src-jvm/qr.scala b/vecxt/src-jvm/qr.scala index dfeb5cb6..3637283c 100644 --- a/vecxt/src-jvm/qr.scala +++ b/vecxt/src-jvm/qr.scala @@ -1,11 +1,13 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix -import vecxt.MatrixInstance.* -import vecxt.MatrixHelper.zeros + import vecxt.BoundsCheck.BoundsCheck +import vecxt.MatrixHelper.zeros +import vecxt.MatrixInstance.* +import vecxt.matrix.Matrix + +import dev.ludovic.netlib.lapack.JavaLAPACK object QR: private lazy final val lapack = JavaLAPACK.getInstance() diff --git a/vecxt/src-jvm/solve.scala b/vecxt/src-jvm/solve.scala index 8f588a1d..24aaa019 100644 --- a/vecxt/src-jvm/solve.scala +++ b/vecxt/src-jvm/solve.scala @@ -1,10 +1,12 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix -import vecxt.MatrixInstance.* + import vecxt.BoundsCheck.BoundsCheck +import vecxt.MatrixInstance.* +import vecxt.matrix.Matrix + +import dev.ludovic.netlib.lapack.JavaLAPACK /** Linear system solver using LAPACK. * diff --git a/vecxt/src-jvm/svd.scala b/vecxt/src-jvm/svd.scala index 3ae8dcee..6e13fce7 100644 --- a/vecxt/src-jvm/svd.scala +++ b/vecxt/src-jvm/svd.scala @@ -1,16 +1,18 @@ package vecxt -import dev.ludovic.netlib.lapack.JavaLAPACK import org.netlib.util.intW -import vecxt.matrix.Matrix + +import vecxt.BooleanArrays.trues +import vecxt.BoundsCheck.BoundsCheck +import vecxt.DoubleMatrix.matmul +import vecxt.MatrixHelper.zeros import vecxt.MatrixInstance.* -import vecxt.arrays.maxSIMD import vecxt.arrays.> -import vecxt.BooleanArrays.trues +import vecxt.arrays.maxSIMD +import vecxt.matrix.Matrix import vecxt.matrixUtil.transpose -import vecxt.MatrixHelper.zeros -import vecxt.DoubleMatrix.matmul -import vecxt.BoundsCheck.BoundsCheck + +import dev.ludovic.netlib.lapack.JavaLAPACK // https://github.com/scalanlp/breeze/blob/fd73d09976a1a50d68b91a53e3896980502d335e/math/src/main/scala/breeze/linalg/functions/svd.scala#L13 object Svd: diff --git a/vecxt/src-native/array.scala b/vecxt/src-native/array.scala index 867e81a8..019c5a1b 100644 --- a/vecxt/src-native/array.scala +++ b/vecxt/src-native/array.scala @@ -46,6 +46,92 @@ object arrays: // end copy end extension + extension (vec: Array[Int]) + + inline def mean: Double = + var sum = 0.0 + var i = 0 + while i < vec.length do + sum += vec(i) + i += 1 + end while + sum / vec.length + end mean + + inline def variance: Double = variance(VarianceMode.Population) + + inline def variance(mode: VarianceMode): Double = + vec.meanAndVariance(mode).variance + end variance + + inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + var mean = 0.0 + var m2 = 0.0 + var i = 0 + while i < vec.length do + val n = i + 1 + val delta = vec(i) - mean + mean += delta / n + val delta2 = vec(i) - mean + m2 += delta * delta2 + i += 1 + end while + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (mean, m2 / denom) + end meanAndVariance + + inline def std: Double = std(VarianceMode.Population) + + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) + + inline def minSIMD: Int = + var i = 0 + var acc = Int.MaxValue + while i < vec.length do + val v = vec(i) + if v < acc then acc = v + end if + i += 1 + end while + acc + end minSIMD + + inline def maxSIMD: Int = + var i = 0 + var acc = Int.MinValue + while i < vec.length do + val v = vec(i) + if v > acc then acc = v + end if + i += 1 + end while + acc + end maxSIMD + + inline def -=(scalar: Int): Unit = + var i = 0 + while i < vec.length do + vec(i) = vec(i) - scalar + i += 1 + end while + end -= + + inline def -(scalar: Int): Array[Int] = + vec.clone().tap(_ -= scalar) + end - + end extension + extension [A: ClassTag](vec: Array[A]) def apply(index: Array[Boolean]): Array[A] = @@ -77,6 +163,30 @@ object arrays: newVec end apply + inline def minSIMD: Double = + var i = 0 + var acc = Double.PositiveInfinity + while i < vec.length do + val v = vec(i) + if v < acc then acc = v + end if + i += 1 + end while + acc + end minSIMD + + inline def maxSIMD: Double = + var i = 0 + var acc = Double.NegativeInfinity + while i < vec.length do + val v = vec(i) + if v > acc then acc = v + end if + i += 1 + end while + acc + end maxSIMD + inline def product: Double = var sum = 1.0 var i = 0; @@ -167,18 +277,43 @@ object arrays: ranks end elementRanks - inline def variance: Double = - // https://www.cuemath.com/sample-variance-formula/ - val μ = vec.mean - vec.map(i => (i - μ) * (i - μ)).sum / (vec.length - 1) + inline def variance: Double = variance(VarianceMode.Population) + + inline def variance(mode: VarianceMode): Double = + meanAndVariance(mode).variance end variance - inline def stdDev: Double = - // https://www.cuemath.com/data/standard-deviation/ - val mu = vec.mean - val diffs_2 = vec.map(num => Math.pow(num - mu, 2)) - Math.sqrt(diffs_2.sum / (vec.length - 1)) - end stdDev + inline def std: Double = std(VarianceMode.Population) + + inline def std(mode: VarianceMode): Double = + Math.sqrt(vec.variance(mode)) + + inline def stdDev: Double = stdDev(VarianceMode.Population) + + inline def stdDev(mode: VarianceMode): Double = std(mode) + + inline def meanAndVariance: (mean: Double, variance: Double) = + meanAndVariance(VarianceMode.Population) + + inline def meanAndVariance(mode: VarianceMode): (mean: Double, variance: Double) = + var mean = 0.0 + var m2 = 0.0 + var i = 0 + while i < vec.length do + val n = i + 1 + val delta = vec(i) - mean + mean += delta / n + val delta2 = vec(i) - mean + m2 += delta * delta2 + i += 1 + end while + + val denom = mode match + case VarianceMode.Population => vec.length.toDouble + case VarianceMode.Sample => (vec.length - 1).toDouble + + (mean, m2 / denom) + end meanAndVariance inline def mean: Double = vec.sum / vec.length diff --git a/vecxt/src/MatrixHelper.scala b/vecxt/src/MatrixHelper.scala index 10cef0d4..8ef82bd9 100644 --- a/vecxt/src/MatrixHelper.scala +++ b/vecxt/src/MatrixHelper.scala @@ -3,8 +3,8 @@ package vecxt import scala.reflect.ClassTag import vecxt.BoundsCheck.BoundsCheck -import vecxt.matrix.* import vecxt.MatrixInstance.apply +import vecxt.matrix.* object MatrixHelper: extension (m: Matrix.type) diff --git a/vecxt/src/all.scala b/vecxt/src/all.scala index 1de60478..58a5bbd5 100644 --- a/vecxt/src/all.scala +++ b/vecxt/src/all.scala @@ -8,6 +8,7 @@ object all: export vecxt.arrayUtil.* export vecxt.arrays.* export vecxt.DoubleArrays.* + export vecxt.VarianceMode // export vecxt.JsNativeDoubleArrays.* export vecxt.BooleanArrays.* @@ -25,6 +26,7 @@ object all: export vecxt.JvmNativeDoubleMatrix.* export vecxt.dimensionExtender.DimensionExtender.* export vecxt.IntArrays.* + export vecxt.LongArrays.* export vecxt.Determinant.* // Import determinant implementations export vecxt.Svd.* // JS and native are stubs export vecxt.Cholesky.* // JS and native are stubs @@ -34,7 +36,4 @@ object all: export vecxt.QR.* // JS and native are stubs // Random export vecxt.cosineSimilarity - - // Longs - export vecxt.LongArrays.* end all diff --git a/vecxt/src/dimMatCheck.scala b/vecxt/src/dimMatCheck.scala index 3e76c6f4..f67753a0 100644 --- a/vecxt/src/dimMatCheck.scala +++ b/vecxt/src/dimMatCheck.scala @@ -1,8 +1,8 @@ package vecxt import vecxt.BoundsCheck.BoundsCheck -import vecxt.matrix.* import vecxt.MatrixInstance.* +import vecxt.matrix.* object dimMatCheck: inline def apply[A](a: Matrix[A], b: Matrix[A])(using inline doCheck: BoundsCheck) = diff --git a/vecxt/src/doublearray.scala b/vecxt/src/doublearray.scala index 70f60d8d..cc4835b0 100644 --- a/vecxt/src/doublearray.scala +++ b/vecxt/src/doublearray.scala @@ -2,6 +2,18 @@ package vecxt object DoubleArrays: extension (vec: Array[Double]) + // TODO bnenchmark. + inline def select(indicies: Array[Int]): Array[Double] = + val len = indicies.length + val out = Array.ofDim[Double](len) + var i = 0 + while i < len do + out(i) = vec(indicies(i)) + i += 1 + end while + out + end select + inline def unique: Array[Double] = if vec.size == 0 then Array.empty[Double] else diff --git a/vecxt/src/intarray.scala b/vecxt/src/intarray.scala index a99eaf01..1c1f64f0 100644 --- a/vecxt/src/intarray.scala +++ b/vecxt/src/intarray.scala @@ -1,9 +1,40 @@ package vecxt +import scala.reflect.ClassTag import scala.util.control.Breaks.* +import vecxt.BooleanArrays.trues +import vecxt.BoundsCheck.BoundsCheck + object IntArrays: + + extension [A](vec: Array[A]) + inline def mask(index: Array[Boolean])(using inline boundsCheck: BoundsCheck, ct: ClassTag[A]) = + dimCheck(vec, index) + val trues = index.trues + val newVec: Array[A] = new Array[A](trues) + var j = 0 + for i <- 0 until index.length do + // println(s"i: $i || j: $j || ${index(i)} ${vec(i)} ") + if index(i) then + newVec(j) = vec(i) + j = 1 + j + end for + newVec + end mask + end extension extension (arr: Array[Int]) + inline def select(indicies: Array[Int]): Array[Int] = + val len = indicies.length + val out = Array.ofDim[Int](len) + var i = 0 + while i < len do + out(i) = arr(indicies(i)) + i += 1 + end while + out + end select + inline def contiguous: Boolean = var i = 1 var out = true @@ -17,6 +48,7 @@ object IntArrays: end while } out + end contiguous end extension end IntArrays diff --git a/vecxt/src/variance.scala b/vecxt/src/variance.scala new file mode 100644 index 00000000..be7dc2b1 --- /dev/null +++ b/vecxt/src/variance.scala @@ -0,0 +1,13 @@ +package vecxt + +enum VarianceMode: + case Population + case Sample +end VarianceMode + +object VarianceMode: + inline def denominator(length: Int, mode: VarianceMode): Double = + mode match + case VarianceMode.Population => length.toDouble + case VarianceMode.Sample => (length - 1).toDouble +end VarianceMode diff --git a/vecxt/test/src-jvm/lu.test.scala b/vecxt/test/src-jvm/lu.test.scala index 29631625..20faa7a1 100644 --- a/vecxt/test/src-jvm/lu.test.scala +++ b/vecxt/test/src-jvm/lu.test.scala @@ -3,6 +3,8 @@ package vecxt import munit.FunSuite import all.* import BoundsCheck.DoBoundsCheck.yes +import scala.util.boundary +import scala.util.boundary.break class LUSuite extends FunSuite: @@ -37,49 +39,58 @@ class LUSuite extends FunSuite: def isLowerUnitTriangular(m: Matrix[Double], tol: Double = epsilon): Boolean = if m.rows < m.cols then return false end if - - for i <- 0 until m.rows do - for j <- 0 until m.cols do - if i < j then - // Above diagonal should be zero - if math.abs(m(i, j)) > tol then return false - else if i == j then - // Diagonal should be one - if math.abs(m(i, j) - 1.0) > tol then return false - end if + var lt = true + boundary { + for i <- 0 until m.rows do + for j <- 0 until m.cols do + if i < j then + // Above diagonal should be zero + if math.abs(m(i, j)) > tol then lt = false; break() + else if i == j then + // Diagonal should be one + if math.abs(m(i, j) - 1.0) > tol then lt = false; break() + end if + end for end for - end for - true + } + lt end isLowerUnitTriangular /** Helper to verify U is upper triangular */ def isUpperTriangular(m: Matrix[Double], tol: Double = epsilon): Boolean = if m.rows > m.cols then return false end if + var ut = true + boundary { + for i <- 0 until m.rows do + for j <- 0 until m.cols do + + if i > j then + // Below diagonal should be zero + if math.abs(m(i, j)) > tol then ut = false; break() + end if + end for - for i <- 0 until m.rows do - for j <- 0 until m.cols do - if i > j then - // Below diagonal should be zero - if math.abs(m(i, j)) > tol then return false - end if end for - end for - true + } + ut end isUpperTriangular /** Helper to check if two matrices are approximately equal */ def matricesEqual(a: Matrix[Double], b: Matrix[Double], tol: Double = epsilon): Boolean = if a.rows != b.rows || a.cols != b.cols then return false end if + var me = true + boundary { + for i <- 0 until a.rows do + for j <- 0 until a.cols do + if math.abs(a(i, j) - b(i, j)) > tol then me = false; break() - for i <- 0 until a.rows do - for j <- 0 until a.cols do - if math.abs(a(i, j) - b(i, j)) > tol then return false - end if + end for end for - end for - true + + } + me end matricesEqual test("LU decomposition of identity matrix") { diff --git a/vecxt/test/src-jvm/IntArrays.test.scala b/vecxt/test/src/IntArrays.test.scala similarity index 100% rename from vecxt/test/src-jvm/IntArrays.test.scala rename to vecxt/test/src/IntArrays.test.scala diff --git a/vecxt/test/src/array.test.scala b/vecxt/test/src/array.test.scala index d44542a2..76a0c054 100644 --- a/vecxt/test/src/array.test.scala +++ b/vecxt/test/src/array.test.scala @@ -147,15 +147,16 @@ class ArrayExtensionSuite extends munit.FunSuite: // val afterIndex = v1(vIdx) // assertEqualsDouble(afterIndex(0), 1.0, 0.0001) // assertEqualsDouble(afterIndex(1), 3.0, 0.0001) + import vecxt.BoundsCheck.DoBoundsCheck.yes val v2 = Array[Double](1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0) val vIdx2 = Array[Boolean](true, false, true, true, false, true, false, true, false) - val afterIndex2 = v2(vIdx2) + val afterIndex2 = v2.mask(vIdx2) assertEqualsDouble(afterIndex2(4), 8.0, 0.0001) val v3 = Array[Int](1, 2, 3, 4, 5, 6, 7, 8, 9) val vIdx3 = Array[Boolean](true, false, true, true, false, true, false, true, false) - val afterIndex3 = v3(vIdx3) + val afterIndex3 = v3.mask(vIdx3) assertEquals(afterIndex3(4), 8) } @@ -398,7 +399,7 @@ class ArrayExtensionSuite extends munit.FunSuite: test("Array indexing") { val v1 = Array[Double](1.0, 2.0, 3.0) val vIdx = Array[Boolean](true, false, true) - val afterIndex = v1(vIdx) + val afterIndex = v1.mask(vIdx)(using true) assertEquals(afterIndex.length, 2) assertEqualsDouble(afterIndex.head, 1, 0.0001) @@ -409,7 +410,7 @@ class ArrayExtensionSuite extends munit.FunSuite: // https://www.storyofmathematics.com/sample-variance/#:~:text=7.%20Divide%20the%20number%20you%20get%20in%20step%206%20by example 3 val ages = Array[Double](26.0, 48.0, 67.0, 39.0, 25.0, 25.0, 36.0, 44.0, 44.0, 47.0, 53.0, 52.0, 52.0, 51.0, 52.0, 40.0, 77.0, 44.0, 40.0, 45.0, 48.0, 49.0, 19.0, 54.0, 82.0) - val variance = ages.variance + val variance = ages.variance(VarianceMode.Sample) assertEqualsDouble(variance, 216.82, 0.01) } @@ -432,6 +433,13 @@ class ArrayExtensionSuite extends munit.FunSuite: // assertEqualsDouble(v1.qdep(0.95, v3), 0.8, 0.0001) } + test("select picks elements by index order") { + val v = Array[Double](10.0, 20.0, 30.0, 40.0) + val idx = Array(3, 1, 0) + val out = v.select(idx) + assertEquals(out.toSeq, Seq(40.0, 20.0, 10.0)) + } + test("tvar index") { import vecxt.reinsurance.tVarIdx val v1 = Array.tabulate[Double](100)(_.toDouble) @@ -531,7 +539,7 @@ class ArrayExtensionSuite extends munit.FunSuite: assert(tvar(9)) assert(tvar(6)) - val v4 = v1(tvar) + val v4 = v1.mask(tvar) assertEquals(v4.length, 2) assertEquals(v4(0), 2.0) assertEquals(v4(1), 1.0) diff --git a/vecxt/test/src/booleanarray.test.scala b/vecxt/test/src/booleanarray.test.scala index 8d67e734..cd561db7 100644 --- a/vecxt/test/src/booleanarray.test.scala +++ b/vecxt/test/src/booleanarray.test.scala @@ -2,7 +2,7 @@ package vecxt import vecxt.all.* -class BooleaArrayExtensionSuite extends munit.FunSuite: +class BooleanArrayExtensionSuite extends munit.FunSuite: test("all") { val v1 = Array[Boolean](true, true, true) @@ -154,4 +154,18 @@ class BooleaArrayExtensionSuite extends munit.FunSuite: } } -end BooleaArrayExtensionSuite + test("Indexing into via select") { + val v1 = Array[Boolean](true, false, true, false, true) + + val v2 = Array[Int](0, 1, 2, 3, 4) + val indexed = v2.mask(v1)(using true) + + assertEquals(indexed.length, v1.trues) + + assertEquals(indexed(0), 0) + assertEquals(indexed(1), 2) + assertEquals(indexed(2), 4) + + } + +end BooleanArrayExtensionSuite diff --git a/vecxt/test/src/intScalar.test.scala b/vecxt/test/src/intScalar.test.scala new file mode 100644 index 00000000..ff61a31b --- /dev/null +++ b/vecxt/test/src/intScalar.test.scala @@ -0,0 +1,33 @@ +package vecxt + +import all.* + +class IntScalarOpsSuite extends munit.FunSuite: + + test("in-place subtraction -= scalar works and mutates array"): + val arr = Array(5, 3, 8) + arr -= 2 + assertEquals(arr.toSeq, Seq(3, 1, 6)) + + test("non-mutating - scalar returns new array and leaves original unchanged"): + val orig = Array(10, 0, -5) + val out = orig - 3 + assertEquals(out.toSeq, Seq(7, -3, -8)) + assertEquals(orig.toSeq, Seq(10, 0, -5)) + + test("subtracting zero does nothing"): + val a = Array(1, 2, 3) + val b = a.clone() + a -= 0 + assertEquals(a.toSeq, b.toSeq) + val c = b - 0 + assertEquals(c.toSeq, b.toSeq) + + test("works on empty arrays"): + val e = Array.empty[Int] + e -= 5 + assertEquals(e.toSeq, Seq()) + val e2 = e - 5 + assertEquals(e2.toSeq, Seq()) + +end IntScalarOpsSuite diff --git a/vecxt/test/src/intarray.test.scala b/vecxt/test/src/intarray.test.scala index d594a297..2700b903 100644 --- a/vecxt/test/src/intarray.test.scala +++ b/vecxt/test/src/intarray.test.scala @@ -31,6 +31,21 @@ class IntArrayExtensionSuite extends munit.FunSuite: assertEquals(v1.sum, 45) } + test("array eq") { + val v1 = Array(1, 2, 3, 4, 5) + + val compared = v1 =:= v1.reverse + + val compared2 = v1 =:= 2 + + assertEquals(compared.trues, 1) + assert(compared(2)) + + assertEquals(compared.trues, 1) + assert(compared(2)) + + } + test("increments") { val v1 = Array[Int](1, 2, 3, 4, 5, 6, 7, 8, 9, 10) @@ -65,4 +80,84 @@ class IntArrayExtensionSuite extends munit.FunSuite: assert(!v2.contiguous) } + test("mean arithmetic progression") { + val v = Array.tabulate[Int](10)(identity) + println(v.printArr) + assertEqualsDouble(v.mean, 4.5d, 1e-12) + } + + test("variance/std zero spread") { + val v = Array.fill[Int](6)(7) + assertEqualsDouble(v.mean, 7d, 0.0, 1e-12) + assertEqualsDouble(v.variance, 0.0, 1e-12) + assertEqualsDouble(v.std, 0.0, 1e-12) + } + + test("variance/std arithmetic progression") { + val v = Array.tabulate[Int](10)(identity) + val expectedVar = 8.25d + assertEqualsDouble(v.variance, expectedVar, 1e-9) + assertEqualsDouble(v.std, math.sqrt(expectedVar), 1e-9) + } + + test("meanAndVariance zero spread") { + val v = Array.fill[Int](6)(7) + val stats = v.meanAndVariance + assertEqualsDouble(stats.mean, 7d, 1e-12) + assertEqualsDouble(stats.variance, 0.0, 1e-12) + } + + test("meanAndVariance arithmetic progression") { + val v = Array.tabulate[Int](10)(identity) + val stats = v.meanAndVariance + val expectedVar = 8.25d + assertEqualsDouble(stats.mean, 4.5d, 1e-12) + assertEqualsDouble(stats.variance, expectedVar, 1e-9) + } + + test("select picks indices in order") { + val base = Array.tabulate[Int](10)(identity) + val idx = Array(0, 3, 5, 9) + assertVecEquals(base.select(idx), Array(0, 3, 5, 9)) + } + + test("select handles duplicates and unsorted indices") { + val base = Array.tabulate[Int](6)(identity) + val idx = Array(5, 2, 5, 0) + assertVecEquals(base.select(idx), Array(5, 2, 5, 0)) + } + + test("select with empty index array") { + val base = Array.tabulate[Int](4)(identity) + val idx = Array.emptyIntArray + assertVecEquals(base.select(idx), Array.emptyIntArray) + } + + test("countsToIdx basic") { + val counts = Array(2, 3, 1) + // 2 ones, 3 twos, 1 three => [1, 1, 2, 2, 2, 3] + assertVecEquals(counts.countsToIdx, Array(1, 1, 2, 2, 2, 3)) + } + + test("countsToIdx with zeros") { + val counts = Array(1, 0, 2, 0, 1) + // 1 one, 0 twos, 2 threes, 0 fours, 1 five => [1, 3, 3, 5] + assertVecEquals(counts.countsToIdx, Array(1, 3, 3, 5)) + } + + test("countsToIdx empty array") { + val counts = Array.emptyIntArray + assertVecEquals(counts.countsToIdx, Array.emptyIntArray) + } + + test("countsToIdx all zeros") { + val counts = Array(0, 0, 0) + assertVecEquals(counts.countsToIdx, Array.emptyIntArray) + } + + test("countsToIdx single element") { + val counts = Array(5) + assertVecEquals(counts.countsToIdx, Array(1, 1, 1, 1, 1)) + } + end IntArrayExtensionSuite diff --git a/vecxt/test/src/simple.stats.scala b/vecxt/test/src/simple.stats.scala index 2922c56c..27455fd7 100644 --- a/vecxt/test/src/simple.stats.scala +++ b/vecxt/test/src/simple.stats.scala @@ -26,8 +26,8 @@ class StatsSuite extends munit.FunSuite: test("sample variance and std") { val v = Array[Double](2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0) - assertEqualsDouble(v.variance, 4.571429, 0.00001) - assertEqualsDouble(v.stdDev, 2.13809, 0.00001) + assertEqualsDouble(v.variance(VarianceMode.Sample), 4.571429, 0.00001) + assertEqualsDouble(v.stdDev(VarianceMode.Sample), 2.13809, 0.00001) } test("elementRanks") { diff --git a/vecxt_re/Readme.md b/vecxt_re/Readme.md new file mode 100644 index 00000000..62122f20 --- /dev/null +++ b/vecxt_re/Readme.md @@ -0,0 +1,4 @@ +# Vecxt Re + +A very domain spefic set of experiments in computational reinsurance. + diff --git a/vecxt_re/package.mill b/vecxt_re/package.mill new file mode 100644 index 00000000..a37a2c5f --- /dev/null +++ b/vecxt_re/package.mill @@ -0,0 +1,85 @@ +package build.vecxt_re + +import mill.*, scalalib.*, scalajslib.*, publish.* +import mill.scalajslib.api.ModuleKind +import mill.api.Task.Simple + +object `package` extends Module: + trait VexctReModule extends PlatformScalaModule with build.VecxtPublishModule with build.ShareCompileResources: + def mvnDeps = super.mvnDeps() ++ Seq() + + trait VexctReTest extends ScalaTests, TestModule.Munit: + def mvnDeps = super.mvnDeps() ++ Seq( + mvn"org.scalameta::munit::${build.V.munitVersion}" + ) + override def forkArgs: Simple[Seq[String]] = super.forkArgs() ++ build.vecIncubatorFlag + end VexctReTest + end VexctReModule + + private def jsNativeSharedSources = Task.Sources { + os.sub / "src-js-native" + } + + private def jvmNativeSharedSources = Task.Sources { + os.sub / "src-jvm-native" + } + + object jvm extends VexctReModule: + def moduleDeps = Seq(build.vecxt.jvm) + override def scalaVersion = build.V.scalaVersion + override def forkArgs = super.forkArgs() ++ build.vecIncubatorFlag + // Ensure macro resource lookups (e.g. VegaPlot.fromResource) can see this module's resources during compilation + def sources = Task(super.sources() ++ jvmNativeSharedSources()) + + override def mvnDeps = super.mvnDeps() ++ Seq( + mvn"io.github.quafadas::scautable:0.0.35", + mvn"io.github.quafadas::dedav4s:0.10.5", + mvn"org.apache.commons:commons-math4-core:4.0-beta1", + mvn"org.apache.commons:commons-statistics-distribution:1.1", + mvn"org.apache.commons:commons-rng-simple:1.6", + mvn"org.typelevel::cats-kernel:${build.V.catsVersion}" + ) + + object test extends VexctReTest, ScalaTests: + def moduleDeps = Seq(jvm) + override def mvnDeps = super.mvnDeps() ++ Seq( + mvn"org.typelevel::cats-kernel-laws:${build.V.catsVersion}", + mvn"org.typelevel::discipline-munit:${build.V.disciplineVersion}", + mvn"org.scalacheck::scalacheck:${build.V.scalacheckVersion}" + ) + override def forkArgs: Simple[Seq[String]] = super.forkArgs() ++ build.vecIncubatorFlag + end test + end jvm + + object js extends VexctReModule with build.CommonJS: + def moduleDeps = Seq(build.vecxt.js) + override def mvnDeps = super.mvnDeps() ++ Seq( + build.V.scalaJavaTime, + mvn"org.typelevel::cats-kernel::${build.V.catsVersion}" + ) + def sources = Task(super.sources() ++ jsNativeSharedSources()) + def moduleKind = ModuleKind.ESModule + def enableBsp = false + + object test extends VexctReTest, ScalaJSTests: + def moduleDeps = Seq(js) + def moduleKind = ModuleKind.CommonJSModule + override def enableBsp = false + end test + end js + + object native extends VexctReModule with build.CommonNative: + def moduleDeps = Seq(build.vecxt.native) + override def mvnDeps = super.mvnDeps() ++ Seq( + build.V.scalaJavaTime, + mvn"org.typelevel::cats-kernel::${build.V.catsVersion}" + ) + def sources = Task(super.sources() ++ jsNativeSharedSources() ++ jvmNativeSharedSources()) + override def enableBsp = false + + object test extends ScalaNativeTests, VexctReTest: + override def moduleDeps = Seq(native) + override def enableBsp = false + end test + end native +end `package` diff --git a/vecxt_re/resources/digraph.vg.json b/vecxt_re/resources/digraph.vg.json new file mode 100644 index 00000000..12089701 --- /dev/null +++ b/vecxt_re/resources/digraph.vg.json @@ -0,0 +1,103 @@ +{ + "$schema": "https://vega.github.io/schema/vega/v6.json", + "description": "A node-link diagram with force-directed layout, depicting character co-occurrence in the novel Les Misérables.", + "width": 700, + "height": 500, + "padding": 0, + "autosize": "none", + + "signals": [ + { "name": "cx", "update": "width / 2" }, + { "name": "cy", "update": "height / 2" }, + { "name": "nodeRadius", "value": 20 } + ], + + "data": [ + { + "name": "node-data", + "values": [ + { "id": "A", "x": 100, "y": 100 }, + { "id": "B", "x": 300, "y": 120 }, + { "id": "C", "x": 200, "y": 300 } + ] + }, + { + "name": "link-data", + "values": [ + { "source": "A", "target": "B" }, + { "source": "B", "target": "C" }, + { "source": "A", "target": "C" } + ], + "transform": [ + { + "type": "lookup", + "from": "node-data", + "key": "id", + "fields": ["source"], + "as": ["sourceNode"] + }, + { + "type": "lookup", + "from": "node-data", + "key": "id", + "fields": ["target"], + "as": ["targetNode"] + } + ] + } + ], + + "scales": [ + { + "name": "color", + "type": "ordinal", + "domain": { "data": "node-data", "field": "group" }, + "range": { "scheme": "category20c" } + } + ], + + "marks": [ + { + "name": "nodes", + "type": "symbol", + "zindex": 1, + + "from": { "data": "node-data" }, + + "encode": { + "enter": { + "x": { "field": "x" }, + "y": { "field": "y" }, + "fill": { "scale": "color", "field": "group" }, + "stroke": { "value": "white" } + }, + "update": { + "size": { "signal": "2 * nodeRadius * nodeRadius" }, + "cursor": { "value": "pointer" } + } + } + }, + { + "type": "path", + "from": { "data": "link-data" }, + "interactive": false, + "encode": { + "update": { + "stroke": { "value": "#ccc" }, + "strokeWidth": { "value": 0.5 } + } + }, + "transform": [ + { + "type": "linkpath", + "shape": "diagonal", + "orient": "vertical", + "sourceX": "datum.sourceNode.x", + "sourceY": "datum.sourceNode.y", + "targetX": "datum.targetNode.x", + "targetY": "datum.targetNode.y" + } + ] + } + ] +} diff --git a/vecxt_re/resources/distDensity.vg.json b/vecxt_re/resources/distDensity.vg.json new file mode 100644 index 00000000..1af950a0 --- /dev/null +++ b/vecxt_re/resources/distDensity.vg.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "Based on https://vega.github.io/vega/examples/probability-density.vg.json", + "data": [ + { + "name": "points", + "values": [ + { + "amount": 1.1, + "probability": 0.1, + "c": "first", + "offset": 0.5 + }, + { + "amount": 1.01, + "probability": 0.2, + "c": "first", + "offset": 0.5 + }, + { + "amount": 0.99, + "probability": 0.3, + "c": "first", + "offset": 0.5 + }, + { + "amount": 0.5, + "probability": 0.5, + "c": "first", + "offset": 0.5 + }, + { + "amount": 0.1, + "probability": 0.99, + "c": "first", + "offset": 0.5 + } + ] + }, + { + "name": "summary", + "source": "points", + "transform": [ + { + "type": "aggregate", + "fields": [ + "amount", + "offset" + ], + "groupby": [ + "c" + ], + "ops": [ + "mean", + "mean" + ], + "as": [ + "mean", + "meanOffset" + ] + } + ] + } + ], + "scales": [ + { + "name": "color", + "type": "ordinal", + "domain": { + "data": "points", + "field": "c" + }, + "range": { + "scheme": "category20" + } + }, + { + "name": "xscale", + "range": "width", + "domain": { + "data": "points", + "field": "probability" + }, + "type": "log" + }, + { + "name": "yscale", + "type": "linear", + "range": "height", + "domain": { + "fields": [ + { + "data": "points", + "field": "amount" + } + ] + } + } + ], + "legends": [ + { + "orient": "top-right", + "fill": "color", + "offset": 0, + "zindex": 1 + } + ], + "marks": [ + { + "type": "group", + "from": { + "facet": { + "name": "series", + "data": "points", + "groupby": "c" + } + }, + "marks": [ + { + "type": "line", + "from": { + "data": "series" + }, + "encode": { + "enter": { + "x": { + "scale": "xscale", + "field": "probability" + }, + "y": { + "scale": "yscale", + "field": "amount" + }, + "stroke": { + "scale": "color", + "field": "c" + }, + "tooltip": { + "signal": "{amount : format(datum.amount*100, \".3f\")+\"%\" , probAmountSmaller: format(datum.probability*100,\".3f\")+\"%\" }" + } + }, + "update": { + "interpolate": "monotone", + "strokeOpacity": { + "value": 1 + } + }, + "hover": { + "strokeOpacity": { + "value": 0.5 + } + } + } + } + ] + }, + { + "type": "rect", + "from": { + "data": "points" + }, + "interactive": true, + "encode": { + "enter": { + "y": { + "scale": "yscale", + "field": "amount" + }, + "height": { + "value": 2 + }, + "x": { + "value": 25, + "offset": { + "signal": "width" + }, + "mult": { + "field": "offset" + } + }, + "tooltip": { + "signal": "{amount : format(datum.amount*100, \".3f\")+\"%\" , probAmountSmaller: format(datum.probability*100,\".3f\")+\"%\" }" + }, + "width": { + "value": 5 + }, + "fill": { + "scale": "color", + "field": "c" + }, + "fillOpacity": { + "value": 0.4 + } + } + } + }, + { + "type": "rect", + "from": { + "data": "summary" + }, + "interactive": true, + "zindex": 1, + "encode": { + "enter": { + "y": { + "scale": "yscale", + "field": "mean" + }, + "height": { + "value": 2 + }, + "x": { + "value": 25, + "offset": { + "signal": "width" + }, + "mult": { + "field": "meanOffset" + } + }, + "fill": { + "value": "black" + }, + "width": { + "value": 5 + }, + "fillOpacity": { + "value": 1 + } + } + } + } + ], + "axes": [ + { + "orient": "bottom", + "scale": "xscale", + "zindex": 0, + "grid": true, + "title": "CDF", + "titleAnchor": "middle", + "gridOpacity": 0.5 + }, + { + "orient": "left", + "scale": "yscale", + "zindex": 0, + "grid": true, + "title": "AMOUNT", + "titleAnchor": "middle", + "gridOpacity": 0.5, + "domain": false + } + ] +} \ No newline at end of file diff --git a/vecxt_re/resources/ecdfVsCdf.vl.json b/vecxt_re/resources/ecdfVsCdf.vl.json new file mode 100644 index 00000000..50fafe39 --- /dev/null +++ b/vecxt_re/resources/ecdfVsCdf.vl.json @@ -0,0 +1,54 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "ECDF vs Theoretical CDF comparison for count data", + "width": "container", + "height": "container", + "title": "ECDF vs Theoretical CDF", + "layer": [ + { + "data": { + "values": [ + {"value": 0, "prob": 0.1}, + {"value": 1, "prob": 0.3}, + {"value": 2, "prob": 0.6}, + {"value": 3, "prob": 0.85}, + {"value": 4, "prob": 0.95} + ] + }, + "mark": { + "type": "line", + "interpolate": "step-after", + "strokeWidth": 2, + "color": "steelblue", + "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative", "title": "k"}, + "y": {"field": "prob", "type": "quantitative", "title": "Cumulative Probability"} + } + }, + { + "data": { + "values": [ + {"value": 0, "prob": 0.12}, + {"value": 1, "prob": 0.32}, + {"value": 2, "prob": 0.58}, + {"value": 3, "prob": 0.82}, + {"value": 4, "prob": 0.97} + ] + }, + "mark": { + "type": "line", + "interpolate": "step-after", + "strokeWidth": 2, + "strokeDash": [4, 4], + "color": "orange", + "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative", "title": "k"}, + "y": {"field": "prob", "type": "quantitative", "title": "Cumulative Probability"} + } + } + ] +} diff --git a/vecxt_re/resources/empiricalCdf.vl.json b/vecxt_re/resources/empiricalCdf.vl.json new file mode 100644 index 00000000..10f01943 --- /dev/null +++ b/vecxt_re/resources/empiricalCdf.vl.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Empirical CDF (optionally weighted)", + "width": "container", + "height": "container", + "data": { "values": [ { "x": 0.0, "cdf": 0.0 } ] }, + "mark": { "type": "line", "interpolate": "step-after" }, + "encoding": { + "x": { "field": "x", "type": "quantitative", "title": "x" }, + "y": { "field": "cdf", "type": "quantitative", "title": "CDF" } + } +} diff --git a/vecxt_re/resources/empiricalPdf.vl.json b/vecxt_re/resources/empiricalPdf.vl.json new file mode 100644 index 00000000..82c6d343 --- /dev/null +++ b/vecxt_re/resources/empiricalPdf.vl.json @@ -0,0 +1,23 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Empirical distribution PDF via (optionally weighted) histogram", + "width": "container", + "height": "container", + "data": { "values": [ { "x": 0.0, "w": 1.0 } ] }, + "transform": [ + { "bin": { "maxbins": 60 }, "field": "x", "as": ["x0", "x1"] }, + { "aggregate": [ { "op": "sum", "field": "w", "as": "binWeight" } ], "groupby": ["x0", "x1"] }, + { "joinaggregate": [ { "op": "sum", "field": "binWeight", "as": "W" } ] }, + { "calculate": "datum.binWeight / (datum.W * (datum.x1 - datum.x0))", "as": "density" } + ], + "mark": { "type": "bar", "opacity": 0.65 }, + "encoding": { + "x": { "field": "x0", "type": "quantitative", "title": "x" }, + "x2": { "field": "x1" }, + "y": { "field": "density", "type": "quantitative", "title": "PDF" }, + "tooltip": [ + { "field": "binWeight", "type": "quantitative", "title": "Bin weight" }, + { "field": "density", "type": "quantitative", "title": "Density" } + ] + } +} diff --git a/vecxt_re/resources/hillPlot.vl.json b/vecxt_re/resources/hillPlot.vl.json new file mode 100644 index 00000000..cc8987ad --- /dev/null +++ b/vecxt_re/resources/hillPlot.vl.json @@ -0,0 +1,33 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "title": "Hill Plot", + "width": "container", + "height": "container", + "data": { + "values": [ + { "k": 10, "estimate": 2.1 }, + { "k": 20, "estimate": 2.05 }, + { "k": 30, "estimate": 1.98 }, + { "k": 40, "estimate": 2.02 }, + { "k": 50, "estimate": 1.95 } + ] + }, + "mark": { + "type": "line", + "point": { "filled": true, "size": 40 }, + "tooltip": { "content": "data" } + }, + "encoding": { + "x": { + "field": "k", + "type": "quantitative", + "title": "k (number of upper order statistics)" + }, + "y": { + "field": "estimate", + "type": "quantitative", + "title": "Tail Index Estimate α̂(k)" + }, + "color": { "value": "steelblue" } + } +} diff --git a/vecxt_re/resources/hist.vg.json b/vecxt_re/resources/hist.vg.json new file mode 100644 index 00000000..2485e709 --- /dev/null +++ b/vecxt_re/resources/hist.vg.json @@ -0,0 +1,96 @@ +{ + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "An interactive histogram for visualizing a univariate distribution.", + "width": 500, + "height": 100, + "padding": 5, + + "signals": [ + { "name": "binOffset", "value": 0, + "bind": {"input": "range", "min": 1, "max": 10} }, + { "name": "binStep", "value": 1, + "bind": {"input": "range", "min": 0.001, "max":0, "step": 1} } + ], + + "data": [ + { + "name": "points", + "values": [ + {"u": 0}, {"u": 1}, {"u": 2}, {"u": 3}, {"u": 4}, + {"u": 5}, {"u": 6}, {"u": 7}, {"u": 8}, {"u": 9}, {"u": 10} + + ] + }, + { + "name": "binned", + "source": "points", + "transform": [ + { + "type": "bin", "field": "u", + "extent": [0, 10], + "anchor": {"signal": "binOffset"}, + "step": {"signal": "binStep"}, + "nice": false + }, + { + "type": "aggregate", + "key": "bin0", "groupby": ["bin0", "bin1"], + "fields": ["bin0"], "ops": ["count"], "as": ["count"] + } + ] + } + ], + + "scales": [ + { + "name": "xscale", + "type": "linear", + "range": "width", + "domain": [0, 10] + }, + { + "name": "yscale", + "type": "linear", + "range": "height", "round": true, + "domain": {"data": "binned", "field": "count"}, + "zero": true, "nice": true + } + ], + + "axes": [ + {"orient": "bottom", "scale": "xscale", "zindex": 1}, + {"orient": "left", "scale": "yscale", "tickCount": 5, "zindex": 1} + ], + + "marks": [ + { + "type": "rect", + "from": {"data": "binned"}, + "encode": { + "update": { + "x": {"scale": "xscale", "field": "bin0"}, + "x2": {"scale": "xscale", "field": "bin1", + "offset": {"signal": "binStep > 0.02 ? -0.5 : 0"}}, + "y": {"scale": "yscale", "field": "count"}, + "y2": {"scale": "yscale", "value": 0}, + "fill": {"value": "steelblue"} + }, + "hover": { "fill": {"value": "firebrick"} } + } + }, + { + "type": "rect", + "from": {"data": "points"}, + "encode": { + "enter": { + "x": {"scale": "xscale", "field": "u"}, + "width": {"value": 1}, + "y": {"value": 25, "offset": {"signal": "height"}}, + "height": {"value": 5}, + "fill": {"value": "steelblue"}, + "fillOpacity": {"value": 0.4} + } + } + } + ] +} \ No newline at end of file diff --git a/vecxt_re/resources/index.vl.json b/vecxt_re/resources/index.vl.json new file mode 100644 index 00000000..16d4e69a --- /dev/null +++ b/vecxt_re/resources/index.vl.json @@ -0,0 +1,95 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Incremental index and cumulative index over time.", + "width":"container", + "height":"container", + "data": { + "values": [ + {"year": 2024, "index": 1.05, "cumulative": 0.9, "threshold": 1.1, "missing": 0.9}, + {"year": 2025, "index": 1.05, "cumulative": 0.975, "threshold": 1.1, "missing": 0.975}, + {"year": 2026, "index": 1.05, "cumulative": 1.0, "threshold": 1.1, "missing": 1.0} + ] + }, + "layer": [ + { + "mark": { + "type": "area", + "color": "red", + "opacity": 0.2, + "tooltip": true + }, + "encoding": { + "x": { + "field": "year", + "type": "ordinal", + "title": "Year" + }, + "y": { + "field": "missing", + "type": "quantitative", + "scale": { "zero": false }, + "title": "Index" + }, + "y2": { + "datum": 1.0 + } + } + }, + + { + "mark": { + "type": "point", + "filled": true, + "size": 80, + "tooltip": true + }, + "encoding": { + "x": { + "field": "year", + "type": "ordinal" + }, + "y": { + "field": "index", + "type": "quantitative" + } + } + }, + { + "mark": { + "type": "rule", + "tooltip": true + }, + "encoding": { + "x": { + "field": "year", + "type": "ordinal" + }, + "y": { + "field": "index", + "type": "quantitative" + }, + "y2": { + "datum": 1.0 + } + } + }, + { + "mark": { + "type": "line", + "tooltip": true, + "strokeDash": [4, 4] + }, + "encoding": { + "x": { + "field": "year", + "type": "ordinal" + }, + "y": { + "field": "threshold", + "type": "quantitative" + } + } + } + ] + +} diff --git a/vecxt_re/resources/loglogCdf.vl.json b/vecxt_re/resources/loglogCdf.vl.json new file mode 100644 index 00000000..f92be3f7 --- /dev/null +++ b/vecxt_re/resources/loglogCdf.vl.json @@ -0,0 +1,56 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "title": "Log-Log CDF Plot", + "width": "container", + "height": "container", + "data": { + "values": [ + { "x": 1, "y": 0.9, "source": "model" }, + { "x": 10, "y": 0.5, "source": "model" }, + { "x": 100, "y": 0.1, "source": "model" }, + { "x": 1000, "y": 0.01, "source": "model" }, + { "x": 2, "y": 0.85, "source": "empirical" }, + { "x": 15, "y": 0.55, "source": "empirical" }, + { "x": 80, "y": 0.15, "source": "empirical" }, + { "x": 500, "y": 0.05, "source": "empirical" } + ] + }, + "layer": [ + { + "transform": [{ "filter": "datum.source === 'model'" }], + "mark": "line", + "encoding": { + "x": { + "field": "x", + "type": "quantitative", + "scale": { "type": "log", "domainMin": 1 }, + "title": "X (log scale)" + }, + "y": { + "field": "y", + "type": "quantitative", + "scale": { "type": "log" }, + "title": "Survival Probability S(x)" + }, + "color": { "value": "steelblue" } + } + }, + { + "transform": [{ "filter": "datum.source === 'empirical'" }], + "mark": { "type": "point", "shape": "cross", "size": 100, "tooltip": {"content": "data"} }, + "encoding": { + "x": { + "field": "x", + "type": "quantitative", + "scale": { "type": "log", "domainMin": 1 } + }, + "y": { + "field": "y", + "type": "quantitative", + "scale": { "type": "log" } + }, + "color": { "value": "red" } + } + } + ] +} diff --git a/vecxt_re/resources/mixedCdf.vl.json b/vecxt_re/resources/mixedCdf.vl.json new file mode 100644 index 00000000..8c9f5e95 --- /dev/null +++ b/vecxt_re/resources/mixedCdf.vl.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Mixed distribution CDF (empirical body + Pareto tail)", + "width": "container", + "height": "container", + "data": { "values": [ { "x": 0.0, "cdf": 0.0 } ] }, + "mark": { "type": "line" }, + "encoding": { + "x": { "field": "x", "type": "quantitative", "title": "x" }, + "y": { "field": "cdf", "type": "quantitative", "title": "CDF" } + } +} diff --git a/vecxt_re/resources/mixedPdf.vl.json b/vecxt_re/resources/mixedPdf.vl.json new file mode 100644 index 00000000..b40bfb1d --- /dev/null +++ b/vecxt_re/resources/mixedPdf.vl.json @@ -0,0 +1,23 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Mixed distribution PDF (empirical body + Pareto tail) via histogram of samples", + "width": "container", + "height": "container", + "data": { "values": [ { "x": 0.0 } ] }, + "transform": [ + { "bin": { "maxbins": 80 }, "field": "x", "as": ["x0", "x1"] }, + { "aggregate": [ { "op": "count", "as": "binCount" } ], "groupby": ["x0", "x1"] }, + { "joinaggregate": [ { "op": "sum", "field": "binCount", "as": "N" } ] }, + { "calculate": "datum.binCount / (datum.N * (datum.x1 - datum.x0))", "as": "density" } + ], + "mark": { "type": "bar", "opacity": 0.65 }, + "encoding": { + "x": { "field": "x0", "type": "quantitative", "title": "x" }, + "x2": { "field": "x1" }, + "y": { "field": "density", "type": "quantitative", "title": "PDF" }, + "tooltip": [ + { "field": "binCount", "type": "quantitative", "title": "Bin count" }, + { "field": "density", "type": "quantitative", "title": "Density" } + ] + } +} diff --git a/vecxt_re/resources/negBinCumul.vl.json b/vecxt_re/resources/negBinCumul.vl.json new file mode 100644 index 00000000..618b9eb9 --- /dev/null +++ b/vecxt_re/resources/negBinCumul.vl.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Negative Binomial distribution cumulative probabilities.", + "data": {"values": [ + {"value": 0, "probability": 0.0}, + {"value": 1, "probability": 0.1}, + {"value": 2, "probability": 0.18} + ]}, + "mark": { + "type": "line", "interpolate": "step-after", "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative"}, + "y": {"field": "prob", "type": "quantitative"} + }, + "width": "container", + "height": "container" +} \ No newline at end of file diff --git a/vecxt_re/resources/negBinCumul_vsSample.vl.json b/vecxt_re/resources/negBinCumul_vsSample.vl.json new file mode 100644 index 00000000..c5f95805 --- /dev/null +++ b/vecxt_re/resources/negBinCumul_vsSample.vl.json @@ -0,0 +1,51 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Negative Binomial CDF with samples overlaid", + "width": "container", + "height": "container", + "title": "Negative Binomial CDF with Sample Points", + "layer": [ + { + "data": { + "values": [ + {"value": 0, "prob": 0.0}, + {"value": 1, "prob": 0.1}, + {"value": 2, "prob": 0.18}, + {"value": 3, "prob": 0.30}, + {"value": 4, "prob": 0.45} + ] + }, + "mark": { + "type": "line", + "interpolate": "step-after", + "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative", "title": "k"}, + "y": {"field": "prob", "type": "quantitative", "title": "CDF"} + } + }, + + { + "data": { + "values": [ + {"value": 1, "prob": 0.125}, + {"value": 2.0, "prob": 0.35}, + {"value": 3, "prob": 0.55}, + {"value": 4, "prob": 0.85} + ] + }, + "mark": { + "type": "point", + "shape": "cross", + "color": "red", + "size": 100, + "strokeWidth": 2 + }, + "encoding": { + "x": {"field": "value", "type": "quantitative", "title": "k"}, + "y": {"field": "prob", "type": "quantitative", "title": "CDF"} + } + } + ] +} diff --git a/vecxt_re/resources/negBinProb.vl.json b/vecxt_re/resources/negBinProb.vl.json new file mode 100644 index 00000000..a2e14071 --- /dev/null +++ b/vecxt_re/resources/negBinProb.vl.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Negative Binomial distribution probabilities.", + "data": {"values": [ + {"value": 0, "prob": 0.0}, + {"value": 1, "prob": 0.1}, + {"value": 2, "prob": 0.18} + ]}, + "mark": { + "type": "bar", "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative"}, + "y": {"field": "prob", "type": "quantitative"} + }, + "width": "container", + "height": "container" +} \ No newline at end of file diff --git a/vecxt_re/resources/paretoCdf.vl.json b/vecxt_re/resources/paretoCdf.vl.json new file mode 100644 index 00000000..312a6298 --- /dev/null +++ b/vecxt_re/resources/paretoCdf.vl.json @@ -0,0 +1,43 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "CDF of a Pareto distribution", + "width": "container", + "height": "container", + + "data": { + "sequence": { + "start": 1, + "stop": 20, + "step": 0.05 + } + }, + + "transform": [ + { + "calculate": "1 - pow(1 / datum.data, 2)", + "as": "cdf" + } + ], + + "mark": { + "type": "line", + "interpolate": "monotone", + "strokeWidth": 2, + "tooltip": true + }, + + "encoding": { + "x": { + "field": "data", + "type": "quantitative", + "title": "x", + "scale": { "zero": false } + }, + "y": { + "field": "cdf", + "type": "quantitative", + "title": "F(x)", + "scale": { "domain": [0, 1] } + } + } +} \ No newline at end of file diff --git a/vecxt_re/resources/paretoPdf.vl.json b/vecxt_re/resources/paretoPdf.vl.json new file mode 100644 index 00000000..3584bd6a --- /dev/null +++ b/vecxt_re/resources/paretoPdf.vl.json @@ -0,0 +1,145 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Pareto PDF with correctly normalized log-binned histogram and analytic overlay", + "width": "container", + "height": "container", + "resolve": { + "scale": { + "x": "shared" + } + }, + "layer": [ + { + "data": { + "values": [ + { + "x": 1.02 + }, + { + "x": 1.05 + } + ] + }, + "transform": [ + { + "calculate": "log(datum.x)", + "as": "logx" + }, + { + "bin": { + "maxbins": 50 + }, + "field": "logx", + "as": [ + "logx0", + "logx1" + ] + }, + { + "aggregate": [ + { + "op": "count", + "as": "count" + } + ], + "groupby": [ + "logx0", + "logx1" + ] + }, + { + "joinaggregate": [ + { + "op": "sum", + "field": "count", + "as": "N" + } + ] + }, + { + "calculate": "exp(datum.logx0)", + "as": "x0" + }, + { + "calculate": "exp(datum.logx1)", + "as": "x1" + }, + { + "calculate": "datum.count / (datum.N * (datum.x1 - datum.x0))", + "as": "density" + } + ], + "mark": { + "type": "bar", + "opacity": 0.6 + }, + "encoding": { + "x": { + "field": "x0", + "type": "quantitative", + "title": "x", + "scale": { + "nice": false, + "domain": [ + 1, + 20 + ] + } + }, + "x2": { + "field": "x1" + }, + "y": { + "field": "density", + "type": "quantitative", + "title": "PDF" + }, + "tooltip": [ + { + "field": "count", + "type": "quantitative", + "title": "Count" + }, + { + "field": "density", + "type": "quantitative", + "title": "Density" + } + ] + } + }, + { + "data": { + "sequence": { + "start": 1, + "stop": 20, + "step": 0.05 + } + }, + "transform": [ + { + "calculate": "pow(datum.data, -2)", + "as": "pdf" + } + ], + "mark": { + "type": "line", + "strokeWidth": 2 + }, + "encoding": { + "x": { + "field": "data", + "type": "quantitative", + "scale": { + "nice": false, + "type": "log" + } + }, + "y": { + "field": "pdf", + "type": "quantitative" + } + } + } + ] +} \ No newline at end of file diff --git a/vecxt_re/resources/paretoPdfFixed.vl.json b/vecxt_re/resources/paretoPdfFixed.vl.json new file mode 100644 index 00000000..fb5fc58d --- /dev/null +++ b/vecxt_re/resources/paretoPdfFixed.vl.json @@ -0,0 +1,40 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v5.json", + "description": "Pareto PDF with log-binned histogram and analytic overlay", + "width": "container", + "height": "container", + "resolve": { + "scale": { + "x": "shared" + } + }, + "layer": [ + { + "data": { "values": [ { "x": 1.02 }, { "x": 1.05 } ] }, + "transform": [ + { "calculate": "log(datum.x)", "as": "logx" }, + { "bin": { "maxbins": 50 }, "field": "logx", "as": ["logx0", "logx1"] }, + { "aggregate": [ { "op": "count", "as": "count" } ], "groupby": ["logx0", "logx1"] }, + { "joinaggregate": [ { "op": "sum", "field": "count", "as": "N" } ] }, + { "calculate": "exp(datum.logx0)", "as": "x0" }, + { "calculate": "exp(datum.logx1)", "as": "x1" }, + { "calculate": "datum.count / (datum.N * (datum.x1 - datum.x0))", "as": "density" } + ], + "mark": { "type": "bar", "opacity": 0.6 }, + "encoding": { + "x": { "field": "x0", "type": "quantitative", "scale": { "type": "log" }, "title": "x" }, + "x2": { "field": "x1" }, + "y": { "field": "density", "type": "quantitative", "title": "PDF" } + } + }, + { + "data": { "sequence": { "start": 1, "stop": 20, "step": 0.05 } }, + "transform": [ { "calculate": "pow(datum.data, -2)", "as": "pdf" } ], + "mark": { "type": "line", "strokeWidth": 2 }, + "encoding": { + "x": { "field": "data", "type": "quantitative", "scale": { "type": "log" } }, + "y": { "field": "pdf", "type": "quantitative" } + } + } + ] +} diff --git a/vecxt_re/resources/pearsonResiduals.vl.json b/vecxt_re/resources/pearsonResiduals.vl.json new file mode 100644 index 00000000..666570fa --- /dev/null +++ b/vecxt_re/resources/pearsonResiduals.vl.json @@ -0,0 +1,43 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Pearson residuals plot for count data", + "width": "container", + "height": "container", + "title": "Pearson Residuals", + "data": { + "values": [ + {"k": 0, "residual": 0.5}, + {"k": 1, "residual": -1.2}, + {"k": 2, "residual": 0.8}, + {"k": 3, "residual": -0.3} + ] + }, + "layer": [ + { + "mark": {"type": "bar", "color": "steelblue"}, + "encoding": { + "x": {"field": "k", "type": "ordinal", "title": "Count (k)"}, + "y": {"field": "residual", "type": "quantitative", "title": "Pearson Residual"}, + "color": { + "condition": { + "test": "abs(datum.residual) > 2", + "value": "red" + }, + "value": "steelblue" + } + } + }, + { + "mark": {"type": "rule", "color": "black", "strokeDash": [4, 4]}, + "encoding": {"y": {"datum": 2}} + }, + { + "mark": {"type": "rule", "color": "black", "strokeDash": [4, 4]}, + "encoding": {"y": {"datum": -2}} + }, + { + "mark": {"type": "rule", "color": "gray"}, + "encoding": {"y": {"datum": 0}} + } + ] +} diff --git a/vecxt_re/resources/poissonCumul.vl.json b/vecxt_re/resources/poissonCumul.vl.json new file mode 100644 index 00000000..2ea62753 --- /dev/null +++ b/vecxt_re/resources/poissonCumul.vl.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Poisson distribution cumulative probabilities.", + "data": {"values": [ + {"value": 0, "prob": 0.0}, + {"value": 1, "prob": 0.1}, + {"value": 2, "prob": 0.28} + ]}, + "mark": { + "type": "line", "tooltip": true, "point": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative"}, + "y": {"field": "prob", "type": "quantitative"} + }, + "width": "container", + "height": "container" +} diff --git a/vecxt_re/resources/poissonProb.vl.json b/vecxt_re/resources/poissonProb.vl.json new file mode 100644 index 00000000..42b12a9e --- /dev/null +++ b/vecxt_re/resources/poissonProb.vl.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Poisson distribution probabilities.", + "data": {"values": [ + {"value": 0, "prob": 0.0}, + {"value": 1, "prob": 0.1}, + {"value": 2, "prob": 0.18} + ]}, + "mark": { + "type": "bar", "tooltip": true + }, + "encoding": { + "x": {"field": "value", "type": "quantitative"}, + "y": {"field": "prob", "type": "quantitative"} + }, + "width": "container", + "height": "container" +} diff --git a/vecxt_re/resources/poissonTrend.vl.json b/vecxt_re/resources/poissonTrend.vl.json new file mode 100644 index 00000000..de54bdf6 --- /dev/null +++ b/vecxt_re/resources/poissonTrend.vl.json @@ -0,0 +1,111 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Poisson GLM trend with confidence intervals", + "width": 600, + "height": 600, + "title": "Frequency Trend", + "layer": [ + { + "data": { + "values": [{"year": 2000, "lower": 0.3, "upper": 1.2}] + }, + "transform": [{"calculate": "'CI @ 95%'", "as": "legend"}], + "mark": { + "type": "area", + "opacity": 0.15 + }, + "encoding": { + "x": {"field": "year", "type": "quantitative", "title": "Year", "scale": {"zero": false}}, + "y": {"field": "lower", "type": "quantitative", "title": "Count"}, + "y2": {"field": "upper"}, + "color": { + "field": "legend", + "type": "nominal", + "scale": {"range": ["red"]}, + "legend": {"symbolType": "square", "symbolOpacity": 0.15, "title": null} + } + } + }, + { + "data": { + "values": [{"year": 2000, "lower": 0.3}] + }, + "mark": { + "type": "line", + "strokeWidth": 2, + "strokeDash": [6, 4], + "color": "red" + }, + "encoding": { + "x": {"field": "year", "type": "quantitative"}, + "y": {"field": "lower", "type": "quantitative"} + } + }, + { + "data": { + "values": [{"year": 2000, "upper": 1.2}] + }, + "mark": { + "type": "line", + "strokeWidth": 2, + "strokeDash": [6, 4], + "color": "red" + }, + "encoding": { + "x": {"field": "year", "type": "quantitative"}, + "y": {"field": "upper", "type": "quantitative", "title": "Count"} + } + }, + { + "data": { + "values": [{"year": 2000, "fit": 0.7}] + }, + "transform": [{"calculate": "'Fit'", "as": "legend"}], + "mark": { + "type": "line", + "strokeWidth": 2.5, + "tooltip": true + }, + "encoding": { + "x": {"field": "year", "type": "quantitative"}, + "y": {"field": "fit", "type": "quantitative"}, + "stroke": { + "field": "legend", + "type": "nominal", + "scale": {"range": ["red"]}, + "legend": {"symbolType": "stroke", "symbolStrokeWidth": 2.5, "title": null} + } + } + }, + { + "data": { + "values": [{"year": 2000, "count": 1}] + }, + "transform": [{"calculate": "'Observation'", "as": "obsLegend"}], + "mark": { + "type": "point", + "shape": "cross", + "size": 80, + "strokeWidth": 2, + "tooltip": true, + "stroke": "steelblue" + }, + "encoding": { + "x": {"field": "year", "type": "quantitative"}, + "y": {"field": "count", "type": "quantitative"}, + "fill": { + "field": "obsLegend", + "type": "nominal", + "scale": {"range": ["steelblue"]}, + "legend": {"symbolType": "cross", "symbolStrokeColor": "steelblue", "title": null} + } + } + } + ], + "config": { + "legend": { + "orient": "top-right", + "offset": 5 + } + } +} diff --git a/vecxt_re/resources/rootogram.vl.json b/vecxt_re/resources/rootogram.vl.json new file mode 100644 index 00000000..e290d945 --- /dev/null +++ b/vecxt_re/resources/rootogram.vl.json @@ -0,0 +1,38 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Hanging rootogram for count data diagnostics", + "width": "container", + "height": "container", + "title": "Hanging Rootogram", + "data": { + "values": [ + {"k": 0, "sqrtExpected": 2.0, "sqrtObserved": 1.8, "hanging": 0.2}, + {"k": 1, "sqrtExpected": 3.0, "sqrtObserved": 3.2, "hanging": -0.2}, + {"k": 2, "sqrtExpected": 2.5, "sqrtObserved": 2.3, "hanging": 0.2} + ] + }, + "encoding": { + "x": {"field": "k", "type": "ordinal", "title": "Count (k)"} + }, + "layer": [ + { + "mark": {"type": "bar", "color": "steelblue", "opacity": 0.7}, + "encoding": { + "y": {"field": "sqrtExpected", "type": "quantitative", "title": "√Frequency"}, + "y2": {"field": "hanging", "type": "quantitative"} + } + }, + { + "mark": {"type": "line", "color": "red", "strokeWidth": 2, "point": true}, + "encoding": { + "y": {"field": "sqrtExpected", "type": "quantitative"} + } + }, + { + "mark": {"type": "rule", "color": "black", "strokeDash": [4, 4]}, + "encoding": { + "y": {"datum": 0} + } + } + ] +} diff --git a/vecxt_re/resources/seasonality.vg.json b/vecxt_re/resources/seasonality.vg.json new file mode 100644 index 00000000..57a52acf --- /dev/null +++ b/vecxt_re/resources/seasonality.vg.json @@ -0,0 +1,39 @@ + +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "title": "Seasonality", + "autosize": {"type": "fit", "contains": "padding", "resize": true}, + "width":"container", + "height":"container", + "data": { + "values": [ + {"category": "Jan 2025", "amount": 100, "color": false}, + {"category": "Feb 2025", "amount": 200, "color": false} + ] + } + , + + "mark": {"type": "bar", "tooltip": true}, + + "encoding": { + "x": { + "field": "category", + "type": "temporal", + "timeUnit": "yearmonth", + "bandPosition": 0, + "axis": {"labelAngle": -45, "grid": false, "tickCount": "month"} + + }, + "y": { + "aggregate": "sum", + "field": "amount", + "type": "quantitative" + }, + "color": { + "field": "color", + "type": "nominal", + "scale": {"range": ["steelblue", "green"]}, + "legend": null + } + } +} diff --git a/vecxt_re/resources/squareDiag.vg.json b/vecxt_re/resources/squareDiag.vg.json new file mode 100644 index 00000000..0bbfc78c --- /dev/null +++ b/vecxt_re/resources/squareDiag.vg.json @@ -0,0 +1,161 @@ +{ + "$schema": "https://vega.github.io/schema/vega/v6.json", + "description": "Treemap of transactions sized by 'size'. Grouped by transaction → company.", + "width": 700, + "height": 500, + "padding": 2.5, + "autosize": "none", + + "signals": [ + { + "name": "layout", "value": "squarify", + "bind": { + "input": "select", + "options": ["squarify", "binary", "slicedice"] + } + }, + { + "name": "aspectRatio", "value": 1.6, + "bind": { "input": "range", "min": 1, "max": 5, "step": 0.1 } + } + ], + + "data": [ + { + "name": "transactions", + "values": [ + { "company": "A", "size": 40, "transaction": "Yey" }, + { "company": "A", "size": 23, "transaction": "B" }, + { "company": "B", "size": 29, "transaction": "cat" }, + { "company": "C", "size": 17, "transaction": "dog" } + ] + }, + + { + "name": "tree", + "source": "transactions", + "transform": [ + { "type": "nest", "keys": ["transaction", "company"] }, + { + "type": "treemap", + "field": "size", + "sort": { "field": "value" }, + "round": true, + "method": { "signal": "layout" }, + "ratio": { "signal": "aspectRatio" }, + "size": [{ "signal": "width" }, { "signal": "height" }] + } + ] + }, + { + "name": "nodes", + "source": "tree", + "transform": [{ "type": "filter", "expr": "datum.children" }] + }, + { + "name": "leaves", + "source": "tree", + "transform": [{ "type": "filter", "expr": "!datum.children" }] + } + ], + + "scales": [ + { + "name": "color_companies", + "type": "ordinal", + "domain": { "data": "leaves", "field": "company" }, + "range": {"scheme": "dark2"} + }, + { + "name": "labelSize", + "type": "ordinal", + "domain": [0, 1, 2, 3], + "range": [256, 28, 20, 14] + }, + { + "name": "labelOpacity", + "type": "ordinal", + "domain": [0, 1, 2, 3], + "range": [0.15, 0.5, 0.8, 1.0] + } + ], + + "marks": [ + + { + "type": "rect", + "from": { "data": "nodes" }, + "interactive": false, + "encode": { + "enter": { + "fill": { "value": "#f3f3f3" } + }, + "update": { + "x": { "field": "x0" }, + "y": { "field": "y0" }, + "x2": { "field": "x1" }, + "y2": { "field": "y1" } + } + } + }, + + + { + "type": "rect", + "from": { "data": "leaves" }, + "encode": { + "enter": { + "stroke": { "value": "#fff" }, + "fill": { "scale": "color_companies", "field": "company" }, + "tooltip": { + "signal": "{'Transaction': datum.transaction, 'Company': datum.company, 'Size': datum.size}" + } + }, + "update": { + "x": { "field": "x0" }, + "y": { "field": "y0" }, + "x2": { "field": "x1" }, + "y2": { "field": "y1" } + }, + "hover": { + "stroke": { "value": "#000" } + } + } + }, + + + { + "type": "text", + "from": { "data": "leaves" }, + "interactive": false, + "encode": { + "enter": { + "font": { "value": "Helvetica Neue, Arial" }, + "align": { "value": "center" }, + "baseline": { "value": "middle" }, + "fill": { "value": "#000" }, + "fontWeight": { "value": "bold" } + }, + "update": { + "x": { "signal": "(datum.x0 + datum.x1) / 2" }, + "y": { "signal": "(datum.y0 + datum.y1) / 2" }, + + "text": { + "signal": + "[datum.company, datum.transaction, datum.size + 'mUSD']" + }, + + "fontSize": { + "signal": + "15" + }, + + "fillOpacity": { + "signal": + "((datum.x1-datum.x0) * (datum.y1-datum.y0)) < 2500 ? 0 : 1" + } + } + } +} + ] +} diff --git a/vecxt_re/resources/stemChartWithRollingAverage.vl.json b/vecxt_re/resources/stemChartWithRollingAverage.vl.json new file mode 100644 index 00000000..a656fa15 --- /dev/null +++ b/vecxt_re/resources/stemChartWithRollingAverage.vl.json @@ -0,0 +1,108 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Stem plot with daily time unit showing amounts", + "title": "Stem Plot", + "width": "container", + "height": "container", + "datasets": { + "stemData": [ + { "date": "2022-01-01", "amount": 10, "normalisedAmount": 12 }, + { "date": "2023-01-02", "amount": 5, "normalisedAmount": 7 }, + { "date": "2024-01-03", "amount": 15, "normalisedAmount": 20 }, + { "date": "2022-06-04", "amount": 10, "normalisedAmount": 12 }, + { "date": "2023-03-05", "amount": 20, "normalisedAmount": 22 } + ], + "rollingAverageData": [ + { "endYear": "2022", "rollingAverage": 8 }, + { "endYear": "2023", "rollingAverage": 9 }, + { "endYear": "2024", "rollingAverage": 10 } + ] + }, + "data": { "name": "stemData" }, + "encoding": { + "x": { + "field": "date", + "type": "temporal", + "timeUnit": "yearmonthdate", + "title": "Date" + } + }, + "layer": [ + { + "mark": { "type": "rule" }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative", + "title": "Amount" + }, + "y2": { "datum": 0 }, + "color": { "datum": "Amount", "legend": { "title": "Series" } } + } + }, + { + "mark": { "type": "rule" }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative" + }, + "y2": { "field": "normalisedAmount" }, + "color": { "datum": "Normalised Amount" } + } + }, + { + "mark": { + "type": "circle", + "size": 100, + "tooltip": { "content": "data" } + }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative" + }, + "color": { "datum": "Amount" } + } + }, + { + "mark": { + "type": "circle", + "size": 100, + "tooltip": { "content": "data" } + }, + "encoding": { + "y": { + "field": "normalisedAmount", + "type": "quantitative" + }, + "color": { "datum": "Normalised Amount" } + } + }, + { + "data": { "name": "rollingAverageData" }, + "mark": { + "type": "line", + "strokeWidth": 2, + "tooltip": { "content": "data" } + }, + "encoding": { + "x": { + "field": "endYear", + "type": "temporal", + "timeUnit": "year" + }, + "y": { + "field": "rollingAverage", + "type": "quantitative" + }, + "color": { "datum": "Rolling Average" } + } + } + ], + "config": { + "range": { + "category": ["steelblue", "orange", "green"] + } + } +} diff --git a/vecxt_re/resources/stemPlot.vl.json b/vecxt_re/resources/stemPlot.vl.json new file mode 100644 index 00000000..7fa3b54d --- /dev/null +++ b/vecxt_re/resources/stemPlot.vl.json @@ -0,0 +1,74 @@ +{ + "$schema": "https://vega.github.io/schema/vega-lite/v6.json", + "description": "Stem plot with daily time unit showing amounts", + "title": "Stem Plot", + "width": "container", + "height": "container", + "data": { + "values": [ + { "date": "2023-01-01", "amount": 10, "normalisedAmount": 12 }, + { "date": "2023-01-02", "amount": 5, "normalisedAmount": 7 }, + { "date": "2023-01-03", "amount": 15, "normalisedAmount": 20 }, + { "date": "2023-01-04", "amount": 10, "normalisedAmount": 12 }, + { "date": "2023-01-05", "amount": 20, "normalisedAmount": 22 } + ] + }, + "encoding": { + "x": { + "field": "date", + "type": "temporal", + "timeUnit": "yearmonthdate", + "title": "Date" + } + }, + "layer": [ + { + "mark": { "type": "rule", "color": "steelblue" }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative", + "title": "Amount" + }, + "y2": { "datum": 0 } + } + }, + { + "mark": { "type": "rule", "color": "orange" }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative" + }, + "y2": { "field": "normalisedAmount" } + } + }, + { + "mark": { + "type": "circle", + "size": 100, + "color": "steelblue", + "tooltip": true + }, + "encoding": { + "y": { + "field": "amount", + "type": "quantitative" + } + } + },{ + "mark": { + "type": "circle", + "size": 100, + "color": "orange", + "tooltip": "datum" + }, + "encoding": { + "y": { + "field": "normalisedAmount", + "type": "quantitative" + } + } + } + ] +} diff --git a/vecxt_re/resources/timeline.vl.json b/vecxt_re/resources/timeline.vl.json new file mode 100644 index 00000000..61b1736a --- /dev/null +++ b/vecxt_re/resources/timeline.vl.json @@ -0,0 +1,171 @@ +{ + "$schema": "https://vega.github.io/schema/vega/v5.json", + "description": "A timeline visualization of contracts going on and off risk.", + "width": 500, + "height": 80, + "padding": 5, + "data": [ + { + "name": "sections", + "values": [ + { + "SectionID": 382, + "SectionName": "Something", + "RiskInceptionDate": 1333576800000, + "RiskExpiryDate": 1459893600000, + "ScheduledMaturityDate": 1460498400000, + "ExtensionPeriod": null + }, + { + "SectionID": 3, + "SectionName": "else", + "RiskInceptionDate": 1333576800000, + "RiskExpiryDate": 1459893600000, + "ScheduledMaturityDate": 1460498400000, + "ExtensionPeriod": null + } + ], + "format": { + "parse": { + "RiskInceptionDate": "date", + "RiskExpiryDate": "date" + } + }, + "transform": [ + { + "type": "collect", + "sort": { + "field": "RiskInceptionDate" + } + } + ] + } + ], + "scales": [ + { + "name": "yscale", + "type": "band", + "range": [ + 0, + { + "signal": "height" + } + ], + "domain": { + "data": "sections", + "field": "SectionName" + } + }, + { + "name": "xscale2", + "type": "time", + "range": "width", + "domain": { + "fields": [ + { + "data": "sections", + "field": "RiskInceptionDate" + }, + { + "data": "sections", + "field": "RiskExpiryDate" + } + ] + } + }, + { + "name": "xscale", + "type": "time", + "range": "width", + "domain": { + "data": "sections", + "fields": [ + "RiskInceptionDate", + "RiskExpiryDate" + ] + } + }, + { + "name": "color", + "type": "ordinal", + "range": { + "scheme": "tableau20" + }, + "domain": { + "data": "sections", + "field": "cedent" + } + } + ], + "axes": [ + { + "orient": "bottom", + "scale": "xscale" + }, + { + "orient": "top", + "scale": "xscale", + "offset": 20 + } + ], + "marks": [ + { + "type": "text", + "from": { + "data": "sections" + }, + "encode": { + "enter": { + "x": { + "scale": "xscale", + "field": "RiskInceptionDate" + }, + "y": { + "scale": "yscale", + "field": "SectionName", + "offset": 17 + }, + "fill": { + "value": "#000" + }, + "text": { + "field": "SectionName" + }, + "fontSize": { + "value": 20 + } + } + } + }, + { + "type": "rect", + "from": { + "data": "sections" + }, + "encode": { + "enter": { + "x": { + "scale": "xscale", + "field": "RiskInceptionDate" + }, + "x2": { + "scale": "xscale", + "field": "RiskExpiryDate" + }, + "y": { + "scale": "yscale", + "field": "SectionName", + "offset": 20 + }, + "height": { + "value": 4 + }, + "fill": { + "scale": "color", + "field": "cedent" + } + } + } + } + ] +} \ No newline at end of file diff --git a/vecxt_re/src-js-native/PlatformReporting.scala b/vecxt_re/src-js-native/PlatformReporting.scala new file mode 100644 index 00000000..9739d87a --- /dev/null +++ b/vecxt_re/src-js-native/PlatformReporting.scala @@ -0,0 +1,98 @@ +package vecxt_re + +/** Platform-specific reporting implementations for JS and Native. + * + * Uses a streaming single-pass algorithm to compute all loss metrics efficiently. + */ +object PlatformReporting: + + /** Computes loss report metrics in a single pass using Welford's online algorithm. + * + * Instead of calling groupSum multiple times (for attachment, exhaustion, std, and EL), this method iterates through + * the grouped sums once, accumulating all intermediate results: + * - Sum for expected loss (EL) + * - Count of attached iterations (groupSum > 0) + * - Count of exhausted iterations (groupSum >= exhaust threshold) + * - Running mean and M2 for Welford's variance algorithm + * + * @param calcd + * Tuple of (layer, cededToLayer array) + * @param numIterations + * Number of iterations/years + * @param years + * Sorted array of 1-based iteration indices + * @param limit + * Report denominator for normalizing results + * @return + * Named tuple with all loss report metrics + */ + inline def lossReportFast( + calcd: (layer: Layer, cededToLayer: Array[Double]), + numIterations: Int, + years: Array[Int], + limit: ReportDenominator + ): (name: String, limit: Double, el: Double, stdDev: Double, attachProb: Double, exhaustProb: Double) = + val reportLimit = limit.fromlayer(calcd.layer) + val exhaust = calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) - 0.01 + val values = calcd.cededToLayer + val l = years.length + + // Accumulators + var totalSum = 0.0 // For expected loss + var attachCount = 0 // Count of iterations with loss > 0 + var exhaustCount = 0 // Count of iterations at exhaustion + // Welford's online algorithm accumulators + var mean = 0.0 + var m2 = 0.0 + var n = 0 // Count for Welford (should equal numIterations at end) + + // Single pass through groups (similar to groupSum but computing all metrics) + var i = 0 + var currentGroup = 1 + while currentGroup <= numIterations do + var groupSum = 0.0 + + // Sum all values in this group + while i < l && years(i) == currentGroup do + groupSum += values(i) + i += 1 + end while + + // Update total sum for EL + totalSum += groupSum + + // Update attachment count (any positive loss) + if groupSum > 0 then attachCount += 1 + end if + + // Update exhaustion count + if groupSum > exhaust then exhaustCount += 1 + end if + + // Welford's online algorithm for variance + n += 1 + val delta = groupSum - mean + mean += delta / n + val delta2 = groupSum - mean + m2 += delta * delta2 + + currentGroup += 1 + end while + + // Compute final statistics + val el = totalSum / numIterations + val variance = if n > 0 then m2 / n else 0.0 + val stdDev = Math.sqrt(variance) + val attachProb = attachCount.toDouble / numIterations + val exhaustProb = exhaustCount.toDouble / numIterations + + ( + name = calcd.layer.layerName.getOrElse(s"Layer ${calcd.layer.layerId}"), + limit = reportLimit, + el = el / reportLimit, + stdDev = stdDev / reportLimit, + attachProb = attachProb, + exhaustProb = exhaustProb + ) + end lossReportFast +end PlatformReporting diff --git a/vecxtensions/src-js-native/SplitLosses.scala b/vecxt_re/src-js-native/SplitLosses.scala similarity index 97% rename from vecxtensions/src-js-native/SplitLosses.scala rename to vecxt_re/src-js-native/SplitLosses.scala index d1508a8a..61631fb9 100644 --- a/vecxtensions/src-js-native/SplitLosses.scala +++ b/vecxt_re/src-js-native/SplitLosses.scala @@ -1,4 +1,4 @@ -package vecxt.reinsurance +package vecxt_re import vecxt.BoundsCheck.BoundsCheck @@ -15,7 +15,11 @@ object SplitLosses: */ inline def splitAmntFast(years: Array[Int], losses: Array[Double])(using inline bc: BoundsCheck - ): (ceded: Array[Double], retained: Array[Double], splits: IndexedSeq[(Layer, Array[Double])]) = + ): ( + ceded: Array[Double], + retained: Array[Double], + splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])] + ) = inline if bc then assert(years.length == losses.length) end if if losses.isEmpty then (Array.empty[Double], Array.empty[Double], tower.layers.map(_ -> Array.empty[Double])) diff --git a/vecxt_re/src-js-native/plots.scala b/vecxt_re/src-js-native/plots.scala new file mode 100644 index 00000000..971a5222 --- /dev/null +++ b/vecxt_re/src-js-native/plots.scala @@ -0,0 +1,5 @@ +package vecxt_re + +object Plots: + +end Plots diff --git a/vecxtensions/src-js/rpt.scala b/vecxt_re/src-js/rpt.scala similarity index 97% rename from vecxtensions/src-js/rpt.scala rename to vecxt_re/src-js/rpt.scala index 26213a55..976f5fc9 100644 --- a/vecxtensions/src-js/rpt.scala +++ b/vecxt_re/src-js/rpt.scala @@ -1,7 +1,7 @@ -package vecxt.reinsurance +package vecxt_re -import vecxt.reinsurance.Limits.Limit -import vecxt.reinsurance.Retentions.Retention +import vecxt_re.Limits.Limit +import vecxt_re.Retentions.Retention /* diff --git a/vecxt_re/src-jvm-native/.keep b/vecxt_re/src-jvm-native/.keep new file mode 100644 index 00000000..e69de29b diff --git a/vecxt_re/src-jvm/PlatformReporting.scala b/vecxt_re/src-jvm/PlatformReporting.scala new file mode 100644 index 00000000..138bd207 --- /dev/null +++ b/vecxt_re/src-jvm/PlatformReporting.scala @@ -0,0 +1,100 @@ +package vecxt_re + +/** Platform-specific reporting implementations for JVM. + * + * Uses a streaming single-pass algorithm to compute all loss metrics efficiently. The algorithm processes groups + * inline (avoiding the allocation of an intermediate array) and uses Welford's online algorithm for numerically stable + * variance computation. + */ +object PlatformReporting: + + /** Computes loss report metrics in a single pass using Welford's online algorithm. + * + * Instead of calling groupSum multiple times (for attachment, exhaustion, std, and EL), this method iterates through + * the grouped sums once, accumulating all intermediate results: + * - Sum for expected loss (EL) + * - Count of attached iterations (groupSum > 0) + * - Count of exhausted iterations (groupSum >= exhaust threshold) + * - Running mean and M2 for Welford's variance algorithm + * + * @param calcd + * Tuple of (layer, cededToLayer array) + * @param numIterations + * Number of iterations/years + * @param years + * Sorted array of 1-based iteration indices + * @param limit + * Report denominator for normalizing results + * @return + * Named tuple with all loss report metrics + */ + inline def lossReportFast( + calcd: (layer: Layer, cededToLayer: Array[Double]), + numIterations: Int, + years: Array[Int], + limit: ReportDenominator + ): (name: String, limit: Double, el: Double, stdDev: Double, attachProb: Double, exhaustProb: Double) = + val reportLimit = limit.fromlayer(calcd.layer) + val exhaust = calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) - 0.01 + val values = calcd.cededToLayer + val l = years.length + + // Accumulators + var totalSum = 0.0 // For expected loss + var attachCount = 0 // Count of iterations with loss > 0 + var exhaustCount = 0 // Count of iterations at exhaustion + // Welford's online algorithm accumulators + var mean = 0.0 + var m2 = 0.0 + var n = 0 // Count for Welford (should equal numIterations at end) + + // Single pass through groups (similar to groupSum but computing all metrics inline) + var i = 0 + var currentGroup = 1 + while currentGroup <= numIterations do + var groupSum = 0.0 + + // Sum all values in this group - use scalar loop since group sizes are typically small + while i < l && years(i) == currentGroup do + groupSum += values(i) + i += 1 + end while + + // Update total sum for EL + totalSum += groupSum + + // Update attachment count (any positive loss) + if groupSum > 0 then attachCount += 1 + end if + + // Update exhaustion count + if groupSum > exhaust then exhaustCount += 1 + end if + + // Welford's online algorithm for variance + n += 1 + val delta = groupSum - mean + mean += delta / n + val delta2 = groupSum - mean + m2 += delta * delta2 + + currentGroup += 1 + end while + + // Compute final statistics + val el = totalSum / numIterations + val variance = if n > 0 then m2 / n else 0.0 + val stdDev = Math.sqrt(variance) + val attachProb = attachCount.toDouble / numIterations + val exhaustProb = exhaustCount.toDouble / numIterations + + ( + name = calcd.layer.layerName.getOrElse(calcd.layer.autoName), + limit = reportLimit, + el = el / reportLimit, + stdDev = stdDev / reportLimit, + attachProb = attachProb, + exhaustProb = exhaustProb + ) + end lossReportFast +end PlatformReporting diff --git a/vecxtensions/src-jvm/SplitLosses.scala b/vecxt_re/src-jvm/SplitLosses.scala similarity index 97% rename from vecxtensions/src-jvm/SplitLosses.scala rename to vecxt_re/src-jvm/SplitLosses.scala index 9182619f..a194e5bb 100644 --- a/vecxtensions/src-jvm/SplitLosses.scala +++ b/vecxt_re/src-jvm/SplitLosses.scala @@ -1,10 +1,12 @@ -package vecxt.reinsurance +package vecxt_re import java.util.concurrent.Executors -import jdk.incubator.vector.{DoubleVector, VectorOperators, VectorSpecies} import vecxt.BoundsCheck.BoundsCheck -import scala.annotation.nowarn + +import jdk.incubator.vector.DoubleVector +import jdk.incubator.vector.VectorOperators +import jdk.incubator.vector.VectorSpecies object SplitLosses: extension (tower: Tower) @@ -19,7 +21,11 @@ object SplitLosses: */ inline def splitAmntFast(years: Array[Int], losses: Array[Double])(using inline bc: BoundsCheck - ): (ceded: Array[Double], retained: Array[Double], splits: IndexedSeq[(Layer, Array[Double])]) = + ): ( + ceded: Array[Double], + retained: Array[Double], + splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])] + ) = inline if bc then assert(years.length == losses.length) end if if losses.isEmpty then (Array.empty[Double], Array.empty[Double], tower.layers.map(_ -> Array.empty[Double])) diff --git a/vecxt_re/src-jvm/TrendAnalysis.scala b/vecxt_re/src-jvm/TrendAnalysis.scala new file mode 100644 index 00000000..72626bee --- /dev/null +++ b/vecxt_re/src-jvm/TrendAnalysis.scala @@ -0,0 +1,449 @@ +package vecxt_re + +import org.apache.commons.math3.special.Gamma.logGamma + +/** Result of fitting a GLM trend model: log(μ) = β₀ + β₁·year + * + * Contains coefficient estimates, standard errors, test statistics, and goodness-of-fit measures. + * + * @param nObs + * Number of observations + * @param dfResidual + * Residual degrees of freedom (n - 2) + * @param intercept + * Estimated intercept (β₀) + * @param slope + * Estimated year coefficient (β₁) + * @param seIntercept + * Standard error of intercept + * @param seSlope + * Standard error of slope + * @param zIntercept + * z-statistic for intercept (β₀ / SE(β₀)) + * @param zSlope + * z-statistic for slope (β₁ / SE(β₁)) + * @param pValueIntercept + * Two-tailed p-value for intercept (H₀: β₀ = 0) + * @param pValueSlope + * Two-tailed p-value for slope (H₀: β₁ = 0) - this tests for significant trend + * @param nullDeviance + * Deviance of intercept-only model + * @param residualDeviance + * Deviance of full model + * @param dispersion + * Estimated dispersion parameter (1.0 for Poisson, estimated for NegBin) + * @param fStatistic + * F-statistic for model vs intercept-only (using dispersion) + * @param fPValue + * p-value for F-statistic + * @param aic + * Akaike Information Criterion + * @param logLikelihood + * Log-likelihood of the fitted model + */ +case class TrendFitResult( + nObs: Int, + dfResidual: Int, + intercept: Double, + slope: Double, + seIntercept: Double, + seSlope: Double, + zIntercept: Double, + zSlope: Double, + pValueIntercept: Double, + pValueSlope: Double, + nullDeviance: Double, + residualDeviance: Double, + dispersion: Double, + fStatistic: Double, + fPValue: Double, + aic: Double, + logLikelihood: Double +): + + /** Test whether there is a statistically significant trend at the given alpha level */ + def hasSignificantTrend(alpha: Double = 0.05): Boolean = pValueSlope < alpha + + /** Nicely formatted summary string, similar to R's glm summary output */ + def summary: String = + val sb = new StringBuilder + sb.append("Generalized Linear Model: log(Count) ~ 1 + Year\n") + sb.append("=" * 60 + "\n\n") + + sb.append("Coefficients:\n") + sb.append(f"${""}%-15s ${"Estimate"}%12s ${"Std. Error"}%12s ${"z value"}%10s ${"Pr(>|z|)"}%12s\n") + sb.append("-" * 60 + "\n") + sb.append( + f"(Intercept)${" "}%-4s $intercept%12.5f $seIntercept%12.5f $zIntercept%10.3f $pValueIntercept%12.6f${significanceCode(pValueIntercept)}%s\n" + ) + sb.append( + f"Year${" "}%-11s $slope%12.7f $seSlope%12.7f $zSlope%10.3f $pValueSlope%12.6f${significanceCode(pValueSlope)}%s\n" + ) + sb.append("-" * 60 + "\n") + sb.append("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\n") + + sb.append(f"$nObs observations, $dfResidual residual degrees of freedom\n") + sb.append(f"Estimated Dispersion: $dispersion%.3f\n") + sb.append(f"Null Deviance: $nullDeviance%.3f\n") + sb.append(f"Residual Deviance: $residualDeviance%.3f\n") + sb.append(f"AIC: $aic%.3f\n") + sb.append(f"Log-Likelihood: $logLikelihood%.3f\n\n") + + sb.append(f"F-statistic vs. constant model: $fStatistic%.3f, p-value = $fPValue%.6f\n") + + sb.toString + end summary + + private def significanceCode(p: Double): String = + if p < 0.001 then " ***" + else if p < 0.01 then " **" + else if p < 0.05 then " *" + else if p < 0.1 then " ." + else "" +end TrendFitResult + +object TrendAnalysis: + private val normDist = org.apache.commons.math3.distribution.NormalDistribution(0.0, 1.0) + + /** Two-tailed p-value from z-statistic using normal approximation */ + private inline def pValueFromZ(z: Double): Double = + if z.isNaN || z.isInfinite then Double.NaN + else 2.0 * (1.0 - normalCdf(math.abs(z))) + + /** Standard normal CDF using Apache Commons Math */ + private inline def normalCdf(x: Double): Double = + normDist.cumulativeProbability(x) + + /** F-distribution p-value: P(F > f) for right-tailed test */ + private inline def fDistPValue(f: Double, df1: Int, df2: Int): Double = + if f <= 0 || df1 <= 0 || df2 <= 0 then 1.0 + else + val fDist = new org.apache.commons.math3.distribution.FDistribution(df1.toDouble, df2.toDouble) + 1.0 - fDist.cumulativeProbability(f) + + /** Regularized incomplete beta function using Apache Commons Math */ + private inline def incompleteBeta(a: Double, b: Double, x: Double): Double = + org.apache.commons.math3.special.Beta.regularizedBeta(x, a, b) + + extension (p: Poisson) + /** Fit a Poisson GLM trend model: log(μ) = β₀ + β₁·year + * + * Uses IRLS to fit the model and computes test statistics for assessing whether there is a statistically + * significant trend over time. + * + * @param years + * the year for each observation + * @param counts + * the count for each observation (same length as years) + * @return + * TrendFitResult containing coefficients, standard errors, p-values, and goodness-of-fit statistics + */ + def fitTrend(years: IndexedSeq[Int], counts: IndexedSeq[Int]): TrendFitResult = + require(years.length == counts.length, "years and counts must have the same length") + require(years.length >= 3, "need at least 3 observations to fit a trend") + + val n = years.length + val yearsD = years.map(_.toDouble) + val countsD = counts.map(_.toDouble) + + // Fit full model: log(μ) = β₀ + β₁·year via IRLS + val meanY = countsD.sum / n + var beta0 = math.log(math.max(meanY, 0.1)) + var beta1 = 0.0 + + for _ <- 0 until 25 do + val mu = yearsD.map(y => math.exp(beta0 + beta1 * y)) + val z = (0 until n).map { i => + val eta = beta0 + beta1 * yearsD(i) + eta + (countsD(i) - mu(i)) / math.max(mu(i), 1e-10) + } + val w = mu.map(m => math.max(m, 1e-10)) + + var xtwx00, xtwx01, xtwx11 = 0.0 + var xtwz0, xtwz1 = 0.0 + var i = 0 + while i < n do + val wi = w(i) + val yi = yearsD(i) + val zi = z(i) + xtwx00 += wi + xtwx01 += wi * yi + xtwx11 += wi * yi * yi + xtwz0 += wi * zi + xtwz1 += wi * yi * zi + i += 1 + end while + + val det = xtwx00 * xtwx11 - xtwx01 * xtwx01 + if math.abs(det) > 1e-15 then + beta0 = (xtwx11 * xtwz0 - xtwx01 * xtwz1) / det + beta1 = (xtwx00 * xtwz1 - xtwx01 * xtwz0) / det + end if + end for + + // Fit null model: log(μ) = β₀ only + val nullBeta0 = math.log(meanY) + val muNull = Array.fill(n)(meanY) + + // Compute deviances + // Poisson deviance: 2 * Σ[yᵢ·log(yᵢ/μᵢ) - (yᵢ - μᵢ)] + def poissonDeviance(observed: IndexedSeq[Double], fitted: IndexedSeq[Double]): Double = + var dev = 0.0 + var i = 0 + while i < n do + val y = observed(i) + val mu = fitted(i) + if y > 0 then dev += y * math.log(y / mu) + end if + dev -= (y - mu) + i += 1 + end while + 2.0 * dev + end poissonDeviance + + val muFull = yearsD.map(y => math.exp(beta0 + beta1 * y)) + val nullDeviance = poissonDeviance(countsD, muNull.toIndexedSeq) + val residualDeviance = poissonDeviance(countsD, muFull) + + // Fisher information and standard errors + var i00, i01, i11 = 0.0 + var j = 0 + while j < n do + val mi = muFull(j) + val yi = yearsD(j) + i00 += mi + i01 += mi * yi + i11 += mi * yi * yi + j += 1 + end while + + val detI = i00 * i11 - i01 * i01 + val seBeta0 = if detI > 1e-15 then math.sqrt(i11 / detI) else Double.NaN + val seBeta1 = if detI > 1e-15 then math.sqrt(i00 / detI) else Double.NaN + + // z-statistics and p-values + val zBeta0 = beta0 / seBeta0 + val zBeta1 = beta1 / seBeta1 + val pBeta0 = pValueFromZ(zBeta0) + val pBeta1 = pValueFromZ(zBeta1) + + // For Poisson, dispersion = 1 by assumption + val dispersion = 1.0 + + // Pearson dispersion estimate (for diagnostics) + var pearsonChi2 = 0.0 + var k = 0 + while k < n do + val y = countsD(k) + val mu = muFull(k) + pearsonChi2 += (y - mu) * (y - mu) / math.max(mu, 1e-10) + k += 1 + end while + val estimatedDispersion = pearsonChi2 / (n - 2) + + // F-statistic: (null deviance - residual deviance) / dispersion + val fStat = (nullDeviance - residualDeviance) / dispersion + val fPVal = fDistPValue(fStat, 1, n - 2) + + // Log-likelihood + def poissonLogLik(observed: IndexedSeq[Double], fitted: IndexedSeq[Double]): Double = + var ll = 0.0 + var i = 0 + while i < n do + val y = observed(i).toInt + val mu = fitted(i) + ll += y * math.log(mu) - mu - logGamma(y + 1) + i += 1 + end while + ll + end poissonLogLik + + val logLik = poissonLogLik(countsD, muFull) + val aic = -2 * logLik + 2 * 2 // 2 parameters + + TrendFitResult( + nObs = n, + dfResidual = n - 2, + intercept = beta0, + slope = beta1, + seIntercept = seBeta0, + seSlope = seBeta1, + zIntercept = zBeta0, + zSlope = zBeta1, + pValueIntercept = pBeta0, + pValueSlope = pBeta1, + nullDeviance = nullDeviance, + residualDeviance = residualDeviance, + dispersion = estimatedDispersion, + fStatistic = fStat, + fPValue = fPVal, + aic = aic, + logLikelihood = logLik + ) + end extension + + extension (nb: NegativeBinomial) + /** Fit a Negative Binomial GLM trend model: log(μ) = β₀ + β₁·year + * + * Uses IRLS with the NB2 variance function (Var = μ + μ²/θ where θ = a). This accounts for overdispersion in count + * data. + * + * @param years + * the year for each observation + * @param counts + * the count for each observation (same length as years) + * @return + * TrendFitResult containing coefficients, standard errors, p-values, and goodness-of-fit statistics + */ + def fitTrend(years: IndexedSeq[Int], counts: IndexedSeq[Int]): TrendFitResult = + require(years.length == counts.length, "years and counts must have the same length") + require(years.length >= 3, "need at least 3 observations to fit a trend") + + val n = years.length + val yearsD = years.map(_.toDouble) + val countsD = counts.map(_.toDouble) + val theta = nb.a // overdispersion parameter + + // Fit full model via IRLS with NB variance function + val meanY = countsD.sum / n + var beta0 = math.log(math.max(meanY, 0.1)) + var beta1 = 0.0 + + for _ <- 0 until 25 do + val mu = yearsD.map(y => math.exp(beta0 + beta1 * y)) + + // NB2 variance: Var = μ + μ²/θ, so weight = μ / (1 + μ/θ) + val w = mu.map { m => + val v = m + m * m / theta + math.max(m * m / v, 1e-10) + } + + val z = (0 until n).map { i => + val eta = beta0 + beta1 * yearsD(i) + eta + (countsD(i) - mu(i)) / math.max(mu(i), 1e-10) + } + + var xtwx00, xtwx01, xtwx11 = 0.0 + var xtwz0, xtwz1 = 0.0 + var i = 0 + while i < n do + val wi = w(i) + val yi = yearsD(i) + val zi = z(i) + xtwx00 += wi + xtwx01 += wi * yi + xtwx11 += wi * yi * yi + xtwz0 += wi * zi + xtwz1 += wi * yi * zi + i += 1 + end while + + val det = xtwx00 * xtwx11 - xtwx01 * xtwx01 + if math.abs(det) > 1e-15 then + beta0 = (xtwx11 * xtwz0 - xtwx01 * xtwz1) / det + beta1 = (xtwx00 * xtwz1 - xtwx01 * xtwz0) / det + end if + end for + + // Null model + val nullBeta0 = math.log(meanY) + + // Negative binomial deviance: 2 * Σ[yᵢ·log(yᵢ/μᵢ) - (yᵢ + θ)·log((yᵢ + θ)/(μᵢ + θ))] + def nbDeviance(observed: IndexedSeq[Double], fitted: IndexedSeq[Double]): Double = + var dev = 0.0 + var i = 0 + while i < n do + val y = observed(i) + val mu = fitted(i) + if y > 0 then dev += y * math.log(y / mu) + end if + dev -= (y + theta) * math.log((y + theta) / (mu + theta)) + i += 1 + end while + 2.0 * dev + end nbDeviance + + val muFull = yearsD.map(y => math.exp(beta0 + beta1 * y)) + val muNull = IndexedSeq.fill(n)(meanY) + val nullDeviance = nbDeviance(countsD, muNull) + val residualDeviance = nbDeviance(countsD, muFull) + + // Fisher information with NB variance + var i00, i01, i11 = 0.0 + var j = 0 + while j < n do + val mi = muFull(j) + val yi = yearsD(j) + val wi = mi * mi / (mi + mi * mi / theta) + i00 += wi + i01 += wi * yi + i11 += wi * yi * yi + j += 1 + end while + + val detI = i00 * i11 - i01 * i01 + val seBeta0 = if detI > 1e-15 then math.sqrt(i11 / detI) else Double.NaN + val seBeta1 = if detI > 1e-15 then math.sqrt(i00 / detI) else Double.NaN + + val zBeta0 = beta0 / seBeta0 + val zBeta1 = beta1 / seBeta1 + val pBeta0 = pValueFromZ(zBeta0) + val pBeta1 = pValueFromZ(zBeta1) + + // Estimated dispersion (Pearson) + var pearsonChi2 = 0.0 + var k = 0 + while k < n do + val y = countsD(k) + val mu = muFull(k) + val v = mu + mu * mu / theta + pearsonChi2 += (y - mu) * (y - mu) / v + k += 1 + end while + val dispersion = pearsonChi2 / (n - 2) + + // F-statistic + val fStat = (nullDeviance - residualDeviance) / dispersion + val fPVal = fDistPValue(fStat, 1, n - 2) + + // NB log-likelihood + def nbLogLik(observed: IndexedSeq[Double], fitted: IndexedSeq[Double]): Double = + var ll = 0.0 + var i = 0 + while i < n do + val y = observed(i).toInt + val mu = fitted(i) + // log P(Y=y) = log Γ(y+θ) - log Γ(θ) - log(y!) + θ·log(θ/(θ+μ)) + y·log(μ/(θ+μ)) + ll += logGamma(y + theta) - logGamma(theta) - logGamma(y + 1) + ll += theta * math.log(theta / (theta + mu)) + ll += y * math.log(mu / (theta + mu)) + i += 1 + end while + ll + end nbLogLik + + val logLik = nbLogLik(countsD, muFull) + val aic = -2 * logLik + 2 * 2 // 2 parameters (not counting θ as estimated here) + + TrendFitResult( + nObs = n, + dfResidual = n - 2, + intercept = beta0, + slope = beta1, + seIntercept = seBeta0, + seSlope = seBeta1, + zIntercept = zBeta0, + zSlope = zBeta1, + pValueIntercept = pBeta0, + pValueSlope = pBeta1, + nullDeviance = nullDeviance, + residualDeviance = residualDeviance, + dispersion = dispersion, + fStatistic = fStat, + fPValue = fPVal, + aic = aic, + logLikelihood = logLik + ) + end extension + +end TrendAnalysis diff --git a/vecxt_re/src-jvm/all.scala b/vecxt_re/src-jvm/all.scala new file mode 100644 index 00000000..0bd4a108 --- /dev/null +++ b/vecxt_re/src-jvm/all.scala @@ -0,0 +1,24 @@ +package vecxt_re + +object all: + export vecxt_re.Scenario + export vecxt_re.Scenarr + export vecxt_re.Tower + export vecxt_re.Plots.* + export vecxt_re.TrendAnalysis.* + export vecxt_re.TrendFitResult + export vecxt_re.SplitLosses.* + export vecxt_re.SplitScenario.* + export vecxt_re.DeductibleType.* + export vecxt_re.ReReporting.* + export vecxt_re.IndexPerPeriod + export vecxt_re.CalendarYearIndex + export vecxt_re.NegativeBinomial + export vecxt_re.Poisson + export vecxt_re.Empirical + export vecxt_re.Pareto + export vecxt_re.Mixed + export vecxt_re.HillEstimator + export vecxt_re.PickandsEstimator + +end all diff --git a/vecxt_re/src-jvm/dist/Dist.scala b/vecxt_re/src-jvm/dist/Dist.scala new file mode 100644 index 00000000..fa26382b --- /dev/null +++ b/vecxt_re/src-jvm/dist/Dist.scala @@ -0,0 +1,68 @@ +package vecxt_re + +trait Density[T]: + + /** Returns the unnormalized value of the measure */ + def apply(x: T): Double + + /** Returns the log unnormalized value of the measure */ + def logApply(x: T): Double = math.log(apply(x)) +end Density + +/** Represents a continuous Distribution. + */ +trait ContinuousDistr[T] extends Density[T] with Rand[T]: + + /** Returns the probability density function at that point. */ + def pdf(x: T): Double = math.exp(logPdf(x)) + def logPdf(x: T): Double = unnormalizedLogPdf(x) - logNormalizer + + /** Returns the probability density function up to a constant at that point. */ + def unnormalizedPdf(x: T): Double = math.exp(unnormalizedLogPdf(x)) + + def unnormalizedLogPdf(x: T): Double + def logNormalizer: Double + + // 1/Z where Z = exp(logNormalizer) + lazy val normalizer: Double = math.exp(-logNormalizer) + + def apply(x: T) = unnormalizedPdf(x) + override def logApply(x: T) = unnormalizedLogPdf(x) +end ContinuousDistr + +trait HasCdf[T]: + def probability(x: T, y: T): Double // Probability that P(x < X <= y) + def cdf(x: T): Double + + // experimental plotting support + def plot(using viz.LowPriorityPlotTarget): viz.VizReturn + def plotCdf(using viz.LowPriorityPlotTarget): viz.VizReturn +end HasCdf + +trait HasInverseCdf: + def inverseCdf(p: Double): Double // Compute the quantile of p +end HasInverseCdf + +/** Represents a discrete Distribution + */ +trait DiscreteDistr[T] extends Density[T] with Rand[T]: + + /** Returns the probability of that draw. */ + def probabilityOf(x: T): Double + def logProbabilityOf(x: T): Double = math.log(probabilityOf(x)) + + /** Returns the probability of that draw up to a constant */ + def unnormalizedProbabilityOf(x: T): Double = probabilityOf(x) + def unnormalizedLogProbabilityOf(x: T): Double = math.log(unnormalizedProbabilityOf(x)) + + def apply(x: T) = unnormalizedProbabilityOf(x) + override def logApply(x: T) = unnormalizedLogProbabilityOf(x) +end DiscreteDistr + +trait HasMean[T]: + def mean: T +end HasMean + +trait HasVariance[T]: + def variance: T +end HasVariance diff --git a/vecxt_re/src-jvm/dist/Empirical.scala b/vecxt_re/src-jvm/dist/Empirical.scala new file mode 100644 index 00000000..bd59d3ef --- /dev/null +++ b/vecxt_re/src-jvm/dist/Empirical.scala @@ -0,0 +1,198 @@ +package vecxt_re + +import org.apache.commons.rng.simple.RandomSource + +import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} + +/** Empirical distribution (JVM only). + * + * This is a nonparametric distribution built directly from observed samples. It supports positive weights $w_i$. + * + * The distribution is represented as a discrete measure on the (possibly repeated) sample values: $$\mathbb{P}(X = x) = + * \sum_{i: x_i = x} \frac{w_i}{\sum_k w_k}.$$ + * + * Consequently, the CDF is a right-continuous step function $$F(t) = \mathbb{P}(X \le t) = \sum_{x \le t} + * \mathbb{P}(X=x).$$ + * + * Sampling is performed by inverse-transform sampling on the cumulative weights. + * + * Note: Since this is an empirical (atomic) distribution, a density/PDF in the usual continuous sense is not defined. + * The `plot` method instead displays a (weighted) histogram density estimate. + */ +case class Empirical(values: IArray[Double], weights: IArray[Double]) + extends DiscreteDistr[Double] + with HasMean[Double] + with HasVariance[Double] + with HasCdf[Double] + with HasInverseCdf: + + require(values.nonEmpty, "values must not be empty") + require(values.forall(_.isFinite), "all values must be finite") + require(weights.length == values.length, "weights must be the same length as values") + require(weights.forall(w => w > 0 && w.isFinite), "weights must be positive and finite") + + private val rng = RandomSource.XO_RO_SHI_RO_128_PP.create() + + private val n = values.length + + private val pairs: Array[(Double, Double)] = + val out = new Array[(Double, Double)](n) + var j = 0 + while j < n do + out(j) = (values(j), weights(j)) + j += 1 + end while + out + end pairs + + scala.util.Sorting.stableSort(pairs, (a: (Double, Double), b: (Double, Double)) => a._1 < b._1) + + // Compress duplicates so we have unique support points. + private val xsBuf = scala.collection.mutable.ArrayBuffer.empty[Double] + private val wBuf = scala.collection.mutable.ArrayBuffer.empty[Double] + + private var totalWeight = 0.0 + private var i = 0 + while i < pairs.length do + val x = pairs(i)._1 + var wSum = 0.0 + while i < pairs.length && pairs(i)._1 == x do + val w = pairs(i)._2 + wSum += w + totalWeight += w + i += 1 + end while + xsBuf += x + wBuf += wSum + end while + + private val xs: Array[Double] = xsBuf.toArray + private val probs: Array[Double] = wBuf.toArray.map(_ / totalWeight) + + private val cdfVals: Array[Double] = + val out = new Array[Double](probs.length) + var acc = 0.0 + var j = 0 + while j < probs.length do + acc += probs(j) + out(j) = acc + j += 1 + end while + out + end cdfVals + + private val meanVal: Double = + var s = 0.0 + var j = 0 + while j < xs.length do + s += xs(j) * probs(j) + j += 1 + end while + s + end meanVal + + private val varVal: Double = + var s2 = 0.0 + var j = 0 + while j < xs.length do + val d = xs(j) - meanVal + s2 += probs(j) * d * d + j += 1 + end while + s2 + end varVal + + def mean: Double = meanVal + + def variance: Double = varVal + + /** Probability mass at exactly `x` (sums weights for duplicates). */ + def probabilityOf(x: Double): Double = + val idx = java.util.Arrays.binarySearch(xs, x) + if idx >= 0 then probs(idx) else 0.0 + end if + end probabilityOf + + /** Draw a sample using inverse CDF sampling over the atomic masses. */ + def draw: Double = + val u = rng.nextDouble() + inverseCdf(u) + end draw + + /** CDF $F(t)=P(X\le t)$ (right-continuous). */ + def cdf(x: Double): Double = + if x < xs(0) then 0.0 + else if x >= xs(xs.length - 1) then 1.0 + else + // Find the last index with xs(idx) <= x + val ip = java.util.Arrays.binarySearch(xs, x) + val idx = if ip >= 0 then ip else -ip - 2 + cdfVals(idx) + + /** Probability that $x < X \le y$. */ + def probability(x: Double, y: Double): Double = + if y <= x then 0.0 + else cdf(y) - cdf(x) + + /** Inverse CDF (quantile function): returns the smallest `x` with `F(x) >= p`. */ + def inverseCdf(p: Double): Double = + require(p >= 0.0 && p <= 1.0, "p must be in [0,1]") + if p <= 0.0 then xs(0) + else + val ip = java.util.Arrays.binarySearch(cdfVals, p) + val idx = if ip >= 0 then ip else -ip - 1 + xs(math.min(idx, xs.length - 1)) + end if + end inverseCdf + + /** Plot a (weighted) histogram density estimate. */ + def plot(using viz.LowPriorityPlotTarget) = + val plot = VegaPlot.fromResource("empiricalPdf.vl.json") + val data = (0 until n).map(i => (x = values(i), w = weights(i))) + plot.plot( + _.data.values := data.asJson, + _ += (title = s"Empirical Distribution (n=$n)").asJson + ) + end plot + + /** Plot the empirical CDF (step function). */ + def plotCdf(using viz.LowPriorityPlotTarget) = + val plot = VegaPlot.fromResource("empiricalCdf.vl.json") + + // Add an initial point at (min, 0) so the step is visible from the left. + val points = + val pts = scala.collection.mutable.ArrayBuffer.empty[(x: Double, cdf: Double)] + pts += ((x = xs(0), cdf = 0.0)) + var j = 0 + while j < xs.length do + pts += ((x = xs(j), cdf = cdfVals(j))) + j += 1 + end while + pts.toVector + end points + + plot.plot( + _.data.values := points.asJson, + _ += (title = s"Empirical CDF (n=$n)").asJson + ) + end plotCdf + +end Empirical + +object Empirical: + /** Construct an unweighted empirical distribution (all weights equal to $1$). + * + * Note: We intentionally avoid `apply` overloads here because `IArray[Double]` erases to `Array[Double]` on the JVM, + * which can create signature collisions with the case class companion methods. + */ + inline def equalWeights(values: Array[Double]): Empirical = + Empirical( + IArray.from(values), + IArray.from(Array.fill(values.length)(1.0)) + ) + + /** Construct a weighted empirical distribution from arrays. */ + inline def weighted(values: Array[Double], weights: Array[Double]): Empirical = + Empirical(IArray.from(values), IArray.from(weights)) +end Empirical diff --git a/vecxt_re/src-jvm/dist/Mixed.scala b/vecxt_re/src-jvm/dist/Mixed.scala new file mode 100644 index 00000000..6f539915 --- /dev/null +++ b/vecxt_re/src-jvm/dist/Mixed.scala @@ -0,0 +1,284 @@ +package vecxt_re + +import org.apache.commons.rng.simple.RandomSource + +import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} + +/** Mixed distribution: Empirical body with Pareto tail. + * + * This distribution combines an empirical distribution for the body (values below the mixing point) with a Pareto + * distribution for the tail (values at or above the mixing point). + * + * The distribution is parameterized by: + * - An empirical distribution of observed values + * - A mixing point $m$ (threshold between body and tail) + * - A Pareto shape parameter $\alpha$ (for the tail) + * + * The CDF is continuous at the mixing point. Let $p_m = F_{\text{emp}}(m^-)$ be the empirical CDF just below the + * mixing point. Then: + * - For $x < m$: $F(x) = F_{\text{emp}}(x)$ + * - For $x \ge m$: $F(x) = p_m + (1 - p_m) \cdot F_{\text{Pareto}}(x)$ + * + * where the Pareto distribution has scale = $m$ and shape = $\alpha$. + * + * @param empirical + * The empirical distribution for the body + * @param mixingPoint + * The threshold where we switch from empirical to Pareto tail + * @param paretoShape + * The shape parameter (α) for the Pareto tail + */ +case class Mixed(empirical: Empirical, mixingPoint: Double, paretoShape: Double) + extends ContinuousDistr[Double] + with HasMean[Double] + with HasVariance[Double] + with HasCdf[Double] + with HasInverseCdf: + + require(mixingPoint > 0, "mixing point must be positive") + require(paretoShape > 0, "Pareto shape must be positive") + + private val rng = RandomSource.XO_RO_SHI_RO_128_PP.create() + + // The Pareto tail with scale = mixing point + private val paretoTail = Pareto(mixingPoint, paretoShape) + + // Probability mass in the empirical body (CDF at the mixing point) + // We want P(X < mixingPoint) from the empirical, which is the sum of all mass points strictly below mixingPoint + private val bodyWeight: Double = + // The empirical CDF is right-continuous, so cdf(m) includes P(X <= m). + // We want mass strictly below m for the body, and P(X >= m) goes to the tail. + // However, for simplicity and continuity, we use cdf(m-epsilon) conceptually. + // In practice, we compute the probability of all empirical points strictly below the mixing point. + var w = 0.0 + val vals = empirical.values + val weights = empirical.weights + var totalW = 0.0 + var i = 0 + while i < vals.length do + totalW += weights(i) + if vals(i) < mixingPoint then w += weights(i) + end if + i += 1 + end while + w / totalW + end bodyWeight + + // Tail weight is the complement + private val tailWeight: Double = 1.0 - bodyWeight + + /** Draw a random sample from the mixed distribution */ + def draw: Double = + val u = rng.nextDouble() + inverseCdf(u) + end draw + + /** Unnormalized log PDF. + * + * For the body (empirical), this is technically undefined in the continuous sense since the empirical distribution + * is discrete. We return the log of the weighted probability mass if x exactly matches an empirical point, otherwise + * negative infinity. + * + * For the tail (Pareto), we return the properly weighted log PDF. + */ + def unnormalizedLogPdf(x: Double): Double = + if x < mixingPoint then + // Discrete mass in the body + val prob = empirical.probabilityOf(x) + if prob > 0 then math.log(prob) else Double.NegativeInfinity + end if + else + // Continuous Pareto tail, scaled by tail weight + if tailWeight > 0 then paretoTail.unnormalizedLogPdf(x) + math.log(tailWeight) + else Double.NegativeInfinity + end if + end unnormalizedLogPdf + + /** Log normalizer (distribution is already normalized) */ + def logNormalizer: Double = 0.0 + + /** Probability that x < X <= y */ + def probability(x: Double, y: Double): Double = + if y <= x then 0.0 + else cdf(y) - cdf(x) + + /** Cumulative distribution function. + * + * For x < mixingPoint: F(x) = bodyWeight * (empirical CDF normalized to body) For x >= mixingPoint: F(x) = + * bodyWeight + tailWeight * F_Pareto(x) + */ + def cdf(x: Double): Double = + if x < mixingPoint then + // Use empirical CDF, but only count points below mixing point + // The CDF here is P(X <= x) for X in the body region, scaled by bodyWeight + val empCdfAtX = empirical.cdf(x) + // Scale: empirical CDF goes up to 1, but we only want it to contribute bodyWeight + math.min(empCdfAtX, bodyWeight) // Cap at bodyWeight since empirical points >= mixingPoint don't count + else + // In the tail region + bodyWeight + tailWeight * paretoTail.cdf(x) + end if + end cdf + + /** Inverse CDF (quantile function) */ + def inverseCdf(p: Double): Double = + require(p >= 0.0 && p <= 1.0, "p must be in [0,1]") + if p <= 0.0 then + // Return minimum of empirical or mixing point + empirical.inverseCdf(0.0) + else if p <= bodyWeight then + // In the body region - use empirical inverse CDF + // Scale p to [0, 1] within the body + val scaledP = p / bodyWeight + val q = empirical.inverseCdf(math.min(scaledP, 1.0)) + // Ensure we don't exceed mixing point + math.min(q, mixingPoint - Double.MinPositiveValue) + else + // In the tail region - use Pareto inverse CDF + val tailP = (p - bodyWeight) / tailWeight + paretoTail.inverseCdf(tailP) + end if + end inverseCdf + + /** Mean of the mixed distribution. + * + * E[X] = bodyWeight * E[X | X < m] + tailWeight * E[X_Pareto] + * + * Note: For Pareto, mean is only defined when shape > 1. + */ + def mean: Double = + // Compute conditional mean of empirical given X < mixingPoint + var empMean = 0.0 + var empWeight = 0.0 + val vals = empirical.values + val weights = empirical.weights + var totalW = 0.0 + var i = 0 + while i < vals.length do + totalW += weights(i) + i += 1 + end while + i = 0 + while i < vals.length do + if vals(i) < mixingPoint then + val w = weights(i) / totalW + empMean += vals(i) * w + empWeight += w + end if + i += 1 + end while + val condEmpMean = if empWeight > 0 then empMean / empWeight else 0.0 + + bodyWeight * condEmpMean + tailWeight * paretoTail.mean + end mean + + /** Variance of the mixed distribution. + * + * Uses the law of total variance. + * + * Note: For Pareto, variance is only defined when shape > 2. + */ + def variance: Double = + val m = mean + // Compute E[X^2] for the body + var empSecondMoment = 0.0 + var empWeight = 0.0 + val vals = empirical.values + val weights = empirical.weights + var totalW = 0.0 + var i = 0 + while i < vals.length do + totalW += weights(i) + i += 1 + end while + i = 0 + while i < vals.length do + if vals(i) < mixingPoint then + val w = weights(i) / totalW + empSecondMoment += vals(i) * vals(i) * w + empWeight += w + end if + i += 1 + end while + val condEmpSecondMoment = if empWeight > 0 then empSecondMoment / empWeight else 0.0 + + // E[X^2] for Pareto + val paretoSecondMoment = paretoTail.variance + paretoTail.mean * paretoTail.mean + + // Total E[X^2] + val totalSecondMoment = bodyWeight * condEmpSecondMoment + tailWeight * paretoSecondMoment + + // Var(X) = E[X^2] - E[X]^2 + totalSecondMoment - m * m + end variance + + /** Plot the mixed distribution PDF/histogram. */ + def plot(using viz.LowPriorityPlotTarget) = + val plot = VegaPlot.fromResource("mixedPdf.vl.json") + val numSamples = 10000 + val samples = (0 until numSamples).map(_ => (x = draw)) + + plot.plot( + _.data.values := samples.asJson, + _ += (title = s"Mixed Distribution (mixingPoint=$mixingPoint, paretoShape=$paretoShape)").asJson + ) + end plot + + /** Plot the mixed CDF. */ + def plotCdf(using viz.LowPriorityPlotTarget) = + val plot = VegaPlot.fromResource("mixedCdf.vl.json") + + // Generate points for the CDF + val minX = empirical.inverseCdf(0.0) + val maxX = paretoTail.inverseCdf(0.99) // 99th percentile of tail + val numPoints = 500 + val step = (maxX - minX) / numPoints + + val points = (0 to numPoints).map { i => + val x = minX + i * step + (x = x, cdf = cdf(x)) + } + + plot.plot( + _.data.values := points.asJson, + _ += (title = s"Mixed Distribution CDF (mixingPoint=$mixingPoint, paretoShape=$paretoShape)").asJson + ) + end plotCdf + +end Mixed + +object Mixed: + + /** Create a mixed distribution from raw empirical data. + * + * @param values + * The empirical sample values + * @param mixingPoint + * The threshold between body and tail + * @param paretoShape + * The Pareto shape parameter for the tail + */ + inline def fromValues(values: Array[Double], mixingPoint: Double, paretoShape: Double): Mixed = + Mixed(Empirical.equalWeights(values), mixingPoint, paretoShape) + + /** Create a mixed distribution from weighted empirical data. + * + * @param values + * The empirical sample values + * @param weights + * The weights for each sample value + * @param mixingPoint + * The threshold between body and tail + * @param paretoShape + * The Pareto shape parameter for the tail + */ + inline def fromWeightedValues( + values: Array[Double], + weights: Array[Double], + mixingPoint: Double, + paretoShape: Double + ): Mixed = + Mixed(Empirical.weighted(values, weights), mixingPoint, paretoShape) + +end Mixed diff --git a/vecxt_re/src-jvm/dist/NegativeBinomial.scala b/vecxt_re/src-jvm/dist/NegativeBinomial.scala new file mode 100644 index 00000000..b9df1d11 --- /dev/null +++ b/vecxt_re/src-jvm/dist/NegativeBinomial.scala @@ -0,0 +1,392 @@ +package vecxt_re + +import org.apache.commons.numbers.gamma.LogGamma +import org.apache.commons.rng.simple.RandomSource +import org.apache.commons.statistics.distribution.GammaDistribution +import org.apache.commons.statistics.distribution.PoissonDistribution + +import vecxt.all.* + +import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} + +/** Negative Binomial Distribution with alternative parameterization. + * + * Uses the parameterization: + * - r = a (number of successes, can be any positive real) + * - p = 1 / (1 + b) (probability of success) + * + * Which gives: + * - mean = a * b + * - variance = a * b * (1 + b) + * + * Under this parameterisation, as b -> 0, the distribution will converge to Poisson(ab). The parameter b is therefore + * a measure of overdispersion. + * + * Implementation uses the gamma-Poisson mixture representation, which allows non-integer a: If λ ~ Gamma(a, b) and X | + * λ ~ Poisson(λ), then X ~ NegativeBinomial(a, b) + * + * @param a + * shape parameter (must be positive, can be non-integer) + * @param b + * scale/dispersion parameter (must be positive) + */ + +//TODO: JS, facade to Stdlib gamma, poisson etc. +case class NegativeBinomial(a: Double, b: Double) + extends DiscreteDistr[Int] + with HasMean[Double] + with HasCdf[Int] + with HasVariance[Double]: + require(a > 0, "a must be positive") + require(b > 0, "b must be positive") + require(a.isFinite, "a must be finite") + require(b.isFinite, "b must be finite") + + private val p: Double = 1.0 / (1.0 + b) + private val logP: Double = math.log(p) + private val log1MinusP: Double = math.log1p(-p) // log(1-p) = log(b/(1+b)) + + private val rng = RandomSource.XO_RO_SHI_RO_128_PP.create() + + // Gamma distribution with shape=a, scale=b for the mixture representation + private val gammaDistribution = GammaDistribution.of(a, b) + private val gammaSampler = gammaDistribution.createSampler(rng) + + /** Draw using gamma-Poisson mixture: λ ~ Gamma(a, b), X | λ ~ Poisson(λ) */ + def draw: Int = + val lambda = gammaSampler.sample() + if lambda <= 0 then 0 + else PoissonDistribution.of(lambda).createSampler(rng).sample() + end if + end draw + + /** PMF: P(X = k) = Γ(a + k) / (Γ(a) * k!) * p^a * (1-p)^k + */ + def probabilityOf(x: Int): Double = + if x < 0 then 0.0 + else math.exp(logProbabilityOf(x)) + + /** Log PMF: log P(X = k) = logΓ(a + k) - logΓ(a) - logΓ(k + 1) + a*log(p) + k*log(1-p) + */ + override def logProbabilityOf(x: Int): Double = + if x < 0 then Double.NegativeInfinity + else + LogGamma.value(a + x) - LogGamma.value(a) - LogGamma.value(x + 1) + + a * logP + x * log1MinusP + + def mean: Double = a * b + + def variance: Double = a * b * (1.0 + b) + + override def probability(x: Int, y: Int): Double = + if x >= y then 0.0 + else cdf(y) - cdf(x) + + override def cdf(x: Int): Double = + if x < 0 then 0.0 + else + // CDF of NegBin(r, p) at k = I_p(r, k+1) + // where I_p(a, b) is the regularized incomplete beta function + // For our parameterization: p = 1/(1+b) is the success probability + org.apache.commons.numbers.gamma.RegularizedBeta.value(p, a, x.toDouble + 1.0) + + def plot(using viz.LowPriorityPlotTarget) = + val linePlot = VegaPlot.fromResource("negBinProb.vl.json") + val maxX = (mean + 4 * math.sqrt(variance)).toInt + val data = (0 to maxX).map { k => + (value = k, prob = probabilityOf(k)) + } + linePlot.plot( + _.data.values := data.asJson, + _ += (title = s"Negative Binomial Distribution Marginal Probabilities (a=$a, b=$b)").asJson + ) + end plot + + def plotCdf(using viz.LowPriorityPlotTarget) = + val linePlot = VegaPlot.fromResource("negBinCumul.vl.json") + val maxX = (mean + 4 * math.sqrt(variance)).toInt + var cumProb = 0.0 + val data = (0 to maxX).map { k => + cumProb += probabilityOf(k) + (value = k, prob = cumProb) + } + linePlot.plot( + _.data.values := data.asJson, + _ += (title = s"Negative Binomial Distribution Cumulative Probabilities (a=$a, b=$b)").asJson + ) + end plotCdf +end NegativeBinomial + +object NegativeBinomial: + inline def fromMeanDispersion(mu: Double, b: Double): NegativeBinomial = + NegativeBinomial(mu / b, b) + + inline def poisson(mu: Double): NegativeBinomial = + NegativeBinomial(mu / 1e-12, 1e-12) + + /** Maximum likelihood estimation for Negative Binomial parameters. + * + * Uses Newton-Raphson iteration on the profile likelihood for 'a', with method of moments as the initial estimate. + * + * For parameterization p = 1/(1+b), mean = a*b, with b = mean/a: + * - Score: S(a) = Σᵢ [ψ(a + xᵢ) - ψ(a)] + n·log(a/(a + x̄)) + * - Hessian: H(a) = Σᵢ [ψ'(a + xᵢ) - ψ'(a)] + n·x̄/(a·(a + x̄)) + * + * @param observations + * array of non-negative integer observations + * @param maxIter + * maximum number of Newton-Raphson iterations + * @param tol + * convergence tolerance for parameter 'a' + * @return + * Named tuple with `dist`: the fitted distribution (Poisson if no overdispersion, otherwise NegativeBinomial), and + * `converged`: whether the optimizer converged within maxIter + */ + def mle( + observations: Array[Int], + maxIter: Int = 500, + tol: Double = 1e-8 + ): (dist: Poisson | NegativeBinomial, converged: Boolean) = + require(observations.nonEmpty, "observations must not be empty") + require(observations.forall(_ >= 0), "all observations must be non-negative") + + val n = observations.length.toDouble + val (xbar, variance) = observations.meanAndVariance + val sumX = observations.sumSIMD.toDouble + + require(xbar > 0, "mean must be positive for NB fitting") + + // Constant term in the log-likelihood: -∑ log Γ(x_i+1) + var sumLogFact = 0.0 + var _i = 0 + while _i < observations.length do + sumLogFact += LogGamma.value(observations(_i) + 1) + _i += 1 + end while + + // Profile log-likelihood with b = xbar/a (equivalently p = a/(a+xbar)) + inline def profileLogLik(a: Double): Double = + if a <= 0 || !a.isFinite then Double.NegativeInfinity + else + val p = a / (a + xbar) + val logP = math.log(p) + val log1MinusP = math.log1p(-p) + var ll = n * a * logP + sumX * log1MinusP - sumLogFact - n * LogGamma.value(a) + var k = 0 + while k < observations.length do + ll += LogGamma.value(a + observations(k)) + k += 1 + end while + ll + + // If variance <= mean, data is underdispersed relative to Poisson + // In this case, return Poisson distribution + if variance <= xbar then (Poisson(xbar), true) + else + // Method of moments initial estimates: + // b = variance/mean - 1 + // a = mean/b = mean^2 / (variance - mean) + val bMom = (variance / xbar) - 1.0 + val aMom = xbar / bMom + + // Newton-Raphson iteration on the profile score equation for 'a' + // With b = xbar/a, the profile log-likelihood score is: + // S(a) = Σᵢ [ψ(a + xᵢ) - ψ(a)] + n·log(a/(a + xbar)) + var a = aMom + var iter = 0 + var converged = false + + while iter < maxIter && !converged do + // Score: S(a) = Σᵢ [ψ(a + xᵢ) - ψ(a)] + n·log(a/(a + xbar)) + var score = n * math.log(a / (a + xbar)) + + // Hessian (negative): -H(a) = Σᵢ [ψ'(a) - ψ'(a + xᵢ)] + n·xbar/(a·(a + xbar)) + var negHessian = n * xbar / (a * (a + xbar)) + + var i = 0 + while i < observations.length do + val x = observations(i) + score += org.apache.commons.numbers.gamma.Digamma.value(a + x) - + org.apache.commons.numbers.gamma.Digamma.value(a) + negHessian += org.apache.commons.numbers.gamma.Trigamma.value(a) - + org.apache.commons.numbers.gamma.Trigamma.value(a + x) + i += 1 + end while + + val delta = score / negHessian + + // Backtracking line search on the profile log-likelihood to improve robustness. + val llCur = profileLogLik(a) + var step = 1.0 + var aNew = Math.fma(step, delta, a) + var llNew = profileLogLik(aNew) + while step > 1e-6 && llNew < llCur do + step *= 0.5 + aNew = Math.fma(step, delta, a) + llNew = profileLogLik(aNew) + end while + + if aNew <= 0 || !aNew.isFinite then a = a / 2.0 + else a = aNew + end if + + converged = math.abs(step * delta) < tol * math.abs(a) + iter += 1 + end while + + val bFinal = xbar / a + (NegativeBinomial(a, bFinal), converged) + end if + end mle + + /** Maximum likelihood estimation for the volume-adjusted Negative Binomial. + * + * We observe pairs $(n_j, v_j)$ where $n_j$ is the count and $v_j$ is the volume ratio (historical volume / modeled + * volume). With parameters $(r, \beta)$ and $p = 1/(1+\beta v_j)$ the likelihood is $$ L(r,\beta) = \prod_j + * \frac{\Gamma(r+n_j)}{\Gamma(r)\,\Gamma(n_j+1)} \left(\frac{\beta v_j}{1+\beta v_j}\right)^{n_j} + * \left(\frac{1}{1+\beta v_j}\right)^r. $$ The log-likelihood is $$ \ell(r,\beta) = \sum_j \big[\log\Gamma(r+n_j) - + * \log\Gamma(r) - \log\Gamma(n_j+1) + n_j(\log(\beta v_j) - \log(1+\beta v_j)) - r\,\log(1+\beta v_j)\big]. $$ + * Gradient components: $$\partial_\beta \ell = \sum_j \Big( \frac{n_j}{\beta(1+\beta v_j)} - \frac{r v_j}{1+\beta + * v_j} \Big),\quad \partial_r \ell = \sum_j \big[\psi(r+n_j) - \psi(r) - \log(1+\beta v_j)\big],$$ and Hessian + * entries: $$\partial^2_{\beta\beta} \ell = \sum_j \Big( \frac{r v_j}{(1+\beta v_j)^2} - \frac{n_j(1+2\beta + * v_j)}{\beta^2(1+\beta v_j)^2} \Big),$$ $$\partial^2_{rr} \ell = \sum_j \big[\psi'(r+n_j) - \psi'(r)\big],\quad + * \partial^2_{\beta r} \ell = -\sum_j \frac{v_j}{1+\beta v_j}.$$ + * + * Implementation details: + * - Initialize from method of moments on rates $n_j / v_j$; if underdispersed, start at a small $\beta$. + * - Newton updates solve the $2\times2$ system from the gradient/Hessian; a tiny ridge is added to keep the + * Hessian invertible. + * - Step halving is applied to enforce positivity of $r$ and $\beta$. + * + * @param observations + * non-negative counts $n_j$ + * @param volumes + * positive volume ratios $v_j$ (same units as modeled period) + * @param maxIter + * maximum Newton steps + * @param tol + * relative tolerance on both parameters + * @return + * tuple of fitted distribution (Poisson if no overdispersion, otherwise NegativeBinomial) and a convergence flag + */ + def volweightedMle( + observations: Array[Int], + volumes: Array[Double], + maxIter: Int = 500, + tol: Double = 1e-8 + ): (dist: Poisson | NegativeBinomial, converged: Boolean) = + require(observations.nonEmpty, "observations must not be empty") + require(observations.length == volumes.length, "observations and volumes must have the same length") + require(observations.forall(_ >= 0), "all observations must be non-negative") + require(volumes.forall(v => v > 0 && v.isFinite), "volumes must be positive and finite") + + val nObs = observations.length + + var i = 0 + var sumRate = 0.0 + while i < nObs do + sumRate += observations(i) / volumes(i) + i += 1 + end while + + val meanRate = sumRate / nObs + require(meanRate > 0, "mean per unit volume must be positive for NB fitting") + + var varRate = 0.0 + i = 0 + while i < nObs do + val rate = observations(i) / volumes(i) + val diff = rate - meanRate + varRate += diff * diff + i += 1 + end while + varRate /= nObs.toDouble + + // If variance <= mean, data is underdispersed relative to Poisson + // Return Poisson distribution with rate = sum(n) / sum(v) + if varRate <= meanRate then + val sumN = observations.sumSIMD.toDouble + val sumV = volumes.sum + return (Poisson(sumN / sumV), true) + end if + + val betaFloor = 1e-6 + var beta = math.max((varRate / meanRate) - 1.0, betaFloor) + var r = meanRate / beta + + var iter = 0 + var converged = false + val ridge = 1e-12 + + while iter < maxIter && !converged do + var gBeta = 0.0 + var gR = 0.0 + var hbb = 0.0 + var hrr = 0.0 + var hbr = 0.0 + + i = 0 + while i < nObs do + val n = observations(i).toDouble + val v = volumes(i) + val betaV = beta * v + val denom = 1.0 + betaV + val invDenom = 1.0 / denom + val invDenom2 = invDenom * invDenom + val invBeta = 1.0 / beta + + gBeta += n * invBeta * invDenom - r * v * invDenom + gR += org.apache.commons.numbers.gamma.Digamma.value(r + n) - + org.apache.commons.numbers.gamma.Digamma.value(r) - + math.log(denom) + + hbb += r * v * invDenom2 - n * (1.0 + 2.0 * betaV) * invBeta * invBeta * invDenom2 + hrr += org.apache.commons.numbers.gamma.Trigamma.value(r + n) - + org.apache.commons.numbers.gamma.Trigamma.value(r) + hbr -= v * invDenom + i += 1 + end while + + val hbbAdj = hbb + ridge + val hrrAdj = hrr + ridge + val det = hbbAdj * hrrAdj - hbr * hbr + + if det.isNaN || det.isInfinite || math.abs(det) < 1e-18 then iter = maxIter + else + val deltaBeta = (gBeta * hrrAdj - gR * hbr) / det + val deltaR = (hbbAdj * gR - hbr * gBeta) / det + + var step = 1.0 + var newBeta = beta - step * deltaBeta + var newR = r - step * deltaR + + while step > 1e-3 && (newBeta <= 0 || newR <= 0 || newBeta.isNaN || newR.isNaN) do + step *= 0.5 + newBeta = beta - step * deltaBeta + newR = r - step * deltaR + end while + + if newBeta > 0 && newR > 0 && newBeta.isFinite && newR.isFinite then + beta = newBeta + r = newR + converged = math.abs(step * deltaBeta) <= tol * math.abs(beta) && + math.abs(step * deltaR) <= tol * math.abs(r) + else iter = maxIter + end if + end if + + iter += 1 + end while + + (NegativeBinomial(r, beta), converged) + end volweightedMle + + inline def mleVolumeWeighted( + observations: Array[Int], + volumes: Array[Double], + maxIter: Int = 100, + tol: Double = 1e-8 + ): (dist: Poisson | NegativeBinomial, converged: Boolean) = volweightedMle(observations, volumes, maxIter, tol) + +end NegativeBinomial diff --git a/vecxt_re/src-jvm/dist/Pareto.scala b/vecxt_re/src-jvm/dist/Pareto.scala new file mode 100644 index 00000000..b99f3555 --- /dev/null +++ b/vecxt_re/src-jvm/dist/Pareto.scala @@ -0,0 +1,114 @@ +package vecxt_re + +import org.apache.commons.rng.simple.RandomSource +import org.apache.commons.statistics.distribution.ParetoDistribution + +import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} + +/** Pareto Type I Distribution. + * + * The Pareto distribution is a power-law probability distribution commonly used to model the distribution of wealth, + * insurance losses, and other phenomena where small values are common and large values are rare but possible. + * + * For scale parameter k (minimum possible value) and shape parameter α (Pareto index): + * - PDF: f(x) = α * k^α / x^(α+1) for x >= k + * - CDF: F(x) = 1 - (k/x)^α for x >= k + * - Mean: k * α / (α - 1) for α > 1, otherwise infinite + * - Variance: k² * α / ((α-1)² * (α-2)) for α > 2, otherwise infinite + * + * @param scale + * Scale parameter k (minimum possible value of X, must be positive) + * @param shape + * Shape parameter α (Pareto index, must be positive) + */ +case class Pareto(scale: Double, shape: Double) + extends ContinuousDistr[Double] + with HasMean[Double] + with HasVariance[Double] + with HasCdf[Double] + with HasInverseCdf: + + require(scale > 0, "scale must be positive") + require(shape > 0, "shape must be positive") + + private val rng = RandomSource.XO_RO_SHI_RO_128_PP.create() + private val distribution = ParetoDistribution.of(scale, shape) + private val sampler = distribution.createSampler(rng) + + /** Draw a random sample from the Pareto distribution */ + def draw: Double = sampler.sample() + + /** Unnormalized log PDF */ + def unnormalizedLogPdf(x: Double): Double = + if x < scale then Double.NegativeInfinity + else distribution.logDensity(x) + + /** Log normalizer (Pareto is already normalized, so this is 0) */ + def logNormalizer: Double = 0.0 + + /** Probability that x < X <= y */ + def probability(x: Double, y: Double): Double = distribution.probability(x, y) + + /** Cumulative distribution function */ + def cdf(x: Double): Double = distribution.cumulativeProbability(x) + + /** Inverse CDF (quantile function) */ + def inverseCdf(p: Double): Double = distribution.inverseCumulativeProbability(p) + + /** Survival function P(X > x) */ + def survivalProbability(x: Double): Double = distribution.survivalProbability(x) + + /** Inverse survival probability */ + def inverseSurvivalProbability(p: Double): Double = distribution.inverseSurvivalProbability(p) + + def mean: Double = distribution.getMean() + + def variance: Double = distribution.getVariance() + + private def guessMaxXForPlot = shape match + case s if s > 2 => mean + 4 * math.sqrt(variance) // mean and variance are defined + case s if s > 1 => mean + 20 * scale // no well defined variance + case _ => scale * 10 // no well defined mean + + def plot(using viz.LowPriorityPlotTarget) = + + val linePlot2 = VegaPlot.fromResource("paretoPdf.vl.json") + val maxX = guessMaxXForPlot * 5 + val numPoints = 10000 + val data = (0 until numPoints).map { _ => + (x = Math.min(maxX, draw)) + } + + // Analytic Pareto( scale=k, shape=α ) PDF: f(x) = α k^α / x^(α+1) for x >= k + // The Vega template contains a placeholder formula; we inject the parameterized one here. + val pdfExpr = s"$shape * pow($scale, $shape) * pow(datum.data, -(${shape + 1.0}))" + + linePlot2.plot( + _.layer.head.data.values := data.asJson, + _.layer.head.encoding.x.scale.domain := List(scale, maxX).asJson, + _.layer.head.encoding.x.scale.nice := false, + _.layer.head.encoding.x.scale.domain := List(scale, maxX).asJson, + _.layer._1.encoding.x.scale.nice := false, + _.layer._1.data.sequence.start := scale, + _.layer._1.data.sequence.stop := maxX, + _.layer._1.data.sequence.step := (maxX - scale) / 200, + _.layer._1.transform.head.calculate := pdfExpr, + _ += (title = s"Pareto Distribution PDF (scale=$scale, shape=$shape)").asJson + ) + end plot + + def plotCdf(using viz.LowPriorityPlotTarget) = + val linePlot = VegaPlot.fromResource("paretoCdf.vl.json") + val maxX = guessMaxXForPlot + + linePlot.plot( + _.data.sequence.start := scale, + _.data.sequence.stop := maxX, + _.data.sequence.step := (maxX - scale) / 200, + _.transform.head.calculate := s"1 - pow($scale / datum.data, $shape)", + _ += (title = s"Pareto Distribution CDF (scale=$scale, shape=$shape)").asJson + ) + end plotCdf + +end Pareto diff --git a/vecxt_re/src-jvm/dist/Poisson.scala b/vecxt_re/src-jvm/dist/Poisson.scala new file mode 100644 index 00000000..62815546 --- /dev/null +++ b/vecxt_re/src-jvm/dist/Poisson.scala @@ -0,0 +1,247 @@ +package vecxt_re + +import org.apache.commons.numbers.gamma.LogGamma +import org.apache.commons.rng.simple.RandomSource +import org.apache.commons.statistics.distribution.PoissonDistribution + +import vecxt.all.* + +import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} + +/** Poisson Distribution. + * + * The Poisson distribution models the number of events occurring in a fixed interval of time or space, given that + * these events occur with a known constant mean rate and independently of the time since the last event. + * + * Parameterization: + * - λ (lambda) = mean = variance + * + * PMF: P(X = k) = λ^k * e^(-λ) / k! + * + * The Poisson distribution is a limiting case of the Negative Binomial distribution as the dispersion parameter b → 0. + * + * @param lambda + * the rate parameter (must be positive) + */ +case class Poisson(lambda: Double) + extends DiscreteDistr[Int] + with HasMean[Double] + with HasCdf[Int] + with HasVariance[Double]: + require(lambda > 0, "lambda must be positive") + require(lambda.isFinite, "lambda must be finite") + + private val logLambda: Double = math.log(lambda) + + private val rng = RandomSource.XO_RO_SHI_RO_128_PP.create() + private val poissonDistribution = PoissonDistribution.of(lambda) + private val poissonSampler = poissonDistribution.createSampler(rng) + + /** Draw a sample from the Poisson distribution */ + inline def draw: Int = poissonSampler.sample() + + /** PMF: P(X = k) = λ^k * e^(-λ) / k! */ + def probabilityOf(x: Int): Double = + if x < 0 then 0.0 + else math.exp(logProbabilityOf(x)) + + /** Log PMF: log P(X = k) = k*log(λ) - λ - log(k!) */ + override def logProbabilityOf(x: Int): Double = + if x < 0 then Double.NegativeInfinity + else x * logLambda - lambda - LogGamma.value(x + 1) + + inline def mean: Double = lambda + + inline def variance: Double = lambda + + override def probability(x: Int, y: Int): Double = + if x >= y then 0.0 + else cdf(y) - cdf(x) + + override def cdf(x: Int): Double = + if x < 0 then 0.0 + else + // CDF using regularized incomplete gamma function + // P(X <= k) = Q(k+1, λ) = Γ(k+1, λ) / Γ(k+1) + // which is the upper regularized gamma function + org.apache.commons.numbers.gamma.RegularizedGamma.Q.value(x.toDouble + 1.0, lambda) + + def plot(using viz.LowPriorityPlotTarget) = + val linePlot = VegaPlot.fromResource("poissonProb.vl.json") + val maxX = (mean + 4 * math.sqrt(variance)).toInt + val data = (0 to maxX).map { k => + (value = k, prob = probabilityOf(k)) + } + linePlot.plot( + _.data.values := data.asJson, + _ += (title = s"Poisson Distribution Marginal Probabilities (λ=$lambda)").asJson + ) + end plot + + def plotCdf(using viz.LowPriorityPlotTarget) = + val linePlot = VegaPlot.fromResource("poissonCumul.vl.json") + val maxX = (mean + 4 * math.sqrt(variance)).toInt + var cumProb = 0.0 + val data = (0 to maxX).map { k => + cumProb += probabilityOf(k) + (value = k, prob = cumProb) + } + linePlot.plot( + _.data.values := data.asJson, + _ += (title = s"Poisson Distribution Cumulative Probabilities (λ=$lambda)").asJson + ) + end plotCdf +end Poisson + +object Poisson: + /** Create a Poisson distribution from the mean. + * + * @param mu + * the mean (rate) parameter + * @return + * a Poisson distribution with the given mean + */ + inline def fromMean(mu: Double): Poisson = Poisson(mu) + + /** Maximum likelihood estimation for Poisson parameter. + * + * For Poisson, the MLE of λ is simply the sample mean. This is exact and always converges in one step. + * + * @param observations + * array of non-negative integer observations + * @return + * Named tuple with `dist`: the fitted Poisson distribution, and `converged`: always true for Poisson MLE + */ + def mle(observations: Array[Int]): (dist: Poisson, converged: Boolean) = + require(observations.nonEmpty, "observations must not be empty") + require(observations.forall(_ >= 0), "all observations must be non-negative") + + val lambdaHat = observations.mean + require(lambdaHat > 0, "mean must be positive for Poisson fitting") + + (Poisson(lambdaHat), true) + end mle + + /** Maximum likelihood estimation for volume-adjusted Poisson. + * + * For observations $n_j$ with corresponding volumes $v_j$, the Poisson model assumes $n_j \sim + * \text{Poisson}(\lambda v_j)$. + * + * The MLE for $\lambda$ is: $$ \hat{\lambda} = \frac{\sum_j n_j}{\sum_j v_j} $$ + * + * @param observations + * non-negative counts $n_j$ + * @param volumes + * positive volume ratios $v_j$ (same units as modeled period) + * @return + * tuple of fitted `Poisson(lambda)` and a convergence flag (always true for Poisson) + */ + def volweightedMle( + observations: Array[Int], + volumes: Array[Double] + ): (dist: Poisson, converged: Boolean) = + require(observations.nonEmpty, "observations must not be empty") + require(observations.length == volumes.length, "observations and volumes must have the same length") + require(observations.forall(_ >= 0), "all observations must be non-negative") + require(volumes.forall(v => v > 0 && v.isFinite), "volumes must be positive and finite") + + val sumN = observations.sumSIMD.toDouble + val sumV = volumes.sum + val lambdaHat = sumN / sumV + + require(lambdaHat > 0, "rate must be positive for Poisson fitting") + + (Poisson(lambdaHat), true) + end volweightedMle + + inline def mleVolumeWeighted( + observations: Array[Int], + volumes: Array[Double] + ): (dist: Poisson, converged: Boolean) = volweightedMle(observations, volumes) + + /** Perform a chi-squared goodness-of-fit test to assess whether the observed data follows a Poisson distribution. + * + * Groups observations into bins and computes the chi-squared statistic comparing observed to expected frequencies. + * + * @param observations + * array of non-negative integer observations + * @param lambda + * the Poisson rate parameter (if None, uses MLE from data) + * @param minExpected + * minimum expected frequency per bin (bins are combined to meet this threshold) + * @return + * Named tuple with `statistic`: the chi-squared test statistic, `degreesOfFreedom`: the degrees of freedom, and + * `pValue`: the p-value of the test + */ + def goodnessOfFit( + observations: Array[Int], + lambda: Option[Double] = None, + minExpected: Double = 5.0 + ): (statistic: Double, degreesOfFreedom: Int, pValue: Double) = + require(observations.nonEmpty, "observations must not be empty") + require(minExpected > 0, "minExpected must be positive") + + val n = observations.length.toDouble + val lambdaEst = lambda.getOrElse(observations.sumSIMD.toDouble / n) + val poisson = Poisson(lambdaEst) + + // Find the max observation to determine bin range + var maxObs = observations.maxSIMD + + // Count observations in each bin + val counts = new Array[Int](maxObs + 2) // +1 for the "maxObs or more" bin + var i = 0 + while i < observations.length do + val obs = observations(i) + if obs >= counts.length - 1 then counts(counts.length - 1) += 1 + else counts(obs) += 1 + end if + i += 1 + end while + + // Compute expected frequencies + val expected = new Array[Double](counts.length) + i = 0 + while i < expected.length - 1 do + expected(i) = n * poisson.probabilityOf(i) + i += 1 + end while + // Last bin is "maxObs or more" + expected(expected.length - 1) = n * (1.0 - poisson.cdf(expected.length - 2)) + + // Combine bins with expected < minExpected + var chiSq = 0.0 + var df = -1 // Start at -1 because we estimated lambda + var obsAccum = 0 + var expAccum = 0.0 + + i = 0 + while i < counts.length do + obsAccum += counts(i) + expAccum += expected(i) + if expAccum >= minExpected then + chiSq += (obsAccum - expAccum) * (obsAccum - expAccum) / expAccum + df += 1 + obsAccum = 0 + expAccum = 0.0 + end if + i += 1 + end while + + // Handle remaining accumulated values + if expAccum > 0 then + // Add to previous bin's chi-squared contribution + chiSq += (obsAccum - expAccum) * (obsAccum - expAccum) / expAccum + df += 1 + end if + + // Compute p-value using chi-squared distribution + val pValue = + if df <= 0 then 1.0 + else 1.0 - org.apache.commons.numbers.gamma.RegularizedGamma.P.value(df.toDouble / 2.0, chiSq / 2.0) + + (chiSq, df, pValue) + end goodnessOfFit + +end Poisson diff --git a/vecxt_re/src-jvm/dist/Rand.scala b/vecxt_re/src-jvm/dist/Rand.scala new file mode 100644 index 00000000..6e9ec7b3 --- /dev/null +++ b/vecxt_re/src-jvm/dist/Rand.scala @@ -0,0 +1,89 @@ +package vecxt_re + +/** A trait for monadic distributions. Provides support for use in for-comprehensions + */ +trait Rand[T]: + outer => + + /** Gets one sample from the distribution. Equivalent to sample + */ + def draw: T + + inline def get = draw + + /** Overridden by filter/map/flatmap for monadic invocations. Basically, rejection samplers will return None here */ + def drawOpt: Option[T] = Some(draw) + + /** Gets one sample from the distribution. Equivalent to get + */ + inline def sample = get + + /** Gets n samples from the distribution. + */ + inline def sample(n: Int): IndexedSeq[T] = IndexedSeq.fill(n)(draw) + + /** Gets n samples from the distribution into a specified collection type. + */ + inline def sampleTo[C](n: Int)(using factory: scala.collection.Factory[T, C]): C = + val builder = factory.newBuilder + builder.sizeHint(n) + var i = 0 + while i < n do + builder += draw + i += 1 + end while + builder.result() + end sampleTo + + /** An infinitely long iterator that samples repeatedly from the Rand + * @return + * an iterator that repeatedly samples + */ + inline def samples: Iterator[T] = Iterator.continually(draw) + + /** Converts a random sampler of one type to a random sampler of another type. Examples: uniform.map(_*2) gives a + * Rand[Double] in the range [0,2] Equivalently, for(x <- uniform) yield 2*x + * + * @param f + * the transform to apply to the sampled value. + */ + def map[E](f: T => E): Rand[E] = MappedRand(outer, f) + + def flatMap[E](f: T => Rand[E]): Rand[E] = FlatMappedRand(outer, f) + + def withFilter(p: T => Boolean): Rand[T] = FilteredRand(outer, p) +end Rand + +private final case class MappedRand[@specialized(Int, Double) T, @specialized(Int, Double) U]( + rand: Rand[T], + func: T => U +) extends Rand[U]: + override def draw: U = func(rand.draw) + override def drawOpt: Option[U] = rand.drawOpt.map(func) + override def map[E](f: U => E): Rand[E] = MappedRand(rand, (x: T) => f(func(x))) +end MappedRand + +private final case class FlatMappedRand[@specialized(Int, Double) T, @specialized(Int, Double) U]( + rand: Rand[T], + func: T => Rand[U] +) extends Rand[U]: + override def draw: U = func(rand.draw).draw + override def drawOpt: Option[U] = rand.drawOpt.flatMap(x => func(x).drawOpt) + override def flatMap[E](f: U => Rand[E]): Rand[E] = FlatMappedRand(rand, (x: T) => func(x).flatMap(f)) +end FlatMappedRand + +private final case class FilteredRand[@specialized(Int, Double) T](rand: Rand[T], predicate: T => Boolean) + extends Rand[T]: + override def draw: T = + var result = rand.draw + var attempts = 0 + while !predicate(result) do + attempts += 1 + if attempts > 100000 then throw new RuntimeException("Rejection sampling exceeded max attempts") + end if + result = rand.draw + end while + result + end draw + override def drawOpt: Option[T] = rand.drawOpt.filter(predicate) +end FilteredRand diff --git a/vecxt_re/src-jvm/imposeClustering.scala b/vecxt_re/src-jvm/imposeClustering.scala new file mode 100644 index 00000000..81b5fef4 --- /dev/null +++ b/vecxt_re/src-jvm/imposeClustering.scala @@ -0,0 +1,88 @@ +// package vecxt_re + +// import vecxt.all.* + +// extension (scenario: Scenario) +// def imposeClustering(newCoeff: Double): Scenario = { +// // expectation and variance of new scenario +// val numItrs = scenario.numberIterations +// val frequency = scenario.freq +// val e = frequency.mean +// val v = newCoeff * Math.pow(e, 2) + e + +// // in (r,p) form +// val p = e / v +// val r = e * p / (1 - p) + +// val newDist: DiscreteDistr[Int] with Product = if (newCoeff > 0) { +// breeze.stats.distributions.NegativeBinomial(r, 1 - p) // different parameterisation to matlab +// } else { +// breeze.stats.distributions.Poisson(e) +// } + +// var newFreq: IndexedSeq[Int] = newDist.sample(numberIterations) +// val maxSteps = 10 +// val sumEvents = scenario.events.length +// var step = 0 + +// def matchMean( +// inFreq: IndexedSeq[Int], +// sumEvents: Int, +// newCoeff: Double +// ): IndexedSeq[Int] = { +// val delta = sumEvents - inFreq.sum; +// val anz = math.min(numItrs, Math.abs(delta)) +// val asVector: Array[Int] = Array(inFreq: _*) // for slicing... +// val asVectorDouble = convert(asVector, Double) + +// delta match { +// case n if (n < 0) => { + +// val d = breeze.numerics.abs(asVectorDouble - Math.max(Math.ceil(mean(asVectorDouble)), 1)) +// val temp: Matrix[Double] = Matrix(d.toArray.toScalaVector.zipWithIndex.map { case (x, y) => (x, y.toDouble) }: _*) +// val sorted = sortrows(temp, Vector(0)) +// sorted(::, 0) +// val idx = convert(sorted(::, 1), Int) +// val changeThese = idx(0 until anz) +// asVector(changeThese.toScalaVector) -= 1 +// val check = (asVector <:< 0).activeKeysIterator.toVector +// asVector(check) += 1 +// asVector.toScalaVector +// } +// case n if (n > 0) => { +// val d = breeze.numerics.abs(asVectorDouble - Math.floor(mean(asVectorDouble))) +// val temp: Matrix[Double] = Matrix(d.toArray.toScalaVector.zipWithIndex.map { case (x, y) => (x, y.toDouble) }: _*) +// val sorted = sortrows(temp, Vector(0)) +// sorted(::, 0) +// val idx = convert(sorted(::, 1), Int) +// val changeThese = idx(0 until anz) +// asVector(changeThese.toScalaVector) += 1 +// asVector.toScalaVector +// } +// } +// } +// while (newFreq.sum != sumEvents && step <= maxSteps) { +// newFreq = matchMean(newFreq, sumEvents, newCoeff) +// step = step + 1 +// } +// val frequencyC = convert(Array(newFreq: _*), Double) +// val meanFreqC = mean(frequencyC) +// (variance(frequencyC) - meanFreqC) / Math.pow(meanFreqC, 2) + +// if (step == maxSteps) { +// throw new Exception("Max steps reached, this probably didn't work") +// } +// val builder = Vector.newBuilder[Int] +// for ((numEvents, itr) <- newFreq.zipWithIndex) { +// // decumcount +// val etend = for (_ <- 1 to numEvents if numEvents > 0) yield (itr + 1) +// builder ++= etend +// } +// val decumcount = builder.result() + +// val zipTogether = decumcount.zip(events) +// val permute = zipTogether.map { case (itr, event) => event.copy(iteration = itr) } + +// scenario.copy(events = permute) + +// } diff --git a/vecxt_re/src-jvm/plots.scala b/vecxt_re/src-jvm/plots.scala new file mode 100644 index 00000000..b8bcef9c --- /dev/null +++ b/vecxt_re/src-jvm/plots.scala @@ -0,0 +1,488 @@ +package vecxt_re + +import vecxt_re.HillEstimator.HillPlotResult +import vecxt_re.PickandsEstimator.PickandsPlotResult + +import io.circe.syntax.* +import io.github.quafadas.plots.SetupVega.{*, given} + +object Plots: + // These must be private otherwise scaladoc get crazy. + private lazy val timeline = VegaPlot.fromResource("timeline.vl.json") // riskInceptionDate, riskExpiryDate + private lazy val seasonality = VegaPlot.fromResource("seasonality.vg.json") // catagory, amount + private lazy val distributionDensity = VegaPlot.fromResource("distDensity.vg.json") // value, density + private lazy val negBinCdfWSample = VegaPlot.fromResource("negBinCumul_vsSample.vl.json") // value, density + private lazy val ecdfVsCdf = VegaPlot.fromResource("ecdfVsCdf.vl.json") // theoretical and empirical CDF + private lazy val rootogram = VegaPlot.fromResource("rootogram.vl.json") // hanging rootogram + private lazy val pearsonResiduals = VegaPlot.fromResource("pearsonResiduals.vl.json") // residual plot + private lazy val poissonTrend = VegaPlot.fromResource("poissonTrend.vl.json") // Poisson GLM trend + private lazy val logLogPlot = VegaPlot.fromResource("loglogCdf.vl.json") // log-log CDF plot + private lazy val hillPlotSpec = VegaPlot.fromResource("hillPlot.vl.json") // Hill plot for tail index + + extension (idx: CalendarYearIndex) + def plotIndex(reportingThreshold: Double)(using viz.LowPriorityPlotTarget) = + val linePlot2 = VegaPlot.fromResource("index.vl.json") + val cumulative = idx.onLevel(Array.fill(idx.years.length)(1.0), idx.years) + val factors = idx.years.zip(idx.indices).zip(cumulative).map { case ((year, index), cumulative) => + ( + year = year, + index = index, + missing = 1 / cumulative, + threshold = idx.suggestedNewThreshold(reportingThreshold) + ) + } + linePlot2.plot( + _.data.values := factors.asJson + ) + end extension + + extension (nb: NegativeBinomial) + + /** Plot ECDF vs theoretical CDF as step functions for visual goodness-of-fit assessment. + * + * Both curves are step functions. Deviations between the orange (empirical) and blue (theoretical) lines indicate + * potential model misfit. + */ + inline def plotEcdfVsCdf(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val maxX = math.max(samples.max, (nb.mean + 4 * math.sqrt(nb.variance)).toInt) + + // Theoretical CDF + var cumProb = 0.0 + val theoreticalCdf = (0 to maxX).map { k => + cumProb += nb.probabilityOf(k) + (value = k, prob = cumProb) + } + + // Empirical CDF (step function at each unique value) + val n = samples.length.toDouble + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + var empiricalCum = 0.0 + val empiricalCdf = (0 to maxX).map { k => + empiricalCum += counts.getOrElse(k, 0) + (value = k, prob = empiricalCum / n) + } + + ecdfVsCdf.plot( + _.title(s"NegBin(a=${nb.a}, b=${nb.b}) ECDF vs Theoretical CDF"), + _.layer._0.data.values := theoreticalCdf.asJson, + _.layer._1.data.values := empiricalCdf.asJson + ) + end plotEcdfVsCdf + + /** Plot a hanging rootogram for count data diagnostics. + * + * A rootogram displays sqrt(expected) as the reference curve and hangs bars from it down to sqrt(observed). When + * the model fits well, bars hang close to the zero line. Bars extending below zero indicate under-prediction; bars + * stopping above zero indicate over-prediction. + */ + inline def plotRootogram(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val n = samples.length.toDouble + val maxK = math.max(samples.max, (nb.mean + 3 * math.sqrt(nb.variance)).toInt) + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + + val data = (0 to maxK).map { k => + val observed = counts.getOrElse(k, 0) + val expected = nb.probabilityOf(k) * n + val sqrtObs = math.sqrt(observed) + val sqrtExp = math.sqrt(expected) + // Hanging: bar goes from sqrtExp down by sqrtObs, ending at sqrtExp - sqrtObs + (k = k, sqrtExpected = sqrtExp, sqrtObserved = sqrtObs, hanging = sqrtExp - sqrtObs) + } + + rootogram.plot( + _.title(s"NegBin(a=${nb.a}, b=${nb.b}) Hanging Rootogram"), + _.data.values := data.asJson + ) + end plotRootogram + + /** Plot Pearson residuals: (observed - expected) / sqrt(expected). + * + * Residuals beyond ±2 (shown in red) indicate significant deviation from the fitted model. For Negative Binomial, + * we use the variance = μ(1 + μ/a) for the denominator when available. + */ + inline def plotPearsonResiduals(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val n = samples.length.toDouble + val maxK = math.max(samples.max, (nb.mean + 3 * math.sqrt(nb.variance)).toInt) + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + + val data = (0 to maxK).flatMap { k => + val observed = counts.getOrElse(k, 0) + val expected = nb.probabilityOf(k) * n + // Only include if expected > 0 to avoid division by zero + if expected > 0.001 then + // For NegBin, variance of count = expected * (1 + expected/(n*a)) approximately + // Simplify to Pearson: (O - E) / sqrt(E) + val residual = (observed - expected) / math.sqrt(expected) + Some((k = k, residual = residual)) + else None + end if + } + + pearsonResiduals.plot( + _.title(s"NegBin(a=${nb.a}, b=${nb.b}) Pearson Residuals"), + _.data.values := data.asJson + ) + end plotPearsonResiduals + end extension + + extension (p: Poisson) + /** Plot ECDF vs theoretical CDF as step functions for visual goodness-of-fit assessment. */ + inline def plotEcdfVsCdf(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val maxX = math.max(samples.max, (p.mean + 4 * math.sqrt(p.variance)).toInt) + + // Theoretical CDF + var cumProb = 0.0 + val theoreticalCdf = (0 to maxX).map { k => + cumProb += p.probabilityOf(k) + (value = k, prob = cumProb) + } + + // Empirical CDF + val n = samples.length.toDouble + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + var empiricalCum = 0.0 + val empiricalCdf = (0 to maxX).map { k => + empiricalCum += counts.getOrElse(k, 0) + (value = k, prob = empiricalCum / n) + } + + ecdfVsCdf.plot( + _.title(s"Poisson(λ=${p.lambda}) ECDF vs Theoretical CDF"), + _.layer._0.data.values := theoreticalCdf.asJson, + _.layer._1.data.values := empiricalCdf.asJson + ) + end plotEcdfVsCdf + + /** Plot a hanging rootogram for Poisson count data diagnostics. + * + * Bars hang from sqrt(expected) down to sqrt(expected) - sqrt(observed). Good fit means bars end near zero. + */ + inline def plotRootogram(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val n = samples.length.toDouble + val maxK = math.max(samples.max, (p.mean + 3 * math.sqrt(p.variance)).toInt) + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + + val data = (0 to maxK).map { k => + val observed = counts.getOrElse(k, 0) + val expected = p.probabilityOf(k) * n + val sqrtObs = math.sqrt(observed) + val sqrtExp = math.sqrt(expected) + (k = k, sqrtExpected = sqrtExp, sqrtObserved = sqrtObs, hanging = sqrtExp - sqrtObs) + } + + rootogram.plot( + _.title(s"Poisson(λ=${p.lambda}) Hanging Rootogram"), + _.data.values := data.asJson + ) + end plotRootogram + + /** Plot Pearson residuals for Poisson: (observed - expected) / sqrt(expected). + * + * For Poisson, variance = mean, so the denominator is simply sqrt(expected). Residuals beyond ±2 (red) suggest + * significant deviation. Systematic patterns may indicate overdispersion (consider Negative Binomial). + */ + inline def plotPearsonResiduals(samples: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + val n = samples.length.toDouble + val maxK = math.max(samples.max, (p.mean + 3 * math.sqrt(p.variance)).toInt) + val counts = samples.groupMapReduce(identity)(_ => 1)(_ + _) + + val data = (0 to maxK).flatMap { k => + val observed = counts.getOrElse(k, 0) + val expected = p.probabilityOf(k) * n + if expected > 0.001 then + val residual = (observed - expected) / math.sqrt(expected) + Some((k = k, residual = residual)) + else None + end if + } + + pearsonResiduals.plot( + _.title(s"Poisson(λ=${p.lambda}) Pearson Residuals"), + _.data.values := data.asJson + ) + end plotPearsonResiduals + + /** Plot a Poisson GLM trend: log(Count) ~ 1 + Year with 95% confidence intervals. + * + * Fits a Poisson regression to count data over years and displays: - Observations (blue X markers) - Fitted trend + * line (solid red) - 95% confidence interval band (dashed red lines with shaded area) + * + * The coefficients (intercept, year slope) and their standard errors are estimated via iteratively reweighted + * least squares (IRLS). The confidence intervals use normal approximation on the log scale. + * + * @param years + * the year for each observation + * @param counts + * the count for each observation (same length as years) + */ + inline def plotTrend(years: IndexedSeq[Int], counts: IndexedSeq[Int])(using viz.LowPriorityPlotTarget) = + require(years.length == counts.length, "years and counts must have the same length") + + val n = years.length + val yearsD = years.map(_.toDouble) + val countsD = counts.map(_.toDouble) + + // Fit Poisson GLM via IRLS: log(μ) = β₀ + β₁·year + // Design matrix: X = [1 | year], each row is [1, yearᵢ] + val meanY = countsD.sum / n + var beta0 = math.log(math.max(meanY, 0.1)) + var beta1 = 0.0 + + // IRLS iterations + for _ <- 0 until 25 do + // Fitted values: μ = exp(Xβ) + val mu = yearsD.map(y => math.exp(beta0 + beta1 * y)) + + // Working response: z = η + (y - μ)/μ where η = Xβ + val z = (0 until n).map { i => + val eta = beta0 + beta1 * yearsD(i) + eta + (countsD(i) - mu(i)) / math.max(mu(i), 1e-10) + } + + // Weights: W = diag(μ) for Poisson canonical link + val w = mu.map(m => math.max(m, 1e-10)) + + // Solve weighted least squares: (XᵀWX)β = XᵀWz + // XᵀWX is 2×2 symmetric: [[Σwᵢ, Σwᵢyᵢ], [Σwᵢyᵢ, Σwᵢyᵢ²]] + // XᵀWz is 2×1: [Σwᵢzᵢ, Σwᵢyᵢzᵢ] + var xtwx00, xtwx01, xtwx11 = 0.0 + var xtwz0, xtwz1 = 0.0 + + var i = 0 + while i < n do + val wi = w(i) + val yi = yearsD(i) + val zi = z(i) + xtwx00 += wi + xtwx01 += wi * yi + xtwx11 += wi * yi * yi + xtwz0 += wi * zi + xtwz1 += wi * yi * zi + i += 1 + end while + + // Solve 2×2 system via Cramer's rule: [xtwx00, xtwx01; xtwx01, xtwx11] * β = [xtwz0; xtwz1] + val det = xtwx00 * xtwx11 - xtwx01 * xtwx01 + if math.abs(det) > 1e-15 then + beta0 = (xtwx11 * xtwz0 - xtwx01 * xtwz1) / det + beta1 = (xtwx00 * xtwz1 - xtwx01 * xtwz0) / det + end if + end for + + // Fisher information matrix: I = XᵀWX at final β + // I is 2×2 symmetric: [[i00, i01], [i01, i11]] + val muFinal = yearsD.map(y => math.exp(beta0 + beta1 * y)) + var i00, i01, i11 = 0.0 + var j = 0 + while j < n do + val mi = muFinal(j) + val yi = yearsD(j) + i00 += mi + i01 += mi * yi + i11 += mi * yi * yi + j += 1 + end while + + // Standard errors from Cov(β) = I⁻¹ + val detI = i00 * i11 - i01 * i01 + val seBeta0 = if detI > 1e-15 then math.sqrt(i11 / detI) else Double.NaN + val seBeta1 = if detI > 1e-15 then math.sqrt(i00 / detI) else Double.NaN + + // Covariance matrix: Cov(β) = I⁻¹ = (1/det) * [[i11, -i01], [-i01, i00]] + val covBeta = + if detI > 1e-15 then + Some( + ( + v00 = i11 / detI, + v01 = -i01 / detI, + v11 = i00 / detI + ) + ) + else None + + // Generate fitted curve with CI + val minYear = years.min + val maxYear = years.max + val yearRange = (minYear to maxYear).toVector + + val ciData = yearRange.map { y => + val eta = beta0 + beta1 * y.toDouble + // Var(η) = xᵀ Cov(β) x where x = [1, year]ᵀ + val varEta = covBeta + .map { c => + c.v00 + 2 * y * c.v01 + y.toDouble * y.toDouble * c.v11 + } + .getOrElse(0.0) + val seEta = math.sqrt(math.max(varEta, 0.0)) + val fit = math.exp(eta) + val lower = math.exp(eta - 1.96 * seEta) + val upper = math.exp(eta + 1.96 * seEta) + (year = y, fit = fit, lower = lower, upper = upper) + } + + val obsData = years.zip(counts).map { case (y, c) => (year = y, count = c) } + + poissonTrend.plot( + _.title(s"Poisson Trend: β₀=${f"$beta0%.3f"}±${f"$seBeta0%.3f"}, β₁=${f"$beta1%.5f"}±${f"$seBeta1%.5f"}"), + _.layer._0.data.values := ciData.asJson, + _.layer._1.data.values := ciData.asJson, + _.layer._2.data.values := ciData.asJson, + _.layer._3.data.values := ciData.asJson, + _.layer._4.data.values := obsData.asJson + ) + end plotTrend + end extension + + extension (scenario: Scenarr) + inline def plotSeasonality(highlight: Option[(year: Int, month: Int)] = None)(using + tgt: viz.LowPriorityPlotTarget + ) = + val calc = scenario.monthYear.zip(scenario.amounts).groupMapReduce(_._1)(_._2)(_ + _).toVector + val normaliseBy = calc.map(_._2).sum // total of all claims + val sorted = calc + .sortBy(row => (row._1.year, row._1.month)) + .map(row => + ( + category = + s"${row._1.month.getDisplayName(java.time.format.TextStyle.SHORT, java.util.Locale.getDefault())} ${row._1.year}", + amount = row._2 / normaliseBy, + color = highlight.exists(h => h.year == row._1.year && h.month == row._1.month.getValue) + ) + ) + + seasonality.plot( + _.title("Seasonality " + scenario.name), + _.data.values := sorted.asJson + ) + end extension + + extension (mixed: Mixed) + /** Plot log-log comparison of theoretical Mixed distribution vs empirical sample data. + * + * This plot shows the complementary CDF (1 - CDF) on log-log scale, which is useful for visualizing tail behavior + * of heavy-tailed distributions. The x-axis shows log(value) and y-axis shows log(1 - CDF). + */ + def plotLogLogVsSample(samples: IndexedSeq[Double], threshold: Double)(using viz.LowPriorityPlotTarget) = + val sortedSamples = samples.sorted + val n = sortedSamples.length.toDouble + + // Empirical survival function using Hazen plotting position + val empiricalData = sortedSamples.zipWithIndex.collect { + case (x, i) if x > 0 => + val survivalProb = (n - i - 0.5) / n + if survivalProb > 0 then Some((x = x, y = survivalProb, source = "empirical")) + else None + end if + }.flatten + + // Theoretical survival function (1 - CDF) + // For Pareto: S(x) = (xₘ/x)^α, so log(S) = α·log(xₘ) - α·log(x) is linear + val minX = sortedSamples.filter(_ > 0).headOption.getOrElse(1.0) + val maxX = sortedSamples.last + val numPoints = 200 + val theoreticalData = (0 until numPoints).flatMap { i => + val x = minX * math.pow(maxX / minX, i.toDouble / (numPoints - 1)) + val survivalProb = 1.0 - mixed.cdf(x) + if survivalProb > 1e-10 && x > 0 then Some((x = x, y = survivalProb, source = "model")) + else None + end if + } + + val allData = theoreticalData ++ empiricalData + + logLogPlot.plot( + _.title("Mixed Distribution Log-Log Plot"), + _.data.values := allData.asJson, + _.layer._0.encoding.x.scale.domainMin := threshold + ) + end plotLogLogVsSample + end extension + + extension (hp: HillPlotResult) + /** Plot a Hill plot showing tail index estimates α̂(k) vs k. + * + * A Hill plot helps identify the optimal number of upper order statistics to use for Pareto tail estimation. Look + * for a stable plateau region where the estimate is relatively constant - this indicates the range of k where the + * Pareto assumption holds. Too small k gives high variance; too large k includes non-tail observations. + */ + def plotHill(using viz.LowPriorityPlotTarget) = + val data = hp.kValues.zip(hp.estimates).map { case (k, est) => + (k = k, estimate = est) + } + + hillPlotSpec.plot( + _.title("Hill Plot for Pareto Tail Index Estimation"), + _.data.values := data.asJson + ) + end plotHill + end extension + + extension (pp: PickandsPlotResult) + /** Plot a Pickands plot showing tail index estimates α̂(k) = 1/γ̂(k) vs k. + * + * The Pickands estimator is more robust than Hill to model misspecification but has higher variance. Look for a + * stable plateau region. Unlike the Hill plot, the Pickands estimator can give negative γ values for light-tailed + * distributions; only positive γ (and thus positive α) indicates heavy tails. + */ + def plotPickands(using viz.LowPriorityPlotTarget) = + val data = pp.kValues + .zip(pp.alphaEstimates) + .filter { case (_, est) => !est.isNaN && est.isFinite && est > 0 } + .map { case (k, est) => + (k = k, estimate = est) + } + + hillPlotSpec.plot( + _.title("Pickands Plot for Pareto Tail Index Estimation"), + _.data.values := data.asJson + ) + end plotPickands + + /** Plot the raw extreme value index γ̂(k) from Pickands estimator. + * + * For heavy-tailed data, γ > 0. For light-tailed data (e.g., exponential), γ = 0. For bounded distributions, γ < + * 0. + */ + def plotPickandsGamma(using viz.LowPriorityPlotTarget) = + val data = pp.kValues + .zip(pp.gammaEstimates) + .filter { case (_, est) => !est.isNaN && est.isFinite } + .map { case (k, est) => + (k = k, estimate = est) + } + + hillPlotSpec.plot( + _.title("Pickands Plot for Extreme Value Index γ"), + _.data.values := data.asJson + ) + end plotPickandsGamma + end extension + + // extension (negBin: NegativeBinomial) + // inline def plotPdf(using viz.LowPriorityPlotTarget) = + // val numPoints = 1000 + // val maxX = negBin.mean + 4 * math.sqrt(negBin.variance) + // val data = (0 until numPoints).map { i => + // val x = i.toDouble * maxX / numPoints + // (value = x, density = negBin.probabilityOf(x.round.toInt)) + // } + + // distributionDensity.plot( + // _.title(s"Negative Binomial Distribution Density (a=${negBin.a}, b=${negBin.b})"), + // _.data.values := data.asJson + // ) + + // inline def plotCdf(using viz.LowPriorityPlotTarget) = + // val numPoints = 1000 + // val maxX = negBin.mean + 4 * math.sqrt(negBin.variance) + // val data = (0 until numPoints).map { i => + // val x = i.toDouble * maxX / numPoints + // (value = x, density = negBin.cdf(x)) + // } + + // distributionDensity.plot( + // _.title(s"Negative Binomial Distribution CDF (a=${negBin.a}, b=${negBin.b})"), + // _.data.values := data.asJson + // ) + // end extension +end Plots diff --git a/vecxtensions/src-jvm/rpt.scala b/vecxt_re/src-jvm/rpt.scala similarity index 98% rename from vecxtensions/src-jvm/rpt.scala rename to vecxt_re/src-jvm/rpt.scala index 44fc0c5c..24847600 100644 --- a/vecxtensions/src-jvm/rpt.scala +++ b/vecxt_re/src-jvm/rpt.scala @@ -1,8 +1,7 @@ -package vecxt.reinsurance -import vecxt.reinsurance.Limits.* -import vecxt.reinsurance.Retentions.* +package vecxt_re import vecxt.all.* -import vecxt.all.given +import vecxt_re.Limits.* +import vecxt_re.Retentions.* import jdk.incubator.vector.DoubleVector import jdk.incubator.vector.VectorSpecies diff --git a/vecxtensions/src-native/rpt.scala b/vecxt_re/src-native/rpt.scala similarity index 97% rename from vecxtensions/src-native/rpt.scala rename to vecxt_re/src-native/rpt.scala index 9815837a..e4099b38 100644 --- a/vecxtensions/src-native/rpt.scala +++ b/vecxt_re/src-native/rpt.scala @@ -1,8 +1,7 @@ -package vecxt.reinsurance -import vecxt.reinsurance.Limits.Limit -import vecxt.reinsurance.Retentions.Retention +package vecxt_re import vecxt.all.* -import vecxt.all.given +import vecxt_re.Limits.Limit +import vecxt_re.Retentions.Retention /* diff --git a/vecxt_re/src/CalendarYearIndex.scala b/vecxt_re/src/CalendarYearIndex.scala new file mode 100644 index 00000000..6f0f12ef --- /dev/null +++ b/vecxt_re/src/CalendarYearIndex.scala @@ -0,0 +1,142 @@ +package vecxt_re + +/** A calendar year-based wrapper around IndexPerPeriod for on-leveling historical data. + * + * This class maps calendar years to index factors, allowing on-leveling of datasets where data points are labeled with + * their calendar year. + * + * @param currentYear + * The current/reference year (period 0) + * @param years + * Array of years in descending order (most recent first) + * @param indices + * Array of index factors corresponding to each year + */ +case class CalendarYearIndex(currentYear: Int, years: Array[Int], indices: Array[Double]): + require(years.length == indices.length, "years and indices must have the same length") + require(years.length > 0, "must provide at least one year") + + private val yearToIdx: Map[Int, Int] = years.zipWithIndex.toMap + private val underlying: IndexPerPeriod = IndexPerPeriod(indices) + + /** Number of years covered by this index */ + inline def numYears: Int = years.length + + /** The earliest year covered */ + inline def earliestYear: Int = years.last + + /** The latest year covered (should equal currentYear if properly constructed) */ + inline def latestYear: Int = years.head + + /** Get the index factor for a specific year. + * + * @param year + * The calendar year + * @return + * The index factor for that year + * @throws NoSuchElementException + * if year is not in the index + */ + def indexAt(year: Int): Double = + val idx = yearToIdx.getOrElse(year, throw new NoSuchElementException(s"Year $year not in index")) + indices(idx) + end indexAt + + /** Calculate the cumulative on-leveling factor from a historical year to the current year. + * + * @param fromYear + * The historical year + * @return + * The cumulative factor to on-level from that year to current + */ + def cumulativeToCurrentFrom(fromYear: Int): Double = + val periodsBack = currentYear - fromYear + underlying.cumulativeToCurrentFrom(periodsBack) + end cumulativeToCurrentFrom + + /** Apply on-leveling to an array of values, given their corresponding years. + * + * @param values + * The historical values to on-level + * @param dataYears + * The calendar year for each value (same length as values) + * @return + * Array of on-leveled values + */ + def onLevel(values: Array[Double], dataYears: Array[Int]): Array[Double] = + require(values.length == dataYears.length, "values and dataYears must have the same length") + val result = new Array[Double](values.length) + var i = 0 + while i < values.length do + result(i) = values(i) * cumulativeToCurrentFrom(dataYears(i)) + i += 1 + end while + result + end onLevel + + def suggestedNewThreshold(reportThreshold: Double): Double = + val periodBack = currentYear - latestYear + val factor = underlying.cumulativeToCurrentFrom(periodBack) + reportThreshold * factor + end suggestedNewThreshold + +end CalendarYearIndex + +object CalendarYearIndex: + + /** Create a CalendarYearIndex from arrays of years and their corresponding indices. Years should be provided in + * descending order (most recent first). + * + * @param years + * Array of calendar years in descending order + * @param indices + * Array of index factors for each year + * @return + * CalendarYearIndex with the current year set to the first (most recent) year + */ + def apply(years: Array[Int], indices: Array[Double]): CalendarYearIndex = + require(years.length > 0, "must provide at least one year") + CalendarYearIndex(years.head, years, indices) + end apply + + /** Create a CalendarYearIndex from a range of years with a constant rate change. + * + * @param fromYear + * The earliest year (inclusive) + * @param toYear + * The current/latest year (inclusive) + * @param factor + * The constant factor for each year (e.g., 1.05 for 5% per year) + * @return + * CalendarYearIndex spanning the specified years + */ + def constant(fromYear: Int, toYear: Int, factor: Double): CalendarYearIndex = + require(toYear >= fromYear, "toYear must be >= fromYear") + val numYears = toYear - fromYear + 1 + val years = Array.tabulate(numYears)(i => toYear - i) + val indices = Array.fill(numYears)(factor) + CalendarYearIndex(toYear, years, indices) + end constant + + /** Create a CalendarYearIndex from arrays of years and rate changes (as percentages). Years should be provided in + * descending order (most recent first). + * + * @param years + * Array of calendar years in descending order + * @param rateChanges + * Array of percentage changes for each year (e.g., 5.0 for 5%) + * @return + * CalendarYearIndex with rate changes converted to factors + */ + def fromRateChanges(years: Array[Int], rateChanges: Array[Double]): CalendarYearIndex = + require(years.length == rateChanges.length, "years and rateChanges must have the same length") + val factors = new Array[Double](rateChanges.length) + var i = 0 + while i < rateChanges.length do + factors(i) = 1.0 + rateChanges(i) / 100.0 + i += 1 + end while + CalendarYearIndex(years, factors) + end fromRateChanges + +end CalendarYearIndex diff --git a/vecxtensions/src/DeductibleType.scala b/vecxt_re/src/DeductibleType.scala similarity index 76% rename from vecxtensions/src/DeductibleType.scala rename to vecxt_re/src/DeductibleType.scala index 4b4d2400..f282b043 100644 --- a/vecxtensions/src/DeductibleType.scala +++ b/vecxt_re/src/DeductibleType.scala @@ -1,4 +1,4 @@ -package vecxt.reinsurance +package vecxt_re enum DeductibleType: case Retention, Franchise, ReverseFranchise diff --git a/vecxt_re/src/HillEstimator.scala b/vecxt_re/src/HillEstimator.scala new file mode 100644 index 00000000..e484d39e --- /dev/null +++ b/vecxt_re/src/HillEstimator.scala @@ -0,0 +1,213 @@ +package vecxt_re + +/** Hill estimator for Pareto tail index estimation. + * + * The Hill estimator is used to estimate the shape parameter (α) of a Pareto distribution from the upper tail of the + * data. For a Pareto distribution with survival function S(x) = (x_min/x)^α, the tail index α determines how heavy the + * tail is: + * - α < 2: Infinite variance + * - α < 1: Infinite mean + * - Larger α means lighter tails + * + * The estimator uses the k largest order statistics: α̂ = 1 / (1/k * Σᵢ₌₁ᵏ ln(X₍ₙ₋ᵢ₊₁₎) - ln(X₍ₙ₋ₖ₎)) + */ +object HillEstimator: + + /** Computes the Hill estimator for the Pareto tail index using the k largest observations. + * + * @param data + * The data array (will be sorted internally) + * @param k + * The number of upper order statistics to use (must be between 1 and n-1) + * @return + * The estimated tail index α + * @throws IllegalArgumentException + * if k is out of valid range or data is empty + */ + def apply(data: Array[Double], k: Int): Double = + require(data.length > 1, "Data must have at least 2 observations") + require(k >= 1 && k < data.length, s"k must be between 1 and ${data.length - 1}, got $k") + + val sorted = data.clone() + java.util.Arrays.sort(sorted) + + val n = sorted.length + val threshold = sorted(n - k - 1) // X_(n-k) + + require(threshold > 0, "Threshold (k-th largest value) must be positive for Pareto estimation") + + var sumLogRatios = 0.0 + var i = 0 + while i < k do + val xi = sorted(n - 1 - i) // X_(n-i+1) for i = 1..k + sumLogRatios += math.log(xi) - math.log(threshold) + i += 1 + end while + + k.toDouble / sumLogRatios + end apply + + /** Result of a Hill plot computation containing k values and corresponding tail index estimates. + * + * @param kValues + * Array of k values used + * @param estimates + * Corresponding tail index estimates α̂(k) + */ + case class HillPlotResult( + kValues: Array[Int], + estimates: Array[Double] + ): + /** Find a stable region in the Hill plot by looking for low variance segments. + * + * @param windowSize + * Size of the sliding window for variance calculation + * @param threshold + * Maximum coefficient of variation to consider "stable" + * @return + * Optional tuple of (start k, end k, mean estimate) for the most stable region + */ + def findStableRegion( + windowSize: Int = 10, + threshold: Double = 0.1 + ): Option[(bestStart: Int, bestEnd: Int, meanEstimate: Double)] = + if kValues.length < windowSize then None + else + var bestVariance = Double.MaxValue + var bestStart = 0 + var bestMean = 0.0 + + var i = 0 + while i <= estimates.length - windowSize do + var sum = 0.0 + var sumSq = 0.0 + var j = 0 + while j < windowSize do + val v = estimates(i + j) + sum += v + sumSq += v * v + j += 1 + end while + val mean = sum / windowSize + val variance = sumSq / windowSize - mean * mean + val cv = if mean != 0 then math.sqrt(variance) / math.abs(mean) else Double.MaxValue + + if cv < bestVariance && cv < threshold then + bestVariance = cv + bestStart = i + bestMean = mean + end if + i += 1 + end while + + if bestVariance < threshold then + Some((bestStart = kValues(bestStart), bestEnd = kValues(bestStart + windowSize - 1), meanEstimate = bestMean)) + else None + end if + end if + end findStableRegion + end HillPlotResult + + /** Computes a Hill plot: tail index estimates for a range of k values. + * + * A Hill plot shows how the estimate varies with k. A good estimate should show a stable plateau region. Too small k + * gives high variance; too large k includes non-tail observations. + * + * @param data + * The data array + * @param kMin + * Minimum k value (default: 2) + * @param kMax + * Maximum k value (default: n/2 or n-1, whichever is smaller) + * @param step + * Step size for k values (default: 1) + * @return + * HillPlotResult containing k values and corresponding estimates + */ + def hillPlot( + data: Array[Double], + kMin: Int = 2, + kMax: Int = -1, + step: Int = 1 + ): HillPlotResult = + require(data.length > 2, "Data must have at least 3 observations") + + val sorted = data.clone() + java.util.Arrays.sort(sorted) + + val n = sorted.length + val actualKMax = if kMax < 0 then math.min(n / 2, n - 1) else math.min(kMax, n - 1) + val actualKMin = math.max(kMin, 1) + + require(actualKMin < actualKMax, s"kMin ($actualKMin) must be less than kMax ($actualKMax)") + + // Pre-compute log values for efficiency + val logValues = new Array[Double](n) + var i = 0 + while i < n do + logValues(i) = math.log(sorted(i)) + i += 1 + end while + + // Calculate number of k values + val numK = (actualKMax - actualKMin) / step + 1 + val kValues = new Array[Int](numK) + val estimates = new Array[Double](numK) + + var idx = 0 + var k = actualKMin + while k <= actualKMax do + val threshold = sorted(n - k - 1) + val logThreshold = logValues(n - k - 1) + + var sumLogRatios = 0.0 + var j = 0 + while j < k do + sumLogRatios += logValues(n - 1 - j) - logThreshold + j += 1 + end while + + kValues(idx) = k + estimates(idx) = k.toDouble / sumLogRatios + idx += 1 + k += step + end while + + HillPlotResult(kValues, estimates) + end hillPlot + +end HillEstimator + +object HillEstimatorExtensions: + + extension (vec: Array[Double]) + + /** Computes the Hill estimator for the Pareto tail index. + * + * @param k + * The number of upper order statistics to use + * @return + * The estimated tail index α + */ + inline def hillEstimator(k: Int): Double = HillEstimator(vec, k) + + /** Computes a Hill plot for this data. + * + * @param kMin + * Minimum k value (default: 2) + * @param kMax + * Maximum k value (default: n/2) + * @param step + * Step size for k values (default: 1) + * @return + * HillPlotResult with k values and estimates + */ + inline def hillPlot( + kMin: Int = 2, + kMax: Int = -1, + step: Int = 1 + ): HillEstimator.HillPlotResult = + HillEstimator.hillPlot(vec, kMin, kMax, step) + end extension + +end HillEstimatorExtensions diff --git a/vecxt_re/src/IndexPerPeriod.scala b/vecxt_re/src/IndexPerPeriod.scala new file mode 100644 index 00000000..2e1e2c9f --- /dev/null +++ b/vecxt_re/src/IndexPerPeriod.scala @@ -0,0 +1,136 @@ +package vecxt_re + +import vecxt.BoundsCheck.DoBoundsCheck.yes +import vecxt.all.* + +/** Aims to provide a (very) simple index mapping for period-based models. + * + * Each period is associated with a unique index, which provided from period zero 0 going backwards for some historic + * number of periods. The basic goal of this is to "on-level" some historical dataset, which has labels corresponding + * to the periods here. + * + * This object provides methods to: + * - Retrieve the index for a given period. + * - Retrieve the cumulative index which will "on level" some historical number, from it's "historical period" to the + * "current period" + * + * @param indices + * Array of indices where indices(0) is the current period (period 0) and indices(n) is n periods back. Each index + * typically represents a rate change factor for that period (e.g., 1.05 for 5% increase). + */ +case class IndexPerPeriod(indices: Array[Double]): + + /** Precomputed cumulative factors: cumulativeFactorsAll(i) = product of indices(0) to indices(i-1) */ + private lazy val cumulativeFactorsAll: Array[Double] = + // cumulative product via exp(cumsum(log(x))) + // Prepend 1.0 to get array where result(0) = 1.0, result(1) = indices(0), result(2) = indices(0)*indices(1), etc. + val cumProd = indices.log + cumProd.`cumsum!` + cumProd.`exp!` + // Prepend 1.0 for period 0 (current period needs no adjustment) + Array.tabulate(indices.length + 1)(i => if i == 0 then 1.0 else cumProd(i - 1)) + end cumulativeFactorsAll + + /** Number of periods available in the index */ + inline def numPeriods: Int = indices.length + + /** Get the index value for a specific period. + * + * @param period + * The period number (0 = current, 1 = one period back, etc.) + * @return + * The index value for that period + * @throws IndexOutOfBoundsException + * if period is outside the available range + */ + inline def indexAt(period: Int): Double = indices(period) + + /** Calculate the cumulative on-leveling factor from a historical period to the current period. + * + * This multiplies all indices from period 0 up to (but not including) the specified historical period. The result is + * the factor needed to bring a value from the historical period to current levels. + * + * For example, if you have rate changes of 5% (1.05) each year for 3 years: + * - indices = Array(1.05, 1.05, 1.05) + * - cumulativeToCurrentFrom(0) = 1.0 (already current) + * - cumulativeToCurrentFrom(1) = 1.05 (one period back, need to apply current period's change) + * - cumulativeToCurrentFrom(2) = 1.05 * 1.05 = 1.1025 + * - cumulativeToCurrentFrom(3) = 1.05 * 1.05 * 1.05 = 1.157625 + * + * @param fromPeriod + * The historical period number (0 = current, positive = periods back) + * @return + * The cumulative factor to on-level from that period to current + */ + inline def cumulativeToCurrentFrom(fromPeriod: Int): Double = + if fromPeriod <= 0 then 1.0 + else if fromPeriod >= cumulativeFactorsAll.length then cumulativeFactorsAll.last + else cumulativeFactorsAll(fromPeriod) + end cumulativeToCurrentFrom + + /** Calculate cumulative on-leveling factors for all periods up to a given period. + * + * @param upToPeriod + * The maximum period to calculate (exclusive) + * @return + * Array where result(i) is the cumulative factor from period i to current + */ + inline def cumulativeFactors(upToPeriod: Int): Array[Double] = + val n = math.min(upToPeriod, cumulativeFactorsAll.length) + if n == cumulativeFactorsAll.length then cumulativeFactorsAll.clone() + else Array.tabulate(n)(i => cumulativeFactorsAll(i)) + end if + end cumulativeFactors + + /** Apply on-leveling to an array of values, given their corresponding period labels. + * + * @param values + * The historical values to on-level + * @param periods + * The period label for each value (same length as values) + * @return + * Array of on-leveled values + */ + inline def onLevel(values: Array[Double], periods: Array[Int]): Array[Double] = + require(values.length == periods.length, "values and periods must have the same length") + // Map periods to cumulative factors, clamping to valid range + val factors: Array[Double] = Array.tabulate(periods.length) { i => + val p = periods(i) + if p <= 0 then 1.0 + else if p >= cumulativeFactorsAll.length then cumulativeFactorsAll.last + else cumulativeFactorsAll(p) + end if + } + values * (factors: Array[Double]) + end onLevel + +end IndexPerPeriod + +object IndexPerPeriod: + + /** Create an IndexPerPeriod from an array of rate changes (as percentages). + * + * @param rateChanges + * Array of rate changes where each value is the percentage change. e.g., 5.0 means a 5% increase, -3.0 means a 3% + * decrease. + * @return + * IndexPerPeriod with the rate changes converted to factors + */ + inline def fromRateChanges(rateChanges: Array[Double]): IndexPerPeriod = + IndexPerPeriod((rateChanges / 100.0) + 1.0) + end fromRateChanges + + /** Create an IndexPerPeriod with a constant rate change for all periods. + * + * @param numPeriods + * Number of historical periods + * @param factor + * The constant factor for each period (e.g., 1.05 for 5% per period) + * @return + * IndexPerPeriod with constant factors + */ + inline def constant(numPeriods: Int, factor: Double): IndexPerPeriod = + IndexPerPeriod(Array.fill(numPeriods)(factor)) + end constant + +end IndexPerPeriod diff --git a/vecxtensions/src/Layer.scala b/vecxt_re/src/Layer.scala similarity index 90% rename from vecxtensions/src/Layer.scala rename to vecxt_re/src/Layer.scala index 092235cb..7c24cc78 100644 --- a/vecxtensions/src/Layer.scala +++ b/vecxt_re/src/Layer.scala @@ -1,4 +1,4 @@ -package vecxt.reinsurance +package vecxt_re object Layer: inline def apply(limit: Double, ret: Double): Layer = @@ -56,6 +56,15 @@ case class Layer( lazy val brokerageUnitString = brokerageUnit.map(_.toString) lazy val occLayer = Sublayer(occLimit, occRetention, LossCalc.Occ, occType) lazy val aggLayer = Sublayer(aggLimit, aggRetention, LossCalc.Agg, aggType) + lazy val autoName: String = + occLimit match + case Some(occLim) => s"$occLim xs ${occRetention.getOrElse(0.0)}" + case None => + aggLimit match + case Some(aggLim) => s"$aggLim xs ${aggRetention.getOrElse(0.0)} agg" + case None => "Unlimited Layer" + + lazy val firstLimit = occLimit.orElse(aggLimit).getOrElse(Double.PositiveInfinity) /** The smallest claim which exhausts the first limit of this layer */ lazy val cap = occLimit match diff --git a/vecxt_re/src/LossCalc.scala b/vecxt_re/src/LossCalc.scala new file mode 100644 index 00000000..7bab06ce --- /dev/null +++ b/vecxt_re/src/LossCalc.scala @@ -0,0 +1,17 @@ +package vecxt_re + +enum LossCalc: + case Agg, Occ +end LossCalc + +enum ReportDenominator: + case FirstLimit + case AggLimit + case Custom(denominator: Double) + def fromlayer(layer: Layer) = + this match + case FirstLimit => layer.firstLimit + case AggLimit => layer.aggLimit.getOrElse(Double.PositiveInfinity) + case Custom(denominator) => denominator + +end ReportDenominator diff --git a/vecxt_re/src/PickandsEstimator.scala b/vecxt_re/src/PickandsEstimator.scala new file mode 100644 index 00000000..5d62b1a9 --- /dev/null +++ b/vecxt_re/src/PickandsEstimator.scala @@ -0,0 +1,257 @@ +package vecxt_re + +/** Pickands estimator for extreme value index (tail index) estimation. + * + * It is unlikely to be useful, given the amount of data typically available in reinsurance. + * + * The Pickands estimator is a robust, non-parametric estimator for the extreme value index (EVI) of a distribution. + * Unlike the Hill estimator which assumes a Pareto-type tail, the Pickands estimator works for all three domains of + * attraction (Fréchet, Gumbel, Weibull). + * + * For heavy-tailed distributions (Fréchet domain), the EVI γ > 0 corresponds to a Pareto tail index α = 1/γ. + * + * The estimator uses order statistics at positions n-k, n-2k, n-4k: γ̂ = (1/ln2) * ln((X₍ₙ₋ₖ₎ - X₍ₙ₋₂ₖ₎) / (X₍ₙ₋₂ₖ₎ - + * X₍ₙ₋₄ₖ₎)) + * + * Properties: + * - More robust to model misspecification than Hill + * - Higher variance than Hill for pure Pareto data + * - Works for all extreme value distributions, not just Pareto + * - Consistent and asymptotically normal + */ +object PickandsEstimator: + + private val ln2 = math.log(2.0) + + /** Computes the Pickands estimator for the extreme value index. + * + * @param data + * The data array (will be sorted internally) + * @param k + * The tuning parameter (must satisfy 4k < n) + * @return + * The estimated extreme value index γ (for Pareto, α = 1/γ) + * @throws IllegalArgumentException + * if k is out of valid range or data is too small + */ + def apply(data: Array[Double], k: Int): Double = + require(data.length >= 5, "Data must have at least 5 observations") + require(k >= 1, s"k must be at least 1, got $k") + require(4 * k < data.length, s"4*k must be less than n=${data.length}, got 4*$k=${4 * k}") + + val sorted = data.clone() + java.util.Arrays.sort(sorted) + + val n = sorted.length + + // Order statistics (using 1-based indexing convention, converted to 0-based) + // X_(n-k+1), X_(n-2k+1), X_(n-4k+1) in 1-based + // = sorted(n-k), sorted(n-2k), sorted(n-4k) in 0-based + val x_nk = sorted(n - k) + val x_n2k = sorted(n - 2 * k) + val x_n4k = sorted(n - 4 * k) + + val numerator = x_nk - x_n2k + val denominator = x_n2k - x_n4k + + require(denominator > 0, "Denominator (X_(n-2k) - X_(n-4k)) must be positive") + require(numerator > 0, "Numerator (X_(n-k) - X_(n-2k)) must be positive") + + val ratio = numerator / denominator + math.log(ratio) / ln2 + end apply + + /** Computes the Pareto tail index α from the Pickands estimate. + * + * For heavy-tailed distributions in the Fréchet domain, γ > 0 and α = 1/γ. + * + * @param data + * The data array + * @param k + * The tuning parameter + * @return + * The estimated Pareto tail index α = 1/γ + */ + def tailIndex(data: Array[Double], k: Int): Double = + val gamma = apply(data, k) + require(gamma > 0, s"Pickands estimate γ=$gamma is not positive; data may not be heavy-tailed") + 1.0 / gamma + end tailIndex + + /** Result of a Pickands plot computation. + * + * @param kValues + * Array of k values used + * @param gammaEstimates + * Corresponding EVI estimates γ̂(k) + * @param alphaEstimates + * Corresponding tail index estimates α̂(k) = 1/γ̂(k) (NaN if γ ≤ 0) + */ + case class PickandsPlotResult( + kValues: Array[Int], + gammaEstimates: Array[Double], + alphaEstimates: Array[Double] + ): + /** Find a stable region by looking for low variance segments in gamma estimates. + * + * @param windowSize + * Size of the sliding window + * @param threshold + * Maximum coefficient of variation to consider "stable" + * @return + * Optional tuple of (start k, end k, mean gamma, mean alpha) + */ + def findStableRegion(windowSize: Int = 5, threshold: Double = 0.2): Option[(Int, Int, Double, Double)] = + if kValues.length < windowSize then None + else + var bestVariance = Double.MaxValue + var bestStart = 0 + var bestMeanGamma = 0.0 + + var i = 0 + while i <= gammaEstimates.length - windowSize do + var sum = 0.0 + var sumSq = 0.0 + var validCount = 0 + var j = 0 + while j < windowSize do + val v = gammaEstimates(i + j) + if !v.isNaN && v.isFinite then + sum += v + sumSq += v * v + validCount += 1 + end if + j += 1 + end while + + if validCount == windowSize then + val mean = sum / windowSize + val variance = sumSq / windowSize - mean * mean + val cv = if mean != 0 then math.sqrt(math.abs(variance)) / math.abs(mean) else Double.MaxValue + + if cv < bestVariance && cv < threshold then + bestVariance = cv + bestStart = i + bestMeanGamma = mean + end if + end if + i += 1 + end while + + if bestVariance < threshold && bestMeanGamma > 0 then + Some((kValues(bestStart), kValues(bestStart + windowSize - 1), bestMeanGamma, 1.0 / bestMeanGamma)) + else None + end if + end if + end findStableRegion + end PickandsPlotResult + + /** Computes a Pickands plot: EVI estimates for a range of k values. + * + * @param data + * The data array + * @param kMin + * Minimum k value (default: 1) + * @param kMax + * Maximum k value (default: (n-1)/4) + * @param step + * Step size for k values (default: 1) + * @return + * PickandsPlotResult containing k values and estimates + */ + def pickandsPlot( + data: Array[Double], + kMin: Int = 1, + kMax: Int = -1, + step: Int = 1 + ): PickandsPlotResult = + require(data.length >= 5, "Data must have at least 5 observations") + + val sorted = data.clone() + java.util.Arrays.sort(sorted) + + val n = sorted.length + // Maximum valid k is floor((n-1)/4) since we need 4k < n + val maxValidK = (n - 1) / 4 + val actualKMax = if kMax < 0 then maxValidK else math.min(kMax, maxValidK) + val actualKMin = math.max(kMin, 1) + + require( + actualKMin <= actualKMax, + s"kMin ($actualKMin) must be <= kMax ($actualKMax), n=$n allows k up to $maxValidK" + ) + + // Calculate number of k values + val numK = (actualKMax - actualKMin) / step + 1 + val kValues = new Array[Int](numK) + val gammaEstimates = new Array[Double](numK) + val alphaEstimates = new Array[Double](numK) + + var idx = 0 + var k = actualKMin + while k <= actualKMax do + val x_nk = sorted(n - k) + val x_n2k = sorted(n - 2 * k) + val x_n4k = sorted(n - 4 * k) + + val numerator = x_nk - x_n2k + val denominator = x_n2k - x_n4k + + val gamma = + if denominator > 0 && numerator > 0 then math.log(numerator / denominator) / ln2 + else Double.NaN + + kValues(idx) = k + gammaEstimates(idx) = gamma + alphaEstimates(idx) = if gamma > 0 then 1.0 / gamma else Double.NaN + idx += 1 + k += step + end while + + PickandsPlotResult(kValues, gammaEstimates, alphaEstimates) + end pickandsPlot + +end PickandsEstimator + +object PickandsEstimatorExtensions: + + extension (vec: Array[Double]) + + /** Computes the Pickands estimator for the extreme value index γ. + * + * @param k + * The tuning parameter (must satisfy 4k < n) + * @return + * The estimated extreme value index γ + */ + inline def pickandsEstimator(k: Int): Double = PickandsEstimator(vec, k) + + /** Computes the Pareto tail index α using the Pickands estimator. + * + * @param k + * The tuning parameter + * @return + * The estimated tail index α = 1/γ + */ + inline def pickandsTailIndex(k: Int): Double = PickandsEstimator.tailIndex(vec, k) + + /** Computes a Pickands plot for this data. + * + * @param kMin + * Minimum k value (default: 1) + * @param kMax + * Maximum k value (default: (n-1)/4) + * @param step + * Step size for k values (default: 1) + * @return + * PickandsPlotResult with k values and estimates + */ + inline def pickandsPlot( + kMin: Int = 1, + kMax: Int = -1, + step: Int = 1 + ): PickandsEstimator.PickandsPlotResult = + PickandsEstimator.pickandsPlot(vec, kMin, kMax, step) + end extension + +end PickandsEstimatorExtensions diff --git a/vecxt_re/src/ReReporting.scala b/vecxt_re/src/ReReporting.scala new file mode 100644 index 00000000..d5bfbf22 --- /dev/null +++ b/vecxt_re/src/ReReporting.scala @@ -0,0 +1,46 @@ +package vecxt_re + +import vecxt.all.* + +object ReReporting: + extension (calcd: (layer: Layer, cededToLayer: Array[Double])) + + inline def attachmentProbability(numIterations: Int, years: Array[Int]): Double = + (groupSum(years, calcd.cededToLayer, numIterations) > 0).trues / numIterations.toDouble + + inline def exhaustionProbability(numIterations: Int, years: Array[Int]): Double = + val exhaust = calcd.layer.aggLimit.getOrElse(Double.PositiveInfinity) - 0.01 + (groupSum(years, calcd.cededToLayer, numIterations) > exhaust).trues / numIterations.toDouble + end exhaustionProbability + + inline def expectedLoss(numIterations: Int): Double = calcd.cededToLayer.sumSIMD / numIterations + + inline def std(numIterations: Int, years: Array[Int]): Double = + groupSum(years, calcd.cededToLayer, numIterations).stdDev + + /** Efficient single-pass loss report computation. + * + * This method computes all loss metrics (EL, std, attachment probability, exhaustion probability) in a single pass + * through the data, using Welford's online algorithm for numerically stable variance computation. + * + * This is significantly more efficient than calling the individual metric methods separately, as it avoids + * multiple iterations through the grouped sums. + * + * @param numIterations + * Number of simulation iterations + * @param years + * Sorted array of 1-based iteration indices + * @param limit + * Report denominator for normalizing EL and std + * @return + * Named tuple with (name, limit, el, stdDev, attachProb, exhaustProb) + */ + inline def lossReport( + numIterations: Int, + years: Array[Int], + limit: ReportDenominator + ): (name: String, limit: Double, el: Double, stdDev: Double, attachProb: Double, exhaustProb: Double) = + PlatformReporting.lossReportFast(calcd, numIterations, years, limit) + + end extension +end ReReporting diff --git a/vecxtensions/src/Retention_Limit.scala b/vecxt_re/src/Retention_Limit.scala similarity index 90% rename from vecxtensions/src/Retention_Limit.scala rename to vecxt_re/src/Retention_Limit.scala index bbdac5db..c00d92a6 100644 --- a/vecxtensions/src/Retention_Limit.scala +++ b/vecxt_re/src/Retention_Limit.scala @@ -1,6 +1,4 @@ -package vecxt.reinsurance - -import vecxt.reinsurance.Retentions.Retention +package vecxt_re object Retentions: opaque type Retention = Double @@ -19,6 +17,7 @@ object Retentions: end Retentions object Limits: + import Retentions.Retention opaque type Limit = Double object Limit: diff --git a/vecxt_re/src/SplitScenario.scala b/vecxt_re/src/SplitScenario.scala new file mode 100644 index 00000000..04abcf8c --- /dev/null +++ b/vecxt_re/src/SplitScenario.scala @@ -0,0 +1,24 @@ +package vecxt_re + +import vecxt.BoundsCheck.BoundsCheck +import vecxt_re.SplitLosses.splitAmntFast + +object SplitScenario: + extension (tower: Tower) + inline def splitScenarioAmounts(scenario: Scenarr)(using + inline bc: BoundsCheck + ): ( + ceded: Array[Double], + retained: Array[Double], + splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])] + ) = + val tmp = + if bc then scenario.sorted + else scenario + + tower.splitAmntFast( + tmp.iterations, + tmp.amounts + ) + end extension +end SplitScenario diff --git a/vecxtensions/src/Tower.scala b/vecxt_re/src/Tower.scala similarity index 96% rename from vecxtensions/src/Tower.scala rename to vecxt_re/src/Tower.scala index 54b5b0af..ca184e37 100644 --- a/vecxtensions/src/Tower.scala +++ b/vecxt_re/src/Tower.scala @@ -1,10 +1,5 @@ -package vecxt.reinsurance +package vecxt_re -import java.util.UUID -import vecxtensions.{groupCumSum, groupDiff} -import vecxt.reinsurance.Limits.Limit -import vecxt.reinsurance.Retentions.Retention -import vecxt.reinsurance.rpt.* import vecxt.all.* object Tower: diff --git a/vecxtensions/src/groupCumSum.scala b/vecxt_re/src/groupCumSum.scala similarity index 94% rename from vecxtensions/src/groupCumSum.scala rename to vecxt_re/src/groupCumSum.scala index 7a1bac38..341c0209 100644 --- a/vecxtensions/src/groupCumSum.scala +++ b/vecxt_re/src/groupCumSum.scala @@ -1,6 +1,6 @@ -package vecxtensions +package vecxt_re -import vecxt.reinsurance.Layer +import vecxt_re.Layer def aggregateByItr( years: Array[Int], diff --git a/vecxtensions/src/groupDiff.scala b/vecxt_re/src/groupDiff.scala similarity index 98% rename from vecxtensions/src/groupDiff.scala rename to vecxt_re/src/groupDiff.scala index 3e474e01..b86b4bbe 100644 --- a/vecxtensions/src/groupDiff.scala +++ b/vecxt_re/src/groupDiff.scala @@ -1,4 +1,4 @@ -package vecxtensions +package vecxt_re /** - You have a sorted groups array. * - Each group has a small number of values. diff --git a/vecxt_re/src/groupSums.scala b/vecxt_re/src/groupSums.scala new file mode 100644 index 00000000..08956a36 --- /dev/null +++ b/vecxt_re/src/groupSums.scala @@ -0,0 +1,154 @@ +package vecxt_re + +/** - You have a sorted groups array. + * - Each group has a small number of values. + * - You're doing per-group cumulative sums. + * - Returns cumulative sums for each element within its group + */ +inline def groupCumSum(groups: Array[Int], values: Array[Double]): Array[Double] = + + val n = groups.length + if n == 0 then Array.empty[Double] + else + val result = new Array[Double](n) + + var i = 0 + while i < n do + val g = groups(i) + var cumSum = 0.0 + + // Process block of same group, computing cumulative sum + while i < n && groups(i) == g do + cumSum += values(i) + result(i) = cumSum + i += 1 + end while + end while + + result + end if +end groupCumSum + +/** Compute the sum of values for each group identified by an integer index. + * + * The function expects `groups` to be sorted in non-decreasing order and that `groups` and `values` have the same + * length. Group indices are 1-based and must be in the range 1..nitr. The returned array has length `nitr`; element at + * position `i` (0-based) contains the sum of values for group index `i+1`. Groups with no entries produce a zero in + * the corresponding slot. + * + * Preconditions: + * - groups.length == values.length + * - groups is sorted (runs of identical indices are contiguous) + * - every g in groups satisfies 1 <= g <= nitr + * + * Complexity: O(groups.length) time, O(nitr) extra space. + * + * This method is unsafe and performs no checks that these conditions are satisfied. It is the responsibility of the + * caller. + * + * @param groups + * sorted array of 1-based group indices (length L) + * @param values + * array of values corresponding to each group index (length L) + * @param nitr + * number of groups (size of the returned array) + * @return + * an Array[Double] of length `nitr` where each element is the sum for that group + * @throws ArrayIndexOutOfBoundsException + * if a group index is outside 1..nitr + * @throws IllegalArgumentException + * if groups.length != values.length + * + * Example: groups = Array(1, 1, 3), values = Array(1.0, 2.0, 4.0), nitr = 4 result = Array(3.0, 0.0, 4.0, 0.0) + */ +inline def groupSum(groups: Array[Int], values: Array[Double], nitr: Int): Array[Double] = + val result = Array.fill(nitr)(0.0) + val l = groups.length + var i = 0 + while i < l do + val g = groups(i) + var groupSum = 0.0 + // Process block of same group, computing cumulative sum + while i < l && groups(i) == g do + groupSum += values(i) + i += 1 + end while + result(g - 1) = groupSum + end while + + result +end groupSum + +/** - count by group index + * - Each group has a small number of values. + * - Each the groups are keyed by their index. + * - assumes groups are already sorted + */ +inline def groupCount(groups: Array[Int], nitr: Int): Array[Int] = + val result = Array.fill(nitr)(0) + val l = groups.length + var i = 0 + while i < l do + val g = groups(i) + var groupSum = 0 + // Process block of same group, computing cumulative sum + while i < l && groups(i) == g do + groupSum += 1 + i += 1 + end while + result(g - 1) = groupSum + end while + + result +end groupCount + +/** Compute the maximum of values for each group identified by an integer index. + * + * The function expects `groups` to be sorted in non-decreasing order and that `groups` and `values` have the same + * length. Group indices are 1-based and must be in the range 1..nitr. The returned array has length `nitr`; element at + * position `i` (0-based) contains the max of values for group index `i+1`. Groups with no entries produce + * Double.NegativeInfinity in the corresponding slot. + * + * Preconditions: + * - groups.length == values.length + * - groups is sorted (runs of identical indices are contiguous) + * - every g in groups satisfies 1 <= g <= nitr + * + * Complexity: O(groups.length) time, O(nitr) extra space. + * + * This method is unsafe and performs no checks that these conditions are satisfied. It is the responsibility of the + * caller. + * + * @param groups + * sorted array of 1-based group indices (length L) + * @param values + * array of values corresponding to each group index (length L) + * @param nitr + * number of groups (size of the returned array) + * @return + * an Array[Double] of length `nitr` where each element is the max for that group + * @throws ArrayIndexOutOfBoundsException + * if a group index is outside 1..nitr + * @throws IllegalArgumentException + * if groups.length != values.length + * + * Example: groups = Array(1, 1, 3), values = Array(1.0, 2.0, 4.0), nitr = 4 result = Array(2.0, -Inf, 4.0, -Inf) + */ +inline def groupMax(groups: Array[Int], values: Array[Double], nitr: Int): Array[Double] = + val result = Array.fill(nitr)(Double.NegativeInfinity) + val l = groups.length + var i = 0 + while i < l do + val g = groups(i) + var groupMax = Double.NegativeInfinity + // Process block of same group, computing max + while i < l && groups(i) == g do + if values(i) > groupMax then groupMax = values(i) + end if + i += 1 + end while + result(g - 1) = groupMax + end while + + result +end groupMax diff --git a/vecxt_re/src/scenario.scala b/vecxt_re/src/scenario.scala new file mode 100644 index 00000000..915655a7 --- /dev/null +++ b/vecxt_re/src/scenario.scala @@ -0,0 +1,132 @@ +package vecxt_re + +import java.time.LocalDate +import java.time.temporal.ChronoUnit + +import vecxt.all.* + +case class Event(eventId: Long = scala.util.Random.nextLong(), iteration: Int = 0, day: Int = 0, loss: Double = 0): + def multiplyBy(scale: Double): Event = this.copy(loss = loss * scale) +end Event + +object Event: + inline def random(maxAmount: Double = 20, maxIter: Int = 10) = + Event(iteration = scala.util.Random.nextInt(maxIter), loss = scala.util.Random.nextDouble() * maxAmount) + inline def apply(iter: Int, amount: Double): Event = Event( + iteration = iter, + loss = amount + ) +end Event + +// case class IterationFrequency(itr: Int, freq: Int) + +// case class IterationAmount(itr: Int, amnt: Double) + +case class Scenario( + events: IndexedSeq[Event] = Vector(), + numberIterations: Int = 0, + threshold: Double = 0d, + day1: LocalDate = LocalDate.of(2019, 1, 1), + name: String = "", + id: Long = scala.util.Random.nextLong() +): + + lazy val eventsSorted: Array[Event] = Array.from(events.sortBy(event => (event.iteration, event.day))) + + lazy val freq: Array[Int] = groupCount(iterations, numberIterations) + + lazy val meanFreq: Double = freq.mean + + lazy val agg: Array[Double] = groupSum(iterations, amounts, numberIterations) + + lazy val claimDates: Array[LocalDate] = eventsSorted.map(d => ChronoUnit.DAYS.addTo(this.day1, d.day)) + + /** Interpretation: + * + * - Excess variance over Poisson, scaled by m^2: Var(X) = E[X] for Poisson, so (v - m) is the extra variance; + * dividing by m^2 scales it. + * - Method-of-moments estimate of 1/k for Negative Binomial: Var(X) = μ + μ^2 / k ⇒ (Var(X) - μ) / μ^2 = 1 / k. + * Thus, clusterCoeff estimates 1 / k. Smaller k (larger clusterCoeff) ⇒ more clustering/overdispersion. + * - Relation to Index of Dispersion (VMR = v / m): clusterCoeff = (v - m) / m^2 = (VMR - 1) / m. It is a + * mean-scaled excess dispersion; under NB, it targets 1 / k. + */ + lazy val clusterCoeff: Double = + val (m, v) = freq.meanAndVariance(VarianceMode.Sample) + (v - m) / Math.pow(m, 2) + end clusterCoeff + + /** Computes the variance-to-mean ratio (dispersion) based on the frequency data. This metric is calculated by + * dividing the variance by the mean, using values from `freq.meanAndVariance`. + * + * 1 = poisson distributed > 1 => overdispersed... but careful with sample size. + */ + lazy val varianceMeanRatio = + val (m, v) = freq.meanAndVariance(VarianceMode.Sample) + v / m + end varianceMeanRatio + + lazy val hasOccurence: Boolean = events.nonEmpty + + lazy val numSeasons: Int = math.ceil(days.maxSIMD / 365).toInt // doesnt deal so well with leap years. + + lazy val meanLoss: Double = amounts.sum / numberIterations + + lazy val days: Array[Int] = eventsSorted.map(_.day) + + lazy val iterations: Array[Int] = eventsSorted.map(_.iteration) + + lazy val amounts: Array[Double] = eventsSorted.map(_.loss) + + lazy val itrDayAmount: (itr: Array[Int], days: Array[Int], amounts: Array[Double]) = + (itr = iterations, days = days, amounts = amounts) + + lazy val period: (firstLoss: LocalDate, lastLoss: LocalDate) = + (day1.plusDays((days.minSIMD - 1).toLong), day1.plusDays((days.maxSIMD - 1).toLong)) + +end Scenario + +extension (scenario: Scenario) + inline def scaleAmntBy(scale: Double): Scenario = Scenario( + scenario.eventsSorted.map(_.multiplyBy(scale)), + scenario.numberIterations, + scenario.threshold * scale, + scenario.day1, + scenario.name + ) + + def shiftDay1To(date: LocalDate): Scenario = + scenario.period.firstLoss.plusYears(1).minusDays(1) +// val ndays = ChronoUnit.DAYS.between( period._1, seasonEnd) + 1 Let sjust ssume this is 365 ... there is a theoretical problem with air assuming 365 days. Leap years anyone? + val betweenStartDates = ChronoUnit.DAYS.between(scenario.day1, date).toInt + val newEvents = + scenario.eventsSorted.map(x => + Event(x.eventId, x.iteration, Math.floorMod(x.day - betweenStartDates - 1, 365) + 1, x.loss) + ) + Scenario(newEvents, scenario.numberIterations, scenario.threshold, date, scenario.name) + end shiftDay1To + + inline def removeClaimsAfter(date: LocalDate): Scenario = + val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) <= 0) + Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) + end removeClaimsAfter + + inline def removeClaimsBefore(date: LocalDate): Scenario = + val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) >= 0) + Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) + end removeClaimsBefore + + inline def applyThreshold(newThresh: Double): Scenario = + if !(newThresh > scenario.threshold) then + throw new Exception( + "Threshold may only be increased. Attempt to change it from " + scenario.threshold + " to " + newThresh + " is illegal" + ) + end if + Scenario( + scenario.eventsSorted.filter(_.loss > newThresh), + scenario.numberIterations, + newThresh, + scenario.day1, + scenario.name + ) + end applyThreshold +end extension diff --git a/vecxt_re/src/scenarr.scala b/vecxt_re/src/scenarr.scala new file mode 100644 index 00000000..53633654 --- /dev/null +++ b/vecxt_re/src/scenarr.scala @@ -0,0 +1,409 @@ +package vecxt_re + +import java.time.LocalDate +import java.time.Month +import java.time.temporal.ChronoUnit + +import scala.collection.mutable + +import vecxt.all.* + +import cats.kernel.Monoid + +case class Scenarr( + iterations: Array[Int], + days: Array[Int], + amounts: Array[Double], + ids: Array[Long], + numberIterations: Int, + threshold: Double = 0d, + day1: LocalDate = LocalDate.of(2019, 1, 1), + name: String = "", + id: Long = scala.util.Random.nextLong(), + isSorted: Boolean = false +): + assert( + iterations.length == days.length && days.length == amounts.length && amounts.length == ids.length, + s"Array lengths must match: iterations=${iterations.length}, days=${days.length}, amounts=${amounts.length}, ids=${ids.length}" + ) + + lazy val freq: Array[Int] = + assert(isSorted, "Scenario must be sorted to compute frequency") + groupCount(iterations, numberIterations) + end freq + + lazy val meanFreq: Double = + freq.mean + + lazy val agg: Array[Double] = + assert(isSorted, "Scenario must be sorted to compute aggregate amounts") + groupSum(iterations, amounts, numberIterations) + end agg + + lazy val aep = agg.sorted(using Ordering[Double].reverse).zipWithIndex.map { case (amt, idx) => + (returnPeriod = numberIterations.toDouble / (idx + 1).toDouble, amount = amt) + } + + lazy val oep = + groupMax(iterations, amounts, numberIterations).sorted(using Ordering[Double].reverse).zipWithIndex.map { + case (amt, idx) => + (returnPeriod = numberIterations.toDouble / (idx + 1).toDouble, amount = amt) + } + + lazy val claimDates: Array[LocalDate] = (days - 1).map(d => ChronoUnit.DAYS.addTo(this.day1, d)) + + lazy val monthYear: Array[(month: Month, year: Int)] = claimDates.map(d => (d.getMonth, d.getYear)) + + /** Interpretation: + * + * - Excess variance over Poisson, scaled by m^2: Var(X) = E[X] for Poisson, so (v - m) is the extra variance; + * dividing by m^2 scales it. + * - Method-of-moments estimate of 1/k for Negative Binomial: Var(X) = μ + μ^2 / k ⇒ (Var(X) - μ) / μ^2 = 1 / k. + * Thus, clusterCoeff estimates 1 / k. Smaller k (larger clusterCoeff) ⇒ more clustering/overdispersion. + * - Relation to Index of Dispersion (VMR = v / m): clusterCoeff = (v - m) / m^2 = (VMR - 1) / m. It is a + * mean-scaled excess dispersion; under NB, it targets 1 / k. + */ + lazy val clusterCoeff: Double = + val (m, v) = freq.meanAndVariance(VarianceMode.Sample) + (v - m) / Math.pow(m, 2) + end clusterCoeff + + /** Computes the variance-to-mean ratio (dispersion) based on the frequency data. This metric is calculated by + * dividing the variance by the mean, using values from `freq.meanAndVariance`. + * + * 1 = poisson distributed > 1 => overdispersed... but careful with sample size. + */ + lazy val varianceMeanRatio = + val (m, v) = freq.meanAndVariance(VarianceMode.Sample) + v / m + end varianceMeanRatio + + lazy val hasOccurence: Boolean = amounts.nonEmpty + + lazy val numSeasons: Int = math.ceil(days.maxSIMD.toDouble / 365).toInt // doesnt deal so well with leap years. + + lazy val meanLoss: Double = amounts.sum / numberIterations + + lazy val itrDayAmount: Array[(itr: Int, day: Int, amnt: Double, id: Long)] = + iterations.zip(days).zip(amounts).zip(ids).map { case (((i, d), a), id) => (itr = i, day = d, amnt = a, id = id) } + + lazy val period: (firstLoss: LocalDate, lastLoss: LocalDate) = + (day1.plusDays((days.minSIMD - 1).toLong), day1.plusDays((days.maxSIMD - 1).toLong)) + +end Scenarr + +object Scenarr: + /** The empty Scenarr - identity element for the monoid. Combining any scenario with empty returns the original + * scenario unchanged. + */ + val empty: Scenarr = new Scenarr( + Array.emptyIntArray, + Array.emptyIntArray, + Array.emptyDoubleArray, + Array.emptyLongArray, + numberIterations = 0, + threshold = 0d, + day1 = LocalDate.of(2019, 1, 1), + name = "empty", + id = 0L, + isSorted = true + ) + + /** Generate a small random Scenarr for experimentation. + * + * @param numClaims + * number of claim events to generate (default 15) + * @param seed + * optional random seed for reproducibility + * @return + * a small Scenarr with 10 iterations + */ + def sample(numClaims: Int = 15, seed: Option[Long] = None): Scenarr = + val rng = seed.fold(scala.util.Random())(s => scala.util.Random(s)) + val n = numClaims + + val iterations = Array.fill(n)(rng.nextInt(10) + 1) + val days = Array.fill(n)(rng.nextInt(365) + 1) + val amounts = Array.fill(n)(rng.nextDouble() * 1000.0 + 100.0) // 100-1100 + val ids = Array.fill(n)(rng.nextLong()) + + new Scenarr( + iterations = iterations, + days = days, + amounts = amounts, + ids = ids, + numberIterations = 10, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "sample", + id = rng.nextLong(), + isSorted = false + ) + end sample + + /** Combine two Scenarr instances following monoid laws. + * + * The combination semantics are: + * - Events with matching IDs have their amounts aggregated (with validation that iteration/day match) + * - Thresholds are summed, and claims below the new threshold are filtered out + * - Day1 is the earlier of the two; the later scenario's days are adjusted to align calendar dates + * - Number of iterations must match (unless one is empty) + * - Result is always sorted by (iteration, day) + * + * @throws IllegalArgumentException + * if events with same ID have different iteration/day + * @throws IllegalArgumentException + * if numberIterations don't match (for non-empty scenarios) + */ + def combine(s1: Scenarr, s2: Scenarr): Scenarr = + // Handle empty cases - identity element + if s1.amounts.isEmpty then return s2 + end if + if s2.amounts.isEmpty then return s1 + end if + + // Check iteration count matches for non-empty scenarios + require( + s1.numberIterations == s2.numberIterations, + s"Cannot combine scenarios with different iteration counts: ${s1.numberIterations} vs ${s2.numberIterations}" + ) + + // Determine new day1 (earlier of the two) + val newDay1 = if s1.day1.isBefore(s2.day1) then s1.day1 else s2.day1 + + // Calculate day offsets to align both scenarios to newDay1 + val dayOffset1 = ChronoUnit.DAYS.between(newDay1, s1.day1).toInt + val dayOffset2 = ChronoUnit.DAYS.between(newDay1, s2.day1).toInt + + // Sum thresholds + val newThreshold = s1.threshold + s2.threshold + + // Build a map: id -> (iteration, adjustedDay, totalAmount) + // This aggregates amounts for events with the same ID + val idMap = mutable.HashMap.empty[Long, (Int, Int, Double)] + + // Process s1 events + var i = 0 + while i < s1.ids.length do + val id = s1.ids(i) + val iter = s1.iterations(i) + val day = s1.days(i) + dayOffset1 + val amount = s1.amounts(i) + + idMap.get(id) match + case None => + idMap(id) = (iter, day, amount) + case Some((existingIter, existingDay, existingAmount)) => + require( + existingIter == iter && existingDay == day, + s"Event with ID $id has inconsistent iteration/day: ($existingIter, $existingDay) vs ($iter, $day)" + ) + idMap(id) = (iter, day, existingAmount + amount) + end match + i += 1 + end while + + // Process s2 events + i = 0 + while i < s2.ids.length do + val id = s2.ids(i) + val iter = s2.iterations(i) + val day = s2.days(i) + dayOffset2 + val amount = s2.amounts(i) + + idMap.get(id) match + case None => + idMap(id) = (iter, day, amount) + case Some((existingIter, existingDay, existingAmount)) => + require( + existingIter == iter && existingDay == day, + s"Event with ID $id has inconsistent iteration/day: ($existingIter, $existingDay) vs ($iter, $day)" + ) + idMap(id) = (iter, day, existingAmount + amount) + end match + i += 1 + end while + + // Convert to arrays, sorted by (iteration, day), filtering by threshold + val filtered = idMap.iterator.filter(_._2._3 > newThreshold).toArray + val sorted = filtered.sortBy { case (_, (iter, day, _)) => (iter, day) } + + val finalIds = sorted.map(_._1) + val finalIterations = sorted.map(_._2._1) + val finalDays = sorted.map(_._2._2) + val finalAmounts = sorted.map(_._2._3) + + new Scenarr( + finalIterations, + finalDays, + finalAmounts, + finalIds, + s1.numberIterations, + newThreshold, + newDay1, + s"concat: [${s1.name} + ${s2.name}]", + scala.util.Random.nextLong(), + isSorted = true + ) + end combine + + /** Infix operator for combining scenarios */ + extension (s1: Scenarr) def |+|(s2: Scenarr): Scenarr = combine(s1, s2) + end extension + + /** Cats Monoid instance for Scenarr. + * + * This instance requires that all combined scenarios have the same `numberIterations`. The identity element is + * `Scenarr.empty` with `numberIterations = 0`. + * + * Important: This monoid is only valid for scenarios with matching `numberIterations`. Combining scenarios with + * different iteration counts will throw an IllegalArgumentException. + * + * @param numIterations + * The fixed number of iterations for this monoid instance + */ + def monoidForIterations(numIterations: Int): Monoid[Scenarr] = new Monoid[Scenarr]: + def empty: Scenarr = new Scenarr( + Array.emptyIntArray, + Array.emptyIntArray, + Array.emptyDoubleArray, + Array.emptyLongArray, + numberIterations = numIterations, + threshold = 0d, + day1 = LocalDate.of(2019, 1, 1), + name = "empty", + id = 0L, + isSorted = true + ) + def combine(x: Scenarr, y: Scenarr): Scenarr = Scenarr.combine(x, y) + end monoidForIterations + + /** Default Monoid instance for Scenarr. Uses the general `combine` which treats empty scenarios as identity. + */ + given Monoid[Scenarr] with + def empty: Scenarr = Scenarr.empty + def combine(x: Scenarr, y: Scenarr): Scenarr = Scenarr.combine(x, y) + end given + + /** Create a Scenarr with automatically generated random IDs for each event. Use this factory when you don't need to + * specify event IDs explicitly. + */ + def withGeneratedIds( + iterations: Array[Int], + days: Array[Int], + amounts: Array[Double], + numberIterations: Int, + threshold: Double = 0d, + day1: LocalDate = LocalDate.of(2019, 1, 1), + name: String = "", + id: Long = scala.util.Random.nextLong(), + isSorted: Boolean = false + ): Scenarr = + val ids = Array.fill(iterations.length)(scala.util.Random.nextLong()) + new Scenarr(iterations, days, amounts, ids, numberIterations, threshold, day1, name, id, isSorted) + end withGeneratedIds + + extension (scenario: Scenarr) + inline def sorted: Scenarr = + val indicies = scenario.iterations.zipWithIndex + .zip(scenario.days) + .map { case ((iter, idx), day) => + (index = idx, iter = iter, day = day) + } + .sortBy(r => (r.iter, r.day)) + .map(_.index) + + Scenarr( + scenario.iterations.select(indicies), + scenario.days.select(indicies), + scenario.amounts.select(indicies), + scenario.ids.select(indicies), + scenario.numberIterations, + scenario.threshold, + scenario.day1, + scenario.name, + scenario.id, + isSorted = true + ) + end sorted + + inline def takeFirstNIterations(i: Int) = + assert(i > 0 && i <= scenario.numberIterations) + val idx = scenario.iterations <= i + import vecxt.BoundsCheck.DoBoundsCheck.yes + Scenarr( + scenario.iterations.mask(idx), + scenario.days.mask(idx), + scenario.amounts.mask(idx), + scenario.ids.mask(idx), + i, + scenario.threshold, + scenario.day1, + scenario.name, + scenario.id, + isSorted = scenario.isSorted + ) + end takeFirstNIterations + + inline def scaleAmntBy(scale: Double): Scenarr = + scenario.copy(amounts = scenario.amounts * scale, ids = scenario.ids, threshold = scenario.threshold * scale) + + inline def iteration(num: Int) = + assert(num > 0 && num <= scenario.numberIterations) + val idx = scenario.iterations =:= num + import vecxt.BoundsCheck.DoBoundsCheck.yes + Scenarr( + scenario.iterations.mask(idx), + scenario.days.mask(idx), + scenario.amounts.mask(idx), + scenario.ids.mask(idx), + scenario.numberIterations, + scenario.threshold, + scenario.day1, + scenario.name, + scenario.id, + isSorted = scenario.isSorted + ) + end iteration + + // def shiftDay1To(date: LocalDate): Scenarr = + // scenario.period.firstLoss.plusYears(1).minusDays(1) + // // val ndays = ChronoUnit.DAYS.between( period._1, seasonEnd) + 1 Let sjust ssume this is 365 ... there is a theoretical problem with air assuming 365 days. Leap years anyone? + // val betweenStartDates = ChronoUnit.DAYS.between(scenario.day1, date).toInt + // val newEvents = + // scenario.eventsSorted.map(x => + // Event(x.eventId, x.iteration, Math.floorMod(x.day - betweenStartDates - 1, 365) + 1, x.loss) + // ) + // Scenario(newEvents, scenario.numberIterations, scenario.threshold, date, scenario.name) + // end shiftDay1To + + // inline def removeClaimsAfter(date: LocalDate): Scenarr = + // val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) <= 0) + // Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) + // end removeClaimsAfter + + // inline def removeClaimsBefore(date: LocalDate): Scenarr = + // val remaining = scenario.claimDates.zip(scenario.eventsSorted).filter(_._1.compareTo(date) >= 0) + // Scenario(remaining.map(_._2), scenario.numberIterations, scenario.threshold, scenario.day1, scenario.name) + // end removeClaimsBefore + + inline def applyThreshold(newThresh: Double): Scenarr = + if !(newThresh > scenario.threshold) then + throw new Exception( + "Threshold may only be increased. Attempt to change it from " + scenario.threshold + " to " + newThresh + " is illegal" + ) + end if + val idx = scenario.amounts > newThresh + Scenarr( + scenario.iterations.mask(idx)(using false), + scenario.days.mask(idx)(using false), + scenario.amounts.mask(idx)(using false), + scenario.ids.mask(idx)(using false), + scenario.numberIterations, + newThresh, + scenario.day1, + scenario.name + ) + end applyThreshold + end extension +end Scenarr diff --git a/vecxt_re/test/src-js/.keep b/vecxt_re/test/src-js/.keep new file mode 100644 index 00000000..e69de29b diff --git a/vecxt_re/test/src-jvm/.keep b/vecxt_re/test/src-jvm/.keep new file mode 100644 index 00000000..e69de29b diff --git a/vecxt_re/test/src-jvm/Empirical.test.scala b/vecxt_re/test/src-jvm/Empirical.test.scala new file mode 100644 index 00000000..93b849ad --- /dev/null +++ b/vecxt_re/test/src-jvm/Empirical.test.scala @@ -0,0 +1,101 @@ +package vecxt_re + +import munit.FunSuite + +class EmpiricalTest extends FunSuite: + + test("cdf/inverseCdf are consistent (unweighted)") { + val xs = Array(3.0, 1.0, 2.0, 2.0) + val emp = Empirical.equalWeights(xs) + + assertEqualsDouble(emp.cdf(0.5), 0.0, 1e-12) + assertEqualsDouble(emp.cdf(1.0), 0.25, 1e-12) + assertEqualsDouble(emp.cdf(1.5), 0.25, 1e-12) + assertEqualsDouble(emp.cdf(2.0), 0.75, 1e-12) + assertEqualsDouble(emp.cdf(10.0), 1.0, 1e-12) + + // Quantiles: smallest x with F(x) >= p + assertEqualsDouble(emp.inverseCdf(0.0), 1.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(0.25), 1.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(0.2500001), 2.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(0.75), 2.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(1.0), 3.0, 1e-12) + } + + test("mean/variance match weighted formulas") { + val xs = Array(1.0, 10.0) + val ws = Array(3.0, 1.0) // 75% at 1, 25% at 10 + val emp = Empirical.weighted(xs, ws) + + val mean = 0.75 * 1.0 + 0.25 * 10.0 + val variance = 0.75 * (1.0 - mean) * (1.0 - mean) + 0.25 * (10.0 - mean) * (10.0 - mean) + + assertEqualsDouble(emp.mean, mean, 1e-12) + assertEqualsDouble(emp.variance, variance, 1e-12) + + assertEqualsDouble(emp.probabilityOf(1.0), 0.75, 1e-12) + assertEqualsDouble(emp.probabilityOf(10.0), 0.25, 1e-12) + assertEqualsDouble(emp.probability(1.0, 10.0), 0.25, 1e-12) // P(1 < X <= 10) = 0.25 + } + + test("single element distribution") { + val emp = Empirical.equalWeights(Array(42.0)) + assertEqualsDouble(emp.mean, 42.0, 1e-12) + assertEqualsDouble(emp.variance, 0.0, 1e-12) + assertEqualsDouble(emp.cdf(41.0), 0.0, 1e-12) + assertEqualsDouble(emp.cdf(42.0), 1.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(0.0), 42.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(0.5), 42.0, 1e-12) + assertEqualsDouble(emp.inverseCdf(1.0), 42.0, 1e-12) + assertEqualsDouble(emp.probabilityOf(42.0), 1.0, 1e-12) + } + + test("all duplicate values are merged") { + val emp = Empirical.equalWeights(Array(5.0, 5.0, 5.0)) + assertEqualsDouble(emp.mean, 5.0, 1e-12) + assertEqualsDouble(emp.variance, 0.0, 1e-12) + assertEqualsDouble(emp.probabilityOf(5.0), 1.0, 1e-12) + } + + test("inverseCdf(1.0) returns maximum") { + val emp = Empirical.equalWeights(Array(1.0, 2.0, 100.0)) + assertEqualsDouble(emp.inverseCdf(1.0), 100.0, 1e-12) + } + + test("cdf at exact max value equals 1") { + val emp = Empirical.equalWeights(Array(1.0, 2.0, 3.0)) + assertEqualsDouble(emp.cdf(3.0), 1.0, 1e-12) + } + + test("draw returns values in support") { + val xs = Array(10.0, 20.0, 30.0) + val emp = Empirical.equalWeights(xs) + val samples = (1 to 100).map(_ => emp.draw) + assert(samples.forall(s => xs.contains(s))) + } + + test("construction fails on empty values") { + intercept[IllegalArgumentException] { + Empirical.equalWeights(Array.empty[Double]) + } + } + + test("construction fails on zero weight") { + intercept[IllegalArgumentException] { + Empirical.weighted(Array(1.0), Array(0.0)) + } + } + + test("construction fails on negative weight") { + intercept[IllegalArgumentException] { + Empirical.weighted(Array(1.0, 2.0), Array(1.0, -1.0)) + } + } + + test("construction fails on NaN in values") { + intercept[IllegalArgumentException] { + Empirical.equalWeights(Array(1.0, Double.NaN)) + } + } + +end EmpiricalTest diff --git a/vecxt_re/test/src-jvm/Mixed.test.scala b/vecxt_re/test/src-jvm/Mixed.test.scala new file mode 100644 index 00000000..793ad518 --- /dev/null +++ b/vecxt_re/test/src-jvm/Mixed.test.scala @@ -0,0 +1,144 @@ +package vecxt_re + +import munit.FunSuite + +class MixedTest extends FunSuite: + + test("CDF is continuous at mixing point") { + val values = Array(1.0, 2.0, 3.0, 4.0, 5.0) + val mixingPoint = 4.0 + val paretoShape = 2.0 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + // CDF just below and at the mixing point should be close + val cdfBelow = mixed.cdf(mixingPoint - 0.0001) + val cdfAt = mixed.cdf(mixingPoint) + + // At mixingPoint, the Pareto CDF starts at 0, so cdf should equal bodyWeight + // which is the fraction of empirical values strictly below mixingPoint + // Values < 4.0 are: 1.0, 2.0, 3.0 (3 out of 5) = 0.6 + val expectedBodyWeight = 0.6 + assertEqualsDouble(cdfBelow, expectedBodyWeight, 0.01) + assertEqualsDouble(cdfAt, expectedBodyWeight, 1e-12) // Pareto CDF at scale is 0 + } + + test("CDF goes from 0 to 1") { + val values = Array(1.0, 2.0, 3.0, 5.0, 10.0) + val mixingPoint = 4.0 + val paretoShape = 2.0 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + assertEqualsDouble(mixed.cdf(0.0), 0.0, 1e-12) + assertEqualsDouble(mixed.cdf(1000000.0), 1.0, 1e-6) + } + + test("inverseCdf and cdf are consistent") { + val values = Array(1.0, 2.0, 3.0, 5.0, 10.0) + val mixingPoint = 4.0 + val paretoShape = 2.5 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + // Test a range of quantiles + val quantiles = Array(0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99) + for p <- quantiles do + val x = mixed.inverseCdf(p) + val recoveredP = mixed.cdf(x) + // For discrete parts, we only expect recoveredP >= p + assert(recoveredP >= p - 1e-6, s"Failed at p=$p: inverseCdf($p)=$x, cdf($x)=$recoveredP") + end for + } + + test("draw returns values in valid range") { + val values = Array(1.0, 2.0, 3.0) + val mixingPoint = 2.5 + val paretoShape = 2.0 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + val samples = (1 to 1000).map(_ => mixed.draw) + val minSample = samples.min + val maxSample = samples.max + + // Min should be from empirical (>= 1.0) + assert(minSample >= 1.0, s"Min sample $minSample should be >= 1.0") + // Should have some tail samples above mixing point + assert(maxSample > mixingPoint, s"Max sample $maxSample should be > $mixingPoint (Pareto tail)") + } + + test("body weight calculation is correct") { + val values = Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0) + val mixingPoint = 5.5 + val paretoShape = 2.0 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + // Values < 5.5 are: 1, 2, 3, 4, 5 (5 out of 10) = 0.5 + // CDF at mixing point should equal bodyWeight + assertEqualsDouble(mixed.cdf(mixingPoint), 0.5, 1e-12) + } + + test("mean calculation for shape > 1") { + val values = Array(1.0, 2.0, 3.0) + val mixingPoint = 2.5 + val paretoShape = 2.5 // Mean is defined for shape > 1 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + val mean = mixed.mean + // Mean should be between minimum empirical and some reasonable upper bound + assert(mean > 1.0, s"Mean $mean should be > 1.0") + assert(mean.isFinite, s"Mean should be finite") + } + + test("variance calculation for shape > 2") { + val values = Array(1.0, 2.0, 3.0, 4.0) + val mixingPoint = 3.0 + val paretoShape = 3.0 // Variance is defined for shape > 2 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + val variance = mixed.variance + assert(variance > 0.0, s"Variance $variance should be > 0") + assert(variance.isFinite, s"Variance should be finite") + } + + test("weighted empirical works correctly") { + val values = Array(1.0, 2.0, 3.0) + val weights = Array(1.0, 2.0, 1.0) // 25%, 50%, 25% + val mixingPoint = 2.5 + val paretoShape = 2.0 + val mixed = Mixed.fromWeightedValues(values, weights, mixingPoint, paretoShape) + + // Values < 2.5 are 1.0 and 2.0 with weights 1.0 and 2.0 + // bodyWeight = (1.0 + 2.0) / (1.0 + 2.0 + 1.0) = 0.75 + assertEqualsDouble(mixed.cdf(mixingPoint), 0.75, 1e-12) + } + + test("probability method works correctly") { + val values = Array(1.0, 2.0, 3.0, 4.0, 5.0) + val mixingPoint = 3.5 + val paretoShape = 2.0 + val mixed = Mixed.fromValues(values, mixingPoint, paretoShape) + + // P(2 < X <= 3) should be 1/5 = 0.2 (only value 3 is in this range) + assertEqualsDouble(mixed.probability(2.0, 3.0), 0.2, 1e-12) + + // P(0 < X <= 10) should be close to cdf(10) + assertEqualsDouble(mixed.probability(0.0, 10.0), mixed.cdf(10.0), 1e-12) + } + + test("construction fails with non-positive mixing point") { + intercept[IllegalArgumentException] { + Mixed.fromValues(Array(1.0, 2.0), 0.0, 2.0) + } + intercept[IllegalArgumentException] { + Mixed.fromValues(Array(1.0, 2.0), -1.0, 2.0) + } + } + + test("construction fails with non-positive pareto shape") { + intercept[IllegalArgumentException] { + Mixed.fromValues(Array(1.0, 2.0), 1.5, 0.0) + } + intercept[IllegalArgumentException] { + Mixed.fromValues(Array(1.0, 2.0), 1.5, -1.0) + } + } + +end MixedTest diff --git a/vecxt_re/test/src-jvm/NegBin.test.scala b/vecxt_re/test/src-jvm/NegBin.test.scala new file mode 100644 index 00000000..e05b6355 --- /dev/null +++ b/vecxt_re/test/src-jvm/NegBin.test.scala @@ -0,0 +1,226 @@ +package vecxt_re + +import munit.FunSuite +import vecxt.all.* +import org.apache.commons.statistics.distribution.PoissonDistribution + +class NegBinTest extends FunSuite: + + inline val localTests = false + + test("pmf approximately normalizes") { + val nb = NegativeBinomial(a = 2.5, b = 1.2) + + val mu = nb.mean + val sd = math.sqrt(nb.variance) + val K = (mu + 15 * sd).toInt + + val sum = (0 to K).map(nb.probabilityOf).sum + + assert(math.abs(sum - 1.0) < 1e-8) + } + + test("pmf mean and variance match theory") { + val nb = NegativeBinomial(3.0, 0.7) + + val K = 500 + val probs = (0 to K).map(k => nb.probabilityOf(k)) + + val mean = probs.zipWithIndex.map { case (p, k) => p * k }.sum + val varr = probs.zipWithIndex.map { case (p, k) => p * k * k }.sum - mean * mean + + assert(math.abs(mean - nb.mean) < 1e-6) + assert(math.abs(varr - nb.variance) < 1e-6) + } + + test("approaches Poisson as b -> 0") { + val mu = 4.0 + val b = 1e-6 + val a = mu / b + + val nb = NegativeBinomial(a, b) + val pois = PoissonDistribution.of(mu) + + assert(nb.probabilityOf(-1) == 0.0) + assert(nb.logProbabilityOf(-1).isNegInfinity) + + (0 to 20).foreach { k => + val diff = + math.abs(nb.probabilityOf(k) - pois.probability(k)) + assert(diff < 1e-6) + } + } + + test("works with small a < 1 (fractional shape)") { + val nb = NegativeBinomial(a = 0.5, b = 2.0) + + // Verify PMF normalizes + val K = 200 + val sum = (0 to K).map(nb.probabilityOf).sum + assert(math.abs(sum - 1.0) < 1e-6) + + // Verify mean and variance from PMF match theoretical values + val probs = (0 to K).map(k => nb.probabilityOf(k)) + val mean = probs.zipWithIndex.map { case (p, k) => p * k }.sum + val varr = probs.zipWithIndex.map { case (p, k) => p * k * k }.sum - mean * mean + + // a * b = 0.5 * 2.0 = 1.0 + assert(math.abs(mean - nb.mean) < 1e-5) + assert(math.abs(nb.mean - 1.0) < 1e-10) + + // a * b * (1 + b) = 0.5 * 2.0 * 3.0 = 3.0 + assert(math.abs(varr - nb.variance) < 1e-4) + assert(math.abs(nb.variance - 3.0) < 1e-10) + } + + // Ignored in CI as slow + test("SLOW: sampling mean and variance") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.NegBinTest.sampling mean and variance IN CI========") + val nb = NegativeBinomial(5.0, 0.8) + val n = 2_000_000 + + val xs = Array.fill(n)(nb.draw.toDouble) + + val mean = xs.sum / n + val varr = xs.map(x => (x - mean) * (x - mean)).sum / n + + assert(math.abs(mean - nb.mean) < 5e-3) + assert(math.abs(varr - nb.variance) < 5e-2) + } + + test("SLOW: sampling distribution matches pmf") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.NegBinTest.sampling distribution matches pmf IN CI========") + val nb = NegativeBinomial(2.0, 1.5) + val n = 500_000 + + val samples = Array.fill(n)(nb.draw) + val counts = samples.groupBy(identity).view.mapValues(_.size).toMap + + val K = 20 + (0 to K).foreach { k => + val expected = n * nb.probabilityOf(k) + val observed = counts.getOrElse(k, 0) + assert(math.abs(observed - expected) < 5 * math.sqrt(expected)) + } + } + + test("SLOW: MLE recovers parameters") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.MLE recovers parameters IN CI========") + + val trueNb = NegativeBinomial(4.0, 0.6) + val data = Array.fill(10_000)(trueNb.draw) + + val (fitted, converged) = NegativeBinomial.mle(data) + assert(converged) + + fitted match + case nb: NegativeBinomial => + assertEqualsDouble(nb.mean, trueNb.mean, 0.1) + assertEqualsDouble(nb.b, trueNb.b, 0.1) + case _: Poisson => + fail("Expected NegativeBinomial but got Poisson") + end match + } + + test("SLOW: vol weighted MLE follows standard case with uniform volumes ") { + + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.MLE recovers parameters IN CI========") + + val trueNb = NegativeBinomial(4.0, 0.6) + val data = Array.fill(10_000)(trueNb.draw) + + val (fitted, converged) = NegativeBinomial.mleVolumeWeighted(data, Array.fill(10_000)(1.0)) + assert(converged) + + fitted match + case nb: NegativeBinomial => + assertEqualsDouble(nb.mean, trueNb.mean, 0.1) + assertEqualsDouble(nb.b, trueNb.b, 0.1) + case _: Poisson => + fail("Expected NegativeBinomial but got Poisson") + end match + } + + /** This directly exercises the volume factors: counts drawn with v = 0.5 use scale βv = 0.4, and with v = 2.0 use βv = + * 1.6; the fitter must undo that scaling to recover β = 0.8. + */ + test("SLOW: volume-weighted MLE recovers base params with mixed volumes") { + assume(localTests, "Skip heavy sampling in CI") + + val rTrue = 3.2 + val betaTrue = 0.8 + val seed = 12345L + val nPerBucket = 25_000 + val vols = Array.fill(nPerBucket)(0.5) ++ Array.fill(nPerBucket)(2.0) + + val rng = org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create(seed) + val gammaLow = org.apache.commons.statistics.distribution.GammaDistribution + .of(rTrue, betaTrue * 0.5) + .createSampler(rng) + val gammaHigh = org.apache.commons.statistics.distribution.GammaDistribution + .of(rTrue, betaTrue * 2.0) + .createSampler(rng) + + val data = new Array[Int](vols.length) + var i = 0 + while i < vols.length do + val lambda = + if i < nPerBucket then gammaLow.sample() + else gammaHigh.sample() + data(i) = org.apache.commons.statistics.distribution.PoissonDistribution + .of(lambda) + .createSampler(rng) + .sample() + i += 1 + end while + + val (fitted, converged) = NegativeBinomial.mleVolumeWeighted(data, vols, maxIter = 200, tol = 1e-8) + assert(converged) + fitted match + case nb: NegativeBinomial => + assertEqualsDouble(nb.a, rTrue, 0.1) + assertEqualsDouble(nb.b, betaTrue, 0.1) + + // Ignoring volumes collapses a mixture of scaled NB's into a single NB, which should fit worse + // (at minimum: it should be less accurate on the modeled-period mean and dispersion). + val modeledMean = rTrue * betaTrue + val (unweighted, _) = NegativeBinomial.mle(data) + unweighted match + case unNb: NegativeBinomial => + assert(math.abs(nb.mean - modeledMean) <= math.abs(unNb.mean - modeledMean)) + assert(math.abs(nb.b - betaTrue) <= math.abs(unNb.b - betaTrue)) + case _: Poisson => + // Unweighted returned Poisson, just check that the weighted NB is reasonable + assert(math.abs(nb.mean - modeledMean) < 0.5) + end match + case _: Poisson => + fail("Expected NegativeBinomial but got Poisson for overdispersed data") + end match + } + + test("MLE returns Poisson for underdispersed data") { + // Data with variance <= mean should return Poisson + val poissonData = Array.fill(10000)( + org.apache.commons.statistics.distribution.PoissonDistribution + .of(5.0) + .createSampler(org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create()) + .sample() + ) + + val (fitted, converged) = NegativeBinomial.mle(poissonData) + assert(converged) + + fitted match + case pois: Poisson => + assertEqualsDouble(pois.lambda, 5.0, 0.3) + case _: NegativeBinomial => + // Also acceptable if slightly overdispersed due to sampling variance + () + end match + } + +end NegBinTest diff --git a/vecxt_re/test/src-jvm/Poisson.test.scala b/vecxt_re/test/src-jvm/Poisson.test.scala new file mode 100644 index 00000000..2179d9e3 --- /dev/null +++ b/vecxt_re/test/src-jvm/Poisson.test.scala @@ -0,0 +1,206 @@ +package vecxt_re + +import munit.FunSuite +import vecxt.all.* +import org.apache.commons.statistics.distribution.PoissonDistribution as ApachePoisson + +class PoissonTest extends FunSuite: + + inline val localTests = false + + test("pmf approximately normalizes") { + val pois = Poisson(lambda = 5.0) + + val mu = pois.mean + val sd = math.sqrt(pois.variance) + val K = (mu + 15 * sd).toInt + + val sum = (0 to K).map(pois.probabilityOf).sum + + assert(math.abs(sum - 1.0) < 1e-8) + } + + test("pmf mean and variance match theory") { + val lambda = 7.5 + val pois = Poisson(lambda) + + val K = 500 + val probs = (0 to K).map(k => pois.probabilityOf(k)) + + val mean = probs.zipWithIndex.map { case (p, k) => p * k }.sum + val varr = probs.zipWithIndex.map { case (p, k) => p * k * k }.sum - mean * mean + + // For Poisson, mean = variance = lambda + assert(math.abs(mean - lambda) < 1e-6) + assert(math.abs(varr - lambda) < 1e-6) + assert(math.abs(pois.mean - lambda) < 1e-10) + assert(math.abs(pois.variance - lambda) < 1e-10) + } + + test("matches Apache Commons Poisson distribution") { + val lambda = 4.0 + val pois = Poisson(lambda) + val apachePois = ApachePoisson.of(lambda) + + assert(pois.probabilityOf(-1) == 0.0) + assert(pois.logProbabilityOf(-1).isNegInfinity) + + (0 to 20).foreach { k => + val diff = math.abs(pois.probabilityOf(k) - apachePois.probability(k)) + assert(diff < 1e-14, s"PMF mismatch at k=$k: ${pois.probabilityOf(k)} vs ${apachePois.probability(k)}") + } + } + + test("cdf matches Apache Commons") { + val lambda = 6.0 + val pois = Poisson(lambda) + val apachePois = ApachePoisson.of(lambda) + + (0 to 25).foreach { k => + val diff = math.abs(pois.cdf(k) - apachePois.cumulativeProbability(k)) + assert(diff < 1e-12, s"CDF mismatch at k=$k: ${pois.cdf(k)} vs ${apachePois.cumulativeProbability(k)}") + } + } + + test("probability(x, y) equals cdf(y) - cdf(x)") { + val pois = Poisson(5.0) + + for + x <- 0 to 10 + y <- (x + 1) to 15 + do + val expected = pois.cdf(y) - pois.cdf(x) + val actual = pois.probability(x, y) + assert(math.abs(actual - expected) < 1e-14) + end for + } + + test("small lambda works correctly") { + val pois = Poisson(0.1) + + val K = 50 + val sum = (0 to K).map(pois.probabilityOf).sum + assert(math.abs(sum - 1.0) < 1e-10) + + // P(X=0) = e^(-0.1) ≈ 0.9048 + assert(math.abs(pois.probabilityOf(0) - math.exp(-0.1)) < 1e-14) + } + + test("large lambda works correctly") { + val pois = Poisson(100.0) + + // For large lambda, distribution is approximately normal with mean=variance=lambda + val K = 250 + val probs = (0 to K).map(k => pois.probabilityOf(k)) + val sum = probs.sum + + assert(math.abs(sum - 1.0) < 1e-6) + + val mean = probs.zipWithIndex.map { case (p, k) => p * k }.sum + assert(math.abs(mean - 100.0) < 1e-4) + } + + // Ignored in CI as slow + test("SLOW: sampling mean and variance") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.PoissonTest.sampling mean and variance IN CI========") + val pois = Poisson(8.0) + val n = 2_000_000 + + val xs = Array.fill(n)(pois.draw.toDouble) + + val mean = xs.sum / n + val varr = xs.map(x => (x - mean) * (x - mean)).sum / n + + assert(math.abs(mean - pois.mean) < 5e-3) + assert(math.abs(varr - pois.variance) < 5e-2) + } + + test("SLOW: sampling distribution matches pmf") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF vecxt_re.PoissonTest.sampling distribution matches pmf IN CI========") + val pois = Poisson(4.0) + val n = 500_000 + + val samples = Array.fill(n)(pois.draw) + val counts = samples.groupBy(identity).view.mapValues(_.size).toMap + + (0 to 15).foreach { k => + val empirical = counts.getOrElse(k, 0).toDouble / n + val theoretical = pois.probabilityOf(k) + val diff = math.abs(empirical - theoretical) + assert(diff < 0.01, s"At k=$k: empirical=$empirical, theoretical=$theoretical, diff=$diff") + } + } + + test("SLOW: MLE recovers true parameter") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF MLE recovers true parameter distribution matches pmf IN CI========") + + val trueLambda = 6.0 + val apachePois = ApachePoisson.of(trueLambda) + val sampler = apachePois.createSampler(org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create()) + + val data = Array.fill(10_000)(sampler.sample()) + val (fitted, converged) = Poisson.mle(data) + + assert(converged) + assert(math.abs(fitted.lambda - trueLambda) < 0.1, s"Fitted lambda=${fitted.lambda}, true=$trueLambda") + } + + test("SLOW: MLE volume-weighted with uniform volumes equals regular MLE") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF MLE volume-weighted with uniform volumes equals regular MLE IN CI========") + + val trueLambda = 5.0 + val apachePois = ApachePoisson.of(trueLambda) + val sampler = apachePois.createSampler(org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create()) + + val data = Array.fill(10_000)(sampler.sample()) + val uniformVolumes = Array.fill(10_000)(1.0) + + val (fitted, converged) = Poisson.mle(data) + val (fittedVol, convergedVol) = Poisson.volweightedMle(data, uniformVolumes) + + assert(converged) + assert(convergedVol) + assert(math.abs(fitted.lambda - fittedVol.lambda) < 1e-10) + } + + test("SLOW: volume-weighted MLE correctly adjusts for volumes") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF volume-weighted MLE correctly adjusts for volumes IN CI========") + // If we have counts n_j from volumes v_j, the rate lambda should be sum(n_j) / sum(v_j) + val observations = Array(10, 20, 15, 25) + val volumes = Array(2.0, 4.0, 3.0, 5.0) + + val expectedLambda = observations.sum.toDouble / volumes.sum // = 70 / 14 = 5.0 + val (fitted, converged) = Poisson.volweightedMle(observations, volumes) + + assert(converged) + assert(math.abs(fitted.lambda - expectedLambda) < 1e-10) + } + + test("SLOW: goodness-of-fit test accepts Poisson data") { + assume(localTests, "Don't run local-only tests in CI ideally as they are slow") + println("=============TURN OFF goodness-of-fit test accepts Poisson data IN CI========") + val trueLambda = 5.0 + val apachePois = ApachePoisson.of(trueLambda) + val sampler = apachePois.createSampler(org.apache.commons.rng.simple.RandomSource.XO_RO_SHI_RO_128_PP.create()) + + val data = Array.fill(1000)(sampler.sample()) + val (statistic, df, pValue) = Poisson.goodnessOfFit(data) + + // With Poisson data, we should not reject at α=0.05 + assert(pValue > 0.01, s"p-value=$pValue is suspiciously low for Poisson data") + } + + test("fromMean creates distribution with correct lambda") { + val mu = 7.5 + val pois = Poisson.fromMean(mu) + assert(pois.lambda == mu) + assert(pois.mean == mu) + assert(pois.variance == mu) + } + +end PoissonTest diff --git a/vecxt_re/test/src-jvm/ScenarrMonoidLawsSpec.scala b/vecxt_re/test/src-jvm/ScenarrMonoidLawsSpec.scala new file mode 100644 index 00000000..61733897 --- /dev/null +++ b/vecxt_re/test/src-jvm/ScenarrMonoidLawsSpec.scala @@ -0,0 +1,100 @@ +package vecxt_re + +import cats.kernel.Eq +import cats.kernel.laws.discipline.MonoidTests +import munit.DisciplineSuite +import org.scalacheck.{Arbitrary, Gen} +import java.time.LocalDate + +/** Law-based tests for Scenarr Monoid using cats-kernel-laws and discipline. + * + * These tests verify that the Scenarr Monoid satisfies all required laws: + * - Left identity: empty |+| a = a + * - Right identity: a |+| empty = a + * - Associativity: (a |+| b) |+| c = a |+| (b |+| c) + */ +class ScenarrMonoidLawsSpec extends DisciplineSuite: + + // Fixed parameters for testing + private val TestIterations = 50 + private val TestDay1 = LocalDate.of(2019, 1, 1) + + // Generator for positive amounts (above any reasonable threshold) + private val amountGen: Gen[Double] = Gen.choose(100.0, 10000.0) + + // Generator for days (1-365) + private val dayGen: Gen[Int] = Gen.choose(1, 365) + + // Generator for iterations (1-TestIterations) + private val iterationGen: Gen[Int] = Gen.choose(1, TestIterations) + + // Generator for event count (0-20 events) + private val eventCountGen: Gen[Int] = Gen.choose(0, 25) + + // Small ID space to encourage clashes across Scenarrs being combined + private val idGen: Gen[Long] = Gen.choose(1L, 100L) + + /** Deterministically derive (iteration, day) from an ID. This ensures that when the same ID appears in different + * Scenarrs, it always has the same iteration and day - making the combine valid. Amounts can differ and will be + * aggregated. + */ + private def iterationForId(id: Long): Int = ((id % TestIterations) + 1).toInt + private def dayForId(id: Long): Int = ((id % 365) + 1).toInt + + /** Generate a valid Scenarr with fixed numberIterations and day1. + * + * Uses a small ID space (1-100) to encourage clashes across Scenarrs. Iteration and day are derived + * deterministically from ID, so clashing IDs always have consistent (iteration, day) pairs - the amounts get + * aggregated as expected by the monoid. + */ + private val scenarrrGen: Gen[Scenarr] = for + n <- eventCountGen + ids <- Gen.listOfN(n, idGen).map(_.distinct) // unique within this Scenarr + amounts <- Gen.listOfN(ids.length, amountGen) + threshold <- Gen.const(0.0) // Use 0 threshold to avoid filtering + yield + val iterations = ids.map(iterationForId).toArray + val days = ids.map(dayForId).toArray + new Scenarr( + iterations, + days, + amounts.toArray, + ids.toArray, + TestIterations, + threshold, + TestDay1, + s"test-${ids.length}", + scala.util.Random.nextLong(), + isSorted = false + ) + + given Arbitrary[Scenarr] = Arbitrary(scenarrrGen) + + /** Equality for Scenarr that compares the semantic content. + * + * Two Scenarrs are equal if they have: + * - Same numberIterations + * - Same threshold + * - Same day1 + * - Same events (id -> (iteration, day, amount)) regardless of order + */ + given Eq[Scenarr] = Eq.instance { (a, b) => + if a.numberIterations != b.numberIterations then false + else if Math.abs(a.threshold - b.threshold) > 1e-10 then false + else if a.day1 != b.day1 then false + else if a.ids.length != b.ids.length then false + else + // Compare events by creating a map of id -> (iter, day, amount) + a.iterations.sameElements(b.iterations) && + a.days.sameElements(b.days) && + a.amounts.zip(b.amounts).forall((x, y) => Math.abs(x - y) < 1e-10) && + a.ids.sameElements(b.ids) + } + + // Use the fixed-iteration monoid for law testing + given cats.kernel.Monoid[Scenarr] = Scenarr.monoidForIterations(TestIterations) + + // Run all Monoid law tests + checkAll("Scenarr.MonoidLaws", MonoidTests[Scenarr].monoid) + +end ScenarrMonoidLawsSpec diff --git a/vecxt_re/test/src-jvm/TrendAnalysis.test.scala b/vecxt_re/test/src-jvm/TrendAnalysis.test.scala new file mode 100644 index 00000000..ac10ef7b --- /dev/null +++ b/vecxt_re/test/src-jvm/TrendAnalysis.test.scala @@ -0,0 +1,261 @@ +package vecxt_re + +import munit.FunSuite + +import vecxt.all.* + +class TrendAnalysisTest extends FunSuite: + + import TrendAnalysis.* + + // Test data: synthetic counts with known trend + val yearsNoTrend = Vector(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009) + val countsNoTrend = Vector(1, 1, 1, 1, 1, 1, 1, 1, 1, 1) // No trend + + val yearsWithTrend = Vector(2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009) + val countsWithTrend = Vector(1, 1, 2, 2, 3, 4, 5, 6, 8, 10) // Clear upward trend + + val realYears = Array( + 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, + 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025 + ) + val realCounts = Array( + 1, 0, 0, 0, 1, 0, 0, 0, 2, 1, 1, 2, 3, 0, 0, 1, 2, 1, 0, 1, 2, 3, 1, 2, 1, 0, 1 + ) + // Example results from numpy / statsmodels for realYears + /** Fitted Poisson with lambda = 0.9629629629629629 + * + * Generalized Linear Model: log(Count) ~ 1 + Year + * ============================================================ + * Generalized Linear Model Regression Results + * ============================================================================== + * Dep. Variable: y No. Observations: 27 + * Model: GLM Df Residuals: 25 + * Model Family: Poisson Df Model: 1 + * Link Function: Log Scale: 1.0000 + * Method: IRLS Log-Likelihood: -32.760 + * Date: Thu, 29 Jan 2026 Deviance: 26.468 + * Time: 12:14:51 Pearson chi2: 22.3 + * No. Iterations: 5 Pseudo R-squ. (CS): 0.08983 + * Covariance Type: nonrobust + * ============================================================================== + * coef std err z P>|z| [0.025 0.975] + * ------------------------------------------------------------------------------ + * const -82.0576 52.254 -1.570 0.116 -184.473 20.358 + * x1 0.0407 0.026 1.571 0.116 -0.010 0.092 + * ============================================================================== + */ + + test("Poisson fitTrend returns valid result structure") { + val pois = Poisson(realCounts.mean) + val result = pois.fitTrend(yearsNoTrend, countsNoTrend) + + assertEquals(result.nObs, 10) + assertEquals(result.dfResidual, 8) + assert(!result.intercept.isNaN, "intercept should not be NaN") + assert(!result.slope.isNaN, "slope should not be NaN") + assert(!result.seIntercept.isNaN, "seIntercept should not be NaN") + assert(!result.seSlope.isNaN, "seSlope should not be NaN") + assert(result.pValueSlope >= 0 && result.pValueSlope <= 1, "p-value should be in [0,1]") + + } + + test("Poisson fitTrend detects no significant trend in flat data") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsNoTrend, countsNoTrend) + + // Slope should be close to zero + assert(math.abs(result.slope) < 0.1, s"slope should be near zero, got ${result.slope}") + // p-value should be high (not significant) + assert(result.pValueSlope > 0.1, s"p-value should be > 0.1 for no trend, got ${result.pValueSlope}") + assert(!result.hasSignificantTrend(0.05), "should not detect significant trend in flat data") + } + + test("Poisson fitTrend detects significant trend in increasing data") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + + // Slope should be positive + assert(result.slope > 0, s"slope should be positive, got ${result.slope}") + // p-value should be low (significant) + assert(result.pValueSlope < 0.05, s"p-value should be < 0.05 for clear trend, got ${result.pValueSlope}") + assert(result.hasSignificantTrend(0.05), "should detect significant trend in increasing data") + } + + test("Poisson fitTrend residual deviance less than null deviance for trending data") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + + assert( + result.residualDeviance < result.nullDeviance, + s"residual deviance (${result.residualDeviance}) should be less than null deviance (${result.nullDeviance})" + ) + } + + test("NegativeBinomial fitTrend returns valid result structure") { + val nb = NegativeBinomial(a = 1.0, b = 1.0) + val result = nb.fitTrend(yearsNoTrend, countsNoTrend) + + assertEquals(result.nObs, 10) + assertEquals(result.dfResidual, 8) + assert(!result.intercept.isNaN, "intercept should not be NaN") + assert(!result.slope.isNaN, "slope should not be NaN") + assert(!result.seIntercept.isNaN, "seIntercept should not be NaN") + assert(!result.seSlope.isNaN, "seSlope should not be NaN") + assert(result.pValueSlope >= 0 && result.pValueSlope <= 1, "p-value should be in [0,1]") + } + + test("NegativeBinomial fitTrend detects no significant trend in flat data") { + val nb = NegativeBinomial(a = 1.0, b = 1.0) + val result = nb.fitTrend(yearsNoTrend, countsNoTrend) + + assert(math.abs(result.slope) < 0.1, s"slope should be near zero, got ${result.slope}") + assert(!result.hasSignificantTrend(0.05), "should not detect significant trend in flat data") + } + + test("NegativeBinomial fitTrend detects significant trend in increasing data") { + val nb = NegativeBinomial(a = 2.0, b = 0.5) + val result = nb.fitTrend(yearsWithTrend, countsWithTrend) + + assert(result.slope > 0, s"slope should be positive, got ${result.slope}") + assert(result.hasSignificantTrend(0.05), "should detect significant trend in increasing data") + } + + test("Poisson fitTrend on realistic data produces sensible coefficients") { + val pois = Poisson(1.0) + val result = pois.fitTrend(realYears, realCounts) + + assertEquals(result.nObs, 27) + assertEquals(result.dfResidual, 25) + + // The image shows β₀ ≈ -91.887, β₁ ≈ 0.0456 for similar data + // Our parameterization may differ slightly, but signs should match + // Slope should be small and positive (slight upward trend) + assert(result.slope > -0.1 && result.slope < 0.2, s"slope should be small, got ${result.slope}") + + // Check that summary doesn't throw + val summary = result.summary + assert(summary.nonEmpty, "summary should not be empty") + assert(summary.contains("Coefficients"), "summary should contain 'Coefficients'") + assert(summary.contains("Year"), "summary should contain 'Year'") + } + + test("Poisson fitTrend matches Python statsmodels GLM results") { + // Python statsmodels GLM output for realYears/realCounts: + // Fitted Poisson with lambda = 0.9629629629629629 + // No. Observations: 27, Df Residuals: 25, Df Model: 1 + // Log-Likelihood: -32.760, Deviance: 26.468, Pearson chi2: 22.3 + // const: -82.0576 (std err 52.254), z=-1.570, p=0.116 + // x1: 0.0407 (std err 0.026), z= 1.571, p=0.116 + val pois = Poisson(realCounts.mean) + val result = pois.fitTrend(realYears, realCounts) + + // Observations and degrees of freedom + assertEquals(result.nObs, 27) + assertEquals(result.dfResidual, 25) + + // Coefficients (tolerance for numerical differences) + assertEqualsDouble(result.intercept, -82.0576, 0.5) + assertEqualsDouble(result.slope, 0.0407, 0.001) + + // Standard errors + assertEqualsDouble(result.seIntercept, 52.254, 0.5) + assertEqualsDouble(result.seSlope, 0.026, 0.001) + + // P-value for slope (Python: 0.116, some variation expected due to CDF approximation) + assertEqualsDouble(result.pValueSlope, 0.116, 0.03) + + // Log-likelihood + assertEqualsDouble(result.logLikelihood, -32.760, 0.1) + + // Residual deviance + assertEqualsDouble(result.residualDeviance, 26.468, 0.1) + } + + test("TrendFitResult summary formatting") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + val summary = result.summary + + // Check summary contains expected sections + assert(summary.contains("Generalized Linear Model"), "should have model header") + assert(summary.contains("(Intercept)"), "should show intercept") + assert(summary.contains("Year"), "should show year coefficient") + assert(summary.contains("Null Deviance"), "should show null deviance") + assert(summary.contains("Residual Deviance"), "should show residual deviance") + assert(summary.contains("AIC"), "should show AIC") + assert(summary.contains("F-statistic"), "should show F-statistic") + } + + test("fitTrend requires minimum 3 observations") { + val pois = Poisson(1.0) + + intercept[IllegalArgumentException] { + pois.fitTrend(Vector(2000, 2001), Vector(1, 2)) + } + } + + test("fitTrend requires equal length years and counts") { + val pois = Poisson(1.0) + + intercept[IllegalArgumentException] { + pois.fitTrend(Vector(2000, 2001, 2002), Vector(1, 2)) + } + } + + test("F-statistic p-value is valid") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + + assert(result.fStatistic > 0, "F-statistic should be positive for trending data") + assert(result.fPValue >= 0 && result.fPValue <= 1, "F p-value should be in [0,1]") + } + + test("F-statistic p-value is small for significant trend") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + + // For significant trend, F p-value should be small (< 0.05) + // F ~ 2.5 with df1=1, df2=8 should have p-value around 0.15 + // For our strongly trending data, F should be larger and p-value smaller + assert( + result.fPValue < 0.2, + s"F p-value should be small for significant trend, got ${result.fPValue} with F=${result.fStatistic}" + ) + } + + test("F-statistic p-value matches expected range for known F values") { + // Sanity check: for trending data, higher F should mean lower p-value + val pois = Poisson(1.0) + val trendResult = pois.fitTrend(yearsWithTrend, countsWithTrend) + val flatResult = pois.fitTrend(yearsNoTrend, countsNoTrend) + + // Trending data should have higher F-stat and lower p-value than flat data + assert( + trendResult.fStatistic > flatResult.fStatistic || flatResult.fStatistic <= 0, + s"Trending F (${trendResult.fStatistic}) should be >= flat F (${flatResult.fStatistic})" + ) + } + + test("AIC is finite") { + val pois = Poisson(1.0) + val result = pois.fitTrend(yearsWithTrend, countsWithTrend) + + assert(result.aic.isFinite, "AIC should be finite") + assert(result.logLikelihood.isFinite, "log-likelihood should be finite") + } + + test("Poisson and NegBin give similar results for low dispersion") { + // When NegBin has high 'a' (low overdispersion), should approximate Poisson + val pois = Poisson(1.0) + val nb = NegativeBinomial(a = 100.0, b = 0.01) // High a = low overdispersion + + val poisResult = pois.fitTrend(yearsWithTrend, countsWithTrend) + val nbResult = nb.fitTrend(yearsWithTrend, countsWithTrend) + + // Slopes should be in the same ballpark + val slopeDiff = math.abs(poisResult.slope - nbResult.slope) + assert(slopeDiff < 0.5, s"slopes should be similar: Poisson=${poisResult.slope}, NegBin=${nbResult.slope}") + } + +end TrendAnalysisTest diff --git a/vecxt_re/test/src-native/.keep b/vecxt_re/test/src-native/.keep new file mode 100644 index 00000000..e69de29b diff --git a/vecxtensions/test/src/aggByItr.test.scala b/vecxt_re/test/src/aggByItr.test.scala similarity index 96% rename from vecxtensions/test/src/aggByItr.test.scala rename to vecxt_re/test/src/aggByItr.test.scala index f9a3aec5..90cc70a6 100644 --- a/vecxtensions/test/src/aggByItr.test.scala +++ b/vecxt_re/test/src/aggByItr.test.scala @@ -1,7 +1,6 @@ -package vecxtensions +package vecxt_re import munit.FunSuite -import vecxt.reinsurance.Layer class AggregateByItrSpec extends FunSuite: diff --git a/vecxt_re/test/src/calendarYearIndex.test.scala b/vecxt_re/test/src/calendarYearIndex.test.scala new file mode 100644 index 00000000..9bb453e3 --- /dev/null +++ b/vecxt_re/test/src/calendarYearIndex.test.scala @@ -0,0 +1,139 @@ +package vecxt_re + +import munit.FunSuite + +class CalendarYearIndexSuite extends FunSuite: + + test("basic construction with years and indices") { + val years = Array(2024, 2023, 2022, 2021) + val indices = Array(1.05, 1.03, 1.02, 1.04) + val idx = CalendarYearIndex(years, indices) + + assertEquals(idx.currentYear, 2024) + assertEquals(idx.numYears, 4) + assertEquals(idx.latestYear, 2024) + assertEquals(idx.earliestYear, 2021) + } + + test("indexAt returns correct factor for each year") { + val years = Array(2024, 2023, 2022) + val indices = Array(1.05, 1.03, 1.02) + val idx = CalendarYearIndex(years, indices) + + assertEquals(idx.indexAt(2024), 1.05) + assertEquals(idx.indexAt(2023), 1.03) + assertEquals(idx.indexAt(2022), 1.02) + } + + test("indexAt throws for unknown year") { + val idx = CalendarYearIndex(Array(2024, 2023), Array(1.05, 1.03)) + + intercept[NoSuchElementException] { + idx.indexAt(2020) + } + } + + test("cumulativeToCurrentFrom current year returns 1.0") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.05, 1.03, 1.02)) + assertEquals(idx.cumulativeToCurrentFrom(2024), 1.0) + } + + test("cumulativeToCurrentFrom one year back") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.05, 1.03, 1.02)) + assertEquals(idx.cumulativeToCurrentFrom(2023), 1.05) + } + + test("cumulativeToCurrentFrom two years back") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.05, 1.03, 1.02)) + assertEqualsDouble(idx.cumulativeToCurrentFrom(2022), 1.05 * 1.03, 1e-10) + } + + test("cumulativeToCurrentFrom three years back") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.05, 1.03, 1.02)) + assertEqualsDouble(idx.cumulativeToCurrentFrom(2021), 1.05 * 1.03 * 1.02, 1e-10) + } + + test("onLevel applies correct cumulative factors by year") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.05, 1.03, 1.02)) + + val values = Array(100.0, 200.0, 300.0) + val dataYears = Array(2024, 2023, 2022) + + val result = idx.onLevel(values, dataYears) + + assertEquals(result(0), 100.0) // 2024: current year, factor = 1.0 + assertEqualsDouble(result(1), 200.0 * 1.05, 1e-10) // 2023: one year back + assertEqualsDouble(result(2), 300.0 * 1.05 * 1.03, 1e-10) // 2022: two years back + } + + test("onLevel with mixed year order") { + val idx = CalendarYearIndex(Array(2024, 2023, 2022), Array(1.10, 1.05, 1.03)) + + val values = Array(100.0, 100.0, 100.0) + val dataYears = Array(2022, 2024, 2023) + + val result = idx.onLevel(values, dataYears) + + assertEqualsDouble(result(0), 100.0 * 1.10 * 1.05, 1e-10) // 2022 + assertEquals(result(1), 100.0) // 2024 + assertEqualsDouble(result(2), 100.0 * 1.10, 1e-10) // 2023 + } + + test("onLevel throws on mismatched array lengths") { + val idx = CalendarYearIndex(Array(2024, 2023), Array(1.05, 1.03)) + + intercept[IllegalArgumentException] { + idx.onLevel(Array(100.0, 200.0), Array(2024)) + } + } + + test("constant creates uniform factors across year range") { + val idx = CalendarYearIndex.constant(2020, 2024, 1.05) + + assertEquals(idx.currentYear, 2024) + assertEquals(idx.numYears, 5) + assertEquals(idx.earliestYear, 2020) + assertEquals(idx.latestYear, 2024) + + assertEquals(idx.indexAt(2024), 1.05) + assertEquals(idx.indexAt(2023), 1.05) + assertEquals(idx.indexAt(2020), 1.05) + } + + test("constant cumulative grows exponentially by years back") { + val idx = CalendarYearIndex.constant(2020, 2024, 1.10) + + assertEqualsDouble(idx.cumulativeToCurrentFrom(2021), Math.pow(1.10, 3), 1e-10) + } + + test("fromRateChanges creates correct factors") { + val years = Array(2024, 2023, 2022) + val rateChanges = Array(5.0, 3.0, -2.0) + val idx = CalendarYearIndex.fromRateChanges(years, rateChanges) + + assertEqualsDouble(idx.indexAt(2024), 1.05, 1e-10) + assertEqualsDouble(idx.indexAt(2023), 1.03, 1e-10) + assertEqualsDouble(idx.indexAt(2022), 0.98, 1e-10) + } + + test("construction fails with empty arrays") { + intercept[IllegalArgumentException] { + CalendarYearIndex(Array.empty[Int], Array.empty[Double]) + } + } + + test("construction fails with mismatched array lengths") { + intercept[IllegalArgumentException] { + CalendarYearIndex(2024, Array(2024, 2023), Array(1.05)) + } + } + + test("explicit currentYear constructor") { + val idx = CalendarYearIndex(2025, Array(2024, 2023), Array(1.05, 1.03)) + + assertEquals(idx.currentYear, 2025) + // From 2023 to 2025 is 2 years back + assertEqualsDouble(idx.cumulativeToCurrentFrom(2023), 1.05 * 1.03, 1e-10) + } + +end CalendarYearIndexSuite diff --git a/vecxtensions/test/src/cross.test.scala b/vecxt_re/test/src/cross.test.scala similarity index 85% rename from vecxtensions/test/src/cross.test.scala rename to vecxt_re/test/src/cross.test.scala index 3c4f6d32..c797653e 100644 --- a/vecxtensions/test/src/cross.test.scala +++ b/vecxt_re/test/src/cross.test.scala @@ -1,10 +1,9 @@ -package vecxt.reinsurance - -import Limits.Limit -import Retentions.Retention -import rpt.* +package vecxt_re import scala.util.chaining.* +import Limits.* +import Retentions.* +import rpt.reinsuranceFunction class XSuite extends munit.FunSuite: diff --git a/vecxtensions/test/src/groupCumul.test.scala b/vecxt_re/test/src/groupCumul.test.scala similarity index 99% rename from vecxtensions/test/src/groupCumul.test.scala rename to vecxt_re/test/src/groupCumul.test.scala index 08d1c215..837cf585 100644 --- a/vecxtensions/test/src/groupCumul.test.scala +++ b/vecxt_re/test/src/groupCumul.test.scala @@ -1,4 +1,4 @@ -package vecxtensions +package vecxt_re import munit.FunSuite diff --git a/vecxtensions/test/src/groupDiff.test.scala b/vecxt_re/test/src/groupDiff.test.scala similarity index 99% rename from vecxtensions/test/src/groupDiff.test.scala rename to vecxt_re/test/src/groupDiff.test.scala index 72adfb6e..8e4cf499 100644 --- a/vecxtensions/test/src/groupDiff.test.scala +++ b/vecxt_re/test/src/groupDiff.test.scala @@ -1,4 +1,4 @@ -package vecxtensions +package vecxt_re import munit.FunSuite diff --git a/vecxt_re/test/src/groupSumCount.test.scala b/vecxt_re/test/src/groupSumCount.test.scala new file mode 100644 index 00000000..baf2c8f3 --- /dev/null +++ b/vecxt_re/test/src/groupSumCount.test.scala @@ -0,0 +1,91 @@ +package vecxt_re + +import munit.FunSuite + +class GroupSumCountSuite extends FunSuite: + + test("groupSum aggregates per 1-based group index with gaps") { + val groups = Array(1, 1, 2, 4, 4) + val values = Array(2.0, 3.0, 5, 10, 20) + + val result = groupSum(groups, values, nitr = 4) + + assertEquals(result.length, 4) + assertVecEquals(result, Array(5.0, 5, 0, 30)) + } + + test("groupCount counts occurrences per group index") { + val groups = Array(1, 1, 2, 4, 4) + + val result = groupCount(groups, nitr = 4) + + assertEquals(result.length, 4) + assertVecEquals(result, Array(2, 1, 0, 2)) + } + + test("handles empty input by returning zeroed buckets") { + val groups = Array.empty[Int] + val values = Array.empty[Double] + + val sumResult = groupSum(groups, values, nitr = 3) + val countResult = groupCount(groups, nitr = 3) + + assertEquals(sumResult.length, 3) + assertEquals(countResult.length, 3) + assertVecEquals(sumResult, Array(0.0, 0, 0)) + assertVecEquals(countResult, Array(0, 0, 0)) + } + + test("single group spanning all entries") { + val groups = Array(3, 3, 3) + val values = Array(1.5, 2.5, -4) + + val sumResult = groupSum(groups, values, nitr = 4) + val countResult = groupCount(groups, nitr = 4) + + val expectedSum = Array(0, 0, values.sum, 0) + val expectedCount = Array(0, 0, 3, 0) + + assertVecEquals(sumResult, expectedSum) + assertVecEquals(countResult, expectedCount) + } + + test("groupMax finds max per 1-based group index with gaps") { + val groups = Array(1, 1, 2, 4, 4) + val values = Array(2.0, 3.0, 5.0, 10.0, 20.0) + + val result = groupMax(groups, values, nitr = 4) + + assertEquals(result.length, 4) + assertVecEquals(result, Array(3.0, 5.0, Double.NegativeInfinity, 20.0)) + } + + test("groupMax handles empty input by returning -Inf buckets") { + val groups = Array.empty[Int] + val values = Array.empty[Double] + + val maxResult = groupMax(groups, values, nitr = 3) + + assertEquals(maxResult.length, 3) + assert(maxResult.forall(_ == Double.NegativeInfinity)) + } + + test("groupMax single group spanning all entries") { + val groups = Array(3, 3, 3) + val values = Array(1.5, 2.5, -4.0) + + val maxResult = groupMax(groups, values, nitr = 4) + + val expectedMax = Array(Double.NegativeInfinity, Double.NegativeInfinity, 2.5, Double.NegativeInfinity) + assertVecEquals(maxResult, expectedMax) + } + + test("groupMax with negative values") { + val groups = Array(1, 1, 2, 2) + val values = Array(-5.0, -2.0, -10.0, -3.0) + + val result = groupMax(groups, values, nitr = 2) + + assertVecEquals(result, Array(-2.0, -3.0)) + } +end GroupSumCountSuite diff --git a/vecxt_re/test/src/hillEstimator.test.scala b/vecxt_re/test/src/hillEstimator.test.scala new file mode 100644 index 00000000..ff688b8e --- /dev/null +++ b/vecxt_re/test/src/hillEstimator.test.scala @@ -0,0 +1,153 @@ +package vecxt_re + +import HillEstimatorExtensions.* + +class HillEstimatorSuite extends munit.FunSuite: + + // Helper to generate Pareto samples using inverse transform + def generatePareto(n: Int, alpha: Double, xMin: Double = 1.0, seed: Long = 42L): Array[Double] = + val rng = new scala.util.Random(seed) + Array.fill(n) { + val u = rng.nextDouble() + xMin / math.pow(u, 1.0 / alpha) + } + end generatePareto + + test("Hill estimator basic sanity check") { + // Simple case: known sorted data + val data = Array(1.0, 2.0, 4.0, 8.0, 16.0) + // Using k=2 means we use the 2 largest: 16, 8 + // Threshold is at position n-k-1 = 5-2-1 = 2, which is 4.0 + // sum = ln(16/4) + ln(8/4) = ln(4) + ln(2) = 2*ln(2) + ln(2) = 3*ln(2) + // estimate = 2 / (3*ln(2)) + val expected = 2.0 / (3.0 * math.log(2.0)) + val estimate = data.hillEstimator(2) + assertEqualsDouble(estimate, expected, 1e-10) + } + + test("Hill estimator converges for Pareto(2.0) distribution") { + val alpha = 2.0 + val data = generatePareto(10000, alpha) + // With large sample, estimate should be close to true alpha + val estimate = data.hillEstimator(500) + // Allow 15% error for statistical estimation + assertEqualsDouble(estimate, alpha, alpha * 0.15) + } + + test("Hill estimator converges for Pareto(1.5) distribution") { + val alpha = 1.5 + val data = generatePareto(10000, alpha) + val estimate = data.hillEstimator(500) + assertEqualsDouble(estimate, alpha, alpha * 0.15) + } + + test("Hill estimator converges for Pareto(3.0) distribution") { + val alpha = 3.0 + val data = generatePareto(10000, alpha) + val estimate = data.hillEstimator(500) + assertEqualsDouble(estimate, alpha, alpha * 0.15) + } + + test("Hill estimator rejects invalid k values") { + val data = Array(1.0, 2.0, 3.0, 4.0, 5.0) + + intercept[IllegalArgumentException] { + data.hillEstimator(0) // k must be >= 1 + } + + intercept[IllegalArgumentException] { + data.hillEstimator(5) // k must be < n + } + + intercept[IllegalArgumentException] { + data.hillEstimator(10) // k must be < n + } + } + + test("Hill estimator rejects empty or single-element arrays") { + intercept[IllegalArgumentException] { + Array.empty[Double].hillEstimator(1) + } + + intercept[IllegalArgumentException] { + Array(1.0).hillEstimator(1) + } + } + + test("Hill plot produces valid output") { + val alpha = 2.0 + val data = generatePareto(1000, alpha) + val result = data.hillPlot(kMin = 10, kMax = 100, step = 5) + + assertEquals(result.kValues.length, result.estimates.length) + assert(result.kValues.head == 10) + assert(result.kValues.last == 100) + assert(result.kValues.length == 19) // (100-10)/5 + 1 = 19 + } + + test("Hill plot estimates are positive for valid Pareto data") { + val data = generatePareto(500, 2.0) + val result = data.hillPlot(kMin = 5, kMax = 50) + + result.estimates.foreach { est => + assert(est > 0, s"Expected positive estimate, got $est") + } + } + + test("Hill plot default kMax is sensible") { + val data = generatePareto(100, 2.0) + val result = data.hillPlot() + + // Default kMax should be min(n/2, n-1) = 50 + assert(result.kValues.last <= 50) + assert(result.kValues.head == 2) // default kMin + } + + test("Hill plot findStableRegion identifies plateau") { + // Generate clean Pareto data + val alpha = 2.0 + val data = generatePareto(5000, alpha, seed = 123L) + val result = data.hillPlot(kMin = 50, kMax = 1000, step = 10) + + result.findStableRegion(windowSize = 5, threshold = 0.15) match + case Some((kStart, kEnd, meanEstimate)) => + // The stable region should give estimate close to true alpha + assertEqualsDouble(meanEstimate, alpha, alpha * 0.2) + assert(kStart < kEnd) + case None => + // It's okay if no stable region found with strict threshold + // Just verify the method runs without error + () + end match + } + + test("Hill estimator is invariant to data order") { + val data = Array(5.0, 1.0, 10.0, 2.0, 20.0, 3.0) + val shuffled = data.clone() + scala.util.Random.shuffle(shuffled.toSeq).toArray + + val est1 = data.hillEstimator(2) + val est2 = shuffled.hillEstimator(2) + + // Both should give same result after internal sorting + assertEqualsDouble(est1, est2, 1e-10) + } + + test("Hill estimator with k=1 uses only largest value") { + val data = Array(1.0, 2.0, 4.0, 8.0) + // k=1: use only largest (8), threshold is second largest (4) + // estimate = 1 / ln(8/4) = 1/ln(2) + val expected = 1.0 / math.log(2.0) + val estimate = data.hillEstimator(1) + assertEqualsDouble(estimate, expected, 1e-10) + } + + test("Hill plot step parameter works correctly") { + val data = generatePareto(200, 2.0) + val result = data.hillPlot(kMin = 10, kMax = 50, step = 10) + + assertEquals(result.kValues.toSeq, Seq(10, 20, 30, 40, 50)) + assertEquals(result.estimates.length, 5) + } + +end HillEstimatorSuite diff --git a/vecxt_re/test/src/indexPerPeriod.test.scala b/vecxt_re/test/src/indexPerPeriod.test.scala new file mode 100644 index 00000000..4ee9a704 --- /dev/null +++ b/vecxt_re/test/src/indexPerPeriod.test.scala @@ -0,0 +1,131 @@ +package vecxt_re + +import munit.FunSuite + +class IndexPerPeriodSuite extends FunSuite: + + test("indexAt returns correct index for each period") { + val indices = Array(1.05, 1.03, 1.02) + val idx = IndexPerPeriod(indices) + + assertEquals(idx.indexAt(0), 1.05) + assertEquals(idx.indexAt(1), 1.03) + assertEquals(idx.indexAt(2), 1.02) + } + + test("numPeriods returns correct count") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEquals(idx.numPeriods, 3) + } + + test("cumulativeToCurrentFrom period 0 returns 1.0") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEquals(idx.cumulativeToCurrentFrom(0), 1.0) + } + + test("cumulativeToCurrentFrom negative period returns 1.0") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEquals(idx.cumulativeToCurrentFrom(-1), 1.0) + } + + test("cumulativeToCurrentFrom period 1 returns first index") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEquals(idx.cumulativeToCurrentFrom(1), 1.05) + } + + test("cumulativeToCurrentFrom period 2 returns product of first two indices") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEqualsDouble(idx.cumulativeToCurrentFrom(2), 1.05 * 1.03, 1e-10) + } + + test("cumulativeToCurrentFrom period 3 returns product of all indices") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + assertEqualsDouble(idx.cumulativeToCurrentFrom(3), 1.05 * 1.03 * 1.02, 1e-10) + } + + test("cumulativeToCurrentFrom beyond available periods uses all indices") { + val idx = IndexPerPeriod(Array(1.05, 1.03)) + assertEqualsDouble(idx.cumulativeToCurrentFrom(5), 1.05 * 1.03, 1e-10) + } + + test("cumulativeFactors returns correct array") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + val factors = idx.cumulativeFactors(4) + + assertEquals(factors.length, 4) + assertEquals(factors(0), 1.0) + assertEqualsDouble(factors(1), 1.05, 1e-10) + assertEqualsDouble(factors(2), 1.05 * 1.03, 1e-10) + assertEqualsDouble(factors(3), 1.05 * 1.03 * 1.02, 1e-10) + } + + test("cumulativeFactors with upToPeriod less than numPeriods") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + val factors = idx.cumulativeFactors(2) + + assertEquals(factors.length, 2) + assertEquals(factors(0), 1.0) + assertEqualsDouble(factors(1), 1.05, 1e-10) + } + + test("onLevel applies correct cumulative factors") { + val idx = IndexPerPeriod(Array(1.05, 1.03, 1.02)) + + val values = Array(100.0, 200.0, 300.0) + val periods = Array(0, 1, 2) + + val result = idx.onLevel(values, periods) + + assertEquals(result.length, 3) + assertEquals(result(0), 100.0) // period 0: * 1.0 + assertEqualsDouble(result(1), 200.0 * 1.05, 1e-10) // period 1: * 1.05 + assertEqualsDouble(result(2), 300.0 * 1.05 * 1.03, 1e-10) // period 2: * (1.05 * 1.03) + } + + test("onLevel with mixed period order") { + val idx = IndexPerPeriod(Array(1.10, 1.05)) + + val values = Array(100.0, 100.0, 100.0) + val periods = Array(2, 0, 1) + + val result = idx.onLevel(values, periods) + + assertEqualsDouble(result(0), 100.0 * 1.10 * 1.05, 1e-10) // period 2 + assertEquals(result(1), 100.0) // period 0 + assertEqualsDouble(result(2), 100.0 * 1.10, 1e-10) // period 1 + } + + test("onLevel throws on mismatched array lengths") { + val idx = IndexPerPeriod(Array(1.05)) + val values = Array(100.0, 200.0) + val periods = Array(0) + + intercept[IllegalArgumentException] { + idx.onLevel(values, periods) + } + } + + test("fromRateChanges creates correct factors") { + val idx = IndexPerPeriod.fromRateChanges(Array(5.0, 3.0, -2.0)) + + assertEqualsDouble(idx.indexAt(0), 1.05, 1e-10) + assertEqualsDouble(idx.indexAt(1), 1.03, 1e-10) + assertEqualsDouble(idx.indexAt(2), 0.98, 1e-10) + } + + test("constant creates uniform factors") { + val idx = IndexPerPeriod.constant(3, 1.05) + + assertEquals(idx.numPeriods, 3) + assertEquals(idx.indexAt(0), 1.05) + assertEquals(idx.indexAt(1), 1.05) + assertEquals(idx.indexAt(2), 1.05) + } + + test("constant cumulative grows exponentially") { + val idx = IndexPerPeriod.constant(5, 1.10) + + assertEqualsDouble(idx.cumulativeToCurrentFrom(3), Math.pow(1.10, 3), 1e-10) + } + +end IndexPerPeriodSuite diff --git a/vecxtensions/test/src/layer.test.scala b/vecxt_re/test/src/layer.test.scala similarity index 90% rename from vecxtensions/test/src/layer.test.scala rename to vecxt_re/test/src/layer.test.scala index dc27fb6a..a3ef79d6 100644 --- a/vecxtensions/test/src/layer.test.scala +++ b/vecxt_re/test/src/layer.test.scala @@ -1,10 +1,43 @@ -package vecxt.reinsurance - -import java.util.UUID -import vecxtensions.assertVecEquals +package vecxt_re class ScenarioRISuite extends munit.FunSuite: + test("firstLimit prefers occLimit when both present") { + val layer = Layer( + occLimit = Some(10.0), + occRetention = Some(1.0), + aggLimit = Some(20.0) + ) + assertEqualsDouble(layer.firstLimit, 10.0, 0.0) + } + + test("firstLimit is occLimit when only occLimit is present") { + val layer = Layer( + occLimit = Some(15.0), + occRetention = Some(2.0), + aggLimit = None + ) + assertEqualsDouble(layer.firstLimit, 15.0, 0.0) + } + + test("firstLimit falls back to aggLimit when occLimit is absent") { + val layer = Layer( + occLimit = None, + occRetention = None, + aggLimit = Some(25.0) + ) + assertEqualsDouble(layer.firstLimit, 25.0, 0.0) + } + + test("firstLimit is PositiveInfinity when no limits are present") { + val layer = Layer( + occLimit = None, + occRetention = None, + aggLimit = None + ) + assertEqualsDouble(layer.firstLimit, Double.PositiveInfinity, 0.0) + } + test("Layer default construction") { val layer = Layer() diff --git a/vecxt_re/test/src/losscalc.test.scala b/vecxt_re/test/src/losscalc.test.scala new file mode 100644 index 00000000..59d2245d --- /dev/null +++ b/vecxt_re/test/src/losscalc.test.scala @@ -0,0 +1,254 @@ +package vecxt_re + +import munit.FunSuite +import ReReporting.* + +class LossCalcSuite extends FunSuite: + + test("ReportDenominator.FirstLimit uses occLimit when present") { + val layer = Layer(occLimit = Some(10.0), aggLimit = Some(20.0)) + assertEqualsDouble(ReportDenominator.FirstLimit.fromlayer(layer), 10.0, 0.0) + } + + test("ReportDenominator.FirstLimit falls back to aggLimit when occLimit missing") { + val layer = Layer(occLimit = None, aggLimit = Some(30.0)) + assertEqualsDouble(ReportDenominator.FirstLimit.fromlayer(layer), 30.0, 0.0) + } + + test("ReportDenominator.FirstLimit returns PositiveInfinity when no limits") { + val layer = Layer() + assertEqualsDouble(ReportDenominator.FirstLimit.fromlayer(layer), Double.PositiveInfinity, 0.0) + } + + test("ReportDenominator.AggLimit returns aggLimit when present") { + val layer = Layer(aggLimit = Some(40.0)) + assertEqualsDouble(ReportDenominator.AggLimit.fromlayer(layer), 40.0, 0.0) + } + + test("ReportDenominator.AggLimit returns PositiveInfinity when aggLimit missing") { + val layer = Layer(aggLimit = None) + assertEqualsDouble(ReportDenominator.AggLimit.fromlayer(layer), Double.PositiveInfinity, 0.0) + } + + test("ReportDenominator.Custom returns provided denominator") { + val layer = Layer() + assertEqualsDouble(ReportDenominator.Custom(55.5).fromlayer(layer), 55.5, 0.0) + } +end LossCalcSuite + +class LossReportSuite extends FunSuite: + + test("lossReport computes EL correctly") { + // 5 iterations, total loss = 10 + 0 + 20 + 5 + 15 = 50 + // EL = 50 / 5 = 10 + val layer = Layer(occLimit = Some(100.0)) + val years = Array(1, 1, 3, 4, 5, 5) + val cededToLayer = Array(5.0, 5.0, 20.0, 5.0, 10.0, 5.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 5 + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layer) + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + // Total = 10 + 20 + 5 + 15 = 50, EL = 50/5 = 10, normalized = 10/100 = 0.1 + assertEqualsDouble(report.el, 0.1, 0.0001, "EL should be 10/100 = 0.1") + // Compare against single metric calculation + val singleMetricEL = calcd.expectedLoss(numIterations) / reportLimit + assertEqualsDouble(report.el, singleMetricEL, 0.0001, "lossReport EL should match expectedLoss") + } + + test("lossReport computes attachment probability correctly") { + // 5 iterations: iter 1 has loss, iter 2 has 0 loss, iter 3 has loss, iter 4 has loss, iter 5 has loss + // Attachment = 4/5 = 0.8 + val layer = Layer(occLimit = Some(100.0)) + val years = Array(1, 1, 3, 4, 5, 5) + val cededToLayer = Array(5.0, 5.0, 20.0, 5.0, 10.0, 5.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 5 + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + // Iterations 1,3,4,5 have losses, iteration 2 has 0 + assertEqualsDouble(report.attachProb, 0.8, 0.0001, "Attachment probability should be 4/5 = 0.8") + // Compare against single metric calculation + val singleMetricAttach = calcd.attachmentProbability(numIterations, years) + assertEqualsDouble( + report.attachProb, + singleMetricAttach, + 0.0001, + "lossReport attachProb should match attachmentProbability" + ) + } + + test("lossReport computes exhaustion probability correctly") { + // Layer with aggLimit of 10, 5 iterations + // iter 1: 10 (exhausted), iter 2: 0, iter 3: 20 (exhausted), iter 4: 5, iter 5: 15 (exhausted) + // Exhaustion = 3/5 = 0.6 + val layer = Layer(occLimit = Some(100.0), aggLimit = Some(10.0)) + val years = Array(1, 1, 3, 4, 5, 5) + val cededToLayer = Array(5.0, 5.0, 20.0, 5.0, 10.0, 5.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 5 + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + // Per-iteration sums: 1->10, 2->0, 3->20, 4->5, 5->15 + // exhaust threshold = 10 - 0.01 = 9.99 + // Iterations 1, 3, 5 exceed 9.99, so exhaustion = 3/5 = 0.6 + assertEqualsDouble(report.exhaustProb, 0.6, 0.0001, "Exhaustion probability should be 3/5 = 0.6") + // Compare against single metric calculation + val singleMetricExhaust = calcd.exhaustionProbability(numIterations, years) + assertEqualsDouble( + report.exhaustProb, + singleMetricExhaust, + 0.0001, + "lossReport exhaustProb should match exhaustionProbability" + ) + } + + test("lossReport computes stdDev correctly") { + // 5 iterations with per-iteration sums: 10, 0, 20, 5, 15 + // Mean = (10 + 0 + 20 + 5 + 15) / 5 = 50 / 5 = 10 + // Variance = ((10-10)^2 + (0-10)^2 + (20-10)^2 + (5-10)^2 + (15-10)^2) / 5 + // = (0 + 100 + 100 + 25 + 25) / 5 = 250 / 5 = 50 + // StdDev = sqrt(50) ≈ 7.071 + val layer = Layer(occLimit = Some(100.0)) + val years = Array(1, 1, 3, 4, 5, 5) + val cededToLayer = Array(5.0, 5.0, 20.0, 5.0, 10.0, 5.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 5 + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layer) + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + val expectedStdDev = Math.sqrt(50.0) / 100.0 // normalized by limit + assertEqualsDouble(report.stdDev, expectedStdDev, 0.0001, s"StdDev should be sqrt(50)/100 = $expectedStdDev") + // Compare against single metric calculation + val singleMetricStd = calcd.std(numIterations, years) / reportLimit + assertEqualsDouble(report.stdDev, singleMetricStd, 0.0001, "lossReport stdDev should match std") + } + + test("lossReport with all zero losses") { + val layer = Layer(occLimit = Some(100.0), aggLimit = Some(10.0)) + val years = Array[Int]() + val cededToLayer = Array[Double]() + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 5 + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layer) + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + assertEqualsDouble(report.el, 0.0, 0.0001, "EL should be 0") + assertEqualsDouble(report.stdDev, 0.0, 0.0001, "StdDev should be 0") + assertEqualsDouble(report.attachProb, 0.0, 0.0001, "Attachment probability should be 0") + assertEqualsDouble(report.exhaustProb, 0.0, 0.0001, "Exhaustion probability should be 0") + // Compare against single metric calculations + assertEqualsDouble( + report.el, + calcd.expectedLoss(numIterations) / reportLimit, + 0.0001, + "lossReport EL should match expectedLoss" + ) + assertEqualsDouble( + report.attachProb, + calcd.attachmentProbability(numIterations, years), + 0.0001, + "lossReport attachProb should match attachmentProbability" + ) + assertEqualsDouble( + report.exhaustProb, + calcd.exhaustionProbability(numIterations, years), + 0.0001, + "lossReport exhaustProb should match exhaustionProbability" + ) + assertEqualsDouble( + report.stdDev, + calcd.std(numIterations, years) / reportLimit, + 0.0001, + "lossReport stdDev should match std" + ) + } + + test("lossReport returns correct layer name") { + val layer = Layer(occLimit = Some(100.0), layerName = Some("Test Layer")) + val years = Array(1) + val cededToLayer = Array(10.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val report = calcd.lossReport(1, years, ReportDenominator.FirstLimit) + + assertEquals(report.name, "Test Layer") + } + + test("lossReport returns correct limit") { + val layer = Layer(occLimit = Some(100.0), aggLimit = Some(200.0)) + val years = Array(1) + val cededToLayer = Array(10.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val report = calcd.lossReport(1, years, ReportDenominator.FirstLimit) + + assertEqualsDouble(report.limit, 100.0, 0.0001) + } + + test("lossReport with single iteration") { + val layer = Layer(occLimit = Some(50.0)) + val years = Array(1, 1, 1) + val cededToLayer = Array(10.0, 15.0, 25.0) // Total = 50 + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 1 + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layer) + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + assertEqualsDouble(report.el, 1.0, 0.0001, "EL should be 50/50 = 1.0") + assertEqualsDouble(report.attachProb, 1.0, 0.0001, "Attachment should be 1.0") + assertEqualsDouble(report.stdDev, 0.0, 0.0001, "StdDev should be 0 with single iteration") + // Compare against single metric calculations + assertEqualsDouble( + report.el, + calcd.expectedLoss(numIterations) / reportLimit, + 0.0001, + "lossReport EL should match expectedLoss" + ) + assertEqualsDouble( + report.attachProb, + calcd.attachmentProbability(numIterations, years), + 0.0001, + "lossReport attachProb should match attachmentProbability" + ) + assertEqualsDouble( + report.stdDev, + calcd.std(numIterations, years) / reportLimit, + 0.0001, + "lossReport stdDev should match std" + ) + } + + test("lossReport matches all single metrics for complex scenario") { + // A more complex test case with many iterations and varied losses + val layer = Layer(occLimit = Some(50.0), aggLimit = Some(30.0), layerName = Some("Complex Layer")) + val years = Array(1, 1, 2, 3, 3, 3, 5, 7, 7, 10) + val cededToLayer = Array(10.0, 5.0, 25.0, 8.0, 12.0, 5.0, 40.0, 3.0, 7.0, 15.0) + val calcd = (layer = layer, cededToLayer = cededToLayer) + val numIterations = 10 + val reportLimit = ReportDenominator.FirstLimit.fromlayer(layer) + val report = calcd.lossReport(numIterations, years, ReportDenominator.FirstLimit) + + // Compare all metrics against single metric calculations + val singleMetricEL = calcd.expectedLoss(numIterations) / reportLimit + val singleMetricAttach = calcd.attachmentProbability(numIterations, years) + val singleMetricExhaust = calcd.exhaustionProbability(numIterations, years) + val singleMetricStd = calcd.std(numIterations, years) / reportLimit + + assertEqualsDouble(report.el, singleMetricEL, 0.0001, "lossReport EL should match expectedLoss") + assertEqualsDouble( + report.attachProb, + singleMetricAttach, + 0.0001, + "lossReport attachProb should match attachmentProbability" + ) + assertEqualsDouble( + report.exhaustProb, + singleMetricExhaust, + 0.0001, + "lossReport exhaustProb should match exhaustionProbability" + ) + assertEqualsDouble(report.stdDev, singleMetricStd, 0.0001, "lossReport stdDev should match std") + assertEquals(report.name, "Complex Layer") + assertEqualsDouble(report.limit, 50.0, 0.0001) + } + +end LossReportSuite diff --git a/vecxtensions/test/src/maketower.test.scala b/vecxt_re/test/src/maketower.test.scala similarity index 96% rename from vecxtensions/test/src/maketower.test.scala rename to vecxt_re/test/src/maketower.test.scala index 19e923c7..1abbccd7 100644 --- a/vecxtensions/test/src/maketower.test.scala +++ b/vecxt_re/test/src/maketower.test.scala @@ -1,9 +1,6 @@ -package vecxt.reinsurance +package vecxt_re -import java.util.UUID -import vecxt.all.* - -class TowerSuite extends munit.FunSuite: +class MakeTowerSuite extends munit.FunSuite: val sampleLayer = Layer( layerName = Some("Primary Layer"), @@ -103,4 +100,4 @@ class TowerSuite extends munit.FunSuite: assertEquals(scaledLayer.occLimit, originalLayer.occLimit) assertEquals(scaledLayer.occRetention, originalLayer.occRetention) -end TowerSuite +end MakeTowerSuite diff --git a/vecxt_re/test/src/pickandsEstimator.test.scala b/vecxt_re/test/src/pickandsEstimator.test.scala new file mode 100644 index 00000000..95c53870 --- /dev/null +++ b/vecxt_re/test/src/pickandsEstimator.test.scala @@ -0,0 +1,147 @@ +package vecxt_re + +import PickandsEstimatorExtensions.* + +class PickandsEstimatorSuite extends munit.FunSuite: + + // Helper to generate Pareto samples using inverse transform + def generatePareto(n: Int, alpha: Double, xMin: Double = 1.0, seed: Long = 42L): Array[Double] = + val rng = new scala.util.Random(seed) + Array.fill(n) { + val u = rng.nextDouble() + xMin / math.pow(u, 1.0 / alpha) + } + end generatePareto + + test("Pickands estimator basic formula check") { + // Construct a simple case where we know the order statistics + // Data: 1, 2, 3, 4, 5, 6, 7, 8, 9 (n=9) + // k=2: X_(n-k)=X_(7)=8, X_(n-2k)=X_(5)=6, X_(n-4k)=X_(1)=2 + // γ = ln((8-6)/(6-2)) / ln(2) = ln(2/4) / ln(2) = ln(0.5) / ln(2) = -1 + val data = Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0) + val gamma = data.pickandsEstimator(2) + assertEqualsDouble(gamma, -1.0, 1e-10) + } + + test("Pickands estimator for Pareto(2.0) distribution converges") { + val alpha = 2.0 + val gamma = 1.0 / alpha // = 0.5 + val data = generatePareto(10000, alpha) + + // Pickands has higher variance - use multiple k values and average + val result = data.pickandsPlot(kMin = 100, kMax = 500, step = 10) + val validEstimates = result.gammaEstimates.filter(g => g > 0 && !g.isNaN) + val meanEstimate = validEstimates.sum / validEstimates.length + + // Pickands has higher variance than Hill, so allow 50% error + assertEqualsDouble(meanEstimate, gamma, gamma * 0.5) + } + + test("Pickands tail index for Pareto(2.0)") { + val alpha = 2.0 + val data = generatePareto(10000, alpha) + + // Average over a range of k values for more stable estimate + val result = data.pickandsPlot(kMin = 100, kMax = 500, step = 10) + val validEstimates = result.alphaEstimates.filter(a => a > 0 && !a.isNaN && a.isFinite) + val meanEstimate = validEstimates.sum / validEstimates.length + + // Should be close to 2, allow 50% error for Pickands + assertEqualsDouble(meanEstimate, alpha, alpha * 0.5) + } + + test("Pickands estimator for Pareto(1.5) distribution") { + val alpha = 1.5 + val gamma = 1.0 / alpha + val data = generatePareto(5000, alpha, seed = 123L) + val k = 40 + val estimate = data.pickandsEstimator(k) + + assertEqualsDouble(estimate, gamma, gamma * 0.35) + } + + test("Pickands estimator rejects invalid k values") { + val data = Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0) // n=9 + + intercept[IllegalArgumentException] { + data.pickandsEstimator(0) // k must be >= 1 + } + + intercept[IllegalArgumentException] { + data.pickandsEstimator(3) // 4*3=12 > 9 + } + } + + test("Pickands estimator rejects small arrays") { + intercept[IllegalArgumentException] { + Array(1.0, 2.0, 3.0, 4.0).pickandsEstimator(1) + } + } + + test("Pickands plot produces valid output") { + val alpha = 2.0 + val data = generatePareto(1000, alpha) + val result = data.pickandsPlot(kMin = 5, kMax = 50, step = 5) + + assertEquals(result.kValues.length, result.gammaEstimates.length) + assertEquals(result.kValues.length, result.alphaEstimates.length) + assert(result.kValues.head == 5) + assert(result.kValues.last == 50) + } + + test("Pickands plot default kMax respects 4k < n constraint") { + val data = generatePareto(100, 2.0) + val result = data.pickandsPlot() + + // Max valid k = (100-1)/4 = 24 + assert(result.kValues.last <= 24) + } + + test("Pickands plot positive gamma implies positive alpha") { + val data = generatePareto(500, 2.0) + val result = data.pickandsPlot(kMin = 2, kMax = 20) + + result.gammaEstimates.zip(result.alphaEstimates).foreach { case (gamma, alpha) => + if gamma > 0 && !gamma.isNaN then + assert(alpha > 0, s"Expected positive alpha for gamma=$gamma") + assertEqualsDouble(alpha, 1.0 / gamma, 1e-10) + end if + } + } + + test("Pickands estimator is invariant to data order") { + val data = Array(9.0, 1.0, 5.0, 3.0, 7.0, 2.0, 8.0, 4.0, 6.0) + val shuffled = scala.util.Random.shuffle(data.toSeq).toArray + + val est1 = data.pickandsEstimator(2) + val est2 = shuffled.pickandsEstimator(2) + + assertEqualsDouble(est1, est2, 1e-10) + } + + test("Pickands findStableRegion identifies plateau") { + val alpha = 2.0 + val data = generatePareto(2000, alpha, seed = 456L) + val result = data.pickandsPlot(kMin = 10, kMax = 100, step = 2) + + result.findStableRegion(windowSize = 5, threshold = 0.3) match + case Some((kStart, kEnd, meanGamma, meanAlpha)) => + // Mean alpha should be close to true alpha + assertEqualsDouble(meanAlpha, alpha, alpha * 0.35) + assert(kStart < kEnd) + assertEqualsDouble(meanGamma, 1.0 / meanAlpha, 1e-10) + case None => + // Okay if no stable region found with strict threshold + () + end match + } + + test("Pickands plot step parameter works correctly") { + val data = generatePareto(500, 2.0) + val result = data.pickandsPlot(kMin = 5, kMax = 25, step = 5) + + assertEquals(result.kValues.toSeq, Seq(5, 10, 15, 20, 25)) + assertEquals(result.gammaEstimates.length, 5) + } + +end PickandsEstimatorSuite diff --git a/vecxtensions/test/src/rpt.test.scala b/vecxt_re/test/src/rpt.test.scala similarity index 96% rename from vecxtensions/test/src/rpt.test.scala rename to vecxt_re/test/src/rpt.test.scala index 5a670475..13ce1cef 100644 --- a/vecxtensions/test/src/rpt.test.scala +++ b/vecxt_re/test/src/rpt.test.scala @@ -1,6 +1,6 @@ -package vecxt.reinsurance +package vecxt_re -import vecxt.reinsurance.rpt.* +import rpt.* import Retentions.Retention import Limits.Limit @@ -199,7 +199,7 @@ class ReinsuranceShareSuite extends munit.FunSuite: v1.reinsuranceFunction(Some(Limit(5.0)), Some(Retention(10.0))) v2.reinsuranceFunction(Some(Limit(5.0)), Some(Retention(10.0)), 1.0) - for i <- 0 until v1.length do assertEqualsDouble(v1(i), v2(i), 0.0001, s"Element $i") + for i <- 0.until(v1.length) do assertEqualsDouble(v1(i), v2(i), 0.0001, s"Element $i") end for } @@ -207,7 +207,7 @@ class ReinsuranceShareSuite extends munit.FunSuite: val v = Array[Double](8, 11, 16, 10.0) v.reinsuranceFunction(Some(Limit(5.0)), Some(Retention(10.0)), 0.0) - for i <- 0 until v.length do assertEqualsDouble(v(i), 0.0, 0.0001, s"Element $i") + for i <- 0.until(v.length) do assertEqualsDouble(v(i), 0.0, 0.0001, s"Element $i") end for } diff --git a/vecxt_re/test/src/scenario.test.scala b/vecxt_re/test/src/scenario.test.scala new file mode 100644 index 00000000..85b54ab3 --- /dev/null +++ b/vecxt_re/test/src/scenario.test.scala @@ -0,0 +1,136 @@ +package vecxt_re + +import java.time.LocalDate + +class ScenarioSuite extends munit.FunSuite: + + test("Events") { + + val event = Event.random + + } + + test("Random Scenario") { + val numItr = 10 + val s = Scenario( + Vector.fill(10)(Event.random(maxIter = numItr)), + numItr + ) + + assertEquals(s.iterations.length, 10) + assertEquals(s.amounts.length, 10) + + assert(s.hasOccurence) + } + + test("Some scenario stats") { + val e1 = Event(1, 15.0) + val e2 = Event(4, 25.0) + val e3 = Event(4, 1.0) + val e4 = Event(4, 1.0) + val e5 = Event(4, 1.0) + val numItr = 5 + + val s = Scenario( + Vector(e2, e3, e4, e5, e1), + numItr + ) + + assertVecEquals(s.freq, Array(1, 0, 0, 4, 0)) + assertVecEquals(s.agg, Array(15.0, 0, 0, 28.0, 0)) + assertEqualsDouble(s.meanFreq, (1 + 4) / 5.0, 0.00000001) + assertEqualsDouble(s.clusterCoeff, 2.0, 0.000001) + assertEqualsDouble(s.varianceMeanRatio, 3, 0.00001) + + } + + test("scaleAmntBy doubles amounts and threshold, preserves other fields"): + val base = Scenarr.withGeneratedIds( + iterations = Array(1, 1, 2), + days = Array(1, 2, 3), + amounts = Array(100.0, 200.0, 300.0), + numberIterations = 2, + threshold = 50.0 + ) + + val scaled = base.scaleAmntBy(2.0) + + assertEquals(scaled.amounts.toSeq, Seq(200.0, 400.0, 600.0)) + assertEquals(scaled.threshold, 100.0) + // other fields unchanged + assertEquals(scaled.iterations.toSeq, base.iterations.toSeq) + assertEquals(scaled.days.toSeq, base.days.toSeq) + assertEquals(scaled.numberIterations, base.numberIterations) + assertEquals(scaled.name, base.name) + assertEquals(scaled.isSorted, base.isSorted) + + test("scaleAmntBy with zero scale results in zero amounts and zero threshold"): + val base = Scenarr.withGeneratedIds(Array(1), Array(1), Array(123.0), numberIterations = 1, threshold = 7.5) + val scaled0 = base.scaleAmntBy(0.0) + assertEquals(scaled0.amounts.toSeq, Seq(0.0)) + assertEquals(scaled0.threshold, 0.0) + + test("scaleAmntBy supports negative scaling and does not mutate original"): + val originalAmounts = Array(10.0, 20.0, 30.0) + val base = Scenarr.withGeneratedIds( + Array(1, 1, 1), + Array(1, 2, 3), + originalAmounts.clone(), + numberIterations = 1, + threshold = 5.0 + ) + val scaled = base.scaleAmntBy(-1.5) + assertEquals(scaled.amounts.toSeq, Seq(-15.0, -30.0, -45.0)) + assertEquals(scaled.threshold, -7.5) + // original remains unchanged + assertEquals(base.amounts.toSeq, originalAmounts.toSeq) + assertEquals(base.threshold, 5.0) + + test("applyThreshold increases threshold and filters claims"): + val base = Scenarr.withGeneratedIds( + iterations = Array(1, 2, 3), + days = Array(10, 20, 30), + amounts = Array(10.0, 20.0, 30.0), + numberIterations = 3, + threshold = 5.0 + ) + + val applied = base.applyThreshold(15.0) + + assertEquals(applied.amounts.toSeq, Seq(20.0, 30.0)) + assertEquals(applied.iterations.toSeq, Seq(2, 3)) + assertEquals(applied.days.toSeq, Seq(20, 30)) + assertEquals(applied.threshold, 15.0) + // original remains unchanged + assertEquals(base.amounts.toSeq, Seq(10.0, 20.0, 30.0)) + assertEquals(base.threshold, 5.0) + + test("applyThreshold throws if newThresh is not greater than current threshold"): + val base2 = Scenarr.withGeneratedIds(Array(1), Array(1), Array(100.0), numberIterations = 1, threshold = 50.0) + val ex = intercept[Exception](base2.applyThreshold(50.0)) + assert(ex.getMessage.contains("Threshold may only be increased")) + + test("applyThreshold may result in no claims"): + val base3 = + Scenarr.withGeneratedIds(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 5.0) + val appliedEmpty = base3.applyThreshold(100.0) + assertEquals(appliedEmpty.amounts.toSeq, Seq()) + assertEquals(appliedEmpty.iterations.toSeq, Seq()) + assertEquals(appliedEmpty.days.toSeq, Seq()) + assertEquals(appliedEmpty.threshold, 100.0) + + test("claimDates maps day 1 to day1 property"): + val base = Scenarr.withGeneratedIds( + iterations = Array(1, 2), + days = Array(1, 100), + amounts = Array(10.0, 20.0), + numberIterations = 2, + threshold = 1.0, + day1 = LocalDate.of(2019, 1, 1), + name = "claim-date-test" + ) + + // base claimDates: first should be day1 + assertEquals(base.claimDates.head, base.day1) + +end ScenarioSuite diff --git a/vecxt_re/test/src/scenarr.monoid.test.scala b/vecxt_re/test/src/scenarr.monoid.test.scala new file mode 100644 index 00000000..bcb2d6e0 --- /dev/null +++ b/vecxt_re/test/src/scenarr.monoid.test.scala @@ -0,0 +1,383 @@ +package vecxt_re + +import munit.FunSuite +import java.time.LocalDate +import cats.kernel.Monoid + +class ScenarrMonoidSuite extends FunSuite: + + test("cats Monoid instance is available via given") { + val monoid = summon[Monoid[Scenarr]] + assertEquals(monoid.empty.amounts.length, 0) + assertEquals(monoid.empty.isSorted, true) + } + + test("cats Monoid.combine works like |+|") { + val s1 = Scenarr.withGeneratedIds(Array(1), Array(10), Array(100.0), numberIterations = 1) + val s2 = Scenarr.withGeneratedIds(Array(1), Array(20), Array(200.0), numberIterations = 1) + + val monoid = summon[Monoid[Scenarr]] + val result = monoid.combine(s1, s2) + + assertEquals(result.amounts.length, 2) + assertEquals(result.isSorted, true) + } + + test("cats Monoid.combineAll works for multiple scenarios") { + val scenarios = List( + new Scenarr(Array(1), Array(10), Array(100.0), Array(1L), numberIterations = 2), + new Scenarr(Array(1), Array(20), Array(200.0), Array(2L), numberIterations = 2), + new Scenarr(Array(2), Array(15), Array(150.0), Array(3L), numberIterations = 2) + ) + + val monoid = Scenarr.monoidForIterations(2) + val result = monoid.combineAll(scenarios) + + assertEquals(result.amounts.length, 3) + assertEquals(result.amounts.sum, 450.0) + } + + test("empty is left identity: empty |+| s = s") { + val s = Scenarr.withGeneratedIds( + Array(1, 2), + Array(10, 20), + Array(100.0, 200.0), + numberIterations = 2, + threshold = 5.0, + day1 = LocalDate.of(2020, 6, 15), + name = "test" + ) + + val result = Scenarr.empty |+| s + assertEquals(result.iterations.toSeq, s.iterations.toSeq) + assertEquals(result.days.toSeq, s.days.toSeq) + assertEquals(result.amounts.toSeq, s.amounts.toSeq) + assertEquals(result.numberIterations, s.numberIterations) + assertEquals(result.threshold, s.threshold) + assertEquals(result.day1, s.day1) + } + + test("empty is right identity: s |+| empty = s") { + val s = Scenarr.withGeneratedIds( + Array(1, 2), + Array(10, 20), + Array(100.0, 200.0), + numberIterations = 2, + threshold = 5.0, + day1 = LocalDate.of(2020, 6, 15), + name = "test" + ) + + val result = s |+| Scenarr.empty + assertEquals(result.iterations.toSeq, s.iterations.toSeq) + assertEquals(result.days.toSeq, s.days.toSeq) + assertEquals(result.amounts.toSeq, s.amounts.toSeq) + assertEquals(result.numberIterations, s.numberIterations) + assertEquals(result.threshold, s.threshold) + assertEquals(result.day1, s.day1) + } + + test("empty |+| empty = empty") { + val result = Scenarr.empty |+| Scenarr.empty + assertEquals(result.amounts.length, 0) + assertEquals(result.isSorted, true) + } + + test("combining disjoint events concatenates them") { + val s1 = Scenarr.withGeneratedIds( + Array(1, 1), + Array(10, 20), + Array(100.0, 200.0), + numberIterations = 2, + threshold = 0.0, + name = "s1" + ) + val s2 = Scenarr.withGeneratedIds( + Array(2, 2), + Array(15, 25), + Array(150.0, 250.0), + numberIterations = 2, + threshold = 0.0, + name = "s2" + ) + + val result = s1 |+| s2 + assertEquals(result.amounts.length, 4) + assertEquals(result.isSorted, true) + // Should be sorted by (iteration, day) + assertEquals(result.iterations.toSeq, Seq(1, 1, 2, 2)) + } + + test("events with same ID aggregate their amounts") { + val sharedId = 12345L + val s1 = new Scenarr( + Array(1), + Array(10), + Array(100.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s1" + ) + val s2 = new Scenarr( + Array(1), + Array(10), + Array(50.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s2" + ) + + val result = s1 |+| s2 + assertEquals(result.amounts.length, 1) + assertEquals(result.amounts(0), 150.0) // 100 + 50 + assertEquals(result.ids(0), sharedId) + } + + test("same ID with different iteration throws exception") { + val sharedId = 12345L + val s1 = new Scenarr( + Array(1), + Array(10), + Array(100.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s1" + ) + val s2 = new Scenarr( + Array(2), // different iteration! + Array(10), + Array(50.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s2" + ) + + intercept[IllegalArgumentException] { + s1 |+| s2 + } + } + + test("same ID with different day throws exception") { + val sharedId = 12345L + val s1 = new Scenarr( + Array(1), + Array(10), + Array(100.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s1" + ) + val s2 = new Scenarr( + Array(1), + Array(20), // different day! + Array(50.0), + Array(sharedId), + numberIterations = 2, + threshold = 0.0, + day1 = LocalDate.of(2019, 1, 1), + name = "s2" + ) + + intercept[IllegalArgumentException] { + s1 |+| s2 + } + } + + test("different numberIterations throws exception") { + val s1 = Scenarr.withGeneratedIds(Array(1), Array(10), Array(100.0), numberIterations = 2) + val s2 = Scenarr.withGeneratedIds(Array(1), Array(10), Array(100.0), numberIterations = 3) + + intercept[IllegalArgumentException] { + s1 |+| s2 + } + } + + test("thresholds are summed and claims filtered") { + val s1 = Scenarr.withGeneratedIds( + Array(1, 1), + Array(10, 20), + Array(30.0, 100.0), + numberIterations = 2, + threshold = 10.0 + ) + val s2 = Scenarr.withGeneratedIds( + Array(2), + Array(15), + Array(50.0), + numberIterations = 2, + threshold = 15.0 + ) + + val result = s1 |+| s2 + assertEquals(result.threshold, 25.0) // 10 + 15 + // Only claims > 25 survive: 30.0, 100.0, 50.0 all > 25 + assertEquals(result.amounts.length, 3) + } + + test("threshold filtering removes small claims") { + val s1 = Scenarr.withGeneratedIds( + Array(1), + Array(10), + Array(20.0), // will be filtered: 20 <= 25 + numberIterations = 2, + threshold = 10.0 + ) + val s2 = Scenarr.withGeneratedIds( + Array(2), + Array(15), + Array(50.0), // survives: 50 > 25 + numberIterations = 2, + threshold = 15.0 + ) + + val result = s1 |+| s2 + assertEquals(result.threshold, 25.0) + assertEquals(result.amounts.length, 1) + assertEquals(result.amounts(0), 50.0) + } + + test("day1 is the earlier of the two and days are adjusted") { + val earlierDay1 = LocalDate.of(2019, 1, 1) + val laterDay1 = LocalDate.of(2019, 1, 11) // 10 days later + + val s1 = Scenarr.withGeneratedIds( + Array(1), + Array(5), // day 5 relative to 2019-01-01 = Jan 5 + Array(100.0), + numberIterations = 1, + day1 = earlierDay1 + ) + val s2 = Scenarr.withGeneratedIds( + Array(1), + Array(1), // day 1 relative to 2019-01-11 = Jan 11, which is day 11 relative to Jan 1 + Array(200.0), + numberIterations = 1, + day1 = laterDay1 + ) + + val result = s1 |+| s2 + assertEquals(result.day1, earlierDay1) + // s1's day 5 stays as 5 + // s2's day 1 becomes 1 + 10 = 11 + assert(result.days.contains(5)) + assert(result.days.contains(11)) + } + + test("day1 adjustment works when s2 has earlier day1") { + val earlierDay1 = LocalDate.of(2019, 1, 1) + val laterDay1 = LocalDate.of(2019, 1, 11) + + val s1 = Scenarr.withGeneratedIds( + Array(1), + Array(1), // day 1 relative to Jan 11 = Jan 11 + Array(100.0), + numberIterations = 1, + day1 = laterDay1 + ) + val s2 = Scenarr.withGeneratedIds( + Array(1), + Array(5), // day 5 relative to Jan 1 = Jan 5 + Array(200.0), + numberIterations = 1, + day1 = earlierDay1 + ) + + val result = s1 |+| s2 + assertEquals(result.day1, earlierDay1) + // s1's day 1 becomes 1 + 10 = 11 + // s2's day 5 stays as 5 + assert(result.days.contains(5)) + assert(result.days.contains(11)) + } + + test("result is always sorted by iteration then day") { + val s1 = Scenarr.withGeneratedIds( + Array(2, 1), + Array(30, 10), + Array(200.0, 100.0), + numberIterations = 2 + ) + val s2 = Scenarr.withGeneratedIds( + Array(1, 2), + Array(20, 5), + Array(150.0, 50.0), + numberIterations = 2 + ) + + val result = s1 |+| s2 + assertEquals(result.isSorted, true) + // Expected order: (1,10), (1,20), (2,5), (2,30) + assertEquals(result.iterations.toSeq, Seq(1, 1, 2, 2)) + assertEquals(result.days.toSeq, Seq(10, 20, 5, 30)) + } + + test("name is formatted as concat: [s1 + s2]") { + val s1 = Scenarr.withGeneratedIds(Array(1), Array(10), Array(100.0), numberIterations = 1, name = "alpha") + val s2 = Scenarr.withGeneratedIds(Array(1), Array(20), Array(200.0), numberIterations = 1, name = "beta") + + val result = s1 |+| s2 + assertEquals(result.name, "concat: [alpha + beta]") + } + + test("associativity: (a |+| b) |+| c = a |+| (b |+| c)") { + // Use explicit IDs to avoid ID collisions that could cause issues + val a = new Scenarr( + Array(1), + Array(10), + Array(100.0), + Array(1L), + numberIterations = 2, + threshold = 0.0 + ) + val b = new Scenarr( + Array(1), + Array(20), + Array(200.0), + Array(2L), + numberIterations = 2, + threshold = 0.0 + ) + val c = new Scenarr( + Array(2), + Array(15), + Array(150.0), + Array(3L), + numberIterations = 2, + threshold = 0.0 + ) + + val leftAssoc = (a |+| b) |+| c + val rightAssoc = a |+| (b |+| c) + + // Core data should match + assertEquals(leftAssoc.iterations.toSeq, rightAssoc.iterations.toSeq) + assertEquals(leftAssoc.days.toSeq, rightAssoc.days.toSeq) + assertEquals(leftAssoc.amounts.toSeq, rightAssoc.amounts.toSeq) + assertEquals(leftAssoc.ids.sorted.toSeq, rightAssoc.ids.sorted.toSeq) + assertEquals(leftAssoc.numberIterations, rightAssoc.numberIterations) + assertEquals(leftAssoc.threshold, rightAssoc.threshold) + assertEquals(leftAssoc.day1, rightAssoc.day1) + } + + test("ID aggregation across multiple combines") { + val sharedId = 999L + val a = new Scenarr(Array(1), Array(10), Array(100.0), Array(sharedId), numberIterations = 1) + val b = new Scenarr(Array(1), Array(10), Array(50.0), Array(sharedId), numberIterations = 1) + val c = new Scenarr(Array(1), Array(10), Array(25.0), Array(sharedId), numberIterations = 1) + + val result = a |+| b |+| c + assertEquals(result.amounts.length, 1) + assertEquals(result.amounts(0), 175.0) // 100 + 50 + 25 + } + +end ScenarrMonoidSuite diff --git a/vecxt_re/test/src/scenarr.test.scala b/vecxt_re/test/src/scenarr.test.scala new file mode 100644 index 00000000..c6c08d1b --- /dev/null +++ b/vecxt_re/test/src/scenarr.test.scala @@ -0,0 +1,143 @@ +package vecxt_re + +import vecxt.all.* +import munit.FunSuite +import java.time.{LocalDate, Month} + +class ScenarrSuite extends FunSuite: + + test("constructor should enforce array length equality") { + intercept[AssertionError] { + Scenarr.withGeneratedIds(Array(1), Array(1, 2), Array(1.0), 2) + } + } + + test("freq, meanFreq, agg computed correctly for sorted scenario") { + val iterations = Array(1, 1, 1, 2, 3) + val days = Array(1, 2, 3, 4, 5) + val amounts = Array(10.0, 20.0, 30.0, 40.0, 50.0) + val sc = Scenarr.withGeneratedIds(iterations, days, amounts, numberIterations = 3, isSorted = true) + + // Expected counts per iteration 1..3 => [3,1,1] + val expectedFreq = Array(3, 1, 1) + assertEquals(sc.freq.toList, expectedFreq.toList) + + val expectedMean = expectedFreq.sum.toDouble / expectedFreq.length + assert(math.abs(sc.meanFreq - expectedMean) < 1e-12) + + // Agg: sum amounts per iteration: iter1 -> 10+20+30 = 60, iter2 -> 40, iter3 -> 50 + val expectedAgg = Array(60.0, 40.0, 50.0) + assertEquals(sc.agg.toList, expectedAgg.toList) + + // meanLoss = amounts.sum / numberIterations = 150 / 3 = 50 + assert(math.abs(sc.meanLoss - 50.0) < 1e-12) + } + + test("clusterCoeff and varianceMeanRatio compute from sample variance") { + val iterations = Array(1, 2, 1, 3, 1) + val days = Array(1, 2, 3, 4, 5) + val amounts = Array(10.0, 20.0, 30.0, 40.0, 50.0) + val sc = Scenarr.withGeneratedIds(iterations, days, amounts, numberIterations = 3) + + val sortedScen = sc.sorted + + // freq = [3,1,1] + val freqArr = sortedScen.freq + val (m, v) = freqArr.meanAndVariance(VarianceMode.Sample) + val expectedCluster = (v - m) / (m * m) + val expectedVMR = v / m + + assertEqualsDouble(sortedScen.clusterCoeff, expectedCluster, 1e-6) + assertEqualsDouble(sortedScen.varianceMeanRatio, expectedVMR, 1e-6) + } + + test("claimDates and monthYear mapping") { + val days = Array(1, 2) + val sc = Scenarr.withGeneratedIds(Array(1, 1), days, Array(10.0, 20.0), numberIterations = 1) + val claimDates = sc.claimDates + assertEquals(claimDates(0), LocalDate.of(2019, 1, 1)) + assertEquals(claimDates(1), LocalDate.of(2019, 1, 2)) + + val my = sc.monthYear + assertEquals(my(0).month, Month.JANUARY) + assertEquals(my(0).year, 2019) + } + + test("numSeasons accounts for days spanning multiple years") { + val sc = Scenarr.withGeneratedIds(Array(1, 1), Array(1, 400), Array(1.0, 2.0), numberIterations = 1) + println(sc.numSeasons) + assertEquals(sc.numSeasons, 2) + } + + test("itrDayAmount and period produce expected tuples") { + val days = Array(10, 100, 365, 366) + val sc = Scenarr.withGeneratedIds(Array(1, 1, 1, 1), days, Array(5.0, 6.0, 7.0, 8.0), numberIterations = 1) + val itda = sc.itrDayAmount + assertVecEquals(itda.map(_.itr), Array(1, 1, 1, 1)) + assertVecEquals(itda.map(_.day), days) + assertVecEquals(itda.map(_.amnt), Array(5.0, 6.0, 7.0, 8.0)) + + val (firstLoss, lastLoss) = sc.period + assertEquals(firstLoss, LocalDate.of(2019, 1, 10)) + assertEquals(lastLoss, LocalDate.of(2020, 1, 1)) // day 366 -> Jan 1 2020 from 2019-01-01 + } + + test("hasOccurence false for empty amounts") { + val sc = + Scenarr.withGeneratedIds(Array.emptyIntArray, Array.emptyIntArray, Array.emptyDoubleArray, numberIterations = 0) + assertEquals(sc.hasOccurence, false) + } + + test("sorted extension reorders by iteration then day and sets isSorted") { + val iter = Array(2, 1, 2) + val days = Array(10, 5, 8) + val amts = Array(20.0, 10.0, 15.0) + val sc = Scenarr.withGeneratedIds(iter, days, amts, numberIterations = 2, isSorted = false) + + val ssorted = sc.sorted + assertEquals(ssorted.isSorted, true) + assertEquals(ssorted.iterations.toList, Array(1, 2, 2).toList) + assertEquals(ssorted.days.toList, Array(5, 8, 10).toList) + assertEquals(ssorted.amounts.toList, Array(10.0, 15.0, 20.0).toList) + } + + test("scaleAmntBy multiplies amounts and threshold") { + val sc = + Scenarr.withGeneratedIds(Array(1, 1), Array(1, 2), Array(10.0, 20.0), numberIterations = 1, threshold = 100.0) + val scaled = sc.scaleAmntBy(2.0) + assertEquals(scaled.threshold, 200.0) + assertEquals(scaled.amounts.toList, Array(20.0, 40.0).toList) + } + + test("iteration selects events for given iteration number") { + val iters = Array(2, 1, 2, 1) + val days = Array(1, 2, 3, 4) + val amts = Array(10.0, 11.0, 12.0, 13.0) + val sc = Scenarr.withGeneratedIds(iters, days, amts, numberIterations = 2) + val only2 = sc.iteration(2) + assert(only2.iterations.forall(_ == 2)) + assertEquals(only2.amounts.toList, Array(10.0, 12.0).toList) + } + + test("applyThreshold filters amounts and only allows increasing threshold") { + val sc = Scenarr.withGeneratedIds( + Array(1, 1, 1), + Array(1, 2, 3), + Array(10.0, 50.0, 200.0), + numberIterations = 1, + threshold = 0.0 + ) + val filtered = sc.applyThreshold(49.0) + // keep > 49 => 50 and 200 + assertEquals(filtered.amounts.toList, Array(50.0, 200.0).toList) + assertEquals(filtered.threshold, 49.0) + + intercept[Exception] { + sc.applyThreshold(0.0) // not strictly greater + } + intercept[Exception] { + sc.applyThreshold(-1.0) // decreasing + } + } + +end ScenarrSuite diff --git a/vecxtensions/test/src/tower.test.scala b/vecxt_re/test/src/tower.test.scala similarity index 86% rename from vecxtensions/test/src/tower.test.scala rename to vecxt_re/test/src/tower.test.scala index 9ce46c9b..fb89cf2a 100644 --- a/vecxtensions/test/src/tower.test.scala +++ b/vecxt_re/test/src/tower.test.scala @@ -1,9 +1,9 @@ -package vecxtensions +package vecxt_re -import vecxt.reinsurance.* +import vecxt_re.* import vecxt.all.* import vecxt.all.given -import vecxt.reinsurance.SplitLosses.* +import SplitLosses.* class TowerSuite extends munit.FunSuite: @@ -11,11 +11,11 @@ class TowerSuite extends munit.FunSuite: losses: Array[Double], ceded: Array[Double], retained: Array[Double], - splits: IndexedSeq[(Layer, Array[Double])] = IndexedSeq.empty + splits: IndexedSeq[(layer: Layer, cededToLayer: Array[Double])] = IndexedSeq.empty ) = import vecxt.BoundsCheck.DoBoundsCheck.yes assertVecEquals(ceded + retained, losses) - assertVecEquals(splits.map(_._2).reduce(_ + _), ceded) + assertVecEquals(splits.map(_.cededToLayer).reduce(_ + _), ceded) end noleakage test("from retention") { @@ -44,7 +44,7 @@ class TowerSuite extends munit.FunSuite: assertEqualsDouble(ceded.head, 2.0, 0.001) assertEqualsDouble(retained.head, 10.0, 0.001) - assertEqualsDouble(splits.head._2.head, 2.0, 0.001) + assertEqualsDouble(splits.head.cededToLayer.head, 2.0, 0.001) noleakage(amounts, ceded, retained, splits) } @@ -60,7 +60,7 @@ class TowerSuite extends munit.FunSuite: assertEqualsDouble(ceded.head, 5.0, 0.001) assertEqualsDouble(retained.head, 12.0, 0.001) - assertEqualsDouble(splits.head._2.head, 5.0, 0.001) + assertEqualsDouble(splits.head.cededToLayer.head, 5.0, 0.001) noleakage(amounts, ceded, retained, splits) } @@ -76,7 +76,7 @@ class TowerSuite extends munit.FunSuite: assertEqualsDouble(ceded.head, 2.5, 0.001) assertEqualsDouble(retained.head, 14.5, 0.001) - assertEqualsDouble(splits.head._2.head, 2.5, 0.001) + assertEqualsDouble(splits.head.cededToLayer.head, 2.5, 0.001) noleakage(amounts, ceded, retained, splits) } @@ -93,7 +93,7 @@ class TowerSuite extends munit.FunSuite: val cededExpected = Array(2.0, 1.0) // (14 -10) * 0.5, (12 - 10) * 0.5 assertVecEquals(ceded, cededExpected) assertVecEquals(retained, amounts - cededExpected) - assertVecEquals(splits.head._2, cededExpected) + assertVecEquals(splits.head.cededToLayer, cededExpected) noleakage(amounts, ceded, retained, splits) } @@ -110,7 +110,7 @@ class TowerSuite extends munit.FunSuite: val cededExpected = Array(0.0, 2.0, 1.0, 0.0, 2.5) assertVecEquals(ceded, cededExpected) assertVecEquals(retained, amounts - cededExpected) - assertVecEquals(splits.head._2, cededExpected) + assertVecEquals(splits.head.cededToLayer, cededExpected) noleakage(amounts, ceded, retained, splits) } @@ -127,7 +127,7 @@ class TowerSuite extends munit.FunSuite: val cededExpected = Array(0.0, 7.0, 6.0, 0.0, 7.5) assertVecEquals(ceded, cededExpected) assertVecEquals(retained, amounts - cededExpected) - assertVecEquals(splits.head._2, cededExpected) + assertVecEquals(splits.head.cededToLayer, cededExpected) noleakage(amounts, ceded, retained, splits) } @@ -144,7 +144,7 @@ class TowerSuite extends munit.FunSuite: val cededExpected = Array(0.5, 1.5, 1.5, 0.0, 0.5) assertVecEquals(ceded, cededExpected) assertVecEquals(retained, amounts - cededExpected) - assertVecEquals(splits.head._2, cededExpected) + assertVecEquals(splits.head.cededToLayer, cededExpected) noleakage(amounts, ceded, retained, splits) } @@ -174,9 +174,9 @@ class TowerSuite extends munit.FunSuite: val (ceded, retained, splits) = tower.splitAmntFast(iterations, amounts) - val l1 = splits.head._2 - val l2 = splits(1)._2 - val l3 = splits.last._2 + val l1 = splits.head.cededToLayer + val l2 = splits(1).cededToLayer + val l3 = splits.last.cededToLayer assertVecEquals(ceded, l1 + l2 + l3) noleakage(amounts, ceded, retained, splits) @@ -193,9 +193,9 @@ class TowerSuite extends munit.FunSuite: val (ceded, retained, splits) = tower.splitAmntFast(iterations, amounts) - val l1 = splits.head._2 - val l2 = splits(1)._2 - val l3 = splits.last._2 + val l1 = splits.head.cededToLayer + val l2 = splits(1).cededToLayer + val l3 = splits.last.cededToLayer assertVecEquals(ceded, l1 + l2 + l3) noleakage(amounts, ceded, retained, splits) @@ -212,9 +212,9 @@ class TowerSuite extends munit.FunSuite: val (ceded, retained, splits) = tower.splitAmntFast(iterations, amounts) - val l1 = splits.head._2 - val l2 = splits(1)._2 - val l3 = splits.last._2 + val l1 = splits.head.cededToLayer + val l2 = splits(1).cededToLayer + val l3 = splits.last.cededToLayer assertVecEquals(ceded, l1 + l2 + l3) noleakage(amounts, ceded, retained, splits) diff --git a/vecxt_re/test/src/vecEquals.scala b/vecxt_re/test/src/vecEquals.scala new file mode 100644 index 00000000..924a940c --- /dev/null +++ b/vecxt_re/test/src/vecEquals.scala @@ -0,0 +1,31 @@ +package vecxt_re + +import munit.Assertions.assertEqualsDouble +import munit.Assertions.assertEquals + +def assertVecEquals(v1: Array[Double], v2: Array[Double])(implicit loc: munit.Location): Unit = + assert(v1.length == v2.length) + var i: Int = 0; + while i < v1.length do + assertEqualsDouble(v1(i), v2(i), 1 / 1e6, clue = s"at index $i") + i += 1 + end while +end assertVecEquals + +def assertVecEquals(v1: Array[Int], v2: Array[Int])(implicit loc: munit.Location): Unit = + assert(v1.length == v2.length) + var i: Int = 0; + while i < v1.length do + assertEquals(v1(i), v2(i), clue = s"at index $i") + i += 1 + end while +end assertVecEquals + +def assertVecEquals(v1: Array[Long], v2: Array[Long])(implicit loc: munit.Location): Unit = + assert(v1.length == v2.length) + var i: Int = 0; + while i < v1.length do + assertEquals(v1(i), v2(i), clue = s"at index $i") + i += 1 + end while +end assertVecEquals diff --git a/vecxt_re/test/statchek.ipynb b/vecxt_re/test/statchek.ipynb new file mode 100644 index 00000000..4e477713 --- /dev/null +++ b/vecxt_re/test/statchek.ipynb @@ -0,0 +1,134 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "id": "24260c73", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1999: 1\n", + "2000: 0\n", + "2001: 0\n", + "2002: 0\n", + "2003: 1\n", + "2004: 0\n", + "2005: 0\n", + "2006: 0\n", + "2007: 2\n", + "2008: 1\n", + "2009: 1\n", + "2010: 2\n", + "2011: 3\n", + "2012: 0\n", + "2013: 0\n", + "2014: 1\n", + "2015: 2\n", + "2016: 1\n", + "2017: 0\n", + "2018: 1\n", + "2019: 2\n", + "2020: 3\n", + "2021: 1\n", + "2022: 2\n", + "2023: 1\n", + "2024: 0\n", + "2025: 1\n" + ] + } + ], + "source": [ + "real_years = [\n", + " 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025\n", + "]\n", + "real_counts = [\n", + " 1,0,0,0,1,0,0,0,2,1,1,2,3,0,0,1,2,1,0,1,2,3,1,2,1,0,1\n", + "]\n", + "\n", + "for year, count in zip(real_years, real_counts):\n", + " print(f\"{year}: {count}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cef5959c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitted Poisson with lambda = 0.9629629629629629\n", + "\n", + "Generalized Linear Model: log(Count) ~ 1 + Year\n", + "============================================================\n", + " Generalized Linear Model Regression Results \n", + "==============================================================================\n", + "Dep. Variable: y No. Observations: 27\n", + "Model: GLM Df Residuals: 25\n", + "Model Family: Poisson Df Model: 1\n", + "Link Function: Log Scale: 1.0000\n", + "Method: IRLS Log-Likelihood: -32.760\n", + "Date: Thu, 29 Jan 2026 Deviance: 26.468\n", + "Time: 12:14:51 Pearson chi2: 22.3\n", + "No. Iterations: 5 Pseudo R-squ. (CS): 0.08983\n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const -82.0576 52.254 -1.570 0.116 -184.473 20.358\n", + "x1 0.0407 0.026 1.571 0.116 -0.010 0.092\n", + "==============================================================================\n" + ] + } + ], + "source": [ + "\n", + "import numpy as np\n", + "import statsmodels.api as sm\n", + "\n", + "# Create arrays from the data\n", + "years = np.array(real_years)\n", + "counts = np.array(real_counts)\n", + "\n", + "# Fitted Poisson lambda (mean of counts)\n", + "poisson_lambda = np.mean(counts)\n", + "print(f\"Fitted Poisson with lambda = {poisson_lambda}\")\n", + "\n", + "# Fit Poisson GLM: log(Count) ~ 1 + Year\n", + "X = sm.add_constant(years) # Add intercept\n", + "poisson_model = sm.GLM(counts, X, family=sm.families.Poisson())\n", + "result = poisson_model.fit()\n", + "\n", + "print(\"\\nGeneralized Linear Model: log(Count) ~ 1 + Year\")\n", + "print(\"=\" * 60)\n", + "print(result.summary())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/vecxtensions/src-js/mathtags.scala b/vecxtensions/src-js/mathtags.scala deleted file mode 100644 index b85f43ba..00000000 --- a/vecxtensions/src-js/mathtags.scala +++ /dev/null @@ -1,72 +0,0 @@ -package vecxtensions - -import scalatags.Text.all.* // Imports commonly used ScalaTags elements like `Tag`, `attrs`, etc. -import scalatags.Text.tags -import vecxt.all.* - -import com.raquo.laminar.api.L.{*, given} -import com.raquo.laminar.tags.* -import com.raquo.laminar.codecs.StringAsIsCodec -import vecxt.BoundsCheck.DoBoundsCheck.no - -object MathTagsLaminar: - - extension (m: Matrix[Double]) - def printMl = - mfenced( - mtable( - for i <- 0 until m.rows - yield mtr( - for j <- 0 until m.cols - yield mtd( - mn(m((j, i))) - ) - ) - ) - ) - end extension - - val xmlns1 = htmlAttr[String]("xmlns", StringAsIsCodec) - val math = htmlTag("math") - // Basic content elements - val mi = CustomHtmlTag("mi") - val mn = CustomHtmlTag("mn") - val mo = CustomHtmlTag("mo") - - val mtext = CustomHtmlTag("mtext") - val mfrac = CustomHtmlTag("mfrac") - val msup = CustomHtmlTag("msup") - val msub = CustomHtmlTag("msub") - val msupsub = CustomHtmlTag("msubsup") - val msqrt = CustomHtmlTag("msqrt") - val mroot = CustomHtmlTag("mroot") - val mfenced = CustomHtmlTag("mfenced") - val menclose = CustomHtmlTag("menclose") - val mtable = CustomHtmlTag("mtable") - val mtr = CustomHtmlTag("mtr") - val mtd = CustomHtmlTag("mtd") - val maligngroup = CustomHtmlTag("maligngroup") - val malignmark = CustomHtmlTag("malignmark") - val mspace = CustomHtmlTag("mspace") - val mrow = CustomHtmlTag("mrow") - val mphantom = CustomHtmlTag("mphantom") - val merror = CustomHtmlTag("merror") - val munderover = CustomHtmlTag("munderover") - val mover = CustomHtmlTag("mover") - val munder = CustomHtmlTag("munder") - val msubsup = CustomHtmlTag("msubsup") - val munder_accent = CustomHtmlTag("munder") - val mover_accent = CustomHtmlTag("mover") - val mmultiscripts = CustomHtmlTag("mmultiscripts") - val mstyle = CustomHtmlTag("mstyle") - val mtag = CustomHtmlTag("mtag") - val mlongdiv = CustomHtmlTag("mlongdiv") - val mprescripts = CustomHtmlTag("mprescripts") - val none = CustomHtmlTag("none") - val semantics = CustomHtmlTag("semantics") - val annotation = CustomHtmlTag("annotation") - val annotation_xml = CustomHtmlTag("annotation-xml") - val msum = CustomHtmlTag("msum") - val mprod = CustomHtmlTag("mprod") - val mint = CustomHtmlTag("mint") -end MathTagsLaminar diff --git a/vecxtensions/src-js/matmul.scala b/vecxtensions/src-js/matmul.scala index 27632569..dde848e0 100644 --- a/vecxtensions/src-js/matmul.scala +++ b/vecxtensions/src-js/matmul.scala @@ -1,11 +1,13 @@ package vecxtensions -import spire.implicits.* -import spire.algebra.Ring import scala.reflect.ClassTag + import vecxt.* -import vecxt.all.* import vecxt.BoundsCheck.BoundsCheck +import vecxt.all.* + +import spire.algebra.Ring +import spire.implicits.* object SpireExt: diff --git a/vecxtensions/src/LossCalc.scala b/vecxtensions/src/LossCalc.scala deleted file mode 100644 index 8b7d3dbe..00000000 --- a/vecxtensions/src/LossCalc.scala +++ /dev/null @@ -1,5 +0,0 @@ -package vecxt.reinsurance - -enum LossCalc: - case Agg, Occ -end LossCalc diff --git a/vecxtensions/src/groupSums.scala b/vecxtensions/src/groupSums.scala deleted file mode 100644 index 27aab85d..00000000 --- a/vecxtensions/src/groupSums.scala +++ /dev/null @@ -1,55 +0,0 @@ -package vecxtensions - -import vecxt.reinsurance.Layer - -/** - You have a sorted groups array. - * - Each group has a small number of values. - * - You're doing per-group cumulative sums. - * - Returns cumulative sums for each element within its group - */ -inline def groupCumSum(groups: Array[Int], values: Array[Double]): Array[Double] = - - val n = groups.length - if n == 0 then Array.empty[Double] - else - val result = new Array[Double](n) - - var i = 0 - while i < n do - val g = groups(i) - var cumSum = 0.0 - - // Process block of same group, computing cumulative sum - while i < n && groups(i) == g do - cumSum += values(i) - result(i) = cumSum - i += 1 - end while - end while - - result - end if -end groupCumSum - -/** - sum by group index - * - Each group has a small number of values. - * - Each the groups are keyed by their index. - * - assumes groups are already sorted - */ -inline def groupSum(groups: Array[Int], values: Array[Double], nitr: Int): Array[Double] = - val result = Array.fill(nitr)(0.0) - val l = groups.length - var i = 0 - while i < l do - val g = groups(i) - var groupSum = 0.0 - // Process block of same group, computing cumulative sum - while i < l && groups(i) == g do - groupSum += values(i) - i += 1 - end while - result(g - 1) = groupSum - end while - - result -end groupSum diff --git a/vecxtensions/test/src/vecEquals.scala b/vecxtensions/test/src/vecEquals.scala deleted file mode 100644 index 7fb3d3c9..00000000 --- a/vecxtensions/test/src/vecEquals.scala +++ /dev/null @@ -1,12 +0,0 @@ -package vecxtensions - -import munit.Assertions.assertEqualsDouble - -def assertVecEquals(v1: Array[Double], v2: Array[Double])(implicit loc: munit.Location): Unit = - assert(v1.length == v2.length) - var i: Int = 0; - while i < v1.length do - assertEqualsDouble(v1(i), v2(i), 1 / 1e6, clue = s"at index $i") - i += 1 - end while -end assertVecEquals