From a98dff92d6a9c874d4808a6d53d87b59cfba2d6a Mon Sep 17 00:00:00 2001 From: marmoure Date: Wed, 18 Feb 2026 10:48:08 +0100 Subject: [PATCH 1/2] [refactor] clean up the testing schema --- ...cquisition-with-minimal-transcription.csvs | 68 ------------------- ...cquisition-with-minimal-transcription.json | 7 -- src/main/resources/schemas/concat.csvs | 5 -- src/main/resources/schemas/concat.json | 8 --- .../schemas/thunder-stone-sample-csvs.csvs | 13 ---- .../schemas/thunder-stone-sample-csvs.json | 7 -- 6 files changed, 108 deletions(-) delete mode 100644 src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs delete mode 100644 src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.json delete mode 100644 src/main/resources/schemas/concat.csvs delete mode 100644 src/main/resources/schemas/concat.json delete mode 100644 src/main/resources/schemas/thunder-stone-sample-csvs.csvs delete mode 100644 src/main/resources/schemas/thunder-stone-sample-csvs.json diff --git a/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs b/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs deleted file mode 100644 index c82a12f..0000000 --- a/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs +++ /dev/null @@ -1,68 +0,0 @@ -version 1.0 -@totalColumns 42 -/*------------------------------------------------------------------------------- -|Schema: ADM_363-technical-acquisition-with-minimal-transcription.csvs | -|Authors: Nicki Welch | -| David Underdown | -|Purpose: To capture metadata about the digitisation of the ADM 363 series | -| Primarily technical metadata, but with a minimal amount of | -| transcription to verify that the records may be publicly released | -| after receipt by The National Archives | -|Revision: 1.0 first release | -| 1.1 update as some official numbers only single digit | -| 1.2 allow M as official number prefix too | -| 1.3 further additions to prefixes, L, S, SS, SSX | -| 1.4 allow for asterisk and ? in official number | -| 1.5 further prefixes MX, KX, JX, and longer volume number | -| 1.6 add explicit check that checksum is not that for a 0 byte file | -| 1.7 Fix errors eg use correct not(), rather than isNot() | -| 1.8 Allow brackets etc in comments, range checking for birth year | -| ???? for birth year | -| 1.9 Add piece check in ordinal: unique($piece,$item,$ordinal) | -| Remove and in($resource_uri) from item: | -| resource_uri, change starts(...) to | -| regex("...") | -| 2.0 Allow LX as a prefix too | -|-------------------------------------------------------------------------------*/ -batch_code: length(10) regex("^ADM362B([0-9]{3})$") -department: (is("ADM") if($file_path/notEmpty,in($file_path) and in($resource_uri))) -series: is("362") and if($file_path/notEmpty,in($file_path) and in($resource_uri)) -piece: range(1,69720) if($file_path/notEmpty,in($file_path) and in($resource_uri)) -item: ((positiveInteger unique($piece,$item,$ordinal)) or empty) if($file_path/notEmpty,in($file_path)) -ordinal: if($item/empty,empty,unique($piece,$item,$ordinal)) -file_uuid: if($ordinal/empty,empty,uuid4 unique) -file_path: uri if($ordinal/empty,empty,unique fileExists regex("^file:\/\/\/ADM_362\/[0-9]{1,5}\/[1-9][0-9]{0,4}\/[1-9][0-9]{0,4}_[0-9]{1,4}\.jp2$")) -file_checksum: if($ordinal/empty,empty,not("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") and checksum(file($file_path),"SHA-256")) -resource_uri: if($ordinal/notEmpty,uri and regex("^http://datagov.nationalarchives.gov.uk/66/ADM/362/[1-9][0-9]*/[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$")) -scan_operator: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z]{1,12}$")) -scan_id: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z_]{1,12}$")) -scan_location: if($ordinal/empty,empty,regex("[-\w\s,]+")) -scan_native_format: if($ordinal/empty,empty,regex("[0-9\w\s,.:]+")) -scan_timestamp: if($ordinal/empty,empty,xDateTime) -image_resolution: if($ordinal/empty,empty,is("300")) -image_width: if($ordinal/empty,empty,positiveInteger) -image_height: if($ordinal/empty,empty,positiveInteger) -image_tonal_resolution: if($ordinal/empty,empty,is("24-bit colour")) -image_format: if($ordinal/empty,empty,is("x-fmt/392")) -image_colour_space: if($ordinal/empty,empty,is("sRGB")) -process_location: if($ordinal/empty,empty,regex("[-\w\s,]+")) -jp2_creation_timestamp: if($ordinal/empty,empty,xDateTime) -uuid_timestamp: if($ordinal/empty,empty,xDateTime) -embed_timestamp: if($ordinal/empty,empty,xDateTime) -image_split: if($ordinal/empty,empty,is("yes") or is("no")) -image_split_other_uuid: if($ordinal/empty,empty,if($image_split/is("yes"),uuid4,is(""))) -image_split_operator: if($ordinal/empty,empty,if($image_split/is("yes"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is(""))) -image_split_timestamp: if($ordinal/empty,empty,if($image_split/is("yes"),xDateTime,is(""))) -image_crop: if($ordinal/empty,empty,is("auto") or is("manual") or is("none")) -image_crop_operator: if($ordinal/empty,empty,if($image_split/is("manual"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is(""))) -image_crop_timestamp: if($ordinal/empty,empty,if($image_crop/is("none"),empty,xDateTime)) -image_deskew: if($ordinal/empty,empty,is("yes") or is("no")) -image_deskew_operator: if($ordinal/empty,empty,if($image_deskew/is("yes"),regex("^[0-9a-zA-Z]{1,12}$"),is(""))) -image_deskew_timestamp: if($ordinal/empty,empty,if($image_deskew/is("yes"),xDateTime,is(""))) -QA-code: regex("^[0-9/,]{1,2}$") @optional -comments: regex("[-\w\s,\.\(\)\/'":\?]+") @optional -transcribed_volume_number: if($item/empty,regex("[0-9A-Z\-\s]{1,19}"),is("")) -transcribed_birth_date_day: if(($ordinal/empty and $item/notEmpty),regex("^\*|([0\?][1-9\?])|([1-2\?][0-9\?])|([3\?][0-1\?])$"),is("")) -transcribed_birth_date_month: if(($ordinal/empty and $item/notEmpty),is("*") or is("?") or is("January") or is("February") or is("March") or is("April") or is("May") or is("June") or is("July") or is("August") or is("September") or is("October") or is("November") or is("December"), is("")) -transcribed_birth_date_year: if(($ordinal/empty and $item/notEmpty),if(positiveInteger,range(1850,1914),regex("^1[7-9][0-9\?]{2}|\*|\?{4}$")),is("")) -transcribed_official_number: if(($ordinal/empty and $item/notEmpty),regex("^(([CDP]\/)?([FJKLMS]|LX|MX|JX|KX|SS|SSX)[/?0-9]{1,6}|[/?1-9][/?0-9]{5}|\*)$"),is("")) \ No newline at end of file diff --git a/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.json b/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.json deleted file mode 100644 index b426a36..0000000 --- a/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "id": "ADM_362-technical-acquisition-with-minimal-transcription", - "name": "ADM_362-technical-acquisition-with-minimal-transcription", - "version": "1.0.0", - "date": "2015-11-01", - "description": "Minor updates and bug fixes" -} \ No newline at end of file diff --git a/src/main/resources/schemas/concat.csvs b/src/main/resources/schemas/concat.csvs deleted file mode 100644 index 8ca60ee..0000000 --- a/src/main/resources/schemas/concat.csvs +++ /dev/null @@ -1,5 +0,0 @@ -version 1.1 -@totalColumns 3 -c1: -c2: -c3: is(concat($c1,$c2)) diff --git a/src/main/resources/schemas/concat.json b/src/main/resources/schemas/concat.json deleted file mode 100644 index d013f62..0000000 --- a/src/main/resources/schemas/concat.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "id": "concat", - "name": "concat", - "version": "1.0.0", - "date": "2015-11-01", - "url": "https://localhost/concat.csvs", - "description": "sample file for testing" -} \ No newline at end of file diff --git a/src/main/resources/schemas/thunder-stone-sample-csvs.csvs b/src/main/resources/schemas/thunder-stone-sample-csvs.csvs deleted file mode 100644 index 9ecf89f..0000000 --- a/src/main/resources/schemas/thunder-stone-sample-csvs.csvs +++ /dev/null @@ -1,13 +0,0 @@ -database /tmp/testdb -table customer -# indicate csv format with a delimiter of | -csv | -# Name Type Tag -field CustID varchar(10) 1 -field Company varchar(80) 2 -field Address varchar(80) 3 -field City varchar(20) 4 -field State varchar(10) 5 -field Zip varchar(10) 6 -field Country varchar(10) 7 -field Phone varchar(20) 8 \ No newline at end of file diff --git a/src/main/resources/schemas/thunder-stone-sample-csvs.json b/src/main/resources/schemas/thunder-stone-sample-csvs.json deleted file mode 100644 index 051991d..0000000 --- a/src/main/resources/schemas/thunder-stone-sample-csvs.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "id": "thunder-stone-sample-csvs", - "name": "thunder-stone-sample-csvs", - "version": "1.0.0", - "date": "2015-11-01", - "description": "sample file for testing" -} \ No newline at end of file From 1771908336d285ec902f72a0086c7b2715a23029 Mon Sep 17 00:00:00 2001 From: marmoure Date: Wed, 18 Feb 2026 10:53:04 +0100 Subject: [PATCH 2/2] [feature] Introduce the bitag bbl schema --- .../schemas/bitag-bbl-schema-20240827.csvs | 56 +++++++++++++++++++ .../schemas/bitag-bbl-schema-20240827.json | 7 +++ 2 files changed, 63 insertions(+) create mode 100644 src/main/resources/schemas/bitag-bbl-schema-20240827.csvs create mode 100644 src/main/resources/schemas/bitag-bbl-schema-20240827.json diff --git a/src/main/resources/schemas/bitag-bbl-schema-20240827.csvs b/src/main/resources/schemas/bitag-bbl-schema-20240827.csvs new file mode 100644 index 0000000..a293ec8 --- /dev/null +++ b/src/main/resources/schemas/bitag-bbl-schema-20240827.csvs @@ -0,0 +1,56 @@ +version 1.2 +@totalColumns 54 +name: BroadbandLabels +date_published: date +regulator_name: string +regulator_version_number: numeric, range(1, 99) +connection_type: string +fcc_registration_number: numeric, range(0000000000, 9999999999) +unique_plan_identifier: string, length(*,26) +network_technology_type: range (0, 999) +provider_name: string, length(*,100) +service_plan_name: length(*,100) +bandwidth_download_units: string, length(4) +bandwidth_download_marketed_low: numeric, length(1,3) +bandwidth_download_marketed_high: numeric, length(1,3) +bandwidth_download_typical_low: numeric, length(1,3) +bandwidth_download_typical_high: numeric, length(1,3) +bandwidth_upload_units: string, length(4) +bandwidth_upload_marketed_low: numeric, length(1,3) +bandwidth_upload_marketed_high: numeric, length(1,3) +bandwidth_upload_typical_low: numeric, length(1,3) +bandwidth_upload_typical_high: numeric, length(1,3) +latency_idle_low: numeric, length(*,10) +latency_idle_high: numeric, length(*,10) +currency: string, length(3) +price_type: string, enumerated string value +price_details: string +price_recurring: numeric +fee_introductory: numeric +fee_introductory_description: string +contract_required: string, length(2,3), enumerated string value +contract_details: string +contract_terms_uri: identifier, uri +fee_one_time_amount: numeric +fee_one_time_details: string +fee_recurring: numeric +fee_recurring_description: string +fee_pass_through_recurring: string +fee_pass_through_recurring_description: string +fee_early_termination: string +fee_early_termination_description: string +government_taxes: string, enumerated string value +policy_data_usage: string, length(2,3), enumerated string value +fee_data_usage: string +fee_data_usage_description: string +uri_data_usage: identifier, uri +restrictions_apply: string, length(2,3), enumerated string value +uri_policy_network_management: identifier, uri +uri_policy_privacy: identifier, uri +uri_customer_support: identifier, uri +customer_support_phone: numeric, length(*,15) +country_code: string, length(3) +asn: string, length(0,4294967295) +uri_policy_additional_terms: identifier, uri +language: string, length(3) +digital_signature: string \ No newline at end of file diff --git a/src/main/resources/schemas/bitag-bbl-schema-20240827.json b/src/main/resources/schemas/bitag-bbl-schema-20240827.json new file mode 100644 index 0000000..3063ac9 --- /dev/null +++ b/src/main/resources/schemas/bitag-bbl-schema-20240827.json @@ -0,0 +1,7 @@ +{ + "id": "bitag-bbl-schema-20240827", + "name": "bitag-bbl-schema-20240827", + "version": "1.2.0", + "date": "2024-08-27", + "description": "Broadband Labels schema" +} \ No newline at end of file