refactor: remove remind_sync wrapper library and inline utilities
The `remind_sync` library was acting as a thin re-export layer. This commit removes it entirely and moves the only non-trivial utility (`show_error` for `Timedesc.Date.Ymd.error`) directly into `bin/utils.ml`. Dead `[@@deriving show]` annotations on `rem`, `week_first_day`, and `error` types are also removed.
This commit is contained in:
1
bin/dune
1
bin/dune
@@ -5,7 +5,6 @@
|
||||
(preprocess
|
||||
(pps ppx_deriving.show))
|
||||
(libraries
|
||||
remind_sync
|
||||
cmdliner
|
||||
icalendar
|
||||
timedesc-tzdb.full
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
open Remind_sync
|
||||
open Icalendar
|
||||
open Utils
|
||||
|
||||
@@ -148,11 +147,9 @@ open Utils
|
||||
priorita: Quando serve
|
||||
*)
|
||||
|
||||
type error = Invalid_date of string | Skip [@@deriving show]
|
||||
|
||||
let invalid_date s e =
|
||||
Error (Invalid_date (spf "Invalid date: %s, error: %s" s (Remind_sync.Timedesc.Date.Ymd.show_error e)))
|
||||
type error = Invalid_date of string | Skip
|
||||
|
||||
let invalid_date s e = Error (Invalid_date (spf "Invalid date: %s, error: %s" s (show_error e)))
|
||||
let skip = Error Skip
|
||||
|
||||
type collector = Remind.rem -> event -> (Remind.rem, error) result
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
open Remind_sync
|
||||
module Map = MoreLabels.Map.Make (String)
|
||||
|
||||
(*
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
open Remind_sync
|
||||
open Utils
|
||||
|
||||
type week_first_day = [ `Sunday | `Monday ] [@@deriving show]
|
||||
type week_first_day = [ `Sunday | `Monday ]
|
||||
|
||||
type simple_weekly = {
|
||||
count_or_until : Icalendar.count_or_until option;
|
||||
@@ -9,7 +8,6 @@ type simple_weekly = {
|
||||
byday : Icalendar.weekday list;
|
||||
week_start : week_first_day option; (** First day of the week for weekly recurrence *)
|
||||
}
|
||||
[@@deriving show]
|
||||
(** A simple weekly REM command *)
|
||||
|
||||
type rem = {
|
||||
@@ -26,7 +24,6 @@ type rem = {
|
||||
*)
|
||||
exdate : Icalendar.date_or_datetime list; (** List of excluded dates for recurring events *)
|
||||
}
|
||||
[@@deriving show]
|
||||
(** A complete REM command *)
|
||||
|
||||
let empty =
|
||||
|
||||
@@ -1,8 +1,14 @@
|
||||
open Remind_sync
|
||||
open Icalendar
|
||||
|
||||
type months = Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec
|
||||
|
||||
let show_error (e : Timedesc.Date.Ymd.error) : string =
|
||||
match e with
|
||||
| `Does_not_exist -> "Date does not exist"
|
||||
| `Invalid_year y -> Printf.sprintf "Invalid year: %d" y
|
||||
| `Invalid_month m -> Printf.sprintf "Invalid month: %d" m
|
||||
| `Invalid_day d -> Printf.sprintf "Invalid day: %d" d
|
||||
|
||||
let string_of_weekday = function
|
||||
| `Monday -> "Mon"
|
||||
| `Tuesday -> "Tue"
|
||||
|
||||
8
lib/dune
8
lib/dune
@@ -1,8 +0,0 @@
|
||||
(library
|
||||
(name remind_sync)
|
||||
(modules remind_sync timedesc_augmented result_augmented utf8 icalendar_augmented ptime_augmented)
|
||||
(preprocess
|
||||
(pps ppx_deriving.show))
|
||||
(libraries base logs timedesc uuseg uutf icalendar ptime))
|
||||
|
||||
|
||||
@@ -1,316 +0,0 @@
|
||||
module Params = struct
|
||||
include Icalendar.Params
|
||||
|
||||
let pp ppf _m = Format.pp_print_string ppf "<params>"
|
||||
end
|
||||
|
||||
type params = Params.t [@@deriving show]
|
||||
|
||||
module Ptime = struct
|
||||
include Ptime_augmented
|
||||
end
|
||||
|
||||
(* TODO: tag these with `Utc | `Local *)
|
||||
type timestamp_utc = Ptime.t [@@deriving show]
|
||||
type timestamp_local = Ptime.t [@@deriving show]
|
||||
type utc_or_timestamp_local = [ `Utc of timestamp_utc | `Local of timestamp_local ] [@@deriving show]
|
||||
type timestamp = [ utc_or_timestamp_local | `With_tzid of timestamp_local * (bool * string) ] [@@deriving show]
|
||||
type date_or_datetime = [ `Datetime of timestamp | `Date of Ptime.date ] [@@deriving show]
|
||||
type weekday = [ `Friday | `Monday | `Saturday | `Sunday | `Thursday | `Tuesday | `Wednesday ] [@@deriving show]
|
||||
|
||||
type recur =
|
||||
[ `Byminute of int list
|
||||
| `Byday of (int * weekday) list
|
||||
| `Byhour of int list
|
||||
| `Bymonth of int list
|
||||
| `Bymonthday of int list
|
||||
| `Bysecond of int list
|
||||
| `Bysetposday of int list
|
||||
| `Byweek of int list
|
||||
| `Byyearday of int list
|
||||
| `Weekday of weekday ]
|
||||
[@@deriving show]
|
||||
|
||||
type freq = [ `Daily | `Hourly | `Minutely | `Monthly | `Secondly | `Weekly | `Yearly ] [@@deriving show]
|
||||
type count_or_until = [ `Count of int | `Until of utc_or_timestamp_local (* TODO date or datetime *) ] [@@deriving show]
|
||||
type interval = int [@@deriving show]
|
||||
type recurrence = freq * count_or_until option * interval option * recur list [@@deriving show]
|
||||
|
||||
type valuetype =
|
||||
[ `Binary
|
||||
| `Boolean
|
||||
| `Caladdress
|
||||
| `Date
|
||||
| `Datetime
|
||||
| `Duration
|
||||
| `Float
|
||||
| `Integer
|
||||
| `Period
|
||||
| `Recur
|
||||
| `Text
|
||||
| `Time
|
||||
| `Uri
|
||||
| `Utcoffset
|
||||
| `Xname of string * string
|
||||
| `Ianatoken of string ]
|
||||
|
||||
type cutype = [ `Group | `Individual | `Resource | `Room | `Unknown | `Ianatoken of string | `Xname of string * string ]
|
||||
[@@deriving show]
|
||||
|
||||
type partstat =
|
||||
[ `Accepted
|
||||
| `Completed
|
||||
| `Declined
|
||||
| `Delegated
|
||||
| `In_process
|
||||
| `Needs_action
|
||||
| `Tentative
|
||||
| `Ianatoken of string
|
||||
| `Xname of string * string ]
|
||||
[@@deriving show]
|
||||
|
||||
type role =
|
||||
[ `Chair | `Nonparticipant | `Optparticipant | `Reqparticipant | `Ianatoken of string | `Xname of string * string ]
|
||||
[@@deriving show]
|
||||
|
||||
type relationship = [ `Parent | `Child | `Sibling | `Ianatoken of string | `Xname of string * string ] [@@deriving show]
|
||||
|
||||
type fbtype = [ `Free | `Busy | `Busy_Unavailable | `Busy_Tentative | `Ianatoken of string | `Xname of string * string ]
|
||||
[@@deriving show]
|
||||
|
||||
type param_value = [ `Quoted of string | `String of string ] [@@deriving show]
|
||||
|
||||
type _ icalparameter =
|
||||
| Altrep : Uri.t icalparameter
|
||||
| Cn : param_value icalparameter
|
||||
| Cutype : cutype icalparameter
|
||||
| Delegated_from : Uri.t list icalparameter
|
||||
| Delegated_to : Uri.t list icalparameter
|
||||
| Dir : Uri.t icalparameter
|
||||
| Encoding : [ `Base64 ] icalparameter
|
||||
| Media_type : (string * string) icalparameter
|
||||
| Fbtype : fbtype icalparameter
|
||||
| Language : string icalparameter
|
||||
| Member : Uri.t list icalparameter
|
||||
| Partstat : partstat icalparameter
|
||||
| Range : [ `Thisandfuture ] icalparameter
|
||||
| Related : [ `Start | `End ] icalparameter
|
||||
| Reltype : relationship icalparameter
|
||||
| Role : role icalparameter
|
||||
| Rsvp : bool icalparameter
|
||||
| Sentby : Uri.t icalparameter
|
||||
| Tzid : (bool * string) icalparameter
|
||||
| Valuetype : valuetype icalparameter
|
||||
| Iana_param : string -> param_value list icalparameter
|
||||
| Xparam : (string * string) -> param_value list icalparameter
|
||||
[@@deriving show]
|
||||
|
||||
type other_prop = [ `Iana_prop of string * params * string | `Xprop of (string * string) * params * string ]
|
||||
[@@deriving show]
|
||||
|
||||
type cal_prop =
|
||||
[ `Prodid of params * string
|
||||
| `Version of params * string
|
||||
| `Calscale of params * string
|
||||
| `Method of params * string
|
||||
| other_prop ]
|
||||
[@@deriving show]
|
||||
|
||||
type class_ = [ `Public | `Private | `Confidential | `Ianatoken of string | `Xname of string * string ]
|
||||
[@@deriving show]
|
||||
|
||||
type status =
|
||||
[ `Draft
|
||||
| `Final
|
||||
| `Cancelled
|
||||
| `Needs_action
|
||||
| `Completed
|
||||
| `In_process
|
||||
| (* `Cancelled *)
|
||||
`Tentative
|
||||
| `Confirmed (* | `Cancelled *) ]
|
||||
[@@deriving show]
|
||||
|
||||
type period = timestamp * Ptime.Span.t * bool [@@deriving show]
|
||||
type period_utc = timestamp_utc * Ptime.Span.t * bool [@@deriving show]
|
||||
type dates_or_datetimes = [ `Datetimes of timestamp list | `Dates of Ptime.date list ] [@@deriving show]
|
||||
type dates_or_datetimes_or_periods = [ dates_or_datetimes | `Periods of period list ] [@@deriving show]
|
||||
|
||||
type general_prop =
|
||||
[ `Dtstamp of params * timestamp_utc
|
||||
| `Uid of params * string
|
||||
| `Dtstart of params * date_or_datetime
|
||||
| `Class of params * class_
|
||||
| `Created of params * timestamp_utc
|
||||
| `Description of params * string
|
||||
| `Geo of params * (float * float)
|
||||
| `Lastmod of params * timestamp_utc
|
||||
| `Location of params * string
|
||||
| `Organizer of params * Uri.t
|
||||
| `Priority of params * int
|
||||
| `Seq of params * int
|
||||
| `Status of params * status
|
||||
| `Summary of params * string
|
||||
| `Url of params * Uri.t
|
||||
| `Recur_id of params * date_or_datetime
|
||||
| (* TODO: Furthermore, this property MUST be specified
|
||||
as a date with local time if and only if the "DTSTART" property
|
||||
contained within the recurring component is specified as a date
|
||||
with local time. *)
|
||||
`Rrule of params * recurrence
|
||||
| `Duration of params * Ptime.Span.t
|
||||
| `Attach of params * [ `Uri of Uri.t | `Binary of string ]
|
||||
| `Attendee of params * Uri.t
|
||||
| `Categories of params * string list
|
||||
| `Comment of params * string
|
||||
| `Contact of params * string
|
||||
| `Exdate of params * dates_or_datetimes
|
||||
| `Rstatus of params * ((int * int * int option) * string * string option)
|
||||
| `Related of params * string
|
||||
| `Resource of params * string list
|
||||
| `Rdate of params * dates_or_datetimes_or_periods ]
|
||||
[@@deriving show]
|
||||
|
||||
type event_prop =
|
||||
[ general_prop
|
||||
| `Transparency of params * [ `Transparent | `Opaque ]
|
||||
| `Dtend of params * date_or_datetime
|
||||
| (* TODO: valuetype same as DTSTART *)
|
||||
other_prop ]
|
||||
[@@deriving show]
|
||||
|
||||
type 'a alarm_struct = {
|
||||
trigger : params * [ `Duration of Ptime.Span.t | `Datetime of timestamp_utc ];
|
||||
duration_repeat : ((params * Ptime.Span.t) * (params * int)) option;
|
||||
summary : (params * string) option;
|
||||
other : other_prop list;
|
||||
special : 'a;
|
||||
}
|
||||
[@@deriving show]
|
||||
|
||||
type audio_struct = { attach : (params * [ `Uri of Uri.t | `Binary of string ]) option } [@@deriving show]
|
||||
type display_struct = { description : (params * string) option } [@@deriving show]
|
||||
|
||||
type email_struct = {
|
||||
description : params * string;
|
||||
attendees : (params * Uri.t) list;
|
||||
attach : (params * [ `Uri of Uri.t | `Binary of string ]) option;
|
||||
}
|
||||
[@@deriving show]
|
||||
|
||||
type alarm =
|
||||
[ `Audio of audio_struct alarm_struct
|
||||
| `Display of display_struct alarm_struct
|
||||
| `Email of email_struct alarm_struct
|
||||
| `None of unit alarm_struct ]
|
||||
[@@deriving show]
|
||||
|
||||
type tz_prop =
|
||||
[ `Dtstart_local of params * timestamp_local
|
||||
| `Tzoffset_to of params * Ptime.Span.t
|
||||
| `Tzoffset_from of params * Ptime.Span.t
|
||||
| `Rrule of params * recurrence
|
||||
| `Comment of params * string
|
||||
| `Rdate of params * dates_or_datetimes_or_periods
|
||||
| `Tzname of params * string
|
||||
| other_prop ]
|
||||
[@@deriving show]
|
||||
|
||||
type timezone_prop =
|
||||
[ `Timezone_id of params * (bool * string)
|
||||
| `Lastmod of params * timestamp_utc
|
||||
| `Tzurl of params * Uri.t
|
||||
| `Standard of tz_prop list
|
||||
| `Daylight of tz_prop list
|
||||
| other_prop ]
|
||||
[@@deriving show]
|
||||
|
||||
type todo_prop =
|
||||
[ general_prop
|
||||
| `Completed of params * timestamp_utc
|
||||
| `Percent of params * int
|
||||
| `Due of params * date_or_datetime
|
||||
| other_prop ]
|
||||
[@@deriving show]
|
||||
|
||||
type journal_prop = [ general_prop | other_prop ] [@@deriving show]
|
||||
|
||||
type freebusy_prop =
|
||||
[ `Dtstamp of params * timestamp_utc
|
||||
| `Uid of params * string
|
||||
| `Contact of params * string
|
||||
| `Dtstart_utc of params * timestamp_utc
|
||||
| `Dtend_utc of params * timestamp_utc
|
||||
| `Organizer of params * Uri.t
|
||||
| `Url of params * Uri.t
|
||||
| `Attendee of params * Uri.t
|
||||
| `Comment of params * string
|
||||
| `Freebusy of params * period_utc list
|
||||
| `Rstatus of params * ((int * int * int option) * string * string option)
|
||||
| other_prop ]
|
||||
[@@deriving show]
|
||||
|
||||
type event = {
|
||||
dtstamp : params * timestamp_utc;
|
||||
uid : params * string;
|
||||
dtstart : params * date_or_datetime; (* NOTE: optional if METHOD present according to RFC 5545 *)
|
||||
dtend_or_duration : [ `Duration of params * Ptime.Span.t | `Dtend of params * date_or_datetime ] option;
|
||||
rrule : (params * recurrence) option; (* NOTE: RFC says SHOULD NOT occur more than once *)
|
||||
props : event_prop list;
|
||||
alarms : alarm list;
|
||||
}
|
||||
[@@deriving show]
|
||||
|
||||
type timezone = timezone_prop list [@@deriving show]
|
||||
|
||||
type component =
|
||||
[ `Event of event
|
||||
| `Todo of todo_prop list * alarm list
|
||||
| `Journal of journal_prop list
|
||||
| `Freebusy of freebusy_prop list
|
||||
| `Timezone of timezone ]
|
||||
[@@deriving show]
|
||||
|
||||
let conv_alarm_struct (f : 'a -> 'b) (s : 'a Icalendar.alarm_struct) : 'b alarm_struct =
|
||||
{
|
||||
trigger = s.trigger;
|
||||
duration_repeat = s.duration_repeat;
|
||||
summary = s.summary;
|
||||
other = s.other;
|
||||
special = f s.special;
|
||||
}
|
||||
|
||||
let conv_audio_struct (s : Icalendar.audio_struct) : audio_struct = { attach = s.attach }
|
||||
let conv_display_struct (s : Icalendar.display_struct) : display_struct = { description = s.description }
|
||||
|
||||
let conv_email_struct (s : Icalendar.email_struct) : email_struct =
|
||||
{ description = s.description; attendees = s.attendees; attach = s.attach }
|
||||
|
||||
let conv_alarm (a : Icalendar.alarm) : alarm =
|
||||
match a with
|
||||
| `Audio s -> `Audio (conv_alarm_struct conv_audio_struct s)
|
||||
| `Display s -> `Display (conv_alarm_struct conv_display_struct s)
|
||||
| `Email s -> `Email (conv_alarm_struct conv_email_struct s)
|
||||
| `None s -> `None (conv_alarm_struct Fun.id s)
|
||||
|
||||
let conv_event (e : Icalendar.event) : event =
|
||||
{
|
||||
dtstamp = e.dtstamp;
|
||||
uid = e.uid;
|
||||
dtstart = e.dtstart;
|
||||
dtend_or_duration = e.dtend_or_duration;
|
||||
rrule = e.rrule;
|
||||
props = e.props;
|
||||
alarms = List.map conv_alarm e.alarms;
|
||||
}
|
||||
|
||||
let conv_component (c : Icalendar.component) : component =
|
||||
match c with
|
||||
| `Event e -> `Event (conv_event e)
|
||||
| `Todo (props, alms) -> `Todo (props, List.map conv_alarm alms)
|
||||
| `Journal props -> `Journal props
|
||||
| `Freebusy props -> `Freebusy props
|
||||
| `Timezone tz -> `Timezone tz
|
||||
|
||||
let parse s =
|
||||
Result.map (fun (cal_props, components) -> (cal_props, List.map conv_component components)) (Icalendar.parse s)
|
||||
@@ -1,3 +0,0 @@
|
||||
include Ptime
|
||||
|
||||
type date = int * int * int [@@deriving show]
|
||||
@@ -1,5 +0,0 @@
|
||||
module Icalendar = Icalendar_augmented
|
||||
module Ptime = Ptime_augmented
|
||||
module Result = Result_augmented
|
||||
module Timedesc = Timedesc_augmented
|
||||
module Utf8 = Utf8
|
||||
@@ -1,42 +0,0 @@
|
||||
module Internal_result = struct
|
||||
type ('a, 'b) t = ('a, 'b) result = Ok of 'a | Error of 'b
|
||||
|
||||
let return x = Ok x
|
||||
let error e = Error e
|
||||
let error_string s = Error (`Error_message s)
|
||||
let bind = Stdlib.Result.bind
|
||||
let ok = Result.ok
|
||||
|
||||
module List = struct
|
||||
let map (xs : 'a list) ~(f : 'a -> ('b, 'c) t) : ('b list, 'c) t =
|
||||
let rec loop ?(acc = []) xs =
|
||||
match xs with
|
||||
| [] -> return (List.rev acc)
|
||||
| hd :: tl -> (
|
||||
match f hd with
|
||||
| Ok x -> loop ~acc:(x :: acc) tl
|
||||
| Error e -> Error e)
|
||||
in
|
||||
loop xs
|
||||
|
||||
let iteri ?(start = 0) (xs : 'a list) ~(f : int -> 'a -> (unit, 'b) t) : (unit, 'b) t =
|
||||
let rec loop ?(idx = start) xs =
|
||||
match xs with
|
||||
| [] -> return ()
|
||||
| hd :: tl -> begin
|
||||
let res = f idx hd in
|
||||
match res with
|
||||
| Ok () -> loop ~idx:(idx + 1) tl
|
||||
| Error e -> Error e
|
||||
end
|
||||
in
|
||||
loop xs
|
||||
end
|
||||
|
||||
module Let_syntax = struct
|
||||
let ( let* ) = Stdlib.Result.bind
|
||||
let ( let+ ) x f = Stdlib.Result.map f x
|
||||
end
|
||||
end
|
||||
|
||||
include Internal_result
|
||||
@@ -1,34 +0,0 @@
|
||||
include Timedesc
|
||||
|
||||
type t = Timedesc.t
|
||||
|
||||
module Time = struct
|
||||
include Timedesc.Time
|
||||
|
||||
let pp = Timedesc.Time.pp_rfc3339 ()
|
||||
end
|
||||
|
||||
module Span = struct
|
||||
include Timedesc.Span
|
||||
end
|
||||
|
||||
module Date = struct
|
||||
include Timedesc.Date
|
||||
|
||||
type t = Timedesc.Date.t
|
||||
|
||||
let pp = Timedesc.Date.pp_rfc3339
|
||||
|
||||
module Ymd = struct
|
||||
include Timedesc.Date.Ymd
|
||||
|
||||
type error = [ `Does_not_exist | `Invalid_year of int | `Invalid_month of int | `Invalid_day of int ]
|
||||
[@@deriving show]
|
||||
end
|
||||
end
|
||||
|
||||
module Timestamp = struct
|
||||
type t = Timedesc.Timestamp.t
|
||||
|
||||
let pp = Timedesc.Timestamp.pp
|
||||
end
|
||||
202
lib/utf8.ml
202
lib/utf8.ml
@@ -1,202 +0,0 @@
|
||||
let length = Uuseg_string.fold_utf_8 `Grapheme_cluster (fun x _ -> x + 1) 0
|
||||
|
||||
let capitalize s =
|
||||
let dec = Uutf.decoder ~encoding:`UTF_8 (`String s) in
|
||||
|
||||
let rec split_loop ?(acc = []) () =
|
||||
match Uutf.decode dec with
|
||||
| `Await -> assert false
|
||||
| `End -> List.rev acc
|
||||
| `Malformed _ignored -> split_loop ~acc ()
|
||||
| `Uchar c -> split_loop ~acc:(c :: acc) ()
|
||||
in
|
||||
|
||||
let buf = Buffer.create 1024 in
|
||||
let enc = Uutf.encoder `UTF_8 (`Buffer buf) in
|
||||
let rec capital_loop ?(last_was_upper = false) xs =
|
||||
match xs with
|
||||
| c :: tl ->
|
||||
let last_was_upper =
|
||||
if Uucp.Alpha.is_alphabetic c
|
||||
then begin
|
||||
let f = if last_was_upper = false then Uucp.Case.Map.to_upper else Uucp.Case.Map.to_lower in
|
||||
match f c with
|
||||
| `Self ->
|
||||
let () = Uutf.encode enc (`Uchar c) |> ignore in
|
||||
true
|
||||
| `Uchars u_lst ->
|
||||
List.iter (fun u -> Uutf.encode enc (`Uchar u) |> ignore) u_lst;
|
||||
true
|
||||
end
|
||||
else
|
||||
let () = Uutf.encode enc (`Uchar c) |> ignore in
|
||||
false
|
||||
in
|
||||
capital_loop ~last_was_upper tl
|
||||
| [] ->
|
||||
let () = Uutf.encode enc `End |> ignore in
|
||||
Buffer.contents buf
|
||||
in
|
||||
split_loop () |> capital_loop
|
||||
|
||||
let lowercase s =
|
||||
let dec = Uutf.decoder ~encoding:`UTF_8 (`String s) in
|
||||
|
||||
let rec split_loop ?(acc = []) () =
|
||||
match Uutf.decode dec with
|
||||
| `Await -> assert false
|
||||
| `End -> List.rev acc
|
||||
| `Malformed _ignored -> split_loop ~acc ()
|
||||
| `Uchar c -> split_loop ~acc:(c :: acc) ()
|
||||
in
|
||||
|
||||
let buf = Buffer.create 1024 in
|
||||
let enc = Uutf.encoder `UTF_8 (`Buffer buf) in
|
||||
let rec to_lower xs =
|
||||
match xs with
|
||||
| c :: tl ->
|
||||
if Uucp.Alpha.is_alphabetic c
|
||||
then begin
|
||||
match Uucp.Case.Map.to_lower c with
|
||||
| `Self ->
|
||||
let () = Uutf.encode enc (`Uchar c) |> ignore in
|
||||
to_lower tl
|
||||
| `Uchars u_lst ->
|
||||
List.iter (fun u -> Uutf.encode enc (`Uchar u) |> ignore) u_lst;
|
||||
to_lower tl
|
||||
end
|
||||
else
|
||||
let () = Uutf.encode enc (`Uchar c) |> ignore in
|
||||
to_lower tl
|
||||
| [] ->
|
||||
let () = Uutf.encode enc `End |> ignore in
|
||||
Buffer.contents buf
|
||||
in
|
||||
split_loop () |> to_lower
|
||||
|
||||
let remove_non_alphabetic s =
|
||||
let dec = Uutf.decoder ~encoding:`UTF_8 (`String s) in
|
||||
|
||||
let rec split_loop ?(acc = []) () =
|
||||
match Uutf.decode dec with
|
||||
| `Await -> assert false
|
||||
| `End -> List.rev acc
|
||||
| `Malformed _ignored -> split_loop ~acc ()
|
||||
| `Uchar c -> split_loop ~acc:(c :: acc) ()
|
||||
in
|
||||
|
||||
let buf = Buffer.create 1024 in
|
||||
let enc = Uutf.encoder `UTF_8 (`Buffer buf) in
|
||||
let rec filter_loop xs =
|
||||
match xs with
|
||||
| c :: tl ->
|
||||
if Uucp.Alpha.is_alphabetic c
|
||||
then begin
|
||||
let () = Uutf.encode enc (`Uchar c) |> ignore in
|
||||
filter_loop tl
|
||||
end
|
||||
else filter_loop tl
|
||||
| [] ->
|
||||
let () = Uutf.encode enc `End |> ignore in
|
||||
Buffer.contents buf
|
||||
in
|
||||
split_loop () |> filter_loop
|
||||
|
||||
let split_in_chunks_of n s =
|
||||
let last, chunks =
|
||||
Uuseg_string.fold_utf_8
|
||||
`Grapheme_cluster
|
||||
(fun (last, chunks) grapheme ->
|
||||
let l = List.length last in
|
||||
if l < n
|
||||
then (grapheme :: last, chunks)
|
||||
else if l = n
|
||||
then ([grapheme], (List.rev last |> StringLabels.concat ~sep:"") :: chunks)
|
||||
else assert false)
|
||||
([], [])
|
||||
s
|
||||
in
|
||||
(List.rev last |> StringLabels.concat ~sep:"") :: chunks |> List.rev
|
||||
|
||||
let utf8_clamp_at n s =
|
||||
let first =
|
||||
Uuseg_string.fold_utf_8
|
||||
`Grapheme_cluster
|
||||
(fun acc grapheme -> if List.length acc < n then grapheme :: acc else acc)
|
||||
[]
|
||||
s
|
||||
in
|
||||
let first = String.concat "" (List.rev first) in
|
||||
let l = String.length first in
|
||||
let rest = String.sub s l (String.length s - l) in
|
||||
(first, rest)
|
||||
|
||||
let clamp_at_space_up_to n s =
|
||||
let module S = StringLabels in
|
||||
let module L = ListLabels in
|
||||
let words = S.split_on_char ~sep:' ' s |> L.map ~f:S.trim |> L.filter ~f:(( <> ) "") in
|
||||
|
||||
let words =
|
||||
match words with
|
||||
| first :: rest ->
|
||||
let l_fst = length first in
|
||||
if l_fst <= n
|
||||
then first :: rest
|
||||
else
|
||||
(* Prima parola troppo lunga, forza lo split anche se non è sullo spazio *)
|
||||
let fst, snd = utf8_clamp_at n first in
|
||||
fst :: snd :: rest
|
||||
| [] -> []
|
||||
in
|
||||
|
||||
let rec loop acc words =
|
||||
match words with
|
||||
| hd :: tl ->
|
||||
let l = length hd in
|
||||
if l <= n
|
||||
then loop (hd :: acc) tl
|
||||
else
|
||||
let words' = split_in_chunks_of n hd in
|
||||
loop (L.rev words' @ acc) tl
|
||||
| [] -> L.rev acc
|
||||
in
|
||||
let words = loop [] words in
|
||||
|
||||
let rec loop ?(ok = []) ?(total_chars = 0) ?(total_words = 0) words =
|
||||
match words with
|
||||
| hd :: tl ->
|
||||
let l = length hd in
|
||||
if total_chars + total_words + l > n
|
||||
then (L.rev ok |> S.concat ~sep:" ", S.concat ~sep:" " words)
|
||||
else loop ~ok:(hd :: ok) ~total_chars:(total_chars + l) ~total_words:(total_words + 1) tl
|
||||
| [] -> (L.rev ok |> S.concat ~sep:" ", "")
|
||||
in
|
||||
loop words
|
||||
|
||||
let split_at_space_up_to n s =
|
||||
let rec loop ?(acc = []) s =
|
||||
let s', rest = clamp_at_space_up_to n s in
|
||||
let acc = s' :: acc in
|
||||
if rest = "" then List.rev acc else loop ~acc rest
|
||||
in
|
||||
loop s
|
||||
|
||||
let recode_string ?(encoding = `UTF_8) src =
|
||||
let dst = Buffer.create 4 in
|
||||
let rec loop d e =
|
||||
match Uutf.decode d with
|
||||
| `Uchar _ as u ->
|
||||
let (_ : [`Ok | `Partial]) = Uutf.encode e u in
|
||||
loop d e
|
||||
| `End ->
|
||||
let (_ : [`Ok | `Partial]) = Uutf.encode e `End in
|
||||
()
|
||||
| `Malformed _ ->
|
||||
let (_ : [`Ok | `Partial]) = Uutf.encode e (`Uchar Uutf.u_rep) in
|
||||
loop d e
|
||||
| `Await -> assert false
|
||||
in
|
||||
let d = Uutf.decoder ~nln:(`NLF (Uchar.of_int 10)) ~encoding (`String src) in
|
||||
let e = Uutf.encoder `UTF_8 (`Buffer dst) in
|
||||
let () = loop d e in
|
||||
Buffer.contents dst
|
||||
Reference in New Issue
Block a user