...
Tawesoft Logo

Source file src/tawesoft.co.uk/go/lxstrconv/decimalformat.go

Documentation: src/tawesoft.co.uk/go/lxstrconv/decimalformat.go

     1  package lxstrconv
     2  
     3  import (
     4      "math"
     5      "strconv"
     6      "unicode"
     7      "unicode/utf8"
     8      
     9      "golang.org/x/text/language"
    10      "golang.org/x/text/message"
    11      "golang.org/x/text/number"
    12  )
    13  
    14  // acceptRune returns the length of r in bytes if r is the first rune in s,
    15  // otherwise returns zero.
    16  func acceptRune(r rune, s string) int {
    17      if f, ok := firstRune(s); ok && (f == r) {
    18          return utf8.RuneLen(r)
    19      } else {
    20          return 0
    21      }
    22  }
    23  
    24  // firstRune returns the first rune in a string and true, or (_, false).
    25  func firstRune(s string) (rune, bool) {
    26      for _, c := range s {
    27          return c, true
    28      }
    29      return runeNone, false
    30  }
    31  
    32  // guessDecimalGroupSeparator guesses, for a printer in a given locale,
    33  // the group separator rune in a decimal number system e.g. comma for British.
    34  func guessDecimalGroupSeparator(p *message.Printer) rune {
    35      // heuristic: any rune that appears at least twice is probably a comma
    36      s := p.Sprint(number.Decimal(1234567890))
    37      return repeatingRune(s)
    38  }
    39  
    40  // guessDecimalPointSeparator guesses, for a printer in a given locale,
    41  // the decimal point rune in a decimal number system, e.g. period for British.
    42  func guessDecimalPoint(p *message.Printer) rune {
    43      // heuristic: any rune that is common to both these strings is probably a
    44      // decimal point. Concat the strings and find any repeated rune.
    45      s1 := p.Sprint(number.Decimal(1.23))
    46      s2 := p.Sprint(number.Decimal(4.56))
    47      s := s1 + s2
    48      return repeatingRune(s)
    49  }
    50  
    51  // guessDecimalDigits guesses, for a printer in a given locale, the digits
    52  // representing the values 0 to 9.
    53  func guessDecimalDigits(p *message.Printer, out *[10]rune) {
    54      for i := 0; i < 10; i++ {
    55          s := []rune(p.Sprint(number.Decimal(i)))
    56          if len(s) == 1 {
    57              out[i] = s[0]
    58          } else {
    59              out[i] = runeNone
    60          }
    61      }
    62  }
    63  
    64  // decimalFormat defines how a decimal (base-10) number should be parsed for a
    65  // given locale. Note that the behaviour is undefined for locales that have
    66  // non-base-10 number systems.
    67  //
    68  // This structure is currently internal until we have more confidence it is
    69  // correct for all languages with decimal number systems.
    70  type decimalFormat struct {
    71      // GroupSeparator is a digits separator such as commas for thousands. In
    72      // addition to any separator defined here, a parser will ignore whitespace.
    73      GroupSeparator rune
    74      
    75      // Point is separator between the integer and fractional part of
    76      // a decimal number.
    77      Point rune
    78      
    79      // Digits are an ascending list of digit runes
    80      Digits [10]rune
    81  }
    82  
    83  func (f decimalFormat) ParseInt(s string) (int64, error) {
    84      if len(s) == 0 { return 0, strconv.ErrSyntax }
    85      
    86      value, length, err := f.AcceptInt(s)
    87      
    88      if err != nil { return 0, err }
    89      if len(s) != length { return 0, strconv.ErrSyntax }
    90      
    91      return value, nil
    92  }
    93  
    94  func (f decimalFormat) ParseFloat(s string) (float64, error) {
    95      if len(s) == 0 { return 0, strconv.ErrSyntax }
    96      
    97      value, length, err := f.AcceptFloat(s)
    98      
    99      if err != nil { return 0, err }
   100      if len(s) != length { return 0, strconv.ErrSyntax }
   101      
   102      return value, nil
   103  }
   104  
   105  // NewDecimalFormat constructs, for a given locale, a NumberFormat that
   106  // defines how a decimal (base-10) number should be parsed. Note that the
   107  // behaviour is undefined for locales that have non-base-10 number systems.
   108  func NewDecimalFormat(tag language.Tag) NumberFormat {
   109      
   110      // Unfortunately, I couldn't find any exported symbols in /x/text that
   111      // gives this information directly (as would be ideal). Therefore this
   112      // function works by printing numbers in the current locale and using
   113      // heuristics to guess the correct separators.
   114      
   115      p := message.NewPrinter(tag)
   116      
   117      format := decimalFormat{
   118          GroupSeparator: guessDecimalGroupSeparator(p),
   119          Point:          guessDecimalPoint(p),
   120      }
   121      
   122      guessDecimalDigits(p, &format.Digits)
   123      
   124      return format
   125  }
   126  
   127  // returns (0-9, true) for a decimal digit in any language, or (_, false)
   128  func decimalRuneToInt(d rune, digits *[10]rune) (int, bool) {
   129      for i := 0; i < 10; i++ {
   130          if d == digits[i] { return i, true }
   131      }
   132      return 0, false
   133  }
   134  
   135  // AcceptInteger parses as much of an integer number as possible. It returns a
   136  // 2 tuple: the value of the parsed integer, and the length of the characters
   137  // successfully parsed. For example, for some locales, the string "1,000X"
   138  // returns (1000, 5) and the string "foo" returns (0, 0).
   139  //
   140  // Err is always nil, strconv.ErrRange or strconv.ErrSyntax
   141  func (f decimalFormat) AcceptInt(s string) (value int64, length int, err error) {
   142  
   143      if len(s) == 0 { return 0, 0, nil }
   144      
   145      if s[0] == '-' {
   146          // TODO better negative check e.g. "(1)" for "-1"
   147          v, l, _ := f.AcceptUint(s[1:])
   148          // TODO bounds check
   149          if l > 0 {
   150              return int64(v) * -1, l + 1, nil
   151          } else {
   152              return 0, 0, nil
   153          }
   154      }
   155      
   156      // TODO bounds check
   157      v, l, err := f.AcceptUint(s)
   158      return int64(v), l, nil
   159  }
   160  
   161  // AcceptUint: see AcceptInt
   162  func (f decimalFormat) AcceptUint(s string) (value uint64, length int, err error) {
   163      var accu uint64
   164      
   165      for i, c := range s {
   166          if c == f.GroupSeparator {
   167              // pass
   168          } else if unicode.IsSpace(c) {
   169              // pass
   170          } else if d, ok := decimalRuneToInt(c, &f.Digits); ok {
   171              accu *= 10
   172              accu += uint64(d)
   173              // TODO bounds check
   174          } else {
   175              // TODO this count is runes but should be bytes!
   176              return accu, i, nil
   177          }
   178      }
   179      
   180      return accu, len(s), nil
   181  }
   182  
   183  // AcceptFloat parses as much of a floating point number as possible. It returns
   184  // a 2 tuple: the value of the parsed float, and the length of the characters
   185  // successfully parsed. For example, for some locales, the string "1.23X"
   186  // returns (1.23, 4) and the string "foo" returns (0.0, 0).
   187  //
   188  // Err is always nil, strconv.ErrRange or strconv.ErrSyntax
   189  func (f decimalFormat) AcceptFloat(s string) (value float64, length int, err error) {
   190      var left, right int64
   191      var leftLen, rightLen, pointLen int
   192      var fLeft, fRight float64
   193      
   194      // accept leading decimal point
   195      if first, ok := firstRune(s); ok && first != f.Point {
   196          left, leftLen, err = f.AcceptInt(s)
   197          // TODO check err (Currently always nil)
   198          if leftLen == 0 { return 0, 0, nil }
   199          fLeft = float64(left)
   200      }
   201      
   202      pointLen = acceptRune(f.Point, s[leftLen:])
   203      
   204      if pointLen > 0 && (s[leftLen +pointLen] != '-') {
   205          right, rightLen, err = f.AcceptInt(s[leftLen +pointLen:])
   206          // TODO check err (currently always nil)
   207      }
   208      
   209      if right > 0.0 {
   210          fRight = float64(right)
   211          places := float64(1.0 + math.Floor(math.Log10(fRight)))
   212          fRight *= math.Pow(0.1, places)
   213          fRight = math.Copysign(fRight, fLeft)
   214      }
   215      
   216      value = fLeft + fRight
   217      length = leftLen + pointLen + rightLen
   218      
   219      return value, length, nil
   220  }
   221  

View as plain text