...
Tawesoft Logo

Source file src/tawesoft.co.uk/go/humanizex/parse.go

Documentation: src/tawesoft.co.uk/go/humanizex/parse.go

     1  package humanizex
     2  
     3  import (
     4      "fmt"
     5      "strings"
     6      "unicode"
     7      "unicode/utf8"
     8  )
     9  
    10  // Catagory Zs in http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
    11  var whitespaceRunes = string([]rune{
    12      '\u0020',
    13      '\u00A0',
    14      '\u1680',
    15      '\u2000',
    16      '\u2001',
    17      '\u2002',
    18      '\u2003',
    19      '\u2004',
    20      '\u2005',
    21      '\u2006',
    22      '\u2007',
    23      '\u2008',
    24      '\u2009',
    25      '\u200A',
    26      '\u202F',
    27      '\u205F',
    28      '\u3000',
    29  })
    30  
    31  func skipSpace(str string, bytesRead int) int {
    32      for _, r := range str[bytesRead:] {
    33          if !utf8.ValidRune(r) { break }
    34          if !unicode.IsSpace(r) { break }
    35          bytesRead += utf8.RuneLen(r)
    36      }
    37  
    38      return bytesRead
    39  }
    40  
    41  func (h *humanizer) Parse(str string, unit Unit, factors Factors) (float64, error) {
    42      v, bytesRead, err := h.Accept(str, unit, factors)
    43      if err != nil { return 0, err }
    44  
    45      if len(str) != bytesRead {
    46          return 0, fmt.Errorf("error parsing %q: unexpected trailing content after byte %d", str, bytesRead)
    47      }
    48  
    49      return v, err
    50  }
    51  
    52  func (h *humanizer) Accept(str string, unit Unit, factors Factors) (float64, int, error) {
    53      var v float64
    54      var bytesRead int
    55      var lastFactor Factor
    56  
    57      components := factors.Components
    58      if components < 1 { components = 1 }
    59  
    60      for i := 0; i < components; i++ {
    61          c, f, r, err := h.acceptOne(str[bytesRead:], factors)
    62          if (err != nil) && (i > 0) { break }
    63          if err != nil { return 0, 0, err }
    64  
    65          v += c
    66          bytesRead += r
    67          lastFactor = f
    68          if bytesRead == len(str) { break }
    69      }
    70  
    71      if len(str) == bytesRead { return v, bytesRead, nil }
    72      if lastFactor.Mode & FactorModeReplace == FactorModeReplace { return v, bytesRead, nil }
    73  
    74      // optionally accept the final unit
    75      bytesRead = skipSpace(str, bytesRead)
    76      if len(str) == bytesRead { return v, bytesRead, nil }
    77      remaining := str[bytesRead:]
    78  
    79      if strings.HasPrefix(remaining, unit.Utf8) {
    80          bytesRead += len(unit.Utf8)
    81      } else if strings.HasPrefix(remaining, unit.Ascii) {
    82          bytesRead += len(unit.Ascii)
    83      }
    84  
    85      // optionally accept trailing space
    86      if len(str) != bytesRead {
    87          bytesRead = skipSpace(str, bytesRead)
    88      }
    89  
    90      return v, bytesRead, nil
    91  }
    92  
    93  func (h *humanizer) acceptOne(str string, factors Factors) (float64, Factor, int, error) {
    94      v, bytesRead, err := h.NF.AcceptFloat(str)
    95  
    96      if err != nil {
    97          // only ever strconv.ErrRange
    98          return 0, Factor{}, 0, fmt.Errorf("error parsing number component of %q: %v", str, err)
    99      } else if bytesRead == 0 {
   100          return 0, Factor{}, 0, nil
   101      }
   102  
   103      bytesRead = skipSpace(str, bytesRead)
   104      if len(str) == bytesRead { return v, Factor{}, bytesRead, nil }
   105  
   106      remaining := str[bytesRead:]
   107      multiplier := 1.0
   108      unitLen := 0 // logical length in runes
   109      unitLenBytes := 0 // actual length in bytes
   110      factor := Factor{}
   111      for _, f := range factors.Factors {
   112      // don't break early, use the longest prefix just in case
   113      // we have a hypothetical unit prefix with a common "unit
   114      // prefix prefix" e.g. "x" and "xx"
   115          if strings.HasPrefix(remaining, f.Unit.Utf8) {
   116              if unitLen < len(f.Unit.Ascii) {
   117                  multiplier = f.Magnitude
   118                  unitLen = len(f.Unit.Ascii)
   119                  unitLenBytes = len(f.Unit.Utf8)
   120                  factor = f
   121              }
   122          } else if strings.HasPrefix(remaining, f.Unit.Ascii) {
   123              if unitLen < len(f.Unit.Ascii) {
   124                  multiplier = f.Magnitude
   125                  unitLen = len(f.Unit.Ascii)
   126                  unitLenBytes = len(f.Unit.Ascii)
   127                  factor = f
   128              }
   129          }
   130      }
   131  
   132      return v * multiplier, factor, bytesRead + unitLenBytes, nil
   133  }
   134  

View as plain text