neocities.h

a C library for interacting with Neocities' API
git clone https://github.com/tanguyandreani/neocities.h
Log | Files | Refs | README | LICENSE

dtparser.c (13757B)


      1 /*
      2  * dtparser
      3  *
      4  *
      5  * dtparser is free software; you can redistribute it and/or modify
      6  * it under the terms of the MIT license. See LICENSE for details.
      7  *
      8  * Copyright (c) 2017 Partha Susarla <mail@spartha.org>
      9  */
     10 
     11 #include <stdio.h>
     12 #include <stdlib.h>
     13 #include <string.h>
     14 #include <ctype.h>
     15 
     16 #include "dtparser.h"
     17 
     18 #define EOB (-99999)            /* End Of Buffer */
     19 static const char special[256] = {
     20         [' ']  = 1,
     21         ['\t'] = 1,
     22         ['\r'] = 1,
     23         ['\n'] = 1,
     24 };
     25 
     26 static const char separators[256] = {
     27         [' ']  = 1,
     28         [',']  = 1,
     29         ['-']  = 1,
     30         ['+']  = 1,
     31         [':']  = 1,
     32         ['.']  = 1,
     33 };
     34 
     35 static const char * const monthnames[12] = {
     36         "Jan",
     37         "Feb",
     38         "Mar",
     39         "Apr",
     40         "May",
     41         "Jun",
     42         "Jul",
     43         "Aug",
     44         "Sep",
     45         "Oct",
     46         "Nov",
     47         "Dec"
     48 };
     49 
     50 static const char * const weekdays[7] = {
     51         "Sun",
     52         "Mon",
     53         "Tue",
     54         "Wed",
     55         "Thu",
     56         "Fri",
     57         "Sat"
     58 };
     59 
     60 enum {
     61         Alpha = 1,
     62         UAlpha = 2,
     63         LAlpha = 4,
     64         Digit = 8,
     65         TZSign = 16,
     66 };
     67 
     68 static const long charset[257] = {
     69         ['0' + 1 ... '9' + 1] = Digit,
     70         ['A' + 1 ... 'Z' + 1] = Alpha | UAlpha,
     71         ['a' + 1 ... 'z' + 1] = Alpha | LAlpha,
     72         ['+' + 1] = TZSign,
     73         ['-' + 1] = TZSign
     74 };
     75 
     76 struct tbuf {
     77         const char *str;
     78         size_t len;
     79         size_t offset;
     80 };
     81 
     82 
     83 /*
     84  * Returns the GMT offset of the struct tm 'tm', obtained from 'time'.
     85  * (from Cyrus-imapd)
     86  */
     87 int gmtoff_of(struct tm *tm, time_t time)
     88 {
     89         struct tm local, gmt;
     90         struct tm *gtm;
     91         long offset;
     92 
     93         local = *tm;
     94         gtm = gmtime(&time);
     95         gmt = *gtm;
     96 
     97         /* Assume we are never more than 24 hours away. */
     98         offset = local.tm_yday - gmt.tm_yday;
     99         if (offset > 1) {
    100                 offset = -24;
    101         } else if (offset < -1) {
    102                 offset = 24;
    103         } else {
    104                 offset *= 24;
    105         }
    106 
    107         /* Scale in the hours and minutes; ignore seconds. */
    108         offset += local.tm_hour - gmt.tm_hour;
    109         offset *= 60;
    110         offset += local.tm_min - gmt.tm_min;
    111 
    112         /* Restore the data in the struct 'tm' points to */
    113         *tm = local;
    114         return offset * 60;
    115 }
    116 
    117 static inline int get_next_char(struct tbuf *buf)
    118 {
    119         int c;
    120 
    121         if (buf->offset < buf->len) {
    122                 buf->offset++;
    123                 c = buf->str[buf->offset];
    124                 return c;
    125         }
    126 
    127         return EOB;
    128 }
    129 
    130 static inline int get_current_char(struct tbuf *buf)
    131 {
    132         size_t offset = buf->offset;
    133 
    134         if (offset < buf->len)
    135                 return buf->str[offset];
    136         else
    137                 return EOB;
    138 }
    139 
    140 static inline int get_previous_char(struct tbuf *buf)
    141 {
    142         int offset = buf->offset;
    143 
    144         offset--;
    145         if (offset >= 0)
    146                 return buf->str[offset];
    147         else
    148                 return EOB;
    149 }
    150 
    151 /*
    152   TODO: Support comments as per RFC.
    153  */
    154 static int skip_ws(struct tbuf *buf, int skipcomment __attribute__((unused)))
    155 {
    156         int c = buf->str[buf->offset];
    157 
    158         while (c != EOB) {
    159                 if (special[c]) {
    160                         c = get_next_char(buf);
    161                         continue;
    162                 }
    163 
    164                 break;
    165         }
    166 
    167         return 1;
    168 }
    169 
    170 #define MAX_BUF_LEN 32
    171 static int get_next_token(struct tbuf *buf, char **str, int *len)
    172 {
    173         int c, ret = 1;
    174         long ch;
    175         static char cache[MAX_BUF_LEN];
    176 
    177         memset(cache, 1, MAX_BUF_LEN);
    178 
    179         c = get_current_char(buf);
    180         if (c == EOB) {
    181                 ret = 0;
    182                 goto failed;
    183         }
    184 
    185         *len = 0;
    186         for (;;) {
    187                 if (special[c] || separators[c])
    188                         break;
    189 
    190                 ch = charset[c + 1];
    191                 if (!(ch & (Alpha | Digit)))
    192                         break;
    193 
    194                 if (*len >= MAX_BUF_LEN)
    195                         break;
    196 
    197                 cache[*len] = c;
    198                 *len += 1;
    199 
    200                 c = get_next_char(buf);
    201                 if (c == EOB) {
    202                         ret = 0;
    203                         break;
    204                 }
    205         }
    206 
    207 failed:
    208         *str = cache;
    209 
    210         return ret;
    211 }
    212 
    213 static inline int to_int(char *str, int len)
    214 {
    215         int i, num = 0;
    216 
    217         for (i = 0; i < len; i++) {
    218                 if (charset[str[i] + 1] & Digit)
    219                         num = num * 10 + (str[i] - '0');
    220                 else {
    221                         num = -9999;
    222                         break;
    223                 }
    224         }
    225 
    226         return num;
    227 }
    228 
    229 static inline int to_upper_str_in_place(char **str, int len)
    230 {
    231         int i;
    232 
    233         for (i = 0; i < len; i++) {
    234                 int c = str[0][i];
    235                 if (charset[c + 1] & LAlpha)
    236                         str[0][i] = str[0][i] - 32;
    237         }
    238 
    239         return 1;
    240 }
    241 
    242 static inline int to_upper(char ch)
    243 {
    244         if (charset[ch + 1] & LAlpha)
    245                 ch =  ch - 32;
    246 
    247         return ch;
    248 }
    249 
    250 static inline int to_lower(char ch)
    251 {
    252         if (charset[ch + 1] & UAlpha)
    253                 ch = ch + 32;
    254 
    255         return ch;
    256 }
    257 
    258 static int compute_tzoffset(char *str, int len, int sign)
    259 {
    260         int offset = 0;
    261 
    262         if (len == 1) {         /* Military timezone */
    263                 int ch;
    264                 ch = to_upper(str[0]);
    265                 if (ch < 'J')
    266                         return (str[0] - 'A' + 1) * 60;
    267                 if (ch == 'J')
    268                         return 0;
    269                 if (ch <= 'M')
    270                         return (str[0] - 'A') * 60;;
    271                 if (ch < 'Z')
    272                         return ('M' - str[0]) * 60;
    273 
    274                 return 0;
    275         }
    276 
    277         if (len == 2 &&
    278             to_upper(str[0]) == 'U' &&
    279             to_upper(str[1]) == 'T') {         /* Universal Time zone (UT) */
    280                 return 0;
    281         }
    282 
    283         if (len == 3) {
    284                 char *p;
    285 
    286                 if (to_upper(str[2]) != 'T')
    287                         return 0;
    288 
    289                 p = strchr("AECMPYHB", to_upper(str[0]));
    290                 if (!p)
    291                         return 0;
    292                 offset = (strlen(p) - 12) *  60;
    293 
    294                 if (to_upper(str[1]) == 'D')
    295                         return offset + 60;
    296                 if (to_upper(str[1]) == 'S')
    297                         return offset;
    298         }
    299 
    300         if (len == 4) {         /* The number timezone offset */
    301                 int i;
    302 
    303                 for (i = 0; i < len; i++) {
    304                         if (!(charset[str[i] + 1] & Digit))
    305                                 return 0;
    306                 }
    307 
    308                 offset = ((str[0] - '0') * 10 + (str[1] - '0')) * 60 +
    309                         (str[2] - '0') * 10 +
    310                         (str[3] - '0');
    311 
    312                 return (sign == '+') ? offset : -offset;
    313         }
    314 
    315         return 0;
    316 }
    317 
    318 /*
    319   date-time = [ ([FWS] day-name) "," ]
    320               ([FWS] 1*2DIGIT FWS)
    321               month
    322               (FWS 4*DIGIT FWS)
    323               2DIGIT ":" 2DIGIT [ ":" 2DIGIT ]
    324               (FWS ( "+" / "-" ) 4DIGIT)
    325               [CFWS]
    326 
    327    day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
    328    month = "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / "Jul" / "Aug" /
    329            "Sep" / "Oct" / "Nov" / "Dec"
    330  */
    331 
    332 static int tokenise_and_create_tm(struct tbuf *buf, struct tm *tm,
    333                                   int *tz_offset, bool usetime)
    334 {
    335         long ch;
    336         int c, i, len;
    337         char *str_token = NULL;
    338 
    339         /* Skip leading WS, if any */
    340         if (skip_ws(buf, 0) != 1)
    341                 return -1;
    342 
    343         c = get_current_char(buf);
    344         if (c == EOB)
    345                 goto failed;
    346 
    347         ch = charset[c + 1];
    348         if (ch & Alpha) {       /* Most likely a weekday at the start. */
    349                 if (!get_next_token(buf, &str_token, &len))
    350                         goto failed;
    351 
    352                 /* We might have a weekday token here, which we should skip*/
    353                 if (len != 3)
    354                         goto failed;
    355 
    356                 /* The weekday is foll wed by a ',', consume that. */
    357                 if (get_current_char(buf) == ',')
    358                         get_next_char(buf);
    359                 else
    360                         goto failed;
    361 
    362                 if (skip_ws(buf, 0) != 1)
    363                         return -1;
    364         }
    365 
    366         /** DATE **/
    367         /* date (1 or 2 digits) */
    368         if (!get_next_token(buf, &str_token, &len))
    369                 goto failed;
    370 
    371         if (len < 1 || len > 2 || !(charset[str_token[0] + 1] & Digit))
    372                 goto failed;
    373 
    374         tm->tm_mday = to_int(str_token, len);
    375         if (tm->tm_mday == -9999)
    376                 goto failed;
    377 
    378         /* month name */
    379         get_next_char(buf);     /* Consume a character, either a '-' or ' ' */
    380 
    381         if (!get_next_token(buf, &str_token, &len) ||
    382             len != 3 ||
    383             !(charset[str_token[0] + 1] & Alpha))
    384                 goto failed;
    385 
    386         str_token[0] = to_upper(str_token[0]);
    387         str_token[1] = to_lower(str_token[1]);
    388         str_token[2] = to_lower(str_token[2]);
    389         for (i = 0; i < 12; i++) {
    390                 if (memcmp(monthnames[i], str_token, 3) == 0) {
    391                         tm->tm_mon = i;
    392                         break;
    393                 }
    394         }
    395         if (i == 12)
    396                 goto failed;
    397 
    398         /* year 2, 4 or >4 digits */
    399         get_next_char(buf);     /* Consume a character, either a '-' or ' ' */
    400 
    401         if (!get_next_token(buf, &str_token, &len))
    402                 goto failed;
    403 
    404         tm->tm_year = to_int(str_token, len);
    405         if (tm->tm_year == -9999)
    406                 goto failed;
    407 
    408         if (len == 2) {
    409                 /* A 2 digit year */
    410                 if (tm->tm_year < 70)
    411                         tm->tm_year += 100;
    412         } else {
    413                 if (tm->tm_year < 1900)
    414                         goto failed;
    415                 tm->tm_year -= 1900;
    416         }
    417 
    418         if (!usetime) {
    419                 *tz_offset = 0;
    420                 goto done;
    421         }
    422 
    423         /** TIME **/
    424         if (skip_ws(buf, 0) != 1)
    425                 return -1;
    426 
    427         /* hour */
    428         if (!get_next_token(buf, &str_token, &len))
    429                 goto failed;
    430 
    431         if (len < 1 || len > 2 || !(charset[str_token[0] + 1] & Digit))
    432                 goto failed;
    433 
    434         tm->tm_hour = to_int(str_token, len);
    435         if (tm->tm_hour == -9999)
    436                 goto failed;
    437 
    438         /* minutes */
    439         if (get_current_char(buf) == ':' ||
    440             get_current_char(buf) == '.')
    441                 get_next_char(buf); /* Consume ':'/'.' */
    442         else
    443                 goto failed;    /* Something is broken */
    444 
    445         if (!get_next_token(buf, &str_token, &len))
    446                 goto failed;
    447 
    448         if (len < 1 || len > 2 || !(charset[str_token[0] + 1] & Digit))
    449                 goto failed;
    450 
    451         tm->tm_min = to_int(str_token, len);
    452         if (tm->tm_min == -9999)
    453                 goto failed;
    454 
    455 
    456         /* seconds[optional] */
    457         if (get_current_char(buf) == ':' ||
    458             get_current_char(buf) == '.') {
    459                 get_next_char(buf); /* Consume ':' */
    460 
    461                 if (!get_next_token(buf, &str_token, &len))
    462                         goto failed;
    463 
    464                 if (len < 1 || len > 2 || !(charset[str_token[0] + 1] & Digit))
    465                         goto failed;
    466 
    467                 tm->tm_sec = to_int(str_token, len);
    468                 if (tm->tm_sec == -9999)
    469                         goto failed;
    470 
    471         }
    472 
    473         /* timezone */
    474         if (skip_ws(buf, 0) != 1)
    475                 return -1;
    476 
    477         c = get_current_char(buf); /* the '+' or '-' in the timezone */
    478         get_next_char(buf);        /* consume '+' or '-' */
    479 
    480         if (!get_next_token(buf, &str_token, &len)) {
    481                 *tz_offset = 0;
    482         } else {
    483                 *tz_offset = compute_tzoffset(str_token, len, c);
    484         }
    485 
    486 done:
    487         /* dst */
    488         tm->tm_isdst = -1;
    489         return buf->offset;
    490 
    491 failed:
    492         return -1;
    493 }
    494 
    495 /*
    496   rfc5322_date_parse():
    497    Given a date time string in RFC 5322 format, this function
    498    parses and converts it into time_t format.
    499 
    500  On Success: Returns the number of characters from the date string parsed
    501  On Failure: Returns -1
    502  */
    503 int rfc5322_date_parse(const char *str, size_t len, time_t *t, bool usetime)
    504 {
    505         struct tbuf buf;
    506         struct tm tm;
    507         time_t tmp_time;
    508         int tzone_offset;
    509 
    510         if (!str)
    511                 goto baddate;
    512 
    513         memset(&tm, 0, sizeof(struct tm));
    514         *t = 0;
    515 
    516         buf.str = str;
    517         buf.len = len;
    518         buf.offset = 0;
    519 
    520         if (tokenise_and_create_tm(&buf, &tm, &tzone_offset, usetime) == -1)
    521                 goto baddate;
    522 
    523         if (usetime)
    524                 tmp_time = timegm(&tm);
    525         else
    526                 tmp_time = mktime(&tm);
    527 
    528         if (tmp_time == -1)
    529                 goto baddate;
    530 
    531         *t = tmp_time - tzone_offset * 60;
    532 
    533         return buf.offset;
    534 
    535 baddate:
    536         return -1;
    537 }
    538 
    539 
    540 /*
    541   rfc5322_date_create():
    542    Given a `time_t` date, this function creates a RFC5322 compliant date
    543    string.
    544  */
    545 int rfc5322_date_create(time_t date, char *buf, size_t len)
    546 {
    547         struct tm *tm = localtime(&date);
    548         long gmtoff = gmtoff_of(tm, date);
    549         int gmtnegative = 0;
    550 
    551         if (gmtoff < 0) {
    552                 gmtoff = -gmtoff;
    553                 gmtnegative = 1;
    554         }
    555 
    556         gmtoff /= 60;
    557 
    558 
    559     return snprintf(buf, len,
    560              "%s, %02d %s %04d %02d:%02d:%02d %c%02lu%02lu",
    561              weekdays[tm->tm_wday],
    562              tm->tm_mday, monthnames[tm->tm_mon], tm->tm_year + 1900,
    563              tm->tm_hour, tm->tm_min, tm->tm_sec,
    564              gmtnegative ? '-' : '+', gmtoff/60, gmtoff%60);
    565 }