Home > Appendix > Name Decode Script

Name Decode Script


An S-PLUS decode script can be useful if you want to produce a text report about the columns. Here is a script that works for columns starting out with ASCII characters and gives ‘?’ for non-ASCII characters:

# (S-PLUS and R)

# NameDecode function definition

# Summary:

# NameDecode translate codes to characters.

# For coded ASCII and ISO 8859-1 Latin 1 characters this function decodes in the same way as

# the NameDecode expression method does in TIBCO Spotfire. All other codes are translated to . .

#

# Codes look like ..xyz  where x, y and z are characters matching [0-9a-zA-Z].

#

# Parameter: str ; A character string

# Result:  A character string where all codes are replaced with the coded character

#

NameDecode <- function(str)

{

  byteToCharacter <- function(bytes)

  {

    c001.007 <- "\001\002\003\004\005\006\007";

    c010.077 <- "\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\040\041\042\043\044\045\046\047\050\051\052\053\054\055\056\057\060\061\062\063\064\065\066\067\070\071\072\073\074\075\076\077";

    c100.107 <- "\100\101\102\103\104\105\106\107";

    c110.177 <- "\110\111\112\113\114\115\116\117\120\121\122\123\124\125\126\127\130\131\132\133\134\135\136\137\140\141\142\143\144\145\146\147\150\151\152\153\154\155\156\157\160\161\162\163\164\165\166\167\170\171\172\173\174\175\176\177";

    c200.207 <- "\200\201\202\203\204\205\206\207";

    c210.277 <- "\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277";

    c300.307 <- "\300\301\302\303\304\305\306\307";

    c310.377 <- "\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377";

    v.c001.377 <- unlist(strsplit(c(c001.007, c010.077, c100.107, c110.177, c200.207, c210.277, c300.307, c310.377), ""));

    v.c001.377[bytes]

  }

  NameDecode.OneCode <- function(str, byteToCharacter)

  {

    digits <- unlist(strsplit("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", ""));

    replace <- str;

    sv <- unlist(strsplit(str, ""));

    ib <- match(sv[3:5], digits) - 1;

    byte <- sum(ib * c(62 * 62, 62, 1));

    if (byte == 0)

    {

       replace <- str;

    }

    else if (byte < 256)

    {

      replace <- byteToCharacter(byte);

    }

    else if (byte < 65536)

    {

      # Here other coded UTF-16 characters is replaced with a .

      # Inside TIBCO Spotfire they are decoded.

      replace <- ".";

    }

    else

    {

      replace <- str;

    }

  }

  codeLength = 5;

  rightStr <- str;

  decodedStr <- "";

  matchIndex <- regexpr("[.][.][0-9a-h][0-9a-zA-Z][0-9a-zA-Z]", rightStr);

  while (matchIndex > 0)

  {

    decodedStr <- paste(

      decodedStr,

      substring(rightStr, 1, matchIndex - 1),

      NameDecode.OneCode(substring(rightStr, matchIndex, matchIndex + codeLength - 1), byteToCharacter),

      sep="");

    rightStr <- substring(rightStr, matchIndex + codeLength, nchar(rightStr));

    matchIndex <- regexpr("[.][.][0-9a-h][0-9a-zA-Z][0-9a-zA-Z]", rightStr);            

  }

  decodedStr <- paste(decodedStr, rightStr, sep = "");

  decodedStr

}

 

 

Below is an example of how this script could be used:

#

# Example

#

names = data.frame(encoded = names(input), decoded = sapply(names(input), FUN = NameDecode));

See also:

Name Encoding for Column Names Sent to Spotfire Statistics Services