Name Decode Script

An S-PLUS decode script can be useful if you want to produce a text report about the columns. Here is a script that works for columns starting out with ASCII characters and gives ‘?’ for non-ASCII characters:

# (S-PLUS and R)

# NameDecode function definition

# Summary:

# NameDecode translate codes to characters.

# For coded ASCII and ISO 8859-1 Latin 1 characters this function decodes in the same way as

# the NameDecode expression method does in TIBCO Spotfire. All other codes are translated to . .

# Codes look like ..xyz where x, y and z are characters matching [0-9a-zA-Z].

# Parameter: str ; A character string

# Result: A character string where all codes are replaced with the coded character

NameDecode <- function(str)

{

byteToCharacter <- function(bytes)

{

c001.007 <- "\001\002\003\004\005\006\007";

c010.077 <- "\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\040\041\042\043\044\045\046\047\050\051\052\053\054\055\056\057\060\061\062\063\064\065\066\067\070\071\072\073\074\075\076\077";

c100.107 <- "\100\101\102\103\104\105\106\107";

c110.177 <- "\110\111\112\113\114\115\116\117\120\121\122\123\124\125\126\127\130\131\132\133\134\135\136\137\140\141\142\143\144\145\146\147\150\151\152\153\154\155\156\157\160\161\162\163\164\165\166\167\170\171\172\173\174\175\176\177";

c200.207 <- "\200\201\202\203\204\205\206\207";

c210.277 <- "\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277";

c300.307 <- "\300\301\302\303\304\305\306\307";

c310.377 <- "\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377";

v.c001.377 <- unlist(strsplit(c(c001.007, c010.077, c100.107, c110.177, c200.207, c210.277, c300.307, c310.377), ""));

v.c001.377[bytes]

}

NameDecode.OneCode <- function(str, byteToCharacter)

{

digits <- unlist(strsplit("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", ""));

replace <- str;

sv <- unlist(strsplit(str, ""));

ib <- match(sv[3:5], digits) - 1;

byte <- sum(ib * c(62 * 62, 62, 1));

if (byte == 0)

{

replace <- str;

}

else if (byte < 256)

{

replace <- byteToCharacter(byte);

}

else if (byte < 65536)

{

# Here other coded UTF-16 characters is replaced with a .

# Inside TIBCO Spotfire they are decoded.

replace <- ".";

}

else

{

replace <- str;

}

codeLength = 5;

rightStr <- str;

decodedStr <- "";

matchIndex <- regexpr("[.][.][0-9a-h][0-9a-zA-Z][0-9a-zA-Z]", rightStr);

while (matchIndex > 0)

{

decodedStr <- paste(

decodedStr,

substring(rightStr, 1, matchIndex - 1),

NameDecode.OneCode(substring(rightStr, matchIndex, matchIndex + codeLength - 1), byteToCharacter),

sep="");

rightStr <- substring(rightStr, matchIndex + codeLength, nchar(rightStr));

matchIndex <- regexpr("[.][.][0-9a-h][0-9a-zA-Z][0-9a-zA-Z]", rightStr);

}

decodedStr <- paste(decodedStr, rightStr, sep = "");

decodedStr

}

Below is an example of how this script could be used:

# Example

names = data.frame(encoded = names(input), decoded = sapply(names(input), FUN = NameDecode));