123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302 |
- --- file-5.05/src/Makefile.am.vinejtext 2010-07-22 00:56:10.000000000 +0900
- +++ file-5.05/src/Makefile.am 2011-02-11 16:53:06.000000000 +0900
- @@ -4,11 +4,11 @@
-
- bin_PROGRAMS = file
-
- -AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"'
- +AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"' -DDETECT_JAPANESE
- AM_CFLAGS = @WARNINGS@
-
- libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \
- - encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \
- + encoding.c compress.c is_tar.c readelf.c print.c jcode.c fsmagic.c \
- funcs.c file.h names.h patchlevel.h readelf.h tar.h apptype.c \
- file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h
- libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0
- --- file-5.05/src/encoding.c.vinejtext 2010-07-22 01:47:17.000000000 +0900
- +++ file-5.05/src/encoding.c 2011-02-11 17:26:00.000000000 +0900
- @@ -42,7 +42,7 @@ FILE_RCSID("@(#)$File: encoding.c,v 1.5
- #include <string.h>
- #include <memory.h>
- #include <stdlib.h>
- -
- +#include "jcode.h"
-
- private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
- private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
- @@ -68,7 +68,7 @@ protected int
- file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type)
- {
- size_t mlen;
- - int rv = 1, ucs_type;
- + int rv = 1, ucs_type, jcode;
- unsigned char *nbuf = NULL;
-
- mlen = (nbytes + 1) * sizeof(nbuf[0]);
- @@ -83,10 +83,27 @@ file_encoding(struct magic_set *ms, cons
- }
-
- *type = "text";
- - if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
- + jcode = detect_kcode(buf, nbytes, *ubuf, ulen);
- + if (jcode == ASCII) {
- DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
- *code = "ASCII";
- *code_mime = "us-ascii";
- + } else if (jcode == JIS) {
- + DPRINTF(("jis %" SIZE_T_FORMAT "u\n", *ulen));
- + *code = "7-bit JIS [ESC$B, ESC(B]";
- + *code_mime = "jis";
- + } else if (jcode == SJIS){
- + DPRINTF(("sjis %" SIZE_T_FORMAT "u\n", *ulen));
- + *code = "SJIS";
- + *code_mime = "sjis";
- + } else if (jcode == EUC){
- + DPRINTF(("euc %" SIZE_T_FORMAT "u\n", *ulen));
- + *code = "EUC";
- + *code_mime = "euc-jp";
- + } else if (jcode == EUCORSJIS){
- + DPRINTF(("euc or sjis %" SIZE_T_FORMAT "u\n", *ulen));
- + *code = "EUC or SJIS";
- + *code_mime = "unknown";
- } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
- DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));
- *code = "UTF-8 Unicode (with BOM)";
- --- /dev/null 2011-02-06 21:11:58.373999997 +0900
- +++ file-5.05/src/jcode.c 2011-02-11 17:14:29.000000000 +0900
- @@ -0,0 +1,205 @@
- +/*
- +jcode.c: Kanji-code detect routing by Jun Nishii <jun@vinelinux.org>
- + modified by Ryoichi INAGAKI <inagaki@vinelinux.org>
- + */
- +#include <stdio.h>
- +#include <unistd.h>
- +#include <signal.h>
- +#include <sys/types.h>
- +#include <sys/wait.h>
- +#include <jcode.h>
- +
- +typedef unsigned long unichar;
- +
- +#define F 0 /* character never appears in text */
- +#define T 1 /* character appears in plain ASCII text */
- +#define I 2 /* character appears in ISO-8859 text */
- +#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
- +#define J 4 /* character appears in JIS or plain ASCII */
- +#define S 5 /* character appears in SJIS */
- +#define E 6 /* character appears in EUC */
- +#define O 7 /* character appears in EUC or SJIS */
- +
- +#define ESC 27
- +
- +static char jp_chars1[256] = {
- + F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
- + F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
- + T, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x2X */
- + J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x3X */
- + J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x4X */
- + J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x5X */
- + J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x6X */
- + J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, F, /* 0x7X */
- + /* NEL */
- + X, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x8X */
- + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x9X */
- + I, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xaX */
- + E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xbX */
- + E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xcX */
- + E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xdX */
- + O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xeX */
- + E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, I /* 0xfX */
- +};
- +
- +static char jp_chars2[256] = {
- + /* BEL BS HT LF FF CR */
- + F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
- + /* ESC */
- + F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
- + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
- + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
- + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x4X */
- + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x5X */
- + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x6X */
- + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, F, /* 0x7X */
- + /* NEL */
- + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x8X */
- + S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x9X */
- + S, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xaX */
- + O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xbX */
- + O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xcX */
- + O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xdX */
- + O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xeX */
- + O, O, O, O, O, O, O, O, O, O, O, O, O, E, E, I /* 0xfX */
- +};
- +
- +
- +int
- +check_asc_jis(buf, nbytes, ubuf, ulen)
- + const unsigned char *buf;
- + size_t nbytes;
- + unichar *ubuf;
- + size_t *ulen;
- +{
- + size_t i;
- + int jflag;
- +
- + *ulen = 0; jflag=0;
- +
- + for (i = 0; i < nbytes; i++) {
- + int t = jp_chars1[buf[i]];
- +
- + if (t != T && t != J )
- + return 0;
- +
- + if (buf[i] == ESC && i+2<nbytes) {
- + if ((buf[i+1]=='$' && buf[i+2]=='B')||
- + (buf[i+1]=='$' && buf[i+2]=='@')) jflag=1;
- + }
- +
- + ubuf[(*ulen)++] = buf[i];
- + }
- +
- + if (jflag==1) return JIS;
- + else return ASCII;
- +}
- +
- +int
- +check_sjis(buf, nbytes, ubuf, ulen)
- + const unsigned char *buf;
- + size_t nbytes;
- + unichar *ubuf;
- + size_t *ulen;
- +{
- + size_t i;
- + int jflag;
- +
- + *ulen = 0;
- + jflag = ASCII;
- + for (i = 0; i < nbytes; i++) {
- + int t = jp_chars1[buf[i]];
- +
- + if (t != T && t != J && t != S && t!= O)
- + return 0;
- +
- + if (t == S && i<nbytes-1){
- + ubuf[(*ulen)++] = buf[i];
- + ++i;
- + t=jp_chars2[buf[i]];
- + if(t != S && t != O ) return 0;
- + jflag=SJIS;
- + } else if (t == O && i<nbytes-1){
- + ubuf[(*ulen)++] = buf[i];
- + ++i;
- + t=jp_chars2[buf[i]];
- + if( t == S ){ jflag=SJIS; }
- + else if( t == O ){ if(jflag==ASCII) jflag=EUCORSJIS; }
- + else return 0;
- + }
- +
- + ubuf[(*ulen)++] = buf[i];
- + }
- +#ifdef Z
- + if (jflag==SJIS) {ckfputs("SJIS text", stdout); return SJIS;}
- + if (jflag==EUCORSJIS) {ckfputs("EUCorSJIS", stdout); return EUCORSJIS;}
- +#else
- + if (jflag==SJIS) {return SJIS;}
- + if (jflag==EUCORSJIS) {return EUCORSJIS;}
- +#endif
- +}
- +
- +int
- +check_euc(buf, nbytes, ubuf, ulen)
- + const unsigned char *buf;
- + size_t nbytes;
- + unichar *ubuf;
- + size_t *ulen;
- +{
- + size_t i;
- + int jflag;
- +
- + *ulen = 0;
- + jflag = ASCII;
- +
- + for (i = 0; i < nbytes; i++) {
- + int t = jp_chars1[buf[i]];
- +
- + if (t != T && t != J && t != E && t!= O)
- + return 0;
- +
- + if (t == E && i<nbytes-1){
- + ubuf[(*ulen)++] = buf[i];
- + ++i;
- + t= jp_chars2[buf[i]];
- + if( t != E && t != O) return 0;
- + jflag=EUC;
- + } else if (t == O && i<nbytes-1){
- + ubuf[(*ulen)++] = buf[i];
- + ++i;
- + t=jp_chars2[buf[i]];
- + if( t == E ){ jflag=EUC; }
- + else if( t == O ){ if(jflag==ASCII) jflag=EUCORSJIS; }
- + else return 0;
- + }
- +
- + ubuf[(*ulen)++] = buf[i];
- + }
- +#ifdef Z
- + if (jflag==EUC) {ckfputs("EUC text", stdout); return EUC;}
- + if (jflag==EUCORSJIS) {ckfputs("EUCorSJIS", stdout); return EUCORSJIS;}
- +#else
- + if (jflag==EUC) { return EUC;}
- + if (jflag==EUCORSJIS) {return EUCORSJIS;}
- +#endif
- +}
- +
- +int
- +detect_kcode(buf, nbytes, ubuf, ulen)
- + const unsigned char *buf;
- + size_t nbytes;
- + unichar *ubuf;
- + size_t *ulen;
- +{
- + int ret;
- + ret=check_asc_jis(buf, nbytes, ubuf, ulen);
- + if(ret==ASCII) return ASCII;
- + if(ret==JIS) return JIS;
- +
- + ret=check_sjis(buf, nbytes, ubuf, ulen);
- + if(ret==SJIS) return SJIS;
- + if(ret==EUCORSJIS) return EUCORSJIS;
- + ret=check_euc(buf, nbytes, ubuf, ulen);
- + if(ret==EUC) return EUC;
- + if(ret==EUCORSJIS) return EUCORSJIS;
- +}
- --- /dev/null 2011-02-06 21:11:58.373999997 +0900
- +++ file-5.05/src/jcode.h 2011-02-11 17:12:11.000000000 +0900
- @@ -0,0 +1,15 @@
- +/*
- + jcode.h - for jcode.c by Jun Nishii <jun@vinelinux.org>
- + modified by Ryoichi INAGAKI <inagaki@vinelinux.org>
- + */
- +
- +#define ASCII 1
- +#define JIS 2
- +#define EUC 3
- +#define SJIS 4
- +#define EUCORSJIS 5
- +
- +extern int detect_kcode (const unsigned char *, size_t, unichar *, size_t *);
- +extern int looks_jis (const unsigned char *, size_t, unichar *, size_t *);
- +extern int looks_sjis (const unsigned char *, size_t, unichar *, size_t *);
- +extern int looks_euc (const unsigned char *, size_t, unichar *, size_t *);
- --- file-5.05/src/names.h.vinejtext 2010-10-09 06:58:44.000000000 +0900
- +++ file-5.05/src/names.h 2011-02-11 17:28:18.000000000 +0900
- @@ -135,8 +135,6 @@
- {"/*", L_C, 2 }, /* must precede "The", "the", etc. */
- {"#include", L_C, 2 },
- {"char", L_C, 2 },
- - {"The", L_ENG, 2 },
- - {"the", L_ENG, 2 },
- {"double", L_C, 1 },
- {"extern", L_C, 2 },
- {"float", L_C, 1 },
|