상세 컨텐츠

본문 제목

HTML TAG 제거 함수

프로그래밍/JAVA

by 라제폰 2009. 1. 22. 10:45

본문

public class TagTest
{
    //******************************************************************************
    // 1. HTML TAG 제거 함수
    // 2. 함수명 : removeTags, removeTag
    // 3. 인  자 : String inStr -  원본 문자열
    // 4. 리  턴 : String
    //******************************************************************************
    public static String removeTags(String src)
    {
        String tags[] = {"!--", "!DOCTYPE", "A", "ABBR", "ACRONYM",
                       "ADDRESS", "APPLET", "AREA", "B", "BASE",
                       "BASEFONT", "BDO", "BGSOUND", "BIG", "BLINK",
                       "BLOCKQUOTE", "BODY", "BR", "BUTTON", "CAPTION",
                       "CENTER", "CITE", "CODE", "COL", "COLGROUP",
                       "COMMENT", "DD", "DEL", "DFN", "DIR",
                       "DIV", "DL", "DT", "EM", "EMBED", "FIELDSET",
                       "FONT", "FORM", "FRAME", "FRAMESET",
                       "H", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD",
                       "HR", "HTML", "I", "IFRAME", "ILAYER", "IMG",
                       "INPUT", "INS", "ISINDEX", "KBD", "KEYGEN",
                       "LABEL", "LAYER", "LEGEND", "LH", "LI", "LINK",
                       "LISTING", "MAP", "MARQUEE", "MENU", "META",
                       "MULTICOL", "NEXTID", "NOBR", "NOEMBED", "NOFRAME",
                       "NOLAYER", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP",
                       "OPTION", "P", "PARAM", "PLAINTEXT", "PRE", "Q",
                       "RB", "RP", "RT", "RUBY", "S", "SAMP", "SCRIPT",
                       "SELECT", "SERVER", "SMALL", "SPACER", "SPAN",
                       "STRIKE", "STRONG", "STYLE", "SUB", "SUP", "TABLE",
                       "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD",
                       "TITLE", "TR", "TT", "U", "UL", "VAR", "WBR", "XMP"};

        for(int i=0; i<tags.length; i++){
            src = removeTag(src, tags[i]);
        }

        return src;
    }

    public static String removeTag(String src, String tag)
    {
        String copy = null;
        String mid = null;
        int op = 0;
        int tp = 0;
        int cp = 0;

        copy = src;
        src = src.toUpperCase();

        while(( op = src.indexOf("<" + tag, op) ) != -1)
        {
            mid = src.substring(op + 1 + tag.length(), op + 1 + tag.length() + 1);

            if("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".indexOf(mid) != -1){
                op = op + 1 + tag.length() + 1;
                continue;
            }

            if(( cp = src.indexOf("</" + tag + ">", op)) == -1){
                tp = src.indexOf(">", op);
                if (tp == -1) {
                    src = src.substring(0, op);
                    copy = copy.substring(0, op);
                }
                else {
                    src = src.substring(0, op) + src.substring(tp+1, src.length());
                    copy = copy.substring(0, op) + copy.substring(tp+1, copy.length());
                }
            }else{
                if( tag.equals("SCRIPT") || tag.equals("STYLE") || tag.equals("OBJECT") ){
                    tp = src.indexOf(">",op);
                    src = src.substring(0, op) + src.substring(cp + tag.length()+ 3, src.length());
                    copy = copy.substring(0, op) + copy.substring(cp + tag.length() + 3, copy.length());
                }else{
                    tp = src.indexOf(">", op);
                    src = src.substring(0, op) +
                          src.substring(tp+1, cp)+
                          src.substring(cp + tag.length() + 3, src.length());
                    copy = copy.substring(0, op)+
                           copy.substring(tp+1, cp)+
                           copy.substring(cp + tag.length() + 3, copy.length());
                }
            }
        }

        return copy;
    }

    public static void main(String[] args){
        TagTest tt = new TagTest();

        System.out.println("[" + args[0] + "] ==> [" + tt.removeTags(args[0]) + "]");

    }
}

관련글 더보기