123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140 |
- package com.shawn.util;
- import lombok.extern.slf4j.Slf4j;
- import org.elasticsearch.search.suggest.completion.RegexOptions;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- @Slf4j
- public class DelHtmlUtil {
- private static final String regEx_pre = "<pre[^>]*?>[\\s\\S]*?<\\/pre>"; // 定义pre的正则表达式
- private static final String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>"; // 定义script的正则表达式
- private static final String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>"; // 定义style的正则表达式
- private static final String regEx_html = "<[^>]+>"; // 定义HTML标签的正则表达式
- private static final String regEx_space = "\\s*|\t|\r|\n";//定义空格回车换行符
- private static final String regEx_img_src = "<img.*src\\s*=\\s*(.*?)[^>]*?>";//定义img的正则表达式
- /**
- * @param htmlStr
- * @return 删除Html标签
- */
- public static String delHTMLTag(String htmlStr) {
- Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
- Matcher m_script = p_script.matcher(htmlStr);
- htmlStr = m_script.replaceAll(""); // 过滤script标签
- Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
- Matcher m_style = p_style.matcher(htmlStr);
- htmlStr = m_style.replaceAll(""); // 过滤style标签
- Pattern p_pre = Pattern.compile(regEx_pre, Pattern.CASE_INSENSITIVE);
- Matcher m_pre = p_pre.matcher(htmlStr);
- htmlStr = m_pre.replaceAll(""); // 过滤pre标签
- Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
- Matcher m_html = p_html.matcher(htmlStr);
- htmlStr = m_html.replaceAll(""); // 过滤html标签
- // Pattern p_space = Pattern.compile(regEx_space, Pattern.CASE_INSENSITIVE);
- // Matcher m_space = p_space.matcher(htmlStr);
- // htmlStr = m_space.replaceAll(""); // 过滤空格回车标签
- return htmlStr.trim(); // 返回文本字符串
- }
- /**
- * 获取 第一个 img 的src
- * @param htmlStr
- * @return
- */
- public static String getFirstImgSrc(String htmlStr){
- Pattern p_script = Pattern.compile(regEx_img_src, Pattern.CASE_INSENSITIVE);
- Matcher m_image = p_script.matcher(htmlStr);
- while (m_image.find()) {
- // 得到<img />数据
- String img = m_image.group();
- // 匹配<img>中的src数据
- Matcher m = Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)").matcher(img);
- while (m.find()) {
- return m.group(1);
- }
- }
- return "";
- }
- public static String getTextFromHtml(String htmlStr) {
- htmlStr = delHTMLTag(htmlStr);
- htmlStr = htmlStr.replaceAll(" ", "");
- return htmlStr;
- }
- // public static void main(String[] args) {
- // String str = "ubuntu 安装 phpmyadmin 两种 (两者选一):\n" +
- // "<h4>1: apt-get 安装 然后使用 已有的虚拟主机目录建立软连接</h4>\n" +
- // "<div class=\"dp-highlighter\">\n" +
- // "<ol class=\"dp-xml\" start=\"1\">\n" +
- // " \t<li class=\"alt\">sudo apt-get install phpmyadmin</li>\n" +
- // " \t<li class=\"\">sudo ln-s /usr/share/phpmyadmin/ /var/www/pma</li>\n" +
- // "</ol>\n" +
- // "</div>\n" +
- // "<h4>2:手动上传</h4>\n" +
- // "网上下载 phpmyadmin软件包,使用 filezilla 上传到 /var/www/pma (pma自己创建)\n" +
- // "\n" +
- // "使用 ip/pma 查看 phpmyadmin\n" +
- // "\n" +
- // "其实 还可以 考虑给phpmyadmin 配置虚拟主机\n" +
- // "\n" +
- // "接下来 配置MySQL的 remote access\n" +
- // "\n" +
- // "默认下 mysql只能是本机访问的 但是 如果我通过ip 远程 访问方式 就是 remote access 比如 我在其他机器上要使用navicate 访问 也属于 remote access\n" +
- // "\n" +
- // "但是 ubuntu中的mysql 默认是不允许的 所以要修改mysql的配置\n" +
- // "\n" +
- // "sudo vim /etc/mysql/my.cnf #修改 bind-address 白名单 取消掉\n" +
- // "\n" +
- // "<img src=\"https://img-blog.csdn.net/20140914221520765?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvd2FuZzc5NDY4NjcxNA==/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast\" alt=\"\" />\n" +
- // "<h4></h4>\n" +
- // "<h4>进入phpmyadmin</h4>\n" +
- // "查看 用户 添加一个用户 可以 用 公网的ip 访问:\n" +
- // "\n" +
- // "<img src=\"https://img-blog.csdn.net/20140914222446977?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvd2FuZzc5NDY4NjcxNA==/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast\" alt=\"\" />\n" +
- // "\n" +
- // "<img src=\"https://img-blog.csdn.net/20140914222456821?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvd2FuZzc5NDY4NjcxNA==/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast\" alt=\"\" />\n" +
- // "<h4>重启msyql</h4>\n" +
- // "<div class=\"dp-highlighter\">\n" +
- // "<ol class=\"dp-xml\" start=\"1\">\n" +
- // " \t<li class=\"alt\">sudo service mysql restart</li>\n" +
- // "</ol>\n" +
- // "</div>\n" +
- // "使用navicate测试远程登录:\n" +
- // "\n" +
- // "<img src=\"https://img-blog.csdn.net/20140914222659905?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvd2FuZzc5NDY4NjcxNA==/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast\" alt=\"\" />\n" +
- // "\n" +
- // "<img src=\"https://img-blog.csdn.net/20140914222803896?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvd2FuZzc5NDY4NjcxNA==/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast\" alt=\"\" />\n" +
- // "\n" +
- // "以上 远程登录 已经 完成\n" +
- // "\n" +
- // " \n" +
- // "\n" +
- // "更多文章\n" +
- // "\n" +
- // "<a href=\"http://liuyanzhao.com/2009.html\" target=\"_blank\" rel=\"noopener noreferrer\">ubuntu14.04 下 mysql 存储目录迁移</a>\n" +
- // "\n" +
- // "<a href=\"http://liuyanzhao.com/1978.html\" target=\"_blank\" rel=\"noopener noreferrer\">ubuntu14.04下配置apache虚拟主机</a>\n" +
- // "\n" +
- // "<a href=\"http://liuyanzhao.com/2447.html\" target=\"_blank\" rel=\"noopener noreferrer\">ubuntu14.04 安装phpmyadmin 和配置</a>\n" +
- // "\n" +
- // " \n" +
- // "\n" +
- // " \n" +
- // "\n" +
- // "本文地址:<a href=\"http://liuyanzhao.com/2447.html\" target=\"_blank\" rel=\"noopener noreferrer\">http://liuyanzhao.com/2447.html</a>\n" +
- // "\n" +
- // "转载请注明\n" +
- // "\n" +
- // " ";
- // System.out.println(getTextFromHtml(str));
- // System.out.println(getFirstImgSrc(str));
- // }
- }
|