如果想让粘贴也抓取图片,需要改造百度编辑器,我个人觉得不太好,修改了源码,你的编辑器就不太好升级了。有瑕疵,所以放弃了这个想法,打算换一个解决方案,在用户提交的时候,主动去处理文本中的图片内容。
1、用户写好内容,提交到后台之后,就是一堆html标签,而且是字符串形式的。
2、写好我们的抓取类,如下:
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp1.comm
{
/// <summary>
///抓取图片
/// </summary>
public class CrawlerHelper
{
/// <summary>
/// 抓取图片
/// </summary>
/// <param name="html">The HTML.</param>
/// <returns>System.String.</returns>
public static async Task<string> CrawlerAsync(string html)
{
List<CrawImgView> imglst = new List<CrawImgView>();
HtmlDocument Doc = new HtmlDocument();
Doc.LoadHtml(html);
var nodes = Doc.DocumentNode.SelectNodes("//img");
List<Task> tasks = new List<Task>();
foreach (var node in nodes)
{
string origimg = node.Attributes["src"].Value;
//过滤一下已经处理的图片
if (origimg.IndexOf(".qiniu.com") == -1 && !imglst.Exists(t => t.Original == origimg))
{
var task = Task.Run(() =>
{
CrawImgView item = new CrawImgView();
item.Original = origimg;
item.ImgPath = GetImg(node.Attributes["src"].Value);
imglst.Add(item);
});
tasks.Add(task);
}
}
await Task.WhenAll(tasks);
imglst.ForEach(t =>
{
html = html.Replace(t.Original, t.ImgPath);
});
return html;
}
/// <summary>
///下载图片
/// </summary>
/// <param name="original">The original.</param>
/// <returns>System.String.</returns>
private static string GetImg(string original)
{
if (!string.IsNullOrWhiteSpace(original))
{
if (original.IndexOf('?') > -1)
{
original = original.Split('?')[0];
}
var request = System.Net.HttpWebRequest.Create(original) as System.Net.HttpWebRequest;
using (var response = request.GetResponse() as System.Net.HttpWebResponse)
{
if (response.StatusCode != System.Net.HttpStatusCode.OK)
{
return original;
}
if (response.ContentType.IndexOf("image") == -1)
{
return original;
}
try
{
var stream = response.GetResponseStream();
var reader = new BinaryReader(stream);
byte[] bytes;
using (var ms = new MemoryStream())
{
byte[] buffer = new byte[4096];
int count;
while ((count = reader.Read(buffer, 0, buffer.Length)) != 0)
{
ms.Write(buffer, 0, count);
}
bytes = ms.ToArray();
}
string fileDoc = Path.GetExtension(original);
if (string.IsNullOrWhiteSpace(fileDoc))
{
fileDoc = ".jpg";
}
string uEditorWidhtImgSize ="0";//可以设置裁剪尺寸
string uEditorheightImgSize ="0";
//文件访问域名
string filedomain ="http://www.qiniu.com/";//七牛配置的访问域名
FileUploadResult result = QiniuUpload(fileDoc, bytes, uEditorWidhtImgSize, uEditorheightImgSize);
if (result.IsSucceed)
{
return filedomain + result.FileUrl;
}
}
catch
{
}
}
}
return original;
}
/// <summary>
///七牛上传
/// </summary>
/// <param name="fileDoc">The file document.</param>
/// <param name="byteFile">The byte file.</param>
/// <param name="uEditorImgSize">Size of the u editor img.</param>
/// <param name="uEditorheightImgSize">Size of the u editorheight img.</param>
/// <returns>FileUploadResult.</returns>
private static FileUploadResult QiniuUpload(string fileDoc, byte[] byteFile, string uEditorImgSize, string uEditorheightImgSize)
{
string uploadUrl ="http://www.qiniu.com/upload";//自己搭建的七牛上传站点
if (uEditorImgSize != null && uEditorImgSize.Length > 1)
{
uEditorImgSize = "&size=" + uEditorImgSize + "x" + uEditorheightImgSize;
}
else
{
uEditorImgSize = "";
}
var wc = new System.Net.WebClient();
var upload = wc.UploadData($"{uploadUrl}?type={fileDoc}{uEditorImgSize}", "POST", byteFile);
return Encoding.UTF8.GetString(upload).DeserializeObject<FileUploadResult>();
}
}
/// <summary>
/// 抓取对象
/// </summary>
public class CrawImgView
{
/// <summary>
///原图
/// </summary>
/// <value>The original.</value>
public string Original { get; set; }
/// <summary>
///新图
/// </summary>
/// <value>The imgpath.</value>
public string ImgPath { get; set; }
}
}3、封装好之后,可以开始调用了
string content = CrawlerHelper.CrawlerAsync(content).Result;//处理完成
4、用处理完成的content内容写入数据库即可。
5、注意项:
a、如果有很多或者很大的图片,可能这个过程会有点慢,尽管是异步下载图片。
b、如果内容中有背景图,不会处理的,没时间写,有需要可以改造我的方法,目前就获取了img标签的src属性。
川公网安备 51010702003150号
留下您的脚步
最近评论