feat:1、增加文档和文章的危险标签和属性过滤

2、移除文档的客户端缓存
This commit is contained in:
lifei6671
2018-09-13 18:19:26 +08:00
parent 91d010664b
commit 732b1db281
8 changed files with 282 additions and 256 deletions

View File

@@ -10,11 +10,12 @@ import (
"github.com/astaxie/beego/orm"
"github.com/lifei6671/mindoc/cache"
"github.com/lifei6671/mindoc/conf"
"github.com/PuerkitoBio/goquery"
"strings"
"bytes"
"os"
"path/filepath"
"github.com/PuerkitoBio/goquery"
"bytes"
"strings"
"github.com/lifei6671/mindoc/utils"
)
// Document struct.
@@ -31,36 +32,36 @@ type Document struct {
// Release 发布后的Html格式内容.
Release string `orm:"column(release);type(text);null" json:"release"`
// Content 未发布的 Html 格式内容.
Content string `orm:"column(content);type(text);null" json:"content"`
CreateTime time.Time `orm:"column(create_time);type(datetime);auto_now_add" json:"create_time"`
MemberId int `orm:"column(member_id);type(int)" json:"member_id"`
ModifyTime time.Time `orm:"column(modify_time);type(datetime);auto_now" json:"modify_time"`
ModifyAt int `orm:"column(modify_at);type(int)" json:"-"`
Version int64 `orm:"column(version);type(bigint);" json:"version"`
Content string `orm:"column(content);type(text);null" json:"content"`
CreateTime time.Time `orm:"column(create_time);type(datetime);auto_now_add" json:"create_time"`
MemberId int `orm:"column(member_id);type(int)" json:"member_id"`
ModifyTime time.Time `orm:"column(modify_time);type(datetime);auto_now" json:"modify_time"`
ModifyAt int `orm:"column(modify_at);type(int)" json:"-"`
Version int64 `orm:"column(version);type(bigint);" json:"version"`
//是否展开子目录0 否/1 是
IsOpen int `orm:"column(is_open);type(int);default(0)" json:"is_open"`
IsOpen int `orm:"column(is_open);type(int);default(0)" json:"is_open"`
AttachList []*Attachment `orm:"-" json:"attach"`
}
// 多字段唯一键
func (m *Document) TableUnique() [][]string {
func (item *Document) TableUnique() [][]string {
return [][]string{
[]string{"book_id", "identify"},
}
}
// TableName 获取对应数据库表名.
func (m *Document) TableName() string {
func (item *Document) TableName() string {
return "documents"
}
// TableEngine 获取数据使用的引擎.
func (m *Document) TableEngine() string {
func (item *Document) TableEngine() string {
return "INNODB"
}
func (m *Document) TableNameWithPrefix() string {
return conf.GetDatabasePrefix() + m.TableName()
func (item *Document) TableNameWithPrefix() string {
return conf.GetDatabasePrefix() + item.TableName()
}
func NewDocument() *Document {
@@ -70,45 +71,45 @@ func NewDocument() *Document {
}
//根据文档ID查询指定文档.
func (m *Document) Find(id int) (*Document, error) {
func (item *Document) Find(id int) (*Document, error) {
if id <= 0 {
return m, ErrInvalidParameter
return item, ErrInvalidParameter
}
o := orm.NewOrm()
err := o.QueryTable(m.TableNameWithPrefix()).Filter("document_id", id).One(m)
err := o.QueryTable(item.TableNameWithPrefix()).Filter("document_id", id).One(item)
if err == orm.ErrNoRows {
return m, ErrDataNotExist
return item, ErrDataNotExist
}
return m, nil
return item, nil
}
//插入和更新文档.
func (m *Document) InsertOrUpdate(cols ...string) error {
func (item *Document) InsertOrUpdate(cols ...string) error {
o := orm.NewOrm()
var err error
if m.DocumentId > 0 {
_, err = o.Update(m, cols...)
if item.DocumentId > 0 {
_, err = o.Update(item, cols...)
} else {
if m.Identify == "" {
if item.Identify == "" {
book := NewBook()
identify := "docs"
if err := o.QueryTable(book.TableNameWithPrefix()).Filter("book_id", m.BookId).One(book, "identify"); err == nil {
if err := o.QueryTable(book.TableNameWithPrefix()).Filter("book_id", item.BookId).One(book, "identify"); err == nil {
identify = book.Identify
}
m.Identify = fmt.Sprintf("%s-%s", identify, strconv.FormatInt(time.Now().UnixNano(), 32))
item.Identify = fmt.Sprintf("%s-%s", identify, strconv.FormatInt(time.Now().UnixNano(), 32))
}
if m.OrderSort == 0 {
sort, _ := o.QueryTable(m.TableNameWithPrefix()).Filter("book_id", m.BookId).Filter("parent_id", m.ParentId).Count()
m.OrderSort = int(sort) + 1
if item.OrderSort == 0 {
sort, _ := o.QueryTable(item.TableNameWithPrefix()).Filter("book_id", item.BookId).Filter("parent_id", item.ParentId).Count()
item.OrderSort = int(sort) + 1
}
_, err = o.Insert(m)
NewBook().ResetDocumentNumber(m.BookId)
_, err = o.Insert(item)
NewBook().ResetDocumentNumber(item.BookId)
}
if err != nil {
return err
@@ -118,36 +119,36 @@ func (m *Document) InsertOrUpdate(cols ...string) error {
}
//根据文档识别编号和项目id获取一篇文档
func (m *Document) FindByIdentityFirst(identify string, bookId int) (*Document, error) {
func (item *Document) FindByIdentityFirst(identify string, bookId int) (*Document, error) {
o := orm.NewOrm()
err := o.QueryTable(m.TableNameWithPrefix()).Filter("book_id", bookId).Filter("identify", identify).One(m)
err := o.QueryTable(item.TableNameWithPrefix()).Filter("book_id", bookId).Filter("identify", identify).One(item)
return m, err
return item, err
}
//递归删除一个文档.
func (m *Document) RecursiveDocument(docId int) error {
func (item *Document) RecursiveDocument(docId int) error {
o := orm.NewOrm()
if doc, err := m.Find(docId); err == nil {
if doc, err := item.Find(docId); err == nil {
o.Delete(doc)
NewDocumentHistory().Clear(doc.DocumentId)
}
var maps []orm.Params
_, err := o.Raw("SELECT document_id FROM " + m.TableNameWithPrefix() + " WHERE parent_id=" + strconv.Itoa(docId)).Values(&maps)
_, err := o.Raw("SELECT document_id FROM " + item.TableNameWithPrefix() + " WHERE parent_id=" + strconv.Itoa(docId)).Values(&maps)
if err != nil {
beego.Error("RecursiveDocument => ", err)
return err
}
for _, item := range maps {
if docId, ok := item["document_id"].(string); ok {
for _, param := range maps {
if docId, ok := param["document_id"].(string); ok {
id, _ := strconv.Atoi(docId)
o.QueryTable(m.TableNameWithPrefix()).Filter("document_id", id).Delete()
m.RecursiveDocument(id)
o.QueryTable(item.TableNameWithPrefix()).Filter("document_id", id).Delete()
item.RecursiveDocument(id)
}
}
@@ -155,7 +156,7 @@ func (m *Document) RecursiveDocument(docId int) error {
}
//将文档写入缓存
func (m *Document) PutToCache() {
func (item *Document) PutToCache() {
go func(m Document) {
if m.Identify == "" {
@@ -169,166 +170,170 @@ func (m *Document) PutToCache() {
}
}
}(*m)
}(*item)
}
//清除缓存
func (m *Document) RemoveCache() {
func (item *Document) RemoveCache() {
go func(m Document) {
cache.Put("Document.Id."+strconv.Itoa(m.DocumentId), m, time.Second*3600)
if m.Identify != "" {
cache.Put(fmt.Sprintf("Document.BookId.%d.Identify.%s", m.BookId, m.Identify), m, time.Second*3600)
}
}(*m)
}(*item)
}
//从缓存获取
func (m *Document) FromCacheById(id int) (*Document, error) {
func (item *Document) FromCacheById(id int) (*Document, error) {
if err := cache.Get("Document.Id."+strconv.Itoa(id), &m); err == nil && m.DocumentId > 0 {
beego.Info("从缓存中获取文档信息成功 ->", m.DocumentId)
return m, nil
if err := cache.Get("Document.Id."+strconv.Itoa(id), &item); err == nil && item.DocumentId > 0 {
beego.Info("从缓存中获取文档信息成功 ->", item.DocumentId)
return item, nil
}
if m.DocumentId > 0 {
m.PutToCache()
if item.DocumentId > 0 {
item.PutToCache()
}
m, err := m.Find(id)
item, err := item.Find(id)
if err == nil {
m.PutToCache()
item.PutToCache()
}
return m, err
return item, err
}
//根据文档标识从缓存中查询文档
func (m *Document) FromCacheByIdentify(identify string, bookId int) (*Document, error) {
func (item *Document) FromCacheByIdentify(identify string, bookId int) (*Document, error) {
key := fmt.Sprintf("Document.BookId.%d.Identify.%s", bookId, identify)
if err := cache.Get(key, m); err == nil && m.DocumentId > 0 {
if err := cache.Get(key, item); err == nil && item.DocumentId > 0 {
beego.Info("从缓存中获取文档信息成功 ->", key)
return m, nil
return item, nil
}
defer func() {
if m.DocumentId > 0 {
m.PutToCache()
if item.DocumentId > 0 {
item.PutToCache()
}
}()
return m.FindByIdentityFirst(identify, bookId)
return item.FindByIdentityFirst(identify, bookId)
}
//根据项目ID查询文档列表.
func (m *Document) FindListByBookId(bookId int) (docs []*Document, err error) {
func (item *Document) FindListByBookId(bookId int) (docs []*Document, err error) {
o := orm.NewOrm()
_, err = o.QueryTable(m.TableNameWithPrefix()).Filter("book_id", bookId).OrderBy("order_sort").All(&docs)
_, err = o.QueryTable(item.TableNameWithPrefix()).Filter("book_id", bookId).OrderBy("order_sort").All(&docs)
return
}
//判断文章是否存在
func (m *Document) IsExist(documentId int) bool {
func (item *Document) IsExist(documentId int) bool {
o := orm.NewOrm()
return o.QueryTable(m.TableNameWithPrefix()).Filter("document_id", documentId).Exist()
return o.QueryTable(item.TableNameWithPrefix()).Filter("document_id", documentId).Exist()
}
//发布单篇文档
func (item *Document) ReleaseContent() error {
o := orm.NewOrm()
item.Release = strings.TrimSpace(item.Content)
bookId := item.BookId
err := item.Processor().InsertOrUpdate("release")
var docQuery *goquery.Document
var err error
if err != nil {
beego.Error(fmt.Sprintf("发布失败 -> %+v", item), err)
return err
}
//当文档发布后,需要清除已缓存的转换文档和文档缓存
item.RemoveCache()
if item.Content != "" {
item.Release = item.Content
bufio := bytes.NewReader([]byte(item.Content))
//解析文档中非本站的链接,并设置为新窗口打开
if docQuery, err = goquery.NewDocumentFromReader(bufio); err == nil {
docQuery.Find("a").Each(func(i int, contentSelection *goquery.Selection) {
if src, ok := contentSelection.Attr("href"); ok {
if strings.HasPrefix(src, "http://") || strings.HasPrefix(src, "https://") {
if conf.BaseUrl != "" && !strings.HasPrefix(src, conf.BaseUrl) {
contentSelection.SetAttr("target", "_blank")
if err := os.RemoveAll(filepath.Join(conf.WorkingDirectory, "uploads", "books", strconv.Itoa(item.BookId))); err != nil {
beego.Error("删除已缓存的文档目录失败 -> ", filepath.Join(conf.WorkingDirectory, "uploads", "books", strconv.Itoa(item.BookId)))
return err
}
return nil
}
//处理文档的外链,附件,底部编辑信息等.
func (item *Document) Processor() *Document {
if item.Release != "" {
item.Release = utils.SafetyProcessor(item.Release)
//安全过滤,移除危险标签和属性
if docQuery, err := goquery.NewDocumentFromReader(bytes.NewBufferString(item.Release)); err == nil {
//处理附件
if selector := docQuery.Find("div.attach-list").First(); selector.Size() <= 0 {
//处理附件
attachList, err := NewAttachment().FindListByDocumentId(item.DocumentId)
if err == nil && len(attachList) > 0 {
content := bytes.NewBufferString("<div class=\"attach-list\"><strong>附件</strong><ul>")
for _, attach := range attachList {
if strings.HasPrefix(attach.HttpPath, "/") {
attach.HttpPath = strings.TrimSuffix(conf.BaseUrl, "/") + attach.HttpPath
}
li := fmt.Sprintf("<li><a href=\"%s\" target=\"_blank\" title=\"%s\">%s</a></li>", attach.HttpPath, attach.FileName, attach.FileName)
content.WriteString(li)
}
content.WriteString("</ul></div>")
if docQuery == nil {
docQuery, err = goquery.NewDocumentFromReader(content);
} else {
if selector := docQuery.Find("div.wiki-bottom").First(); selector.Size() > 0 {
selector.BeforeHtml(content.String())
} else if selector := docQuery.Find("div.markdown-article").First(); selector.Size() > 0 {
selector.AppendHtml(content.String())
} else if selector := docQuery.Find("article.markdown-article-inner").First(); selector.Size() > 0 {
selector.AppendHtml(content.String())
} else {
docQuery.Children().WrapHtml("<article class=\"markdown-article-inner\"></article>").AppendHtml(content.String())
}
}
}
})
}
}
//处理附件
attachList, err := NewAttachment().FindListByDocumentId(item.DocumentId)
if err == nil && len(attachList) > 0 {
content := bytes.NewBufferString("<div class=\"attach-list\"><strong>附件</strong><ul>")
for _, attach := range attachList {
if strings.HasPrefix(attach.HttpPath, "/") {
attach.HttpPath = strings.TrimSuffix(conf.BaseUrl, "/") + attach.HttpPath
}
li := fmt.Sprintf("<li><a href=\"%s\" target=\"_blank\" title=\"%s\">%s</a></li>", attach.HttpPath, attach.FileName, attach.FileName)
content.WriteString(li)
}
content.WriteString("</ul></div>")
if docQuery == nil {
docQuery ,err = goquery.NewDocumentFromReader(content);
}else {
if selector := docQuery.Find("div.markdown-article").First(); selector.Size() > 0{
selector.AppendHtml(content.String())
}else{
docQuery.Find("article.markdown-article-inner").First().AppendHtml(content.String())
//处理了文档底部信息
if selector := docQuery.Find("div.wiki-bottom").First(); selector.Size() <= 0 && item.MemberId > 0 {
//处理文档结尾信息
docCreator, err := NewMember().Find(item.MemberId, "real_name", "account")
release := "<div class=\"wiki-bottom\">文档更新时间: " + item.ModifyTime.Local().Format("2006-01-02 15:04") + " &nbsp;&nbsp;作者:"
if err == nil && docCreator != nil {
if docCreator.RealName != "" {
release += docCreator.RealName
} else {
release += docCreator.Account
}
}
release += "</div>"
if selector := docQuery.Find("div.markdown-article").First(); selector.Size() > 0 {
selector.AppendHtml(release)
} else if selector := docQuery.Find("article.markdown-article-inner").First(); selector.Size() > 0 {
selector.First().AppendHtml(release)
} else {
docQuery.Children().WrapHtml("<article class=\"markdown-article-inner\"></article>").AppendHtml(release);
}
}
//设置图片为CDN地址
if cdnimg := beego.AppConfig.String("cdnimg"); cdnimg != "" {
docQuery.Find("img").Each(func(i int, contentSelection *goquery.Selection) {
if src, ok := contentSelection.Attr("src"); ok && strings.HasPrefix(src, "/uploads/") {
contentSelection.SetAttr("src", utils.JoinURI(cdnimg, src))
}
})
}
if html, err := docQuery.Html(); err == nil {
item.Release = strings.TrimSuffix(strings.TrimPrefix(strings.TrimSpace(html), "<html><head></head><body>"), "</body></html>")
}
}
}
//处理格式化后的文档内容
if docQuery != nil {
//处理文档结尾信息
docCreator, err := NewMember().Find(item.MemberId,"real_name","account")
release := "<div class=\"wiki-bottom\">文档更新时间: " + item.ModifyTime.Local().Format("2006-01-02 15:04") + " &nbsp;&nbsp;作者:"
if err == nil && docCreator != nil {
if docCreator.RealName != "" {
release += docCreator.RealName
} else {
release += docCreator.Account
}
}
release += "</div>"
if selector := docQuery.Find("div.markdown-article").First() ; selector.Size() > 0{
beego.Info(selector)
selector.AppendHtml(release)
}else{
docQuery.Find("article.markdown-article-inner").First().AppendHtml(release)
}
if html, err := docQuery.Html(); err == nil {
item.Release = strings.TrimSuffix(strings.TrimPrefix(strings.TrimSpace(html), "<html><head></head><body>"), "</body></html>")
}
}
_, err = o.Update(item, "release")
if err != nil {
beego.Error(fmt.Sprintf("发布失败 => %+v", item), err)
return err
} else {
//当文档发布后,需要清除已缓存的转换文档和文档缓存
if doc, err := NewDocument().Find(item.DocumentId); err == nil {
doc.PutToCache()
} else {
doc.RemoveCache()
}
if err := os.RemoveAll(filepath.Join(conf.WorkingDirectory, "uploads", "books", strconv.Itoa(bookId))); err != nil {
beego.Error("删除已缓存的文档目录失败 => ",filepath.Join(conf.WorkingDirectory, "uploads", "books", strconv.Itoa(bookId)))
return err
}
}
return nil
}
return item
}