cmd/swarm, swarm: cross-platform Content-Type detection (#17782)

- Mime types generator (Standard "mime" package rely on system-settings, see mime.osInitMime)
- Changed swarm/api.Upload:
    - simplify I/O throttling by semaphore primitive and use file name where possible
    - f.Close() must be called in Defer - otherwise panic or future added early return will cause leak of file descriptors
    - one error was suppressed
This commit is contained in:
Alexey Sharov 2018-10-01 18:39:39 +07:00 committed by Anton Evangelatov
parent b69942befe
commit dc5d643bb5
13 changed files with 3379 additions and 90 deletions

View File

@ -57,6 +57,9 @@ devtools:
@type "solc" 2> /dev/null || echo 'Please install solc'
@type "protoc" 2> /dev/null || echo 'Please install protoc'
swarm-devtools:
env GOBIN= go install ./cmd/swarm/mimegen
# Cross Compilation Targets (xgo)
geth-cross: geth-linux geth-darwin geth-windows geth-android geth-ios

View File

@ -0,0 +1,124 @@
// Copyright 2018 The go-ethereum Authors
// This file is part of go-ethereum.
//
// go-ethereum is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// go-ethereum is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
package main
// Standard "mime" package rely on system-settings, see mime.osInitMime
// Swarm will run on many OS/Platform/Docker and must behave similar
// This command generates code to add common mime types based on mime.types file
//
// mime.types file provided by mailcap, which follow https://www.iana.org/assignments/media-types/media-types.xhtml
//
// Get last version of mime.types file by:
// docker run --rm -v $(pwd):/tmp alpine:edge /bin/sh -c "apk add -U mailcap; mv /etc/mime.types /tmp"
import (
"bufio"
"bytes"
"flag"
"html/template"
"io/ioutil"
"strings"
"log"
)
var (
typesFlag = flag.String("types", "", "Input mime.types file")
packageFlag = flag.String("package", "", "Golang package in output file")
outFlag = flag.String("out", "", "Output file name for the generated mime types")
)
type mime struct {
Name string
Exts []string
}
type templateParams struct {
PackageName string
Mimes []mime
}
func main() {
// Parse and ensure all needed inputs are specified
flag.Parse()
if *typesFlag == "" {
log.Fatalf("--types is required")
}
if *packageFlag == "" {
log.Fatalf("--types is required")
}
if *outFlag == "" {
log.Fatalf("--out is required")
}
params := templateParams{
PackageName: *packageFlag,
}
types, err := ioutil.ReadFile(*typesFlag)
if err != nil {
log.Fatal(err)
}
scanner := bufio.NewScanner(bytes.NewReader(types))
for scanner.Scan() {
txt := scanner.Text()
if strings.HasPrefix(txt, "#") || len(txt) == 0 {
continue
}
parts := strings.Fields(txt)
if len(parts) == 1 {
continue
}
params.Mimes = append(params.Mimes, mime{parts[0], parts[1:]})
}
if err = scanner.Err(); err != nil {
log.Fatal(err)
}
result := bytes.NewBuffer([]byte{})
if err := template.Must(template.New("_").Parse(tpl)).Execute(result, params); err != nil {
log.Fatal(err)
}
if err := ioutil.WriteFile(*outFlag, result.Bytes(), 0600); err != nil {
log.Fatal(err)
}
}
var tpl = `// Code generated by github.com/ethereum/go-ethereum/cmd/swarm/mimegen. DO NOT EDIT.
package {{ .PackageName }}
import "mime"
func init() {
var mimeTypes = map[string]string{
{{- range .Mimes -}}
{{ $name := .Name -}}
{{- range .Exts }}
".{{ . }}": "{{ $name | html }}",
{{- end }}
{{- end }}
}
for ext, name := range mimeTypes {
if err := mime.AddExtensionType(ext, name); err != nil {
panic(err)
}
}
}
`

1828
cmd/swarm/mimegen/mime.types Normal file

File diff suppressed because it is too large Load Diff

View File

@ -22,16 +22,15 @@ import (
"fmt"
"io"
"io/ioutil"
"mime"
"net/http"
"os"
"os/user"
"path"
"path/filepath"
"strings"
"github.com/ethereum/go-ethereum/cmd/utils"
swarm "github.com/ethereum/go-ethereum/swarm/api/client"
"github.com/ethereum/go-ethereum/cmd/utils"
"gopkg.in/urfave/cli.v1"
)
@ -118,10 +117,9 @@ func upload(ctx *cli.Context) {
return "", fmt.Errorf("error opening file: %s", err)
}
defer f.Close()
if mimeType == "" {
mimeType = detectMimeType(file)
}
if mimeType != "" {
f.ContentType = mimeType
}
return client.Upload(f, "", toEncrypt)
}
}
@ -161,19 +159,3 @@ func homeDir() string {
}
return ""
}
func detectMimeType(file string) string {
if ext := filepath.Ext(file); ext != "" {
return mime.TypeByExtension(ext)
}
f, err := os.Open(file)
if err != nil {
return ""
}
defer f.Close()
buf := make([]byte, 512)
if n, _ := f.Read(buf); n > 0 {
return http.DetectContentType(buf)
}
return ""
}

View File

@ -32,7 +32,7 @@ import (
"github.com/ethereum/go-ethereum/log"
swarm "github.com/ethereum/go-ethereum/swarm/api/client"
colorable "github.com/mattn/go-colorable"
"github.com/mattn/go-colorable"
)
var loglevel = flag.Int("loglevel", 3, "verbosity of logs")

View File

@ -16,6 +16,9 @@
package api
//go:generate mimegen --types=./../../cmd/swarm/mimegen/mime.types --package=api --out=gen_mime.go
//go:generate gofmt -s -w gen_mime.go
import (
"archive/tar"
"context"
@ -29,8 +32,6 @@ import (
"path"
"strings"
"github.com/ethereum/go-ethereum/swarm/storage/mru/lookup"
"bytes"
"mime"
"path/filepath"
@ -45,7 +46,8 @@ import (
"github.com/ethereum/go-ethereum/swarm/spancontext"
"github.com/ethereum/go-ethereum/swarm/storage"
"github.com/ethereum/go-ethereum/swarm/storage/mru"
opentracing "github.com/opentracing/opentracing-go"
"github.com/ethereum/go-ethereum/swarm/storage/mru/lookup"
"github.com/opentracing/opentracing-go"
)
var (
@ -757,9 +759,14 @@ func (a *API) UploadTar(ctx context.Context, bodyReader io.ReadCloser, manifestP
// add the entry under the path from the request
manifestPath := path.Join(manifestPath, hdr.Name)
contentType := hdr.Xattrs["user.swarm.content-type"]
if contentType == "" {
contentType = mime.TypeByExtension(filepath.Ext(hdr.Name))
}
//DetectContentType("")
entry := &ManifestEntry{
Path: manifestPath,
ContentType: hdr.Xattrs["user.swarm.content-type"],
ContentType: contentType,
Mode: hdr.Mode,
Size: hdr.Size,
ModTime: hdr.ModTime,
@ -770,10 +777,15 @@ func (a *API) UploadTar(ctx context.Context, bodyReader io.ReadCloser, manifestP
return nil, fmt.Errorf("error adding manifest entry from tar stream: %s", err)
}
if hdr.Name == defaultPath {
contentType := hdr.Xattrs["user.swarm.content-type"]
if contentType == "" {
contentType = mime.TypeByExtension(filepath.Ext(hdr.Name))
}
entry := &ManifestEntry{
Hash: contentKey.Hex(),
Path: "", // default entry
ContentType: hdr.Xattrs["user.swarm.content-type"],
ContentType: contentType,
Mode: hdr.Mode,
Size: hdr.Size,
ModTime: hdr.ModTime,
@ -1033,3 +1045,32 @@ func (a *API) ResolveResourceView(ctx context.Context, uri *URI, values mru.Valu
}
return view, nil
}
// MimeOctetStream default value of http Content-Type header
const MimeOctetStream = "application/octet-stream"
// DetectContentType by file file extension, or fallback to content sniff
func DetectContentType(fileName string, f io.ReadSeeker) (string, error) {
ctype := mime.TypeByExtension(filepath.Ext(fileName))
if ctype != "" {
return ctype, nil
}
// save/rollback to get content probe from begin of file
currentPosition, err := f.Seek(0, io.SeekCurrent)
if err != nil {
return MimeOctetStream, fmt.Errorf("seeker can't seek, %s", err)
}
// read a chunk to decide between utf-8 text and binary
var buf [512]byte
n, _ := f.Read(buf[:])
ctype = http.DetectContentType(buf[:n])
_, err = f.Seek(currentPosition, io.SeekStart) // rewind to output whole file
if err != nil {
return MimeOctetStream, fmt.Errorf("seeker can't seek, %s", err)
}
return ctype, nil
}

View File

@ -17,6 +17,7 @@
package api
import (
"bytes"
"context"
"errors"
"flag"
@ -433,3 +434,69 @@ func TestDecryptOrigin(t *testing.T) {
}
}
}
func TestDetectContentType(t *testing.T) {
for _, tc := range []struct {
file string
content string
expectedContentType string
}{
{
file: "file-with-correct-css.css",
content: "body {background-color: orange}",
expectedContentType: "text/css; charset=utf-8",
},
{
file: "empty-file.css",
content: "",
expectedContentType: "text/css; charset=utf-8",
},
{
file: "empty-file.pdf",
content: "",
expectedContentType: "application/pdf",
},
{
file: "empty-file.md",
content: "",
expectedContentType: "text/markdown; charset=utf-8",
},
{
file: "empty-file-with-unknown-content.strangeext",
content: "",
expectedContentType: "text/plain; charset=utf-8",
},
{
file: "file-with-unknown-extension-and-content.strangeext",
content: "Lorem Ipsum",
expectedContentType: "text/plain; charset=utf-8",
},
{
file: "file-no-extension",
content: "Lorem Ipsum",
expectedContentType: "text/plain; charset=utf-8",
},
{
file: "file-no-extension-no-content",
content: "",
expectedContentType: "text/plain; charset=utf-8",
},
{
file: "css-file-with-html-inside.css",
content: "<!doctype html><html><head></head><body></body></html>",
expectedContentType: "text/css; charset=utf-8",
},
} {
t.Run(tc.file, func(t *testing.T) {
detected, err := DetectContentType(tc.file, bytes.NewReader([]byte(tc.content)))
if err != nil {
t.Fatal(err)
}
if detected != tc.expectedContentType {
t.Fatalf("File: %s, Expected mime type %s, got %s", tc.file, tc.expectedContentType, detected)
}
})
}
}

View File

@ -24,7 +24,6 @@ import (
"fmt"
"io"
"io/ioutil"
"mime"
"mime/multipart"
"net/http"
"net/textproto"
@ -124,10 +123,16 @@ func Open(path string) (*File, error) {
f.Close()
return nil, err
}
contentType, err := api.DetectContentType(f.Name(), f)
if err != nil {
return nil, err
}
return &File{
ReadCloser: f,
ManifestEntry: api.ManifestEntry{
ContentType: mime.TypeByExtension(filepath.Ext(path)),
ContentType: contentType,
Mode: int64(stat.Mode()),
Size: stat.Size(),
ModTime: stat.ModTime(),

View File

@ -21,7 +21,6 @@ import (
"context"
"fmt"
"io"
"net/http"
"os"
"path"
"path/filepath"
@ -97,22 +96,28 @@ func (fs *FileSystem) Upload(lpath, index string, toEncrypt bool) (string, error
list = append(list, entry)
}
cnt := len(list)
errors := make([]error, cnt)
done := make(chan bool, maxParallelFiles)
dcnt := 0
awg := &sync.WaitGroup{}
errors := make([]error, len(list))
sem := make(chan bool, maxParallelFiles)
defer close(sem)
for i, entry := range list {
if i >= dcnt+maxParallelFiles {
<-done
dcnt++
}
awg.Add(1)
go func(i int, entry *manifestTrieEntry, done chan bool) {
sem <- true
go func(i int, entry *manifestTrieEntry) {
defer func() { <-sem }()
f, err := os.Open(entry.Path)
if err == nil {
stat, _ := f.Stat()
if err != nil {
errors[i] = err
return
}
defer f.Close()
stat, err := f.Stat()
if err != nil {
errors[i] = err
return
}
var hash storage.Address
var wait func(context.Context) error
ctx := context.TODO()
@ -120,28 +125,21 @@ func (fs *FileSystem) Upload(lpath, index string, toEncrypt bool) (string, error
if hash != nil {
list[i].Hash = hash.Hex()
}
err = wait(ctx)
awg.Done()
if err == nil {
first512 := make([]byte, 512)
fread, _ := f.ReadAt(first512, 0)
if fread > 0 {
mimeType := http.DetectContentType(first512[:fread])
if filepath.Ext(entry.Path) == ".css" {
mimeType = "text/css"
}
list[i].ContentType = mimeType
}
}
f.Close()
}
if err := wait(ctx); err != nil {
errors[i] = err
done <- true
}(i, entry, done)
return
}
for dcnt < cnt {
<-done
dcnt++
list[i].ContentType, err = DetectContentType(f.Name(), f)
if err != nil {
errors[i] = err
return
}
}(i, entry)
}
for i := 0; i < cap(sem); i++ {
sem <- true
}
trie := &manifestTrie{
@ -168,7 +166,6 @@ func (fs *FileSystem) Upload(lpath, index string, toEncrypt bool) (string, error
if err2 == nil {
hs = trie.ref.Hex()
}
awg.Wait()
return hs, err2
}

View File

@ -60,7 +60,7 @@ func TestApiDirUpload0(t *testing.T) {
content = readPath(t, "testdata", "test0", "index.css")
resp = testGet(t, api, bzzhash, "index.css")
exp = expResponse(content, "text/css", 0)
exp = expResponse(content, "text/css; charset=utf-8", 0)
checkResponse(t, resp, exp)
addr := storage.Address(common.Hex2Bytes(bzzhash))
@ -140,7 +140,7 @@ func TestApiDirUploadModify(t *testing.T) {
content = readPath(t, "testdata", "test0", "index.css")
resp = testGet(t, api, bzzhash, "index.css")
exp = expResponse(content, "text/css", 0)
exp = expResponse(content, "text/css; charset=utf-8", 0)
checkResponse(t, resp, exp)
_, _, _, _, err = api.Get(context.TODO(), nil, addr, "")

1201
swarm/api/gen_mime.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -201,6 +201,13 @@ func (s *Server) HandleBzzGet(w http.ResponseWriter, r *http.Request) {
defer reader.Close()
w.Header().Set("Content-Type", "application/x-tar")
fileName := uri.Addr
if found := path.Base(uri.Path); found != "" && found != "." && found != "/" {
fileName = found
}
w.Header().Set("Content-Disposition", fmt.Sprintf("inline; filename=\"%s.tar\"", fileName))
w.WriteHeader(http.StatusOK)
io.Copy(w, reader)
return
@ -616,7 +623,7 @@ func (s *Server) HandleGetResource(w http.ResponseWriter, r *http.Request) {
// All ok, serve the retrieved update
log.Debug("Found update", "view", view.Hex(), "ruid", ruid)
w.Header().Set("Content-Type", "application/octet-stream")
w.Header().Set("Content-Type", api.MimeOctetStream)
http.ServeContent(w, r, "", time.Now(), bytes.NewReader(data))
}
@ -690,11 +697,9 @@ func (s *Server) HandleGet(w http.ResponseWriter, r *http.Request) {
case uri.Raw():
// allow the request to overwrite the content type using a query
// parameter
contentType := "application/octet-stream"
if typ := r.URL.Query().Get("content_type"); typ != "" {
contentType = typ
w.Header().Set("Content-Type", typ)
}
w.Header().Set("Content-Type", contentType)
http.ServeContent(w, r, "", time.Now(), reader)
case uri.Hash():
w.Header().Set("Content-Type", "text/plain")
@ -850,8 +855,17 @@ func (s *Server) HandleGetFile(w http.ResponseWriter, r *http.Request) {
return
}
if contentType != "" {
w.Header().Set("Content-Type", contentType)
http.ServeContent(w, r, "", time.Now(), newBufferedReadSeeker(reader, getFileBufferSize))
}
fileName := uri.Addr
if found := path.Base(uri.Path); found != "" && found != "." && found != "/" {
fileName = found
}
w.Header().Set("Content-Disposition", fmt.Sprintf("inline; filename=\"%s\"", fileName))
http.ServeContent(w, r, fileName, time.Now(), newBufferedReadSeeker(reader, getFileBufferSize))
}
// The size of buffer used for bufio.Reader on LazyChunkReader passed to

View File

@ -32,6 +32,7 @@ import (
"net/http"
"net/url"
"os"
"path"
"strconv"
"strings"
"testing"
@ -764,6 +765,16 @@ func testBzzTar(encrypted bool, t *testing.T) {
}
defer resp2.Body.Close()
if h := resp2.Header.Get("Content-Type"); h != "application/x-tar" {
t.Fatalf("Content-Type header expected: application/x-tar, got: %s", h)
}
expectedFileName := string(swarmHash) + ".tar"
expectedContentDisposition := fmt.Sprintf("inline; filename=\"%s\"", expectedFileName)
if h := resp2.Header.Get("Content-Disposition"); h != expectedContentDisposition {
t.Fatalf("Content-Disposition header expected: %s, got: %s", expectedContentDisposition, h)
}
file, err := ioutil.TempFile("", "swarm-downloaded-tarball")
if err != nil {
t.Fatal(err)
@ -1099,7 +1110,7 @@ func TestModify(t *testing.T) {
res, body := httpDo(testCase.method, testCase.uri, reqBody, testCase.headers, testCase.verbose, t)
if res.StatusCode != testCase.expectedStatusCode {
t.Fatalf("expected status code %d but got %d", testCase.expectedStatusCode, res.StatusCode)
t.Fatalf("expected status code %d but got %d, %s", testCase.expectedStatusCode, res.StatusCode, body)
}
if testCase.assertResponseBody != "" && !strings.Contains(body, testCase.assertResponseBody) {
t.Log(body)
@ -1213,16 +1224,22 @@ func TestBzzGetFileWithResolver(t *testing.T) {
addr string
path string
expectedStatusCode int
expectedContentType string
expectedFileName string
}{
{
addr: string(swarmHash),
path: fileNames[0],
expectedStatusCode: http.StatusOK,
expectedContentType: "text/plain",
expectedFileName: path.Base(fileNames[0]),
},
{
addr: "somebogusensname",
path: fileNames[0],
expectedStatusCode: http.StatusOK,
expectedContentType: "text/plain",
expectedFileName: path.Base(fileNames[0]),
},
} {
req, err := http.NewRequest("GET", fmt.Sprintf(srv.URL+"/bzz:/%s/%s", v.addr, v.path), nil)
@ -1237,6 +1254,16 @@ func TestBzzGetFileWithResolver(t *testing.T) {
if serverResponse.StatusCode != v.expectedStatusCode {
t.Fatalf("expected %d, got %d", v.expectedStatusCode, serverResponse.StatusCode)
}
if h := serverResponse.Header.Get("Content-Type"); h != v.expectedContentType {
t.Fatalf("Content-Type header expected: %s, got %s", v.expectedContentType, h)
}
expectedContentDisposition := fmt.Sprintf("inline; filename=\"%s\"", v.expectedFileName)
if h := serverResponse.Header.Get("Content-Disposition"); h != expectedContentDisposition {
t.Fatalf("Content-Disposition header expected: %s, got: %s", expectedContentDisposition, h)
}
}
}