package corehttp import ( "context" "fmt" "io" "mime" "net/http" "net/url" "os" gopath "path" "regexp" "runtime/debug" "strconv" "strings" "time" humanize "github.com/dustin/go-humanize" "github.com/gabriel-vasile/mimetype" "gitlab.dms3.io/dms3/go-cid" files "gitlab.dms3.io/dms3/go-dms3-files" assets "gitlab.dms3.io/dms3/go-dms3/assets" dag "gitlab.dms3.io/dms3/go-merkledag" mfs "gitlab.dms3.io/dms3/go-mfs" path "gitlab.dms3.io/dms3/go-path" "gitlab.dms3.io/dms3/go-path/resolver" coreiface "gitlab.dms3.io/dms3/interface-go-dms3-core" ipath "gitlab.dms3.io/dms3/interface-go-dms3-core/path" routing "gitlab.dms3.io/p2p/go-p2p-core/routing" ) const ( dms3PathPrefix = "/dms3/" dms3nsPathPrefix = "/dms3ns/" ) var onlyAscii = regexp.MustCompile("[[:^ascii:]]") // gatewayHandler is a HTTP handler that serves DMS3 objects (accessible by default at /dms3/) // (it serves requests like GET /dms3/QmVRzPKPzNtSrEzBFm2UZfxmPAgnaLke4DMcerbsGGSaFe/link) type gatewayHandler struct { config GatewayConfig api coreiface.CoreAPI } // StatusResponseWriter enables us to override HTTP Status Code passed to // WriteHeader function inside of http.ServeContent. Decision is based on // presence of HTTP Headers such as Location. type statusResponseWriter struct { http.ResponseWriter } func (sw *statusResponseWriter) WriteHeader(code int) { // Check if we need to adjust Status Code to account for scheduled redirect // This enables us to return payload along with HTTP 301 // for subdomain redirect in web browsers while also returning body for cli // tools which do not follow redirects by default (curl, wget). redirect := sw.ResponseWriter.Header().Get("Location") if redirect != "" && code == http.StatusOK { code = http.StatusMovedPermanently } sw.ResponseWriter.WriteHeader(code) } func newGatewayHandler(c GatewayConfig, api coreiface.CoreAPI) *gatewayHandler { i := &gatewayHandler{ config: c, api: api, } return i } func parseDms3Path(p string) (cid.Cid, string, error) { rootPath, err := path.ParsePath(p) if err != nil { return cid.Cid{}, "", err } // Check the path. rsegs := rootPath.Segments() if rsegs[0] != "dms3" { return cid.Cid{}, "", fmt.Errorf("WritableGateway: only dms3 paths supported") } rootCid, err := cid.Decode(rsegs[1]) if err != nil { return cid.Cid{}, "", err } return rootCid, path.Join(rsegs[2:]), nil } func (i *gatewayHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { // the hour is a hard fallback, we don't expect it to happen, but just in case ctx, cancel := context.WithTimeout(r.Context(), time.Hour) defer cancel() r = r.WithContext(ctx) defer func() { if r := recover(); r != nil { log.Error("A panic occurred in the gateway handler!") log.Error(r) debug.PrintStack() } }() if i.config.Writable { switch r.Method { case http.MethodPost: i.postHandler(w, r) return case http.MethodPut: i.putHandler(w, r) return case http.MethodDelete: i.deleteHandler(w, r) return } } switch r.Method { case http.MethodGet, http.MethodHead: i.getOrHeadHandler(w, r) return case http.MethodOptions: i.optionsHandler(w, r) return } errmsg := "Method " + r.Method + " not allowed: " var status int if !i.config.Writable { status = http.StatusMethodNotAllowed errmsg = errmsg + "read only access" w.Header().Add("Allow", http.MethodGet) w.Header().Add("Allow", http.MethodHead) w.Header().Add("Allow", http.MethodOptions) } else { status = http.StatusBadRequest errmsg = errmsg + "bad request for " + r.URL.Path } http.Error(w, errmsg, status) } func (i *gatewayHandler) optionsHandler(w http.ResponseWriter, r *http.Request) { /* OPTIONS is a noop request that is used by the browsers to check if server accepts cross-site XMLHttpRequest (indicated by the presence of CORS headers) https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS#Preflighted_requests */ i.addUserHeaders(w) // return all custom headers (including CORS ones, if set) } func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) { begin := time.Now() urlPath := r.URL.Path escapedURLPath := r.URL.EscapedPath() // If the gateway is behind a reverse proxy and mounted at a sub-path, // the prefix header can be set to signal this sub-path. // It will be prepended to links in directory listings and the index.html redirect. prefix := "" if prfx := r.Header.Get("X-Dms3-Gateway-Prefix"); len(prfx) > 0 { for _, p := range i.config.PathPrefixes { if prfx == p || strings.HasPrefix(prfx, p+"/") { prefix = prfx break } } } // HostnameOption might have constructed an DMS3NS/DMS3 path using the Host header. // In this case, we need the original path for constructing redirects // and links that match the requested URL. // For example, http://example.net would become /dms3ns/example.net, and // the redirects and links would end up as http://example.net/dms3ns/example.net requestURI, err := url.ParseRequestURI(r.RequestURI) if err != nil { webError(w, "failed to parse request path", err, http.StatusInternalServerError) return } originalUrlPath := prefix + requestURI.Path // ?uri query param support for requests produced by web browsers // via navigator.registerProtocolHandler Web API // https://developer.mozilla.org/en-US/docs/Web/API/Navigator/registerProtocolHandler // TLDR: redirect /dms3/?uri=dms3%3A%2F%2Fcid%3Fquery%3Dval to /dms3/cid?query=val if uriParam := r.URL.Query().Get("uri"); uriParam != "" { u, err := url.Parse(uriParam) if err != nil { webError(w, "failed to parse uri query parameter", err, http.StatusBadRequest) return } if u.Scheme != "dms3" && u.Scheme != "dms3ns" { webError(w, "uri query parameter scheme must be dms3 or dms3ns", err, http.StatusBadRequest) return } path := u.Path if u.RawQuery != "" { // preserve query if present path = path + "?" + u.RawQuery } http.Redirect(w, r, gopath.Join("/", prefix, u.Scheme, u.Host, path), http.StatusMovedPermanently) return } // Service Worker registration request if r.Header.Get("Service-Worker") == "script" { // Disallow Service Worker registration on namespace roots // https://gitlab.dms3.io/dms3/go-dms3/issues/4025 matched, _ := regexp.MatchString(`^/ip[fn]s/[^/]+$`, r.URL.Path) // TODO: also check for dms3, dms3ns if matched { err := fmt.Errorf("registration is not allowed for this scope") webError(w, "navigator.serviceWorker", err, http.StatusBadRequest) return } } parsedPath := ipath.New(urlPath) if err := parsedPath.IsValid(); err != nil { webError(w, "invalid dms3 path", err, http.StatusBadRequest) return } // Resolve path to the final DAG node for the ETag resolvedPath, err := i.api.ResolvePath(r.Context(), parsedPath) switch err { case nil: case coreiface.ErrOffline: webError(w, "dms3 resolve -r "+escapedURLPath, err, http.StatusServiceUnavailable) return default: if i.servePretty404IfPresent(w, r, parsedPath) { return } webError(w, "dms3 resolve -r "+escapedURLPath, err, http.StatusNotFound) return } dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath) if err != nil { webError(w, "dms3 cat "+escapedURLPath, err, http.StatusNotFound) return } unixfsGetMetric.WithLabelValues(parsedPath.Namespace()).Observe(time.Since(begin).Seconds()) defer dr.Close() var responseEtag string // we need to figure out whether this is a directory before doing most of the heavy lifting below _, ok := dr.(files.Directory) if ok && assets.BindataVersionHash != "" { responseEtag = `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"` } else { responseEtag = `"` + resolvedPath.Cid().String() + `"` } // Check etag sent back to us if r.Header.Get("If-None-Match") == responseEtag || r.Header.Get("If-None-Match") == `W/`+responseEtag { w.WriteHeader(http.StatusNotModified) return } i.addUserHeaders(w) // ok, _now_ write user's headers. w.Header().Set("X-DMS3-Path", urlPath) w.Header().Set("Etag", responseEtag) // set these headers _after_ the error, for we may just not have it // and don't want the client to cache a 500 response... // and only if it's /dms3! // TODO: break this out when we split /dms3 /dms3ns routes. modtime := time.Now() if f, ok := dr.(files.File); ok { if strings.HasPrefix(urlPath, dms3PathPrefix) { w.Header().Set("Cache-Control", "public, max-age=29030400, immutable") // set modtime to a really long time ago, since files are immutable and should stay cached modtime = time.Unix(1, 0) } urlFilename := r.URL.Query().Get("filename") var name string if urlFilename != "" { disposition := "inline" if r.URL.Query().Get("download") == "true" { disposition = "attachment" } utf8Name := url.PathEscape(urlFilename) asciiName := url.PathEscape(onlyAscii.ReplaceAllLiteralString(urlFilename, "_")) w.Header().Set("Content-Disposition", fmt.Sprintf("%s; filename=\"%s\"; filename*=UTF-8''%s", disposition, asciiName, utf8Name)) name = urlFilename } else { name = getFilename(urlPath) } i.serveFile(w, r, name, modtime, f) return } dir, ok := dr.(files.Directory) if !ok { internalWebError(w, fmt.Errorf("unsupported file type")) return } idx, err := i.api.Unixfs().Get(r.Context(), ipath.Join(resolvedPath, "index.html")) switch err.(type) { case nil: dirwithoutslash := urlPath[len(urlPath)-1] != '/' goget := r.URL.Query().Get("go-get") == "1" if dirwithoutslash && !goget { // See comment above where originalUrlPath is declared. suffix := "/" if r.URL.RawQuery != "" { // preserve query parameters suffix = suffix + "?" + r.URL.RawQuery } http.Redirect(w, r, originalUrlPath+suffix, 302) return } f, ok := idx.(files.File) if !ok { internalWebError(w, files.ErrNotReader) return } // write to request i.serveFile(w, r, "index.html", modtime, f) return case resolver.ErrNoLink: // no index.html; noop default: internalWebError(w, err) return } // See statusResponseWriter.WriteHeader // and https://gitlab.dms3.io/dms3/go-dms3/issues/7164 // Note: this needs to occur before listingTemplate.Execute otherwise we get // superfluous response.WriteHeader call from prometheus/client_golang if w.Header().Get("Location") != "" { w.WriteHeader(http.StatusMovedPermanently) return } // A HTML directory index will be presented, be sure to set the correct // type instead of relying on autodetection (which may fail). w.Header().Set("Content-Type", "text/html") if r.Method == http.MethodHead { return } // storage for directory listing var dirListing []directoryItem dirit := dir.Entries() for dirit.Next() { size := "?" if s, err := dirit.Node().Size(); err == nil { // Size may not be defined/supported. Continue anyways. size = humanize.Bytes(uint64(s)) } hash := "" if r, err := i.api.ResolvePath(r.Context(), ipath.Join(resolvedPath, dirit.Name())); err == nil { // Path may not be resolved. Continue anyways. hash = r.Cid().String() } // See comment above where originalUrlPath is declared. di := directoryItem{ Size: size, Name: dirit.Name(), Path: gopath.Join(originalUrlPath, dirit.Name()), Hash: hash, ShortHash: shortHash(hash), } dirListing = append(dirListing, di) } if dirit.Err() != nil { internalWebError(w, dirit.Err()) return } // construct the correct back link // https://gitlab.dms3.io/dms3/go-dms3/issues/1365 var backLink string = originalUrlPath // don't go further up than /dms3/$hash/ pathSplit := path.SplitList(urlPath) switch { // keep backlink case len(pathSplit) == 3: // url: /dms3/$hash // keep backlink case len(pathSplit) == 4 && pathSplit[3] == "": // url: /dms3/$hash/ // add the correct link depending on whether the path ends with a slash default: if strings.HasSuffix(backLink, "/") { backLink += "./.." } else { backLink += "/.." } } size := "?" if s, err := dir.Size(); err == nil { // Size may not be defined/supported. Continue anyways. size = humanize.Bytes(uint64(s)) } hash := resolvedPath.Cid().String() // Gateway root URL to be used when linking to other rootIDs. // This will be blank unless subdomain or DNSLink resolution is being used // for this request. var gwURL string // Get gateway hostname and build gateway URL. if h, ok := r.Context().Value("gw-hostname").(string); ok { gwURL = "//" + h } else { gwURL = "" } dnslink := hasDNSLinkOrigin(gwURL, urlPath) // See comment above where originalUrlPath is declared. tplData := listingTemplateData{ GatewayURL: gwURL, DNSLink: dnslink, Listing: dirListing, Size: size, Path: urlPath, Breadcrumbs: breadcrumbs(urlPath, dnslink), BackLink: backLink, Hash: hash, } err = listingTemplate.Execute(w, tplData) if err != nil { internalWebError(w, err) return } } func (i *gatewayHandler) serveFile(w http.ResponseWriter, req *http.Request, name string, modtime time.Time, file files.File) { size, err := file.Size() if err != nil { http.Error(w, "cannot serve files with unknown sizes", http.StatusBadGateway) return } content := &lazySeeker{ size: size, reader: file, } var ctype string if _, isSymlink := file.(*files.Symlink); isSymlink { // We should be smarter about resolving symlinks but this is the // "most correct" we can be without doing that. ctype = "inode/symlink" } else { ctype = mime.TypeByExtension(gopath.Ext(name)) if ctype == "" { // uses https://github.com/gabriel-vasile/mimetype library to determine the content type. // Fixes https://gitlab.dms3.io/dms3/go-dms3/issues/7252 mimeType, err := mimetype.DetectReader(content) if err != nil { http.Error(w, fmt.Sprintf("cannot detect content-type: %s", err.Error()), http.StatusInternalServerError) return } ctype = mimeType.String() _, err = content.Seek(0, io.SeekStart) if err != nil { http.Error(w, "seeker can't seek", http.StatusInternalServerError) return } } // Strip the encoding from the HTML Content-Type header and let the // browser figure it out. // // Fixes https://gitlab.dms3.io/dms3/go-dms3/issues/2203 if strings.HasPrefix(ctype, "text/html;") { ctype = "text/html" } } w.Header().Set("Content-Type", ctype) w = &statusResponseWriter{w} http.ServeContent(w, req, name, modtime, content) } func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, parsedPath ipath.Path) bool { resolved404Path, ctype, err := i.searchUpTreeFor404(r, parsedPath) if err != nil { return false } dr, err := i.api.Unixfs().Get(r.Context(), resolved404Path) if err != nil { return false } defer dr.Close() f, ok := dr.(files.File) if !ok { return false } size, err := f.Size() if err != nil { return false } log.Debugf("using pretty 404 file for %s", parsedPath.String()) w.Header().Set("Content-Type", ctype) w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) w.WriteHeader(http.StatusNotFound) _, err = io.CopyN(w, f, size) return err == nil } func (i *gatewayHandler) postHandler(w http.ResponseWriter, r *http.Request) { p, err := i.api.Unixfs().Add(r.Context(), files.NewReaderFile(r.Body)) if err != nil { internalWebError(w, err) return } i.addUserHeaders(w) // ok, _now_ write user's headers. w.Header().Set("DMS3-Hash", p.Cid().String()) http.Redirect(w, r, p.String(), http.StatusCreated) } func (i *gatewayHandler) putHandler(w http.ResponseWriter, r *http.Request) { ctx := r.Context() ds := i.api.Dag() // Parse the path rootCid, newPath, err := parseDms3Path(r.URL.Path) if err != nil { webError(w, "WritableGateway: failed to parse the path", err, http.StatusBadRequest) return } if newPath == "" || newPath == "/" { http.Error(w, "WritableGateway: empty path", http.StatusBadRequest) return } newDirectory, newFileName := gopath.Split(newPath) // Resolve the old root. rnode, err := ds.Get(ctx, rootCid) if err != nil { webError(w, "WritableGateway: Could not create DAG from request", err, http.StatusInternalServerError) return } pbnd, ok := rnode.(*dag.ProtoNode) if !ok { webError(w, "Cannot read non protobuf nodes through gateway", dag.ErrNotProtobuf, http.StatusBadRequest) return } // Create the new file. newFilePath, err := i.api.Unixfs().Add(ctx, files.NewReaderFile(r.Body)) if err != nil { webError(w, "WritableGateway: could not create DAG from request", err, http.StatusInternalServerError) return } newFile, err := ds.Get(ctx, newFilePath.Cid()) if err != nil { webError(w, "WritableGateway: failed to resolve new file", err, http.StatusInternalServerError) return } // Patch the new file into the old root. root, err := mfs.NewRoot(ctx, ds, pbnd, nil) if err != nil { webError(w, "WritableGateway: failed to create MFS root", err, http.StatusBadRequest) return } if newDirectory != "" { err := mfs.Mkdir(root, newDirectory, mfs.MkdirOpts{Mkparents: true, Flush: false}) if err != nil { webError(w, "WritableGateway: failed to create MFS directory", err, http.StatusInternalServerError) return } } dirNode, err := mfs.Lookup(root, newDirectory) if err != nil { webError(w, "WritableGateway: failed to lookup directory", err, http.StatusInternalServerError) return } dir, ok := dirNode.(*mfs.Directory) if !ok { http.Error(w, "WritableGateway: target directory is not a directory", http.StatusBadRequest) return } err = dir.Unlink(newFileName) switch err { case os.ErrNotExist, nil: default: webError(w, "WritableGateway: failed to replace existing file", err, http.StatusBadRequest) return } err = dir.AddChild(newFileName, newFile) if err != nil { webError(w, "WritableGateway: failed to link file into directory", err, http.StatusInternalServerError) return } nnode, err := root.GetDirectory().GetNode() if err != nil { webError(w, "WritableGateway: failed to finalize", err, http.StatusInternalServerError) return } newcid := nnode.Cid() i.addUserHeaders(w) // ok, _now_ write user's headers. w.Header().Set("DMS3-Hash", newcid.String()) http.Redirect(w, r, gopath.Join(dms3PathPrefix, newcid.String(), newPath), http.StatusCreated) } func (i *gatewayHandler) deleteHandler(w http.ResponseWriter, r *http.Request) { ctx := r.Context() // parse the path rootCid, newPath, err := parseDms3Path(r.URL.Path) if err != nil { webError(w, "WritableGateway: failed to parse the path", err, http.StatusBadRequest) return } if newPath == "" || newPath == "/" { http.Error(w, "WritableGateway: empty path", http.StatusBadRequest) return } directory, filename := gopath.Split(newPath) // lookup the root rootNodeLD, err := i.api.Dag().Get(ctx, rootCid) if err != nil { webError(w, "WritableGateway: failed to resolve root CID", err, http.StatusInternalServerError) return } rootNode, ok := rootNodeLD.(*dag.ProtoNode) if !ok { http.Error(w, "WritableGateway: empty path", http.StatusInternalServerError) return } // construct the mfs root root, err := mfs.NewRoot(ctx, i.api.Dag(), rootNode, nil) if err != nil { webError(w, "WritableGateway: failed to construct the MFS root", err, http.StatusBadRequest) return } // lookup the parent directory parentNode, err := mfs.Lookup(root, directory) if err != nil { webError(w, "WritableGateway: failed to look up parent", err, http.StatusInternalServerError) return } parent, ok := parentNode.(*mfs.Directory) if !ok { http.Error(w, "WritableGateway: parent is not a directory", http.StatusInternalServerError) return } // delete the file switch parent.Unlink(filename) { case nil, os.ErrNotExist: default: webError(w, "WritableGateway: failed to remove file", err, http.StatusInternalServerError) return } nnode, err := root.GetDirectory().GetNode() if err != nil { webError(w, "WritableGateway: failed to finalize", err, http.StatusInternalServerError) } ncid := nnode.Cid() i.addUserHeaders(w) // ok, _now_ write user's headers. w.Header().Set("DMS3-Hash", ncid.String()) // note: StatusCreated is technically correct here as we created a new resource. http.Redirect(w, r, gopath.Join(dms3PathPrefix+ncid.String(), directory), http.StatusCreated) } func (i *gatewayHandler) addUserHeaders(w http.ResponseWriter) { for k, v := range i.config.Headers { w.Header()[k] = v } } func webError(w http.ResponseWriter, message string, err error, defaultCode int) { if _, ok := err.(resolver.ErrNoLink); ok { webErrorWithCode(w, message, err, http.StatusNotFound) } else if err == routing.ErrNotFound { webErrorWithCode(w, message, err, http.StatusNotFound) } else if err == context.DeadlineExceeded { webErrorWithCode(w, message, err, http.StatusRequestTimeout) } else { webErrorWithCode(w, message, err, defaultCode) } } func webErrorWithCode(w http.ResponseWriter, message string, err error, code int) { http.Error(w, fmt.Sprintf("%s: %s", message, err), code) if code >= 500 { log.Warnf("server error: %s: %s", err) } } // return a 500 error and log func internalWebError(w http.ResponseWriter, err error) { webErrorWithCode(w, "internalWebError", err, http.StatusInternalServerError) } func getFilename(s string) string { if (strings.HasPrefix(s, dms3PathPrefix) || strings.HasPrefix(s, dms3nsPathPrefix)) && strings.Count(gopath.Clean(s), "/") <= 2 { // Don't want to treat dms3.io in /dms3ns/dms3.io as a filename. return "" } return gopath.Base(s) } func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, parsedPath ipath.Path) (ipath.Resolved, string, error) { filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) if err != nil { return nil, "", err } pathComponents := strings.Split(parsedPath.String(), "/") for idx := len(pathComponents); idx >= 3; idx-- { pretty404 := gopath.Join(append(pathComponents[0:idx], filename404)...) parsed404Path := ipath.New("/" + pretty404) if parsed404Path.IsValid() != nil { break } resolvedPath, err := i.api.ResolvePath(r.Context(), parsed404Path) if err != nil { continue } return resolvedPath, ctype, nil } return nil, "", fmt.Errorf("no pretty 404 in any parent folder") } func preferred404Filename(acceptHeaders []string) (string, string, error) { // If we ever want to offer a 404 file for a different content type // then this function will need to parse q weightings, but for now // the presence of anything matching HTML is enough. for _, acceptHeader := range acceptHeaders { accepted := strings.Split(acceptHeader, ",") for _, spec := range accepted { contentType := strings.SplitN(spec, ";", 1)[0] switch contentType { case "*/*", "text/*", "text/html": return "dms3-404.html", "text/html", nil } } } return "", "", fmt.Errorf("there is no 404 file for the requested content types") }