Statistics
| Branch: | Tag: | Revision:

arvados / services / keep-web / handler.go @ 654ee915

History | View | Annotate | Download (16.8 KB)

1
// Copyright (C) The Arvados Authors. All rights reserved.
2
//
3
// SPDX-License-Identifier: AGPL-3.0
4

    
5
package main
6

    
7
import (
8
	"encoding/json"
9
	"fmt"
10
	"html"
11
	"html/template"
12
	"io"
13
	"net/http"
14
	"net/url"
15
	"os"
16
	"sort"
17
	"strconv"
18
	"strings"
19
	"sync"
20

    
21
	"git.curoverse.com/arvados.git/sdk/go/arvados"
22
	"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
23
	"git.curoverse.com/arvados.git/sdk/go/auth"
24
	"git.curoverse.com/arvados.git/sdk/go/health"
25
	"git.curoverse.com/arvados.git/sdk/go/httpserver"
26
	"git.curoverse.com/arvados.git/sdk/go/keepclient"
27
	"golang.org/x/net/webdav"
28
)
29

    
30
type handler struct {
31
	Config        *Config
32
	clientPool    *arvadosclient.ClientPool
33
	setupOnce     sync.Once
34
	healthHandler http.Handler
35
	webdavLS      webdav.LockSystem
36
}
37

    
38
// parseCollectionIDFromDNSName returns a UUID or PDH if s begins with
39
// a UUID or URL-encoded PDH; otherwise "".
40
func parseCollectionIDFromDNSName(s string) string {
41
	// Strip domain.
42
	if i := strings.IndexRune(s, '.'); i >= 0 {
43
		s = s[:i]
44
	}
45
	// Names like {uuid}--collections.example.com serve the same
46
	// purpose as {uuid}.collections.example.com but can reduce
47
	// cost/effort of using [additional] wildcard certificates.
48
	if i := strings.Index(s, "--"); i >= 0 {
49
		s = s[:i]
50
	}
51
	if arvadosclient.UUIDMatch(s) {
52
		return s
53
	}
54
	if pdh := strings.Replace(s, "-", "+", 1); arvadosclient.PDHMatch(pdh) {
55
		return pdh
56
	}
57
	return ""
58
}
59

    
60
var urlPDHDecoder = strings.NewReplacer(" ", "+", "-", "+")
61

    
62
// parseCollectionIDFromURL returns a UUID or PDH if s is a UUID or a
63
// PDH (even if it is a PDH with "+" replaced by " " or "-");
64
// otherwise "".
65
func parseCollectionIDFromURL(s string) string {
66
	if arvadosclient.UUIDMatch(s) {
67
		return s
68
	}
69
	if pdh := urlPDHDecoder.Replace(s); arvadosclient.PDHMatch(pdh) {
70
		return pdh
71
	}
72
	return ""
73
}
74

    
75
func (h *handler) setup() {
76
	h.clientPool = arvadosclient.MakeClientPool()
77

    
78
	keepclient.RefreshServiceDiscoveryOnSIGHUP()
79

    
80
	h.healthHandler = &health.Handler{
81
		Token:  h.Config.ManagementToken,
82
		Prefix: "/_health/",
83
	}
84

    
85
	h.webdavLS = webdav.NewMemLS()
86
}
87

    
88
func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
89
	status := struct {
90
		cacheStats
91
	}{
92
		cacheStats: h.Config.Cache.Stats(),
93
	}
94
	json.NewEncoder(w).Encode(status)
95
}
96

    
97
var (
98
	webdavMethod = map[string]bool{
99
		"OPTIONS":  true,
100
		"PROPFIND": true,
101
		"LOCK":     true,
102
		"UNLOCK":   true,
103
	}
104
	fsMethod = map[string]bool{
105
		"GET":  true,
106
		"HEAD": true,
107
		"POST": true,
108
	}
109
)
110

    
111
// ServeHTTP implements http.Handler.
112
func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
113
	h.setupOnce.Do(h.setup)
114

    
115
	var statusCode = 0
116
	var statusText string
117

    
118
	remoteAddr := r.RemoteAddr
119
	if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
120
		remoteAddr = xff + "," + remoteAddr
121
	}
122

    
123
	w := httpserver.WrapResponseWriter(wOrig)
124
	defer func() {
125
		if statusCode == 0 {
126
			statusCode = w.WroteStatus()
127
		} else if w.WroteStatus() == 0 {
128
			w.WriteHeader(statusCode)
129
		} else if w.WroteStatus() != statusCode {
130
			httpserver.Log(r.RemoteAddr, "WARNING",
131
				fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
132
		}
133
		if statusText == "" {
134
			statusText = http.StatusText(statusCode)
135
		}
136
		httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
137
	}()
138

    
139
	if strings.HasPrefix(r.URL.Path, "/_health/") && r.Method == "GET" {
140
		h.healthHandler.ServeHTTP(w, r)
141
		return
142
	}
143

    
144
	if method := r.Header.Get("Access-Control-Request-Method"); method != "" && r.Method == "OPTIONS" {
145
		if !fsMethod[method] && !webdavMethod[method] {
146
			statusCode = http.StatusMethodNotAllowed
147
			return
148
		}
149
		w.Header().Set("Access-Control-Allow-Headers", "Range")
150
		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS, PROPFIND, LOCK, UNLOCK")
151
		w.Header().Set("Access-Control-Allow-Origin", "*")
152
		w.Header().Set("Access-Control-Max-Age", "86400")
153
		statusCode = http.StatusOK
154
		return
155
	}
156

    
157
	if !fsMethod[r.Method] && !webdavMethod[r.Method] {
158
		statusCode, statusText = http.StatusMethodNotAllowed, r.Method
159
		return
160
	}
161

    
162
	if r.Header.Get("Origin") != "" {
163
		// Allow simple cross-origin requests without user
164
		// credentials ("user credentials" as defined by CORS,
165
		// i.e., cookies, HTTP authentication, and client-side
166
		// SSL certificates. See
167
		// http://www.w3.org/TR/cors/#user-credentials).
168
		w.Header().Set("Access-Control-Allow-Origin", "*")
169
		w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
170
	}
171

    
172
	arv := h.clientPool.Get()
173
	if arv == nil {
174
		statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+h.clientPool.Err().Error()
175
		return
176
	}
177
	defer h.clientPool.Put(arv)
178

    
179
	pathParts := strings.Split(r.URL.Path[1:], "/")
180

    
181
	var stripParts int
182
	var targetID string
183
	var tokens []string
184
	var reqTokens []string
185
	var pathToken bool
186
	var attachment bool
187
	credentialsOK := h.Config.TrustAllContent
188

    
189
	if r.Host != "" && r.Host == h.Config.AttachmentOnlyHost {
190
		credentialsOK = true
191
		attachment = true
192
	} else if r.FormValue("disposition") == "attachment" {
193
		attachment = true
194
	}
195

    
196
	if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
197
		// http://ID.collections.example/PATH...
198
		credentialsOK = true
199
	} else if r.URL.Path == "/status.json" {
200
		h.serveStatus(w, r)
201
		return
202
	} else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
203
		// /c=ID[/PATH...]
204
		targetID = parseCollectionIDFromURL(pathParts[0][2:])
205
		stripParts = 1
206
	} else if len(pathParts) >= 2 && pathParts[0] == "collections" {
207
		if len(pathParts) >= 4 && pathParts[1] == "download" {
208
			// /collections/download/ID/TOKEN/PATH...
209
			targetID = parseCollectionIDFromURL(pathParts[2])
210
			tokens = []string{pathParts[3]}
211
			stripParts = 4
212
			pathToken = true
213
		} else {
214
			// /collections/ID/PATH...
215
			targetID = parseCollectionIDFromURL(pathParts[1])
216
			tokens = h.Config.AnonymousTokens
217
			stripParts = 2
218
		}
219
	}
220

    
221
	if targetID == "" {
222
		statusCode = http.StatusNotFound
223
		return
224
	}
225

    
226
	formToken := r.FormValue("api_token")
227
	if formToken != "" && r.Header.Get("Origin") != "" && attachment && r.URL.Query().Get("api_token") == "" {
228
		// The client provided an explicit token in the POST
229
		// body. The Origin header indicates this *might* be
230
		// an AJAX request, in which case redirect-with-cookie
231
		// won't work: we should just serve the content in the
232
		// POST response. This is safe because:
233
		//
234
		// * We're supplying an attachment, not inline
235
		//   content, so we don't need to convert the POST to
236
		//   a GET and avoid the "really resubmit form?"
237
		//   problem.
238
		//
239
		// * The token isn't embedded in the URL, so we don't
240
		//   need to worry about bookmarks and copy/paste.
241
		tokens = append(tokens, formToken)
242
	} else if formToken != "" {
243
		// The client provided an explicit token in the query
244
		// string, or a form in POST body. We must put the
245
		// token in an HttpOnly cookie, and redirect to the
246
		// same URL with the query param redacted and method =
247
		// GET.
248
		h.seeOtherWithCookie(w, r, "", credentialsOK)
249
		return
250
	}
251

    
252
	targetPath := pathParts[stripParts:]
253
	if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
254
		// http://ID.example/t=TOKEN/PATH...
255
		// /c=ID/t=TOKEN/PATH...
256
		//
257
		// This form must only be used to pass scoped tokens
258
		// that give permission for a single collection. See
259
		// FormValue case above.
260
		tokens = []string{targetPath[0][2:]}
261
		pathToken = true
262
		targetPath = targetPath[1:]
263
		stripParts++
264
	}
265

    
266
	if tokens == nil {
267
		if credentialsOK {
268
			reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
269
		}
270
		tokens = append(reqTokens, h.Config.AnonymousTokens...)
271
	}
272

    
273
	if len(targetPath) > 0 && targetPath[0] == "_" {
274
		// If a collection has a directory called "t=foo" or
275
		// "_", it can be served at
276
		// //collections.example/_/t=foo/ or
277
		// //collections.example/_/_/ respectively:
278
		// //collections.example/t=foo/ won't work because
279
		// t=foo will be interpreted as a token "foo".
280
		targetPath = targetPath[1:]
281
		stripParts++
282
	}
283

    
284
	forceReload := false
285
	if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
286
		forceReload = true
287
	}
288

    
289
	var collection *arvados.Collection
290
	tokenResult := make(map[string]int)
291
	for _, arv.ApiToken = range tokens {
292
		var err error
293
		collection, err = h.Config.Cache.Get(arv, targetID, forceReload)
294
		if err == nil {
295
			// Success
296
			break
297
		}
298
		if srvErr, ok := err.(arvadosclient.APIServerError); ok {
299
			switch srvErr.HttpStatusCode {
300
			case 404, 401:
301
				// Token broken or insufficient to
302
				// retrieve collection
303
				tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
304
				continue
305
			}
306
		}
307
		// Something more serious is wrong
308
		statusCode, statusText = http.StatusInternalServerError, err.Error()
309
		return
310
	}
311
	if collection == nil {
312
		if pathToken || !credentialsOK {
313
			// Either the URL is a "secret sharing link"
314
			// that didn't work out (and asking the client
315
			// for additional credentials would just be
316
			// confusing), or we don't even accept
317
			// credentials at this path.
318
			statusCode = http.StatusNotFound
319
			return
320
		}
321
		for _, t := range reqTokens {
322
			if tokenResult[t] == 404 {
323
				// The client provided valid token(s), but the
324
				// collection was not found.
325
				statusCode = http.StatusNotFound
326
				return
327
			}
328
		}
329
		// The client's token was invalid (e.g., expired), or
330
		// the client didn't even provide one.  Propagate the
331
		// 401 to encourage the client to use a [different]
332
		// token.
333
		//
334
		// TODO(TC): This response would be confusing to
335
		// someone trying (anonymously) to download public
336
		// data that has been deleted.  Allow a referrer to
337
		// provide this context somehow?
338
		w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
339
		statusCode = http.StatusUnauthorized
340
		return
341
	}
342

    
343
	kc, err := keepclient.MakeKeepClient(arv)
344
	if err != nil {
345
		statusCode, statusText = http.StatusInternalServerError, err.Error()
346
		return
347
	}
348

    
349
	basename := targetPath[len(targetPath)-1]
350
	applyContentDispositionHdr(w, r, basename, attachment)
351

    
352
	fs := collection.FileSystem(&arvados.Client{
353
		APIHost:   arv.ApiServer,
354
		AuthToken: arv.ApiToken,
355
		Insecure:  arv.ApiInsecure,
356
	}, kc)
357
	if webdavMethod[r.Method] {
358
		h := webdav.Handler{
359
			Prefix:     "/" + strings.Join(pathParts[:stripParts], "/"),
360
			FileSystem: &webdavFS{httpfs: fs},
361
			LockSystem: h.webdavLS,
362
			Logger: func(_ *http.Request, err error) {
363
				if os.IsNotExist(err) {
364
					statusCode, statusText = http.StatusNotFound, err.Error()
365
				} else if err != nil {
366
					statusCode, statusText = http.StatusInternalServerError, err.Error()
367
				}
368
			},
369
		}
370
		h.ServeHTTP(w, r)
371
		return
372
	}
373

    
374
	openPath := "/" + strings.Join(targetPath, "/")
375
	if f, err := fs.Open(openPath); os.IsNotExist(err) {
376
		// Requested non-existent path
377
		statusCode = http.StatusNotFound
378
	} else if err != nil {
379
		// Some other (unexpected) error
380
		statusCode, statusText = http.StatusInternalServerError, err.Error()
381
	} else if stat, err := f.Stat(); err != nil {
382
		// Can't get Size/IsDir (shouldn't happen with a collectionFS!)
383
		statusCode, statusText = http.StatusInternalServerError, err.Error()
384
	} else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
385
		// If client requests ".../dirname", redirect to
386
		// ".../dirname/". This way, relative links in the
387
		// listing for "dirname" can always be "fnm", never
388
		// "dirname/fnm".
389
		h.seeOtherWithCookie(w, r, basename+"/", credentialsOK)
390
	} else if stat.IsDir() {
391
		h.serveDirectory(w, r, collection.Name, fs, openPath, stripParts)
392
	} else {
393
		http.ServeContent(w, r, basename, stat.ModTime(), f)
394
		if r.Header.Get("Range") == "" && int64(w.WroteBodyBytes()) != stat.Size() {
395
			// If we wrote fewer bytes than expected, it's
396
			// too late to change the real response code
397
			// or send an error message to the client, but
398
			// at least we can try to put some useful
399
			// debugging info in the logs.
400
			n, err := f.Read(make([]byte, 1024))
401
			statusCode, statusText = http.StatusInternalServerError, fmt.Sprintf("f.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), w.WroteBodyBytes(), n, err)
402

    
403
		}
404
	}
405
}
406

    
407
var dirListingTemplate = `<!DOCTYPE HTML>
408
<HTML><HEAD>
409
  <META name="robots" content="NOINDEX">
410
  <TITLE>{{ .Collection.Name }}</TITLE>
411
  <STYLE type="text/css">
412
    body {
413
      margin: 1.5em;
414
    }
415
    pre {
416
      background-color: #D9EDF7;
417
      border-radius: .25em;
418
      padding: .75em;
419
      overflow: auto;
420
    }
421
    .footer p {
422
      font-size: 82%;
423
    }
424
    ul {
425
      padding: 0;
426
    }
427
    ul li {
428
      font-family: monospace;
429
      list-style: none;
430
    }
431
  </STYLE>
432
</HEAD>
433
<BODY>
434
<H1>{{ .CollectionName }}</H1>
435

    
436
<P>This collection of data files is being shared with you through
437
Arvados.  You can download individual files listed below.  To download
438
the entire collection with wget, try:</P>
439

    
440
<PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL }}</PRE>
441

    
442
<H2>File Listing</H2>
443

    
444
{{if .Files}}
445
<UL>
446
{{range .Files}}  <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{.Name}}">{{.Name}}</A></LI>{{end}}
447
</UL>
448
{{else}}
449
<P>(No files; this collection is empty.)</P>
450
{{end}}
451

    
452
<HR noshade>
453
<DIV class="footer">
454
  <P>
455
    About Arvados:
456
    Arvados is a free and open source software bioinformatics platform.
457
    To learn more, visit arvados.org.
458
    Arvados is not responsible for the files listed on this page.
459
  </P>
460
</DIV>
461

    
462
</BODY>
463
`
464

    
465
type fileListEnt struct {
466
	Name string
467
	Size int64
468
}
469

    
470
func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, stripParts int) {
471
	var files []fileListEnt
472
	var walk func(string) error
473
	if !strings.HasSuffix(base, "/") {
474
		base = base + "/"
475
	}
476
	walk = func(path string) error {
477
		dirname := base + path
478
		if dirname != "/" {
479
			dirname = strings.TrimSuffix(dirname, "/")
480
		}
481
		d, err := fs.Open(dirname)
482
		if err != nil {
483
			return err
484
		}
485
		ents, err := d.Readdir(-1)
486
		if err != nil {
487
			return err
488
		}
489
		for _, ent := range ents {
490
			if ent.IsDir() {
491
				err = walk(path + ent.Name() + "/")
492
				if err != nil {
493
					return err
494
				}
495
			} else {
496
				files = append(files, fileListEnt{
497
					Name: path + ent.Name(),
498
					Size: ent.Size(),
499
				})
500
			}
501
		}
502
		return nil
503
	}
504
	if err := walk(""); err != nil {
505
		http.Error(w, err.Error(), http.StatusInternalServerError)
506
		return
507
	}
508

    
509
	funcs := template.FuncMap{
510
		"nbsp": func(s string) template.HTML {
511
			return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
512
		},
513
	}
514
	tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
515
	if err != nil {
516
		http.Error(w, err.Error(), http.StatusInternalServerError)
517
		return
518
	}
519
	sort.Slice(files, func(i, j int) bool {
520
		return files[i].Name < files[j].Name
521
	})
522
	w.WriteHeader(http.StatusOK)
523
	tmpl.Execute(w, map[string]interface{}{
524
		"CollectionName": collectionName,
525
		"Files":          files,
526
		"Request":        r,
527
		"StripParts":     stripParts,
528
	})
529
}
530

    
531
func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
532
	disposition := "inline"
533
	if isAttachment {
534
		disposition = "attachment"
535
	}
536
	if strings.ContainsRune(r.RequestURI, '?') {
537
		// Help the UA realize that the filename is just
538
		// "filename.txt", not
539
		// "filename.txt?disposition=attachment".
540
		//
541
		// TODO(TC): Follow advice at RFC 6266 appendix D
542
		disposition += "; filename=" + strconv.QuoteToASCII(filename)
543
	}
544
	if disposition != "inline" {
545
		w.Header().Set("Content-Disposition", disposition)
546
	}
547
}
548

    
549
func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
550
	if !credentialsOK {
551
		// It is not safe to copy the provided token
552
		// into a cookie unless the current vhost
553
		// (origin) serves only a single collection or
554
		// we are in TrustAllContent mode.
555
		w.WriteHeader(http.StatusBadRequest)
556
		return
557
	}
558

    
559
	if formToken := r.FormValue("api_token"); formToken != "" {
560
		// The HttpOnly flag is necessary to prevent
561
		// JavaScript code (included in, or loaded by, a page
562
		// in the collection being served) from employing the
563
		// user's token beyond reading other files in the same
564
		// domain, i.e., same collection.
565
		//
566
		// The 303 redirect is necessary in the case of a GET
567
		// request to avoid exposing the token in the Location
568
		// bar, and in the case of a POST request to avoid
569
		// raising warnings when the user refreshes the
570
		// resulting page.
571

    
572
		http.SetCookie(w, &http.Cookie{
573
			Name:     "arvados_api_token",
574
			Value:    auth.EncodeTokenCookie([]byte(formToken)),
575
			Path:     "/",
576
			HttpOnly: true,
577
		})
578
	}
579

    
580
	// Propagate query parameters (except api_token) from
581
	// the original request.
582
	redirQuery := r.URL.Query()
583
	redirQuery.Del("api_token")
584

    
585
	u := r.URL
586
	if location != "" {
587
		newu, err := u.Parse(location)
588
		if err != nil {
589
			w.WriteHeader(http.StatusInternalServerError)
590
			return
591
		}
592
		u = newu
593
	}
594
	redir := (&url.URL{
595
		Host:     r.Host,
596
		Path:     u.Path,
597
		RawQuery: redirQuery.Encode(),
598
	}).String()
599

    
600
	w.Header().Add("Location", redir)
601
	w.WriteHeader(http.StatusSeeOther)
602
	io.WriteString(w, `<A href="`)
603
	io.WriteString(w, html.EscapeString(redir))
604
	io.WriteString(w, `">Continue</A>`)
605
}