2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_null", VCPU_STAT(exit_null) },
35 { "exit_validity", VCPU_STAT(exit_validity) },
36 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
37 { "exit_external_request", VCPU_STAT(exit_external_request) },
38 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
39 { "exit_instruction", VCPU_STAT(exit_instruction) },
40 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
41 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
42 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
43 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
44 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
45 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
46 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
47 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
48 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
49 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
50 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
51 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
52 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
53 { "instruction_spx", VCPU_STAT(instruction_spx) },
54 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
55 { "instruction_stap", VCPU_STAT(instruction_stap) },
56 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
57 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
58 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
59 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
60 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
61 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
62 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
63 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
64 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
65 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
66 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
67 { "diagnose_44", VCPU_STAT(diagnose_44) },
72 /* Section: not file related */
73 void kvm_arch_hardware_enable(void *garbage)
75 /* every s390 is virtualization enabled ;-) */
78 void kvm_arch_hardware_disable(void *garbage)
82 int kvm_arch_hardware_setup(void)
87 void kvm_arch_hardware_unsetup(void)
91 void kvm_arch_check_processor_compat(void *rtn)
95 int kvm_arch_init(void *opaque)
100 void kvm_arch_exit(void)
104 /* Section: device related */
105 long kvm_arch_dev_ioctl(struct file *filp,
106 unsigned int ioctl, unsigned long arg)
108 if (ioctl == KVM_S390_ENABLE_SIE)
109 return s390_enable_sie();
113 int kvm_dev_ioctl_check_extension(long ext)
121 /* Section: vm related */
123 * Get (and clear) the dirty memory log for a memory slot.
125 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
126 struct kvm_dirty_log *log)
131 long kvm_arch_vm_ioctl(struct file *filp,
132 unsigned int ioctl, unsigned long arg)
134 struct kvm *kvm = filp->private_data;
135 void __user *argp = (void __user *)arg;
139 case KVM_S390_INTERRUPT: {
140 struct kvm_s390_interrupt s390int;
143 if (copy_from_user(&s390int, argp, sizeof(s390int)))
145 r = kvm_s390_inject_vm(kvm, &s390int);
155 struct kvm *kvm_arch_create_vm(void)
161 rc = s390_enable_sie();
166 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
170 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
174 sprintf(debug_name, "kvm-%u", current->pid);
176 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
180 spin_lock_init(&kvm->arch.float_int.lock);
181 INIT_LIST_HEAD(&kvm->arch.float_int.list);
183 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
184 VM_EVENT(kvm, 3, "%s", "vm created");
188 free_page((unsigned long)(kvm->arch.sca));
195 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
197 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
198 free_page((unsigned long)(vcpu->arch.sie_block));
199 kvm_vcpu_uninit(vcpu);
203 static void kvm_free_vcpus(struct kvm *kvm)
207 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
209 kvm_arch_vcpu_destroy(kvm->vcpus[i]);
210 kvm->vcpus[i] = NULL;
215 void kvm_arch_sync_events(struct kvm *kvm)
219 void kvm_arch_destroy_vm(struct kvm *kvm)
222 kvm_free_physmem(kvm);
223 free_page((unsigned long)(kvm->arch.sca));
224 debug_unregister(kvm->arch.dbf);
228 /* Section: vcpu related */
229 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
234 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
239 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
241 save_fp_regs(&vcpu->arch.host_fpregs);
242 save_access_regs(vcpu->arch.host_acrs);
243 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
244 restore_fp_regs(&vcpu->arch.guest_fpregs);
245 restore_access_regs(vcpu->arch.guest_acrs);
248 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
250 save_fp_regs(&vcpu->arch.guest_fpregs);
251 save_access_regs(vcpu->arch.guest_acrs);
252 restore_fp_regs(&vcpu->arch.host_fpregs);
253 restore_access_regs(vcpu->arch.host_acrs);
256 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
258 /* this equals initial cpu reset in pop, but we don't switch to ESA */
259 vcpu->arch.sie_block->gpsw.mask = 0UL;
260 vcpu->arch.sie_block->gpsw.addr = 0UL;
261 vcpu->arch.sie_block->prefix = 0UL;
262 vcpu->arch.sie_block->ihcpu = 0xffff;
263 vcpu->arch.sie_block->cputm = 0UL;
264 vcpu->arch.sie_block->ckc = 0UL;
265 vcpu->arch.sie_block->todpr = 0;
266 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
267 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
268 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
269 vcpu->arch.guest_fpregs.fpc = 0;
270 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
271 vcpu->arch.sie_block->gbea = 1;
274 /* The current code can have up to 256 pages for virtio */
275 #define VIRTIODESCSPACE (256ul * 4096ul)
277 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
279 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
280 vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
281 vcpu->kvm->arch.guest_origin +
282 VIRTIODESCSPACE - 1ul;
283 vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
284 vcpu->arch.sie_block->ecb = 2;
285 vcpu->arch.sie_block->eca = 0xC1002001U;
286 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
287 (unsigned long) vcpu);
288 get_cpu_id(&vcpu->arch.cpu_id);
289 vcpu->arch.cpu_id.version = 0xfe;
293 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
296 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
302 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
303 get_zeroed_page(GFP_KERNEL);
305 if (!vcpu->arch.sie_block)
308 vcpu->arch.sie_block->icpua = id;
309 BUG_ON(!kvm->arch.sca);
310 BUG_ON(kvm->arch.sca->cpu[id].sda);
311 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
312 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
313 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
315 spin_lock_init(&vcpu->arch.local_int.lock);
316 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
317 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
318 spin_lock_bh(&kvm->arch.float_int.lock);
319 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
320 init_waitqueue_head(&vcpu->arch.local_int.wq);
321 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
322 spin_unlock_bh(&kvm->arch.float_int.lock);
324 rc = kvm_vcpu_init(vcpu, kvm, id);
327 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
328 vcpu->arch.sie_block);
337 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
339 /* kvm common code refers to this, but never calls it */
344 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
347 kvm_s390_vcpu_initial_reset(vcpu);
352 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
355 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
360 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
363 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
368 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
369 struct kvm_sregs *sregs)
372 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
373 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
378 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
379 struct kvm_sregs *sregs)
382 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
383 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
388 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
391 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
392 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
397 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
400 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
401 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
406 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
411 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
414 vcpu->arch.sie_block->gpsw = psw;
419 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
420 struct kvm_translation *tr)
422 return -EINVAL; /* not implemented yet */
425 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
426 struct kvm_debug_guest *dbg)
428 return -EINVAL; /* not implemented yet */
431 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
432 struct kvm_mp_state *mp_state)
434 return -EINVAL; /* not implemented yet */
437 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
438 struct kvm_mp_state *mp_state)
440 return -EINVAL; /* not implemented yet */
443 extern void s390_handle_mcck(void);
445 static void __vcpu_run(struct kvm_vcpu *vcpu)
447 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
452 if (test_thread_flag(TIF_MCCK_PENDING))
455 kvm_s390_deliver_pending_interrupts(vcpu);
457 vcpu->arch.sie_block->icptcode = 0;
461 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
462 atomic_read(&vcpu->arch.sie_block->cpuflags));
463 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
464 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
465 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
467 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
468 vcpu->arch.sie_block->icptcode);
473 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
476 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
483 if (vcpu->sigset_active)
484 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
486 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
488 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
490 switch (kvm_run->exit_reason) {
491 case KVM_EXIT_S390_SIEIC:
492 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
493 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
495 case KVM_EXIT_UNKNOWN:
496 case KVM_EXIT_S390_RESET:
506 rc = kvm_handle_sie_intercept(vcpu);
507 } while (!signal_pending(current) && !rc);
509 if (signal_pending(current) && !rc)
512 if (rc == -ENOTSUPP) {
513 /* intercept cannot be handled in-kernel, prepare kvm-run */
514 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
515 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
516 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
517 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
518 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
519 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
523 if (rc == -EREMOTE) {
524 /* intercept was handled, but userspace support is needed
525 * kvm_run has been prepared by the handler */
529 if (vcpu->sigset_active)
530 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
534 vcpu->stat.exit_userspace++;
538 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
539 unsigned long n, int prefix)
542 return copy_to_guest(vcpu, guestdest, from, n);
544 return copy_to_guest_absolute(vcpu, guestdest, from, n);
548 * store status at address
549 * we use have two special cases:
550 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
551 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
553 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
555 const unsigned char archmode = 1;
558 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
559 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
561 addr = SAVE_AREA_BASE;
563 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
564 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
566 addr = SAVE_AREA_BASE;
571 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
572 vcpu->arch.guest_fpregs.fprs, 128, prefix))
575 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
576 vcpu->arch.guest_gprs, 128, prefix))
579 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
580 &vcpu->arch.sie_block->gpsw, 16, prefix))
583 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
584 &vcpu->arch.sie_block->prefix, 4, prefix))
587 if (__guestcopy(vcpu,
588 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
589 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
592 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
593 &vcpu->arch.sie_block->todpr, 4, prefix))
596 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
597 &vcpu->arch.sie_block->cputm, 8, prefix))
600 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
601 &vcpu->arch.sie_block->ckc, 8, prefix))
604 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
605 &vcpu->arch.guest_acrs, 64, prefix))
608 if (__guestcopy(vcpu,
609 addr + offsetof(struct save_area_s390x, ctrl_regs),
610 &vcpu->arch.sie_block->gcr, 128, prefix))
615 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
620 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
625 long kvm_arch_vcpu_ioctl(struct file *filp,
626 unsigned int ioctl, unsigned long arg)
628 struct kvm_vcpu *vcpu = filp->private_data;
629 void __user *argp = (void __user *)arg;
632 case KVM_S390_INTERRUPT: {
633 struct kvm_s390_interrupt s390int;
635 if (copy_from_user(&s390int, argp, sizeof(s390int)))
637 return kvm_s390_inject_vcpu(vcpu, &s390int);
639 case KVM_S390_STORE_STATUS:
640 return kvm_s390_vcpu_store_status(vcpu, arg);
641 case KVM_S390_SET_INITIAL_PSW: {
644 if (copy_from_user(&psw, argp, sizeof(psw)))
646 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
648 case KVM_S390_INITIAL_RESET:
649 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
656 /* Section: memory related */
657 int kvm_arch_set_memory_region(struct kvm *kvm,
658 struct kvm_userspace_memory_region *mem,
659 struct kvm_memory_slot old,
662 /* A few sanity checks. We can have exactly one memory slot which has
663 to start at guest virtual zero and which has to be located at a
664 page boundary in userland and which has to end at a page boundary.
665 The memory in userland is ok to be fragmented into various different
666 vmas. It is okay to mmap() and munmap() stuff in this slot after
667 doing this call at any time */
672 if (mem->guest_phys_addr)
675 if (mem->userspace_addr & (PAGE_SIZE - 1))
678 if (mem->memory_size & (PAGE_SIZE - 1))
681 kvm->arch.guest_origin = mem->userspace_addr;
682 kvm->arch.guest_memsize = mem->memory_size;
684 /* FIXME: we do want to interrupt running CPUs and update their memory
685 configuration now to avoid race conditions. But hey, changing the
686 memory layout while virtual CPUs are running is usually bad
687 programming practice. */
692 void kvm_arch_flush_shadow(struct kvm *kvm)
696 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
701 static int __init kvm_s390_init(void)
703 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
706 static void __exit kvm_s390_exit(void)
711 module_init(kvm_s390_init);
712 module_exit(kvm_s390_exit);