2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_null", VCPU_STAT(exit_null) },
35 { "exit_validity", VCPU_STAT(exit_validity) },
36 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
37 { "exit_external_request", VCPU_STAT(exit_external_request) },
38 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
39 { "exit_instruction", VCPU_STAT(exit_instruction) },
40 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
41 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
42 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
43 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
44 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
45 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
46 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
47 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
48 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
49 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
50 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
51 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
52 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
53 { "instruction_spx", VCPU_STAT(instruction_spx) },
54 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
55 { "instruction_stap", VCPU_STAT(instruction_stap) },
56 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
57 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
58 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
59 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
60 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
61 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
62 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
63 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
64 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
65 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
66 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
67 { "diagnose_44", VCPU_STAT(diagnose_44) },
72 /* Section: not file related */
73 void kvm_arch_hardware_enable(void *garbage)
75 /* every s390 is virtualization enabled ;-) */
78 void kvm_arch_hardware_disable(void *garbage)
82 int kvm_arch_hardware_setup(void)
87 void kvm_arch_hardware_unsetup(void)
91 void kvm_arch_check_processor_compat(void *rtn)
95 int kvm_arch_init(void *opaque)
100 void kvm_arch_exit(void)
104 /* Section: device related */
105 long kvm_arch_dev_ioctl(struct file *filp,
106 unsigned int ioctl, unsigned long arg)
108 if (ioctl == KVM_S390_ENABLE_SIE)
109 return s390_enable_sie();
113 int kvm_dev_ioctl_check_extension(long ext)
116 case KVM_CAP_USER_MEMORY:
123 /* Section: vm related */
125 * Get (and clear) the dirty memory log for a memory slot.
127 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
128 struct kvm_dirty_log *log)
133 long kvm_arch_vm_ioctl(struct file *filp,
134 unsigned int ioctl, unsigned long arg)
136 struct kvm *kvm = filp->private_data;
137 void __user *argp = (void __user *)arg;
141 case KVM_S390_INTERRUPT: {
142 struct kvm_s390_interrupt s390int;
145 if (copy_from_user(&s390int, argp, sizeof(s390int)))
147 r = kvm_s390_inject_vm(kvm, &s390int);
157 struct kvm *kvm_arch_create_vm(void)
163 rc = s390_enable_sie();
168 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
172 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
176 sprintf(debug_name, "kvm-%u", current->pid);
178 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
182 spin_lock_init(&kvm->arch.float_int.lock);
183 INIT_LIST_HEAD(&kvm->arch.float_int.list);
185 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
186 VM_EVENT(kvm, 3, "%s", "vm created");
188 try_module_get(THIS_MODULE);
192 free_page((unsigned long)(kvm->arch.sca));
199 void kvm_arch_destroy_vm(struct kvm *kvm)
201 debug_unregister(kvm->arch.dbf);
202 kvm_free_physmem(kvm);
203 free_page((unsigned long)(kvm->arch.sca));
205 module_put(THIS_MODULE);
208 /* Section: vcpu related */
209 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
214 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
216 /* kvm common code refers to this, but does'nt call it */
220 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
222 save_fp_regs(&vcpu->arch.host_fpregs);
223 save_access_regs(vcpu->arch.host_acrs);
224 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
225 restore_fp_regs(&vcpu->arch.guest_fpregs);
226 restore_access_regs(vcpu->arch.guest_acrs);
229 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
231 save_fp_regs(&vcpu->arch.guest_fpregs);
232 save_access_regs(vcpu->arch.guest_acrs);
233 restore_fp_regs(&vcpu->arch.host_fpregs);
234 restore_access_regs(vcpu->arch.host_acrs);
237 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
239 /* this equals initial cpu reset in pop, but we don't switch to ESA */
240 vcpu->arch.sie_block->gpsw.mask = 0UL;
241 vcpu->arch.sie_block->gpsw.addr = 0UL;
242 vcpu->arch.sie_block->prefix = 0UL;
243 vcpu->arch.sie_block->ihcpu = 0xffff;
244 vcpu->arch.sie_block->cputm = 0UL;
245 vcpu->arch.sie_block->ckc = 0UL;
246 vcpu->arch.sie_block->todpr = 0;
247 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
248 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
249 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
250 vcpu->arch.guest_fpregs.fpc = 0;
251 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
252 vcpu->arch.sie_block->gbea = 1;
255 /* The current code can have up to 256 pages for virtio */
256 #define VIRTIODESCSPACE (256ul * 4096ul)
258 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
260 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
261 vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
262 vcpu->kvm->arch.guest_origin +
263 VIRTIODESCSPACE - 1ul;
264 vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
265 vcpu->arch.sie_block->ecb = 2;
266 vcpu->arch.sie_block->eca = 0xC1002001U;
267 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
268 (unsigned long) vcpu);
269 get_cpu_id(&vcpu->arch.cpu_id);
270 vcpu->arch.cpu_id.version = 0xfe;
274 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
277 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
283 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
284 get_zeroed_page(GFP_KERNEL);
286 if (!vcpu->arch.sie_block)
289 vcpu->arch.sie_block->icpua = id;
290 BUG_ON(!kvm->arch.sca);
291 BUG_ON(kvm->arch.sca->cpu[id].sda);
292 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
293 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
294 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
296 spin_lock_init(&vcpu->arch.local_int.lock);
297 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
298 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
299 spin_lock_bh(&kvm->arch.float_int.lock);
300 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
301 init_waitqueue_head(&vcpu->arch.local_int.wq);
302 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
303 spin_unlock_bh(&kvm->arch.float_int.lock);
305 rc = kvm_vcpu_init(vcpu, kvm, id);
308 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
309 vcpu->arch.sie_block);
311 try_module_get(THIS_MODULE);
320 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
322 VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
323 free_page((unsigned long)(vcpu->arch.sie_block));
325 module_put(THIS_MODULE);
328 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
330 /* kvm common code refers to this, but never calls it */
335 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
338 kvm_s390_vcpu_initial_reset(vcpu);
343 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
346 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
351 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
354 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
359 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
360 struct kvm_sregs *sregs)
363 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
364 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
369 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
370 struct kvm_sregs *sregs)
373 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
374 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
379 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
382 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
383 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
388 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
391 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
392 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
397 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
402 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
405 vcpu->arch.sie_block->gpsw = psw;
410 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
411 struct kvm_translation *tr)
413 return -EINVAL; /* not implemented yet */
416 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
417 struct kvm_debug_guest *dbg)
419 return -EINVAL; /* not implemented yet */
422 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
423 struct kvm_mp_state *mp_state)
425 return -EINVAL; /* not implemented yet */
428 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
429 struct kvm_mp_state *mp_state)
431 return -EINVAL; /* not implemented yet */
434 extern void s390_handle_mcck(void);
436 static void __vcpu_run(struct kvm_vcpu *vcpu)
438 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
443 if (test_thread_flag(TIF_MCCK_PENDING))
446 kvm_s390_deliver_pending_interrupts(vcpu);
448 vcpu->arch.sie_block->icptcode = 0;
452 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
453 atomic_read(&vcpu->arch.sie_block->cpuflags));
454 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
455 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
456 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
458 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
459 vcpu->arch.sie_block->icptcode);
464 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
467 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
474 if (vcpu->sigset_active)
475 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
477 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
479 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
481 switch (kvm_run->exit_reason) {
482 case KVM_EXIT_S390_SIEIC:
483 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
484 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
486 case KVM_EXIT_UNKNOWN:
487 case KVM_EXIT_S390_RESET:
497 rc = kvm_handle_sie_intercept(vcpu);
498 } while (!signal_pending(current) && !rc);
500 if (signal_pending(current) && !rc)
503 if (rc == -ENOTSUPP) {
504 /* intercept cannot be handled in-kernel, prepare kvm-run */
505 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
506 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
507 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
508 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
509 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
510 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
514 if (rc == -EREMOTE) {
515 /* intercept was handled, but userspace support is needed
516 * kvm_run has been prepared by the handler */
520 if (vcpu->sigset_active)
521 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
525 vcpu->stat.exit_userspace++;
529 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
530 unsigned long n, int prefix)
533 return copy_to_guest(vcpu, guestdest, from, n);
535 return copy_to_guest_absolute(vcpu, guestdest, from, n);
539 * store status at address
540 * we use have two special cases:
541 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
542 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
544 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
546 const unsigned char archmode = 1;
549 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
550 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
552 addr = SAVE_AREA_BASE;
554 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
555 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
557 addr = SAVE_AREA_BASE;
562 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
563 vcpu->arch.guest_fpregs.fprs, 128, prefix))
566 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
567 vcpu->arch.guest_gprs, 128, prefix))
570 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
571 &vcpu->arch.sie_block->gpsw, 16, prefix))
574 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
575 &vcpu->arch.sie_block->prefix, 4, prefix))
578 if (__guestcopy(vcpu,
579 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
580 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
583 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
584 &vcpu->arch.sie_block->todpr, 4, prefix))
587 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
588 &vcpu->arch.sie_block->cputm, 8, prefix))
591 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
592 &vcpu->arch.sie_block->ckc, 8, prefix))
595 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
596 &vcpu->arch.guest_acrs, 64, prefix))
599 if (__guestcopy(vcpu,
600 addr + offsetof(struct save_area_s390x, ctrl_regs),
601 &vcpu->arch.sie_block->gcr, 128, prefix))
606 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
611 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
616 long kvm_arch_vcpu_ioctl(struct file *filp,
617 unsigned int ioctl, unsigned long arg)
619 struct kvm_vcpu *vcpu = filp->private_data;
620 void __user *argp = (void __user *)arg;
623 case KVM_S390_INTERRUPT: {
624 struct kvm_s390_interrupt s390int;
626 if (copy_from_user(&s390int, argp, sizeof(s390int)))
628 return kvm_s390_inject_vcpu(vcpu, &s390int);
630 case KVM_S390_STORE_STATUS:
631 return kvm_s390_vcpu_store_status(vcpu, arg);
632 case KVM_S390_SET_INITIAL_PSW: {
635 if (copy_from_user(&psw, argp, sizeof(psw)))
637 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
639 case KVM_S390_INITIAL_RESET:
640 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
647 /* Section: memory related */
648 int kvm_arch_set_memory_region(struct kvm *kvm,
649 struct kvm_userspace_memory_region *mem,
650 struct kvm_memory_slot old,
653 /* A few sanity checks. We can have exactly one memory slot which has
654 to start at guest virtual zero and which has to be located at a
655 page boundary in userland and which has to end at a page boundary.
656 The memory in userland is ok to be fragmented into various different
657 vmas. It is okay to mmap() and munmap() stuff in this slot after
658 doing this call at any time */
663 if (mem->guest_phys_addr)
666 if (mem->userspace_addr & (PAGE_SIZE - 1))
669 if (mem->memory_size & (PAGE_SIZE - 1))
672 kvm->arch.guest_origin = mem->userspace_addr;
673 kvm->arch.guest_memsize = mem->memory_size;
675 /* FIXME: we do want to interrupt running CPUs and update their memory
676 configuration now to avoid race conditions. But hey, changing the
677 memory layout while virtual CPUs are running is usually bad
678 programming practice. */
683 void kvm_arch_flush_shadow(struct kvm *kvm)
687 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
692 static int __init kvm_s390_init(void)
694 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
697 static void __exit kvm_s390_exit(void)
702 module_init(kvm_s390_init);
703 module_exit(kvm_s390_exit);