2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_null", VCPU_STAT(exit_null) },
35 { "exit_validity", VCPU_STAT(exit_validity) },
36 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
37 { "exit_external_request", VCPU_STAT(exit_external_request) },
38 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
39 { "exit_instruction", VCPU_STAT(exit_instruction) },
40 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
41 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
42 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
43 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
44 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
45 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
46 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
47 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
48 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
49 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
50 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
51 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
52 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
53 { "instruction_spx", VCPU_STAT(instruction_spx) },
54 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
55 { "instruction_stap", VCPU_STAT(instruction_stap) },
56 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
57 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
58 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
59 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
60 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
61 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
62 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
63 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
64 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
65 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
66 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
67 { "diagnose_44", VCPU_STAT(diagnose_44) },
72 /* Section: not file related */
73 void kvm_arch_hardware_enable(void *garbage)
75 /* every s390 is virtualization enabled ;-) */
78 void kvm_arch_hardware_disable(void *garbage)
82 int kvm_arch_hardware_setup(void)
87 void kvm_arch_hardware_unsetup(void)
91 void kvm_arch_check_processor_compat(void *rtn)
95 int kvm_arch_init(void *opaque)
100 void kvm_arch_exit(void)
104 /* Section: device related */
105 long kvm_arch_dev_ioctl(struct file *filp,
106 unsigned int ioctl, unsigned long arg)
108 if (ioctl == KVM_S390_ENABLE_SIE)
109 return s390_enable_sie();
113 int kvm_dev_ioctl_check_extension(long ext)
121 /* Section: vm related */
123 * Get (and clear) the dirty memory log for a memory slot.
125 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
126 struct kvm_dirty_log *log)
131 long kvm_arch_vm_ioctl(struct file *filp,
132 unsigned int ioctl, unsigned long arg)
134 struct kvm *kvm = filp->private_data;
135 void __user *argp = (void __user *)arg;
139 case KVM_S390_INTERRUPT: {
140 struct kvm_s390_interrupt s390int;
143 if (copy_from_user(&s390int, argp, sizeof(s390int)))
145 r = kvm_s390_inject_vm(kvm, &s390int);
155 struct kvm *kvm_arch_create_vm(void)
161 rc = s390_enable_sie();
166 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
170 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
174 sprintf(debug_name, "kvm-%u", current->pid);
176 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
180 spin_lock_init(&kvm->arch.float_int.lock);
181 INIT_LIST_HEAD(&kvm->arch.float_int.list);
183 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
184 VM_EVENT(kvm, 3, "%s", "vm created");
188 free_page((unsigned long)(kvm->arch.sca));
195 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
197 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
198 free_page((unsigned long)(vcpu->arch.sie_block));
199 kvm_vcpu_uninit(vcpu);
203 static void kvm_free_vcpus(struct kvm *kvm)
207 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
209 kvm_arch_vcpu_destroy(kvm->vcpus[i]);
210 kvm->vcpus[i] = NULL;
215 void kvm_arch_destroy_vm(struct kvm *kvm)
218 kvm_free_physmem(kvm);
219 free_page((unsigned long)(kvm->arch.sca));
220 debug_unregister(kvm->arch.dbf);
224 /* Section: vcpu related */
225 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
230 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
235 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
237 save_fp_regs(&vcpu->arch.host_fpregs);
238 save_access_regs(vcpu->arch.host_acrs);
239 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
240 restore_fp_regs(&vcpu->arch.guest_fpregs);
241 restore_access_regs(vcpu->arch.guest_acrs);
244 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
246 save_fp_regs(&vcpu->arch.guest_fpregs);
247 save_access_regs(vcpu->arch.guest_acrs);
248 restore_fp_regs(&vcpu->arch.host_fpregs);
249 restore_access_regs(vcpu->arch.host_acrs);
252 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
254 /* this equals initial cpu reset in pop, but we don't switch to ESA */
255 vcpu->arch.sie_block->gpsw.mask = 0UL;
256 vcpu->arch.sie_block->gpsw.addr = 0UL;
257 vcpu->arch.sie_block->prefix = 0UL;
258 vcpu->arch.sie_block->ihcpu = 0xffff;
259 vcpu->arch.sie_block->cputm = 0UL;
260 vcpu->arch.sie_block->ckc = 0UL;
261 vcpu->arch.sie_block->todpr = 0;
262 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
263 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
264 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
265 vcpu->arch.guest_fpregs.fpc = 0;
266 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
267 vcpu->arch.sie_block->gbea = 1;
270 /* The current code can have up to 256 pages for virtio */
271 #define VIRTIODESCSPACE (256ul * 4096ul)
273 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
275 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
276 vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
277 vcpu->kvm->arch.guest_origin +
278 VIRTIODESCSPACE - 1ul;
279 vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
280 vcpu->arch.sie_block->ecb = 2;
281 vcpu->arch.sie_block->eca = 0xC1002001U;
282 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
283 (unsigned long) vcpu);
284 get_cpu_id(&vcpu->arch.cpu_id);
285 vcpu->arch.cpu_id.version = 0xfe;
289 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
292 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
298 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
299 get_zeroed_page(GFP_KERNEL);
301 if (!vcpu->arch.sie_block)
304 vcpu->arch.sie_block->icpua = id;
305 BUG_ON(!kvm->arch.sca);
306 BUG_ON(kvm->arch.sca->cpu[id].sda);
307 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
308 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
309 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
311 spin_lock_init(&vcpu->arch.local_int.lock);
312 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
313 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
314 spin_lock_bh(&kvm->arch.float_int.lock);
315 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
316 init_waitqueue_head(&vcpu->arch.local_int.wq);
317 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
318 spin_unlock_bh(&kvm->arch.float_int.lock);
320 rc = kvm_vcpu_init(vcpu, kvm, id);
323 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
324 vcpu->arch.sie_block);
333 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
335 /* kvm common code refers to this, but never calls it */
340 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
343 kvm_s390_vcpu_initial_reset(vcpu);
348 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
351 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
356 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
359 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
364 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
365 struct kvm_sregs *sregs)
368 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
369 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
374 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
375 struct kvm_sregs *sregs)
378 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
379 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
384 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
387 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
388 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
393 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
396 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
397 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
402 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
407 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
410 vcpu->arch.sie_block->gpsw = psw;
415 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
416 struct kvm_translation *tr)
418 return -EINVAL; /* not implemented yet */
421 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
422 struct kvm_debug_guest *dbg)
424 return -EINVAL; /* not implemented yet */
427 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
428 struct kvm_mp_state *mp_state)
430 return -EINVAL; /* not implemented yet */
433 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
434 struct kvm_mp_state *mp_state)
436 return -EINVAL; /* not implemented yet */
439 extern void s390_handle_mcck(void);
441 static void __vcpu_run(struct kvm_vcpu *vcpu)
443 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
448 if (test_thread_flag(TIF_MCCK_PENDING))
451 kvm_s390_deliver_pending_interrupts(vcpu);
453 vcpu->arch.sie_block->icptcode = 0;
457 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
458 atomic_read(&vcpu->arch.sie_block->cpuflags));
459 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
460 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
461 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
463 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
464 vcpu->arch.sie_block->icptcode);
469 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
472 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
479 if (vcpu->sigset_active)
480 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
482 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
484 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
486 switch (kvm_run->exit_reason) {
487 case KVM_EXIT_S390_SIEIC:
488 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
489 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
491 case KVM_EXIT_UNKNOWN:
492 case KVM_EXIT_S390_RESET:
502 rc = kvm_handle_sie_intercept(vcpu);
503 } while (!signal_pending(current) && !rc);
505 if (signal_pending(current) && !rc)
508 if (rc == -ENOTSUPP) {
509 /* intercept cannot be handled in-kernel, prepare kvm-run */
510 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
511 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
512 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
513 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
514 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
515 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
519 if (rc == -EREMOTE) {
520 /* intercept was handled, but userspace support is needed
521 * kvm_run has been prepared by the handler */
525 if (vcpu->sigset_active)
526 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
530 vcpu->stat.exit_userspace++;
534 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
535 unsigned long n, int prefix)
538 return copy_to_guest(vcpu, guestdest, from, n);
540 return copy_to_guest_absolute(vcpu, guestdest, from, n);
544 * store status at address
545 * we use have two special cases:
546 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
547 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
549 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
551 const unsigned char archmode = 1;
554 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
555 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
557 addr = SAVE_AREA_BASE;
559 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
560 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
562 addr = SAVE_AREA_BASE;
567 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
568 vcpu->arch.guest_fpregs.fprs, 128, prefix))
571 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
572 vcpu->arch.guest_gprs, 128, prefix))
575 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
576 &vcpu->arch.sie_block->gpsw, 16, prefix))
579 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
580 &vcpu->arch.sie_block->prefix, 4, prefix))
583 if (__guestcopy(vcpu,
584 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
585 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
588 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
589 &vcpu->arch.sie_block->todpr, 4, prefix))
592 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
593 &vcpu->arch.sie_block->cputm, 8, prefix))
596 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
597 &vcpu->arch.sie_block->ckc, 8, prefix))
600 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
601 &vcpu->arch.guest_acrs, 64, prefix))
604 if (__guestcopy(vcpu,
605 addr + offsetof(struct save_area_s390x, ctrl_regs),
606 &vcpu->arch.sie_block->gcr, 128, prefix))
611 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
616 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
621 long kvm_arch_vcpu_ioctl(struct file *filp,
622 unsigned int ioctl, unsigned long arg)
624 struct kvm_vcpu *vcpu = filp->private_data;
625 void __user *argp = (void __user *)arg;
628 case KVM_S390_INTERRUPT: {
629 struct kvm_s390_interrupt s390int;
631 if (copy_from_user(&s390int, argp, sizeof(s390int)))
633 return kvm_s390_inject_vcpu(vcpu, &s390int);
635 case KVM_S390_STORE_STATUS:
636 return kvm_s390_vcpu_store_status(vcpu, arg);
637 case KVM_S390_SET_INITIAL_PSW: {
640 if (copy_from_user(&psw, argp, sizeof(psw)))
642 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
644 case KVM_S390_INITIAL_RESET:
645 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
652 /* Section: memory related */
653 int kvm_arch_set_memory_region(struct kvm *kvm,
654 struct kvm_userspace_memory_region *mem,
655 struct kvm_memory_slot old,
658 /* A few sanity checks. We can have exactly one memory slot which has
659 to start at guest virtual zero and which has to be located at a
660 page boundary in userland and which has to end at a page boundary.
661 The memory in userland is ok to be fragmented into various different
662 vmas. It is okay to mmap() and munmap() stuff in this slot after
663 doing this call at any time */
668 if (mem->guest_phys_addr)
671 if (mem->userspace_addr & (PAGE_SIZE - 1))
674 if (mem->memory_size & (PAGE_SIZE - 1))
677 kvm->arch.guest_origin = mem->userspace_addr;
678 kvm->arch.guest_memsize = mem->memory_size;
680 /* FIXME: we do want to interrupt running CPUs and update their memory
681 configuration now to avoid race conditions. But hey, changing the
682 memory layout while virtual CPUs are running is usually bad
683 programming practice. */
688 void kvm_arch_flush_shadow(struct kvm *kvm)
692 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
697 static int __init kvm_s390_init(void)
699 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
702 static void __exit kvm_s390_exit(void)
707 module_init(kvm_s390_init);
708 module_exit(kvm_s390_exit);